linux/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23#include <linux/firmware.h>
  24#include "drmP.h"
  25#include "amdgpu.h"
  26#include "amdgpu_ih.h"
  27#include "amdgpu_gfx.h"
  28#include "cikd.h"
  29#include "cik.h"
  30#include "atom.h"
  31#include "amdgpu_ucode.h"
  32#include "clearstate_ci.h"
  33
  34#include "uvd/uvd_4_2_d.h"
  35
  36#include "dce/dce_8_0_d.h"
  37#include "dce/dce_8_0_sh_mask.h"
  38
  39#include "bif/bif_4_1_d.h"
  40#include "bif/bif_4_1_sh_mask.h"
  41
  42#include "gca/gfx_7_0_d.h"
  43#include "gca/gfx_7_2_enum.h"
  44#include "gca/gfx_7_2_sh_mask.h"
  45
  46#include "gmc/gmc_7_0_d.h"
  47#include "gmc/gmc_7_0_sh_mask.h"
  48
  49#include "oss/oss_2_0_d.h"
  50#include "oss/oss_2_0_sh_mask.h"
  51
  52#define GFX7_NUM_GFX_RINGS     1
  53#define GFX7_NUM_COMPUTE_RINGS 8
  54
  55static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
  56static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
  57static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
  58int gfx_v7_0_get_cu_info(struct amdgpu_device *, struct amdgpu_cu_info *);
  59
  60MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
  61MODULE_FIRMWARE("radeon/bonaire_me.bin");
  62MODULE_FIRMWARE("radeon/bonaire_ce.bin");
  63MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
  64MODULE_FIRMWARE("radeon/bonaire_mec.bin");
  65
  66MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
  67MODULE_FIRMWARE("radeon/hawaii_me.bin");
  68MODULE_FIRMWARE("radeon/hawaii_ce.bin");
  69MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
  70MODULE_FIRMWARE("radeon/hawaii_mec.bin");
  71
  72MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
  73MODULE_FIRMWARE("radeon/kaveri_me.bin");
  74MODULE_FIRMWARE("radeon/kaveri_ce.bin");
  75MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
  76MODULE_FIRMWARE("radeon/kaveri_mec.bin");
  77MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
  78
  79MODULE_FIRMWARE("radeon/kabini_pfp.bin");
  80MODULE_FIRMWARE("radeon/kabini_me.bin");
  81MODULE_FIRMWARE("radeon/kabini_ce.bin");
  82MODULE_FIRMWARE("radeon/kabini_rlc.bin");
  83MODULE_FIRMWARE("radeon/kabini_mec.bin");
  84
  85MODULE_FIRMWARE("radeon/mullins_pfp.bin");
  86MODULE_FIRMWARE("radeon/mullins_me.bin");
  87MODULE_FIRMWARE("radeon/mullins_ce.bin");
  88MODULE_FIRMWARE("radeon/mullins_rlc.bin");
  89MODULE_FIRMWARE("radeon/mullins_mec.bin");
  90
  91static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
  92{
  93        {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
  94        {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
  95        {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
  96        {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
  97        {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
  98        {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
  99        {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
 100        {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
 101        {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
 102        {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
 103        {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
 104        {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
 105        {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
 106        {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
 107        {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
 108        {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
 109};
 110
 111static const u32 spectre_rlc_save_restore_register_list[] =
 112{
 113        (0x0e00 << 16) | (0xc12c >> 2),
 114        0x00000000,
 115        (0x0e00 << 16) | (0xc140 >> 2),
 116        0x00000000,
 117        (0x0e00 << 16) | (0xc150 >> 2),
 118        0x00000000,
 119        (0x0e00 << 16) | (0xc15c >> 2),
 120        0x00000000,
 121        (0x0e00 << 16) | (0xc168 >> 2),
 122        0x00000000,
 123        (0x0e00 << 16) | (0xc170 >> 2),
 124        0x00000000,
 125        (0x0e00 << 16) | (0xc178 >> 2),
 126        0x00000000,
 127        (0x0e00 << 16) | (0xc204 >> 2),
 128        0x00000000,
 129        (0x0e00 << 16) | (0xc2b4 >> 2),
 130        0x00000000,
 131        (0x0e00 << 16) | (0xc2b8 >> 2),
 132        0x00000000,
 133        (0x0e00 << 16) | (0xc2bc >> 2),
 134        0x00000000,
 135        (0x0e00 << 16) | (0xc2c0 >> 2),
 136        0x00000000,
 137        (0x0e00 << 16) | (0x8228 >> 2),
 138        0x00000000,
 139        (0x0e00 << 16) | (0x829c >> 2),
 140        0x00000000,
 141        (0x0e00 << 16) | (0x869c >> 2),
 142        0x00000000,
 143        (0x0600 << 16) | (0x98f4 >> 2),
 144        0x00000000,
 145        (0x0e00 << 16) | (0x98f8 >> 2),
 146        0x00000000,
 147        (0x0e00 << 16) | (0x9900 >> 2),
 148        0x00000000,
 149        (0x0e00 << 16) | (0xc260 >> 2),
 150        0x00000000,
 151        (0x0e00 << 16) | (0x90e8 >> 2),
 152        0x00000000,
 153        (0x0e00 << 16) | (0x3c000 >> 2),
 154        0x00000000,
 155        (0x0e00 << 16) | (0x3c00c >> 2),
 156        0x00000000,
 157        (0x0e00 << 16) | (0x8c1c >> 2),
 158        0x00000000,
 159        (0x0e00 << 16) | (0x9700 >> 2),
 160        0x00000000,
 161        (0x0e00 << 16) | (0xcd20 >> 2),
 162        0x00000000,
 163        (0x4e00 << 16) | (0xcd20 >> 2),
 164        0x00000000,
 165        (0x5e00 << 16) | (0xcd20 >> 2),
 166        0x00000000,
 167        (0x6e00 << 16) | (0xcd20 >> 2),
 168        0x00000000,
 169        (0x7e00 << 16) | (0xcd20 >> 2),
 170        0x00000000,
 171        (0x8e00 << 16) | (0xcd20 >> 2),
 172        0x00000000,
 173        (0x9e00 << 16) | (0xcd20 >> 2),
 174        0x00000000,
 175        (0xae00 << 16) | (0xcd20 >> 2),
 176        0x00000000,
 177        (0xbe00 << 16) | (0xcd20 >> 2),
 178        0x00000000,
 179        (0x0e00 << 16) | (0x89bc >> 2),
 180        0x00000000,
 181        (0x0e00 << 16) | (0x8900 >> 2),
 182        0x00000000,
 183        0x3,
 184        (0x0e00 << 16) | (0xc130 >> 2),
 185        0x00000000,
 186        (0x0e00 << 16) | (0xc134 >> 2),
 187        0x00000000,
 188        (0x0e00 << 16) | (0xc1fc >> 2),
 189        0x00000000,
 190        (0x0e00 << 16) | (0xc208 >> 2),
 191        0x00000000,
 192        (0x0e00 << 16) | (0xc264 >> 2),
 193        0x00000000,
 194        (0x0e00 << 16) | (0xc268 >> 2),
 195        0x00000000,
 196        (0x0e00 << 16) | (0xc26c >> 2),
 197        0x00000000,
 198        (0x0e00 << 16) | (0xc270 >> 2),
 199        0x00000000,
 200        (0x0e00 << 16) | (0xc274 >> 2),
 201        0x00000000,
 202        (0x0e00 << 16) | (0xc278 >> 2),
 203        0x00000000,
 204        (0x0e00 << 16) | (0xc27c >> 2),
 205        0x00000000,
 206        (0x0e00 << 16) | (0xc280 >> 2),
 207        0x00000000,
 208        (0x0e00 << 16) | (0xc284 >> 2),
 209        0x00000000,
 210        (0x0e00 << 16) | (0xc288 >> 2),
 211        0x00000000,
 212        (0x0e00 << 16) | (0xc28c >> 2),
 213        0x00000000,
 214        (0x0e00 << 16) | (0xc290 >> 2),
 215        0x00000000,
 216        (0x0e00 << 16) | (0xc294 >> 2),
 217        0x00000000,
 218        (0x0e00 << 16) | (0xc298 >> 2),
 219        0x00000000,
 220        (0x0e00 << 16) | (0xc29c >> 2),
 221        0x00000000,
 222        (0x0e00 << 16) | (0xc2a0 >> 2),
 223        0x00000000,
 224        (0x0e00 << 16) | (0xc2a4 >> 2),
 225        0x00000000,
 226        (0x0e00 << 16) | (0xc2a8 >> 2),
 227        0x00000000,
 228        (0x0e00 << 16) | (0xc2ac  >> 2),
 229        0x00000000,
 230        (0x0e00 << 16) | (0xc2b0 >> 2),
 231        0x00000000,
 232        (0x0e00 << 16) | (0x301d0 >> 2),
 233        0x00000000,
 234        (0x0e00 << 16) | (0x30238 >> 2),
 235        0x00000000,
 236        (0x0e00 << 16) | (0x30250 >> 2),
 237        0x00000000,
 238        (0x0e00 << 16) | (0x30254 >> 2),
 239        0x00000000,
 240        (0x0e00 << 16) | (0x30258 >> 2),
 241        0x00000000,
 242        (0x0e00 << 16) | (0x3025c >> 2),
 243        0x00000000,
 244        (0x4e00 << 16) | (0xc900 >> 2),
 245        0x00000000,
 246        (0x5e00 << 16) | (0xc900 >> 2),
 247        0x00000000,
 248        (0x6e00 << 16) | (0xc900 >> 2),
 249        0x00000000,
 250        (0x7e00 << 16) | (0xc900 >> 2),
 251        0x00000000,
 252        (0x8e00 << 16) | (0xc900 >> 2),
 253        0x00000000,
 254        (0x9e00 << 16) | (0xc900 >> 2),
 255        0x00000000,
 256        (0xae00 << 16) | (0xc900 >> 2),
 257        0x00000000,
 258        (0xbe00 << 16) | (0xc900 >> 2),
 259        0x00000000,
 260        (0x4e00 << 16) | (0xc904 >> 2),
 261        0x00000000,
 262        (0x5e00 << 16) | (0xc904 >> 2),
 263        0x00000000,
 264        (0x6e00 << 16) | (0xc904 >> 2),
 265        0x00000000,
 266        (0x7e00 << 16) | (0xc904 >> 2),
 267        0x00000000,
 268        (0x8e00 << 16) | (0xc904 >> 2),
 269        0x00000000,
 270        (0x9e00 << 16) | (0xc904 >> 2),
 271        0x00000000,
 272        (0xae00 << 16) | (0xc904 >> 2),
 273        0x00000000,
 274        (0xbe00 << 16) | (0xc904 >> 2),
 275        0x00000000,
 276        (0x4e00 << 16) | (0xc908 >> 2),
 277        0x00000000,
 278        (0x5e00 << 16) | (0xc908 >> 2),
 279        0x00000000,
 280        (0x6e00 << 16) | (0xc908 >> 2),
 281        0x00000000,
 282        (0x7e00 << 16) | (0xc908 >> 2),
 283        0x00000000,
 284        (0x8e00 << 16) | (0xc908 >> 2),
 285        0x00000000,
 286        (0x9e00 << 16) | (0xc908 >> 2),
 287        0x00000000,
 288        (0xae00 << 16) | (0xc908 >> 2),
 289        0x00000000,
 290        (0xbe00 << 16) | (0xc908 >> 2),
 291        0x00000000,
 292        (0x4e00 << 16) | (0xc90c >> 2),
 293        0x00000000,
 294        (0x5e00 << 16) | (0xc90c >> 2),
 295        0x00000000,
 296        (0x6e00 << 16) | (0xc90c >> 2),
 297        0x00000000,
 298        (0x7e00 << 16) | (0xc90c >> 2),
 299        0x00000000,
 300        (0x8e00 << 16) | (0xc90c >> 2),
 301        0x00000000,
 302        (0x9e00 << 16) | (0xc90c >> 2),
 303        0x00000000,
 304        (0xae00 << 16) | (0xc90c >> 2),
 305        0x00000000,
 306        (0xbe00 << 16) | (0xc90c >> 2),
 307        0x00000000,
 308        (0x4e00 << 16) | (0xc910 >> 2),
 309        0x00000000,
 310        (0x5e00 << 16) | (0xc910 >> 2),
 311        0x00000000,
 312        (0x6e00 << 16) | (0xc910 >> 2),
 313        0x00000000,
 314        (0x7e00 << 16) | (0xc910 >> 2),
 315        0x00000000,
 316        (0x8e00 << 16) | (0xc910 >> 2),
 317        0x00000000,
 318        (0x9e00 << 16) | (0xc910 >> 2),
 319        0x00000000,
 320        (0xae00 << 16) | (0xc910 >> 2),
 321        0x00000000,
 322        (0xbe00 << 16) | (0xc910 >> 2),
 323        0x00000000,
 324        (0x0e00 << 16) | (0xc99c >> 2),
 325        0x00000000,
 326        (0x0e00 << 16) | (0x9834 >> 2),
 327        0x00000000,
 328        (0x0000 << 16) | (0x30f00 >> 2),
 329        0x00000000,
 330        (0x0001 << 16) | (0x30f00 >> 2),
 331        0x00000000,
 332        (0x0000 << 16) | (0x30f04 >> 2),
 333        0x00000000,
 334        (0x0001 << 16) | (0x30f04 >> 2),
 335        0x00000000,
 336        (0x0000 << 16) | (0x30f08 >> 2),
 337        0x00000000,
 338        (0x0001 << 16) | (0x30f08 >> 2),
 339        0x00000000,
 340        (0x0000 << 16) | (0x30f0c >> 2),
 341        0x00000000,
 342        (0x0001 << 16) | (0x30f0c >> 2),
 343        0x00000000,
 344        (0x0600 << 16) | (0x9b7c >> 2),
 345        0x00000000,
 346        (0x0e00 << 16) | (0x8a14 >> 2),
 347        0x00000000,
 348        (0x0e00 << 16) | (0x8a18 >> 2),
 349        0x00000000,
 350        (0x0600 << 16) | (0x30a00 >> 2),
 351        0x00000000,
 352        (0x0e00 << 16) | (0x8bf0 >> 2),
 353        0x00000000,
 354        (0x0e00 << 16) | (0x8bcc >> 2),
 355        0x00000000,
 356        (0x0e00 << 16) | (0x8b24 >> 2),
 357        0x00000000,
 358        (0x0e00 << 16) | (0x30a04 >> 2),
 359        0x00000000,
 360        (0x0600 << 16) | (0x30a10 >> 2),
 361        0x00000000,
 362        (0x0600 << 16) | (0x30a14 >> 2),
 363        0x00000000,
 364        (0x0600 << 16) | (0x30a18 >> 2),
 365        0x00000000,
 366        (0x0600 << 16) | (0x30a2c >> 2),
 367        0x00000000,
 368        (0x0e00 << 16) | (0xc700 >> 2),
 369        0x00000000,
 370        (0x0e00 << 16) | (0xc704 >> 2),
 371        0x00000000,
 372        (0x0e00 << 16) | (0xc708 >> 2),
 373        0x00000000,
 374        (0x0e00 << 16) | (0xc768 >> 2),
 375        0x00000000,
 376        (0x0400 << 16) | (0xc770 >> 2),
 377        0x00000000,
 378        (0x0400 << 16) | (0xc774 >> 2),
 379        0x00000000,
 380        (0x0400 << 16) | (0xc778 >> 2),
 381        0x00000000,
 382        (0x0400 << 16) | (0xc77c >> 2),
 383        0x00000000,
 384        (0x0400 << 16) | (0xc780 >> 2),
 385        0x00000000,
 386        (0x0400 << 16) | (0xc784 >> 2),
 387        0x00000000,
 388        (0x0400 << 16) | (0xc788 >> 2),
 389        0x00000000,
 390        (0x0400 << 16) | (0xc78c >> 2),
 391        0x00000000,
 392        (0x0400 << 16) | (0xc798 >> 2),
 393        0x00000000,
 394        (0x0400 << 16) | (0xc79c >> 2),
 395        0x00000000,
 396        (0x0400 << 16) | (0xc7a0 >> 2),
 397        0x00000000,
 398        (0x0400 << 16) | (0xc7a4 >> 2),
 399        0x00000000,
 400        (0x0400 << 16) | (0xc7a8 >> 2),
 401        0x00000000,
 402        (0x0400 << 16) | (0xc7ac >> 2),
 403        0x00000000,
 404        (0x0400 << 16) | (0xc7b0 >> 2),
 405        0x00000000,
 406        (0x0400 << 16) | (0xc7b4 >> 2),
 407        0x00000000,
 408        (0x0e00 << 16) | (0x9100 >> 2),
 409        0x00000000,
 410        (0x0e00 << 16) | (0x3c010 >> 2),
 411        0x00000000,
 412        (0x0e00 << 16) | (0x92a8 >> 2),
 413        0x00000000,
 414        (0x0e00 << 16) | (0x92ac >> 2),
 415        0x00000000,
 416        (0x0e00 << 16) | (0x92b4 >> 2),
 417        0x00000000,
 418        (0x0e00 << 16) | (0x92b8 >> 2),
 419        0x00000000,
 420        (0x0e00 << 16) | (0x92bc >> 2),
 421        0x00000000,
 422        (0x0e00 << 16) | (0x92c0 >> 2),
 423        0x00000000,
 424        (0x0e00 << 16) | (0x92c4 >> 2),
 425        0x00000000,
 426        (0x0e00 << 16) | (0x92c8 >> 2),
 427        0x00000000,
 428        (0x0e00 << 16) | (0x92cc >> 2),
 429        0x00000000,
 430        (0x0e00 << 16) | (0x92d0 >> 2),
 431        0x00000000,
 432        (0x0e00 << 16) | (0x8c00 >> 2),
 433        0x00000000,
 434        (0x0e00 << 16) | (0x8c04 >> 2),
 435        0x00000000,
 436        (0x0e00 << 16) | (0x8c20 >> 2),
 437        0x00000000,
 438        (0x0e00 << 16) | (0x8c38 >> 2),
 439        0x00000000,
 440        (0x0e00 << 16) | (0x8c3c >> 2),
 441        0x00000000,
 442        (0x0e00 << 16) | (0xae00 >> 2),
 443        0x00000000,
 444        (0x0e00 << 16) | (0x9604 >> 2),
 445        0x00000000,
 446        (0x0e00 << 16) | (0xac08 >> 2),
 447        0x00000000,
 448        (0x0e00 << 16) | (0xac0c >> 2),
 449        0x00000000,
 450        (0x0e00 << 16) | (0xac10 >> 2),
 451        0x00000000,
 452        (0x0e00 << 16) | (0xac14 >> 2),
 453        0x00000000,
 454        (0x0e00 << 16) | (0xac58 >> 2),
 455        0x00000000,
 456        (0x0e00 << 16) | (0xac68 >> 2),
 457        0x00000000,
 458        (0x0e00 << 16) | (0xac6c >> 2),
 459        0x00000000,
 460        (0x0e00 << 16) | (0xac70 >> 2),
 461        0x00000000,
 462        (0x0e00 << 16) | (0xac74 >> 2),
 463        0x00000000,
 464        (0x0e00 << 16) | (0xac78 >> 2),
 465        0x00000000,
 466        (0x0e00 << 16) | (0xac7c >> 2),
 467        0x00000000,
 468        (0x0e00 << 16) | (0xac80 >> 2),
 469        0x00000000,
 470        (0x0e00 << 16) | (0xac84 >> 2),
 471        0x00000000,
 472        (0x0e00 << 16) | (0xac88 >> 2),
 473        0x00000000,
 474        (0x0e00 << 16) | (0xac8c >> 2),
 475        0x00000000,
 476        (0x0e00 << 16) | (0x970c >> 2),
 477        0x00000000,
 478        (0x0e00 << 16) | (0x9714 >> 2),
 479        0x00000000,
 480        (0x0e00 << 16) | (0x9718 >> 2),
 481        0x00000000,
 482        (0x0e00 << 16) | (0x971c >> 2),
 483        0x00000000,
 484        (0x0e00 << 16) | (0x31068 >> 2),
 485        0x00000000,
 486        (0x4e00 << 16) | (0x31068 >> 2),
 487        0x00000000,
 488        (0x5e00 << 16) | (0x31068 >> 2),
 489        0x00000000,
 490        (0x6e00 << 16) | (0x31068 >> 2),
 491        0x00000000,
 492        (0x7e00 << 16) | (0x31068 >> 2),
 493        0x00000000,
 494        (0x8e00 << 16) | (0x31068 >> 2),
 495        0x00000000,
 496        (0x9e00 << 16) | (0x31068 >> 2),
 497        0x00000000,
 498        (0xae00 << 16) | (0x31068 >> 2),
 499        0x00000000,
 500        (0xbe00 << 16) | (0x31068 >> 2),
 501        0x00000000,
 502        (0x0e00 << 16) | (0xcd10 >> 2),
 503        0x00000000,
 504        (0x0e00 << 16) | (0xcd14 >> 2),
 505        0x00000000,
 506        (0x0e00 << 16) | (0x88b0 >> 2),
 507        0x00000000,
 508        (0x0e00 << 16) | (0x88b4 >> 2),
 509        0x00000000,
 510        (0x0e00 << 16) | (0x88b8 >> 2),
 511        0x00000000,
 512        (0x0e00 << 16) | (0x88bc >> 2),
 513        0x00000000,
 514        (0x0400 << 16) | (0x89c0 >> 2),
 515        0x00000000,
 516        (0x0e00 << 16) | (0x88c4 >> 2),
 517        0x00000000,
 518        (0x0e00 << 16) | (0x88c8 >> 2),
 519        0x00000000,
 520        (0x0e00 << 16) | (0x88d0 >> 2),
 521        0x00000000,
 522        (0x0e00 << 16) | (0x88d4 >> 2),
 523        0x00000000,
 524        (0x0e00 << 16) | (0x88d8 >> 2),
 525        0x00000000,
 526        (0x0e00 << 16) | (0x8980 >> 2),
 527        0x00000000,
 528        (0x0e00 << 16) | (0x30938 >> 2),
 529        0x00000000,
 530        (0x0e00 << 16) | (0x3093c >> 2),
 531        0x00000000,
 532        (0x0e00 << 16) | (0x30940 >> 2),
 533        0x00000000,
 534        (0x0e00 << 16) | (0x89a0 >> 2),
 535        0x00000000,
 536        (0x0e00 << 16) | (0x30900 >> 2),
 537        0x00000000,
 538        (0x0e00 << 16) | (0x30904 >> 2),
 539        0x00000000,
 540        (0x0e00 << 16) | (0x89b4 >> 2),
 541        0x00000000,
 542        (0x0e00 << 16) | (0x3c210 >> 2),
 543        0x00000000,
 544        (0x0e00 << 16) | (0x3c214 >> 2),
 545        0x00000000,
 546        (0x0e00 << 16) | (0x3c218 >> 2),
 547        0x00000000,
 548        (0x0e00 << 16) | (0x8904 >> 2),
 549        0x00000000,
 550        0x5,
 551        (0x0e00 << 16) | (0x8c28 >> 2),
 552        (0x0e00 << 16) | (0x8c2c >> 2),
 553        (0x0e00 << 16) | (0x8c30 >> 2),
 554        (0x0e00 << 16) | (0x8c34 >> 2),
 555        (0x0e00 << 16) | (0x9600 >> 2),
 556};
 557
 558static const u32 kalindi_rlc_save_restore_register_list[] =
 559{
 560        (0x0e00 << 16) | (0xc12c >> 2),
 561        0x00000000,
 562        (0x0e00 << 16) | (0xc140 >> 2),
 563        0x00000000,
 564        (0x0e00 << 16) | (0xc150 >> 2),
 565        0x00000000,
 566        (0x0e00 << 16) | (0xc15c >> 2),
 567        0x00000000,
 568        (0x0e00 << 16) | (0xc168 >> 2),
 569        0x00000000,
 570        (0x0e00 << 16) | (0xc170 >> 2),
 571        0x00000000,
 572        (0x0e00 << 16) | (0xc204 >> 2),
 573        0x00000000,
 574        (0x0e00 << 16) | (0xc2b4 >> 2),
 575        0x00000000,
 576        (0x0e00 << 16) | (0xc2b8 >> 2),
 577        0x00000000,
 578        (0x0e00 << 16) | (0xc2bc >> 2),
 579        0x00000000,
 580        (0x0e00 << 16) | (0xc2c0 >> 2),
 581        0x00000000,
 582        (0x0e00 << 16) | (0x8228 >> 2),
 583        0x00000000,
 584        (0x0e00 << 16) | (0x829c >> 2),
 585        0x00000000,
 586        (0x0e00 << 16) | (0x869c >> 2),
 587        0x00000000,
 588        (0x0600 << 16) | (0x98f4 >> 2),
 589        0x00000000,
 590        (0x0e00 << 16) | (0x98f8 >> 2),
 591        0x00000000,
 592        (0x0e00 << 16) | (0x9900 >> 2),
 593        0x00000000,
 594        (0x0e00 << 16) | (0xc260 >> 2),
 595        0x00000000,
 596        (0x0e00 << 16) | (0x90e8 >> 2),
 597        0x00000000,
 598        (0x0e00 << 16) | (0x3c000 >> 2),
 599        0x00000000,
 600        (0x0e00 << 16) | (0x3c00c >> 2),
 601        0x00000000,
 602        (0x0e00 << 16) | (0x8c1c >> 2),
 603        0x00000000,
 604        (0x0e00 << 16) | (0x9700 >> 2),
 605        0x00000000,
 606        (0x0e00 << 16) | (0xcd20 >> 2),
 607        0x00000000,
 608        (0x4e00 << 16) | (0xcd20 >> 2),
 609        0x00000000,
 610        (0x5e00 << 16) | (0xcd20 >> 2),
 611        0x00000000,
 612        (0x6e00 << 16) | (0xcd20 >> 2),
 613        0x00000000,
 614        (0x7e00 << 16) | (0xcd20 >> 2),
 615        0x00000000,
 616        (0x0e00 << 16) | (0x89bc >> 2),
 617        0x00000000,
 618        (0x0e00 << 16) | (0x8900 >> 2),
 619        0x00000000,
 620        0x3,
 621        (0x0e00 << 16) | (0xc130 >> 2),
 622        0x00000000,
 623        (0x0e00 << 16) | (0xc134 >> 2),
 624        0x00000000,
 625        (0x0e00 << 16) | (0xc1fc >> 2),
 626        0x00000000,
 627        (0x0e00 << 16) | (0xc208 >> 2),
 628        0x00000000,
 629        (0x0e00 << 16) | (0xc264 >> 2),
 630        0x00000000,
 631        (0x0e00 << 16) | (0xc268 >> 2),
 632        0x00000000,
 633        (0x0e00 << 16) | (0xc26c >> 2),
 634        0x00000000,
 635        (0x0e00 << 16) | (0xc270 >> 2),
 636        0x00000000,
 637        (0x0e00 << 16) | (0xc274 >> 2),
 638        0x00000000,
 639        (0x0e00 << 16) | (0xc28c >> 2),
 640        0x00000000,
 641        (0x0e00 << 16) | (0xc290 >> 2),
 642        0x00000000,
 643        (0x0e00 << 16) | (0xc294 >> 2),
 644        0x00000000,
 645        (0x0e00 << 16) | (0xc298 >> 2),
 646        0x00000000,
 647        (0x0e00 << 16) | (0xc2a0 >> 2),
 648        0x00000000,
 649        (0x0e00 << 16) | (0xc2a4 >> 2),
 650        0x00000000,
 651        (0x0e00 << 16) | (0xc2a8 >> 2),
 652        0x00000000,
 653        (0x0e00 << 16) | (0xc2ac >> 2),
 654        0x00000000,
 655        (0x0e00 << 16) | (0x301d0 >> 2),
 656        0x00000000,
 657        (0x0e00 << 16) | (0x30238 >> 2),
 658        0x00000000,
 659        (0x0e00 << 16) | (0x30250 >> 2),
 660        0x00000000,
 661        (0x0e00 << 16) | (0x30254 >> 2),
 662        0x00000000,
 663        (0x0e00 << 16) | (0x30258 >> 2),
 664        0x00000000,
 665        (0x0e00 << 16) | (0x3025c >> 2),
 666        0x00000000,
 667        (0x4e00 << 16) | (0xc900 >> 2),
 668        0x00000000,
 669        (0x5e00 << 16) | (0xc900 >> 2),
 670        0x00000000,
 671        (0x6e00 << 16) | (0xc900 >> 2),
 672        0x00000000,
 673        (0x7e00 << 16) | (0xc900 >> 2),
 674        0x00000000,
 675        (0x4e00 << 16) | (0xc904 >> 2),
 676        0x00000000,
 677        (0x5e00 << 16) | (0xc904 >> 2),
 678        0x00000000,
 679        (0x6e00 << 16) | (0xc904 >> 2),
 680        0x00000000,
 681        (0x7e00 << 16) | (0xc904 >> 2),
 682        0x00000000,
 683        (0x4e00 << 16) | (0xc908 >> 2),
 684        0x00000000,
 685        (0x5e00 << 16) | (0xc908 >> 2),
 686        0x00000000,
 687        (0x6e00 << 16) | (0xc908 >> 2),
 688        0x00000000,
 689        (0x7e00 << 16) | (0xc908 >> 2),
 690        0x00000000,
 691        (0x4e00 << 16) | (0xc90c >> 2),
 692        0x00000000,
 693        (0x5e00 << 16) | (0xc90c >> 2),
 694        0x00000000,
 695        (0x6e00 << 16) | (0xc90c >> 2),
 696        0x00000000,
 697        (0x7e00 << 16) | (0xc90c >> 2),
 698        0x00000000,
 699        (0x4e00 << 16) | (0xc910 >> 2),
 700        0x00000000,
 701        (0x5e00 << 16) | (0xc910 >> 2),
 702        0x00000000,
 703        (0x6e00 << 16) | (0xc910 >> 2),
 704        0x00000000,
 705        (0x7e00 << 16) | (0xc910 >> 2),
 706        0x00000000,
 707        (0x0e00 << 16) | (0xc99c >> 2),
 708        0x00000000,
 709        (0x0e00 << 16) | (0x9834 >> 2),
 710        0x00000000,
 711        (0x0000 << 16) | (0x30f00 >> 2),
 712        0x00000000,
 713        (0x0000 << 16) | (0x30f04 >> 2),
 714        0x00000000,
 715        (0x0000 << 16) | (0x30f08 >> 2),
 716        0x00000000,
 717        (0x0000 << 16) | (0x30f0c >> 2),
 718        0x00000000,
 719        (0x0600 << 16) | (0x9b7c >> 2),
 720        0x00000000,
 721        (0x0e00 << 16) | (0x8a14 >> 2),
 722        0x00000000,
 723        (0x0e00 << 16) | (0x8a18 >> 2),
 724        0x00000000,
 725        (0x0600 << 16) | (0x30a00 >> 2),
 726        0x00000000,
 727        (0x0e00 << 16) | (0x8bf0 >> 2),
 728        0x00000000,
 729        (0x0e00 << 16) | (0x8bcc >> 2),
 730        0x00000000,
 731        (0x0e00 << 16) | (0x8b24 >> 2),
 732        0x00000000,
 733        (0x0e00 << 16) | (0x30a04 >> 2),
 734        0x00000000,
 735        (0x0600 << 16) | (0x30a10 >> 2),
 736        0x00000000,
 737        (0x0600 << 16) | (0x30a14 >> 2),
 738        0x00000000,
 739        (0x0600 << 16) | (0x30a18 >> 2),
 740        0x00000000,
 741        (0x0600 << 16) | (0x30a2c >> 2),
 742        0x00000000,
 743        (0x0e00 << 16) | (0xc700 >> 2),
 744        0x00000000,
 745        (0x0e00 << 16) | (0xc704 >> 2),
 746        0x00000000,
 747        (0x0e00 << 16) | (0xc708 >> 2),
 748        0x00000000,
 749        (0x0e00 << 16) | (0xc768 >> 2),
 750        0x00000000,
 751        (0x0400 << 16) | (0xc770 >> 2),
 752        0x00000000,
 753        (0x0400 << 16) | (0xc774 >> 2),
 754        0x00000000,
 755        (0x0400 << 16) | (0xc798 >> 2),
 756        0x00000000,
 757        (0x0400 << 16) | (0xc79c >> 2),
 758        0x00000000,
 759        (0x0e00 << 16) | (0x9100 >> 2),
 760        0x00000000,
 761        (0x0e00 << 16) | (0x3c010 >> 2),
 762        0x00000000,
 763        (0x0e00 << 16) | (0x8c00 >> 2),
 764        0x00000000,
 765        (0x0e00 << 16) | (0x8c04 >> 2),
 766        0x00000000,
 767        (0x0e00 << 16) | (0x8c20 >> 2),
 768        0x00000000,
 769        (0x0e00 << 16) | (0x8c38 >> 2),
 770        0x00000000,
 771        (0x0e00 << 16) | (0x8c3c >> 2),
 772        0x00000000,
 773        (0x0e00 << 16) | (0xae00 >> 2),
 774        0x00000000,
 775        (0x0e00 << 16) | (0x9604 >> 2),
 776        0x00000000,
 777        (0x0e00 << 16) | (0xac08 >> 2),
 778        0x00000000,
 779        (0x0e00 << 16) | (0xac0c >> 2),
 780        0x00000000,
 781        (0x0e00 << 16) | (0xac10 >> 2),
 782        0x00000000,
 783        (0x0e00 << 16) | (0xac14 >> 2),
 784        0x00000000,
 785        (0x0e00 << 16) | (0xac58 >> 2),
 786        0x00000000,
 787        (0x0e00 << 16) | (0xac68 >> 2),
 788        0x00000000,
 789        (0x0e00 << 16) | (0xac6c >> 2),
 790        0x00000000,
 791        (0x0e00 << 16) | (0xac70 >> 2),
 792        0x00000000,
 793        (0x0e00 << 16) | (0xac74 >> 2),
 794        0x00000000,
 795        (0x0e00 << 16) | (0xac78 >> 2),
 796        0x00000000,
 797        (0x0e00 << 16) | (0xac7c >> 2),
 798        0x00000000,
 799        (0x0e00 << 16) | (0xac80 >> 2),
 800        0x00000000,
 801        (0x0e00 << 16) | (0xac84 >> 2),
 802        0x00000000,
 803        (0x0e00 << 16) | (0xac88 >> 2),
 804        0x00000000,
 805        (0x0e00 << 16) | (0xac8c >> 2),
 806        0x00000000,
 807        (0x0e00 << 16) | (0x970c >> 2),
 808        0x00000000,
 809        (0x0e00 << 16) | (0x9714 >> 2),
 810        0x00000000,
 811        (0x0e00 << 16) | (0x9718 >> 2),
 812        0x00000000,
 813        (0x0e00 << 16) | (0x971c >> 2),
 814        0x00000000,
 815        (0x0e00 << 16) | (0x31068 >> 2),
 816        0x00000000,
 817        (0x4e00 << 16) | (0x31068 >> 2),
 818        0x00000000,
 819        (0x5e00 << 16) | (0x31068 >> 2),
 820        0x00000000,
 821        (0x6e00 << 16) | (0x31068 >> 2),
 822        0x00000000,
 823        (0x7e00 << 16) | (0x31068 >> 2),
 824        0x00000000,
 825        (0x0e00 << 16) | (0xcd10 >> 2),
 826        0x00000000,
 827        (0x0e00 << 16) | (0xcd14 >> 2),
 828        0x00000000,
 829        (0x0e00 << 16) | (0x88b0 >> 2),
 830        0x00000000,
 831        (0x0e00 << 16) | (0x88b4 >> 2),
 832        0x00000000,
 833        (0x0e00 << 16) | (0x88b8 >> 2),
 834        0x00000000,
 835        (0x0e00 << 16) | (0x88bc >> 2),
 836        0x00000000,
 837        (0x0400 << 16) | (0x89c0 >> 2),
 838        0x00000000,
 839        (0x0e00 << 16) | (0x88c4 >> 2),
 840        0x00000000,
 841        (0x0e00 << 16) | (0x88c8 >> 2),
 842        0x00000000,
 843        (0x0e00 << 16) | (0x88d0 >> 2),
 844        0x00000000,
 845        (0x0e00 << 16) | (0x88d4 >> 2),
 846        0x00000000,
 847        (0x0e00 << 16) | (0x88d8 >> 2),
 848        0x00000000,
 849        (0x0e00 << 16) | (0x8980 >> 2),
 850        0x00000000,
 851        (0x0e00 << 16) | (0x30938 >> 2),
 852        0x00000000,
 853        (0x0e00 << 16) | (0x3093c >> 2),
 854        0x00000000,
 855        (0x0e00 << 16) | (0x30940 >> 2),
 856        0x00000000,
 857        (0x0e00 << 16) | (0x89a0 >> 2),
 858        0x00000000,
 859        (0x0e00 << 16) | (0x30900 >> 2),
 860        0x00000000,
 861        (0x0e00 << 16) | (0x30904 >> 2),
 862        0x00000000,
 863        (0x0e00 << 16) | (0x89b4 >> 2),
 864        0x00000000,
 865        (0x0e00 << 16) | (0x3e1fc >> 2),
 866        0x00000000,
 867        (0x0e00 << 16) | (0x3c210 >> 2),
 868        0x00000000,
 869        (0x0e00 << 16) | (0x3c214 >> 2),
 870        0x00000000,
 871        (0x0e00 << 16) | (0x3c218 >> 2),
 872        0x00000000,
 873        (0x0e00 << 16) | (0x8904 >> 2),
 874        0x00000000,
 875        0x5,
 876        (0x0e00 << 16) | (0x8c28 >> 2),
 877        (0x0e00 << 16) | (0x8c2c >> 2),
 878        (0x0e00 << 16) | (0x8c30 >> 2),
 879        (0x0e00 << 16) | (0x8c34 >> 2),
 880        (0x0e00 << 16) | (0x9600 >> 2),
 881};
 882
 883static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev);
 884static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
 885static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev);
 886static void gfx_v7_0_init_pg(struct amdgpu_device *adev);
 887
 888/*
 889 * Core functions
 890 */
 891/**
 892 * gfx_v7_0_init_microcode - load ucode images from disk
 893 *
 894 * @adev: amdgpu_device pointer
 895 *
 896 * Use the firmware interface to load the ucode images into
 897 * the driver (not loaded into hw).
 898 * Returns 0 on success, error on failure.
 899 */
 900static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
 901{
 902        const char *chip_name;
 903        char fw_name[30];
 904        int err;
 905
 906        DRM_DEBUG("\n");
 907
 908        switch (adev->asic_type) {
 909        case CHIP_BONAIRE:
 910                chip_name = "bonaire";
 911                break;
 912        case CHIP_HAWAII:
 913                chip_name = "hawaii";
 914                break;
 915        case CHIP_KAVERI:
 916                chip_name = "kaveri";
 917                break;
 918        case CHIP_KABINI:
 919                chip_name = "kabini";
 920                break;
 921        case CHIP_MULLINS:
 922                chip_name = "mullins";
 923                break;
 924        default: BUG();
 925        }
 926
 927        snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
 928        err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 929        if (err)
 930                goto out;
 931        err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
 932        if (err)
 933                goto out;
 934
 935        snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
 936        err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
 937        if (err)
 938                goto out;
 939        err = amdgpu_ucode_validate(adev->gfx.me_fw);
 940        if (err)
 941                goto out;
 942
 943        snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
 944        err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
 945        if (err)
 946                goto out;
 947        err = amdgpu_ucode_validate(adev->gfx.ce_fw);
 948        if (err)
 949                goto out;
 950
 951        snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
 952        err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
 953        if (err)
 954                goto out;
 955        err = amdgpu_ucode_validate(adev->gfx.mec_fw);
 956        if (err)
 957                goto out;
 958
 959        if (adev->asic_type == CHIP_KAVERI) {
 960                snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", chip_name);
 961                err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
 962                if (err)
 963                        goto out;
 964                err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
 965                if (err)
 966                        goto out;
 967        }
 968
 969        snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
 970        err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
 971        if (err)
 972                goto out;
 973        err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
 974
 975out:
 976        if (err) {
 977                printk(KERN_ERR
 978                       "gfx7: Failed to load firmware \"%s\"\n",
 979                       fw_name);
 980                release_firmware(adev->gfx.pfp_fw);
 981                adev->gfx.pfp_fw = NULL;
 982                release_firmware(adev->gfx.me_fw);
 983                adev->gfx.me_fw = NULL;
 984                release_firmware(adev->gfx.ce_fw);
 985                adev->gfx.ce_fw = NULL;
 986                release_firmware(adev->gfx.mec_fw);
 987                adev->gfx.mec_fw = NULL;
 988                release_firmware(adev->gfx.mec2_fw);
 989                adev->gfx.mec2_fw = NULL;
 990                release_firmware(adev->gfx.rlc_fw);
 991                adev->gfx.rlc_fw = NULL;
 992        }
 993        return err;
 994}
 995
 996/**
 997 * gfx_v7_0_tiling_mode_table_init - init the hw tiling table
 998 *
 999 * @adev: amdgpu_device pointer
1000 *
1001 * Starting with SI, the tiling setup is done globally in a
1002 * set of 32 tiling modes.  Rather than selecting each set of
1003 * parameters per surface as on older asics, we just select
1004 * which index in the tiling table we want to use, and the
1005 * surface uses those parameters (CIK).
1006 */
1007static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
1008{
1009        const u32 num_tile_mode_states = 32;
1010        const u32 num_secondary_tile_mode_states = 16;
1011        u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1012
1013        switch (adev->gfx.config.mem_row_size_in_kb) {
1014        case 1:
1015                split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1016                break;
1017        case 2:
1018        default:
1019                split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1020                break;
1021        case 4:
1022                split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1023                break;
1024        }
1025
1026        switch (adev->asic_type) {
1027        case CHIP_BONAIRE:
1028                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1029                        switch (reg_offset) {
1030                        case 0:
1031                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1032                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1033                                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1034                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1035                                break;
1036                        case 1:
1037                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1038                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1039                                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1040                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1041                                break;
1042                        case 2:
1043                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1044                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1045                                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1046                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1047                                break;
1048                        case 3:
1049                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1050                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1051                                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1052                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1053                                break;
1054                        case 4:
1055                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1056                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1057                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1058                                                TILE_SPLIT(split_equal_to_row_size));
1059                                break;
1060                        case 5:
1061                                gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1062                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1063                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1064                                break;
1065                        case 6:
1066                                gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1067                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1068                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1069                                                TILE_SPLIT(split_equal_to_row_size));
1070                                break;
1071                        case 7:
1072                                gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1073                                break;
1074
1075                        case 8:
1076                                gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1077                                                PIPE_CONFIG(ADDR_SURF_P4_16x16));
1078                                break;
1079                        case 9:
1080                                gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1081                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1082                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1083                                break;
1084                        case 10:
1085                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1086                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1087                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1088                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1089                                break;
1090                        case 11:
1091                                gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1092                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1093                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1094                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1095                                break;
1096                        case 12:
1097                                gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1098                                break;
1099                        case 13:
1100                                gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1101                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1102                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1103                                break;
1104                        case 14:
1105                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1106                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1107                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1108                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1109                                break;
1110                        case 15:
1111                                gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1112                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1113                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1114                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1115                                break;
1116                        case 16:
1117                                gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1118                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1119                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1120                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1121                                break;
1122                        case 17:
1123                                gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1124                                break;
1125                        case 18:
1126                                gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1127                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1128                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1129                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1130                                break;
1131                        case 19:
1132                                gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1133                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1134                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1135                                break;
1136                        case 20:
1137                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1138                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1139                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1140                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1141                                break;
1142                        case 21:
1143                                gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1144                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1145                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1146                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1147                                break;
1148                        case 22:
1149                                gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1150                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1151                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1152                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1153                                break;
1154                        case 23:
1155                                gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1156                                break;
1157                        case 24:
1158                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1159                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1160                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1161                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1162                                break;
1163                        case 25:
1164                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1165                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1166                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1167                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1168                                break;
1169                        case 26:
1170                                gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1171                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1172                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1173                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1174                                break;
1175                        case 27:
1176                                gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1177                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1178                                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1179                                break;
1180                        case 28:
1181                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1182                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1183                                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1184                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1185                                break;
1186                        case 29:
1187                                gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1188                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1189                                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1190                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1191                                break;
1192                        case 30:
1193                                gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1194                                break;
1195                        default:
1196                                gb_tile_moden = 0;
1197                                break;
1198                        }
1199                        adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
1200                        WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
1201                }
1202                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1203                        switch (reg_offset) {
1204                        case 0:
1205                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1206                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1207                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1208                                                NUM_BANKS(ADDR_SURF_16_BANK));
1209                                break;
1210                        case 1:
1211                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1212                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1213                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1214                                                NUM_BANKS(ADDR_SURF_16_BANK));
1215                                break;
1216                        case 2:
1217                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1218                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1219                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1220                                                NUM_BANKS(ADDR_SURF_16_BANK));
1221                                break;
1222                        case 3:
1223                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1224                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1225                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1226                                                NUM_BANKS(ADDR_SURF_16_BANK));
1227                                break;
1228                        case 4:
1229                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1230                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1231                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1232                                                NUM_BANKS(ADDR_SURF_16_BANK));
1233                                break;
1234                        case 5:
1235                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1236                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1237                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1238                                                NUM_BANKS(ADDR_SURF_8_BANK));
1239                                break;
1240                        case 6:
1241                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1242                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1243                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1244                                                NUM_BANKS(ADDR_SURF_4_BANK));
1245                                break;
1246                        case 8:
1247                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1248                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1249                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1250                                                NUM_BANKS(ADDR_SURF_16_BANK));
1251                                break;
1252                        case 9:
1253                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1254                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1255                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1256                                                NUM_BANKS(ADDR_SURF_16_BANK));
1257                                break;
1258                        case 10:
1259                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1260                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1261                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1262                                                NUM_BANKS(ADDR_SURF_16_BANK));
1263                                break;
1264                        case 11:
1265                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1266                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1267                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1268                                                NUM_BANKS(ADDR_SURF_16_BANK));
1269                                break;
1270                        case 12:
1271                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1272                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1273                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1274                                                NUM_BANKS(ADDR_SURF_16_BANK));
1275                                break;
1276                        case 13:
1277                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1278                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1279                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1280                                                NUM_BANKS(ADDR_SURF_8_BANK));
1281                                break;
1282                        case 14:
1283                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1284                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1285                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1286                                                NUM_BANKS(ADDR_SURF_4_BANK));
1287                                break;
1288                        default:
1289                                gb_tile_moden = 0;
1290                                break;
1291                        }
1292                        adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
1293                        WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
1294                }
1295                break;
1296        case CHIP_HAWAII:
1297                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1298                        switch (reg_offset) {
1299                        case 0:
1300                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1301                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1302                                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1303                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1304                                break;
1305                        case 1:
1306                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1307                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1308                                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1309                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1310                                break;
1311                        case 2:
1312                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1313                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1314                                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1315                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1316                                break;
1317                        case 3:
1318                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1319                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1320                                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1321                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1322                                break;
1323                        case 4:
1324                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1325                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1326                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1327                                                TILE_SPLIT(split_equal_to_row_size));
1328                                break;
1329                        case 5:
1330                                gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1331                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1332                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1333                                                TILE_SPLIT(split_equal_to_row_size));
1334                                break;
1335                        case 6:
1336                                gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1337                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1338                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1339                                                TILE_SPLIT(split_equal_to_row_size));
1340                                break;
1341                        case 7:
1342                                gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1343                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1344                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1345                                                TILE_SPLIT(split_equal_to_row_size));
1346                                break;
1347
1348                        case 8:
1349                                gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1350                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1351                                break;
1352                        case 9:
1353                                gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1354                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1355                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1356                                break;
1357                        case 10:
1358                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1359                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1360                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1361                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1362                                break;
1363                        case 11:
1364                                gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1365                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1366                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1367                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1368                                break;
1369                        case 12:
1370                                gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1371                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1372                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1373                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1374                                break;
1375                        case 13:
1376                                gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1377                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1378                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1379                                break;
1380                        case 14:
1381                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1382                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1383                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1384                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1385                                break;
1386                        case 15:
1387                                gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1388                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1389                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1390                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1391                                break;
1392                        case 16:
1393                                gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1394                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1395                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1396                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1397                                break;
1398                        case 17:
1399                                gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1400                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1401                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1402                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1403                                break;
1404                        case 18:
1405                                gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1406                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1407                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1408                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1409                                break;
1410                        case 19:
1411                                gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1412                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1413                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
1414                                break;
1415                        case 20:
1416                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1417                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1418                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1419                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1420                                break;
1421                        case 21:
1422                                gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1423                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1424                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1425                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1426                                break;
1427                        case 22:
1428                                gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1429                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1430                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1431                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1432                                break;
1433                        case 23:
1434                                gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1435                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1436                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1437                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1438                                break;
1439                        case 24:
1440                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1441                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1442                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1443                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1444                                break;
1445                        case 25:
1446                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1447                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1448                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1449                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1450                                break;
1451                        case 26:
1452                                gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1453                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1454                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1455                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1456                                break;
1457                        case 27:
1458                                gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1459                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1460                                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1461                                break;
1462                        case 28:
1463                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1464                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1465                                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1466                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1467                                break;
1468                        case 29:
1469                                gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1470                                                PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1471                                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1472                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1473                                break;
1474                        case 30:
1475                                gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1476                                                PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1477                                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1478                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1479                                break;
1480                        default:
1481                                gb_tile_moden = 0;
1482                                break;
1483                        }
1484                        adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
1485                        WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
1486                }
1487                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1488                        switch (reg_offset) {
1489                        case 0:
1490                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1491                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1492                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1493                                                NUM_BANKS(ADDR_SURF_16_BANK));
1494                                break;
1495                        case 1:
1496                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1497                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1498                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1499                                                NUM_BANKS(ADDR_SURF_16_BANK));
1500                                break;
1501                        case 2:
1502                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1503                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1504                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1505                                                NUM_BANKS(ADDR_SURF_16_BANK));
1506                                break;
1507                        case 3:
1508                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1509                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1510                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1511                                                NUM_BANKS(ADDR_SURF_16_BANK));
1512                                break;
1513                        case 4:
1514                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1515                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1516                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1517                                                NUM_BANKS(ADDR_SURF_8_BANK));
1518                                break;
1519                        case 5:
1520                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1521                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1522                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1523                                                NUM_BANKS(ADDR_SURF_4_BANK));
1524                                break;
1525                        case 6:
1526                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1527                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1528                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1529                                                NUM_BANKS(ADDR_SURF_4_BANK));
1530                                break;
1531                        case 8:
1532                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1533                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1534                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1535                                                NUM_BANKS(ADDR_SURF_16_BANK));
1536                                break;
1537                        case 9:
1538                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1539                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1540                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1541                                                NUM_BANKS(ADDR_SURF_16_BANK));
1542                                break;
1543                        case 10:
1544                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1545                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1546                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1547                                                NUM_BANKS(ADDR_SURF_16_BANK));
1548                                break;
1549                        case 11:
1550                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1551                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1552                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1553                                                NUM_BANKS(ADDR_SURF_8_BANK));
1554                                break;
1555                        case 12:
1556                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1557                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1558                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1559                                                NUM_BANKS(ADDR_SURF_16_BANK));
1560                                break;
1561                        case 13:
1562                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1563                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1564                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1565                                                NUM_BANKS(ADDR_SURF_8_BANK));
1566                                break;
1567                        case 14:
1568                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1569                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1570                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1571                                                NUM_BANKS(ADDR_SURF_4_BANK));
1572                                break;
1573                        default:
1574                                gb_tile_moden = 0;
1575                                break;
1576                        }
1577                        adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
1578                        WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
1579                }
1580                break;
1581        case CHIP_KABINI:
1582        case CHIP_KAVERI:
1583        case CHIP_MULLINS:
1584        default:
1585                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1586                        switch (reg_offset) {
1587                        case 0:
1588                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1589                                                PIPE_CONFIG(ADDR_SURF_P2) |
1590                                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1591                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1592                                break;
1593                        case 1:
1594                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1595                                                PIPE_CONFIG(ADDR_SURF_P2) |
1596                                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1597                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1598                                break;
1599                        case 2:
1600                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1601                                                PIPE_CONFIG(ADDR_SURF_P2) |
1602                                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1603                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1604                                break;
1605                        case 3:
1606                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1607                                                PIPE_CONFIG(ADDR_SURF_P2) |
1608                                                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1609                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1610                                break;
1611                        case 4:
1612                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1613                                                PIPE_CONFIG(ADDR_SURF_P2) |
1614                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1615                                                TILE_SPLIT(split_equal_to_row_size));
1616                                break;
1617                        case 5:
1618                                gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1619                                                PIPE_CONFIG(ADDR_SURF_P2) |
1620                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1621                                break;
1622                        case 6:
1623                                gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1624                                                PIPE_CONFIG(ADDR_SURF_P2) |
1625                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1626                                                TILE_SPLIT(split_equal_to_row_size));
1627                                break;
1628                        case 7:
1629                                gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1630                                break;
1631
1632                        case 8:
1633                                gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1634                                                PIPE_CONFIG(ADDR_SURF_P2));
1635                                break;
1636                        case 9:
1637                                gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1638                                                PIPE_CONFIG(ADDR_SURF_P2) |
1639                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1640                                break;
1641                        case 10:
1642                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1643                                                PIPE_CONFIG(ADDR_SURF_P2) |
1644                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1645                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1646                                break;
1647                        case 11:
1648                                gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1649                                                PIPE_CONFIG(ADDR_SURF_P2) |
1650                                                MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1651                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1652                                break;
1653                        case 12:
1654                                gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1655                                break;
1656                        case 13:
1657                                gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1658                                                PIPE_CONFIG(ADDR_SURF_P2) |
1659                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1660                                break;
1661                        case 14:
1662                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1663                                                PIPE_CONFIG(ADDR_SURF_P2) |
1664                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1665                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1666                                break;
1667                        case 15:
1668                                gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1669                                                PIPE_CONFIG(ADDR_SURF_P2) |
1670                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1671                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1672                                break;
1673                        case 16:
1674                                gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1675                                                PIPE_CONFIG(ADDR_SURF_P2) |
1676                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1677                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1678                                break;
1679                        case 17:
1680                                gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1681                                break;
1682                        case 18:
1683                                gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1684                                                PIPE_CONFIG(ADDR_SURF_P2) |
1685                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1686                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1687                                break;
1688                        case 19:
1689                                gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1690                                                PIPE_CONFIG(ADDR_SURF_P2) |
1691                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
1692                                break;
1693                        case 20:
1694                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1695                                                PIPE_CONFIG(ADDR_SURF_P2) |
1696                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1697                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1698                                break;
1699                        case 21:
1700                                gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1701                                                PIPE_CONFIG(ADDR_SURF_P2) |
1702                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1703                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1704                                break;
1705                        case 22:
1706                                gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1707                                                PIPE_CONFIG(ADDR_SURF_P2) |
1708                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1709                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1710                                break;
1711                        case 23:
1712                                gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1713                                break;
1714                        case 24:
1715                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1716                                                PIPE_CONFIG(ADDR_SURF_P2) |
1717                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1718                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1719                                break;
1720                        case 25:
1721                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1722                                                PIPE_CONFIG(ADDR_SURF_P2) |
1723                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1724                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1725                                break;
1726                        case 26:
1727                                gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1728                                                PIPE_CONFIG(ADDR_SURF_P2) |
1729                                                MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1730                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1731                                break;
1732                        case 27:
1733                                gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1734                                                PIPE_CONFIG(ADDR_SURF_P2) |
1735                                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1736                                break;
1737                        case 28:
1738                                gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1739                                                PIPE_CONFIG(ADDR_SURF_P2) |
1740                                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1741                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1742                                break;
1743                        case 29:
1744                                gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1745                                                PIPE_CONFIG(ADDR_SURF_P2) |
1746                                                MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1747                                                SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1748                                break;
1749                        case 30:
1750                                gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1751                                break;
1752                        default:
1753                                gb_tile_moden = 0;
1754                                break;
1755                        }
1756                        adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
1757                        WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
1758                }
1759                for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1760                        switch (reg_offset) {
1761                        case 0:
1762                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1763                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1764                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1765                                                NUM_BANKS(ADDR_SURF_8_BANK));
1766                                break;
1767                        case 1:
1768                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1769                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1770                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1771                                                NUM_BANKS(ADDR_SURF_8_BANK));
1772                                break;
1773                        case 2:
1774                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1775                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1776                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1777                                                NUM_BANKS(ADDR_SURF_8_BANK));
1778                                break;
1779                        case 3:
1780                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1781                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1782                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1783                                                NUM_BANKS(ADDR_SURF_8_BANK));
1784                                break;
1785                        case 4:
1786                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1787                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1788                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1789                                                NUM_BANKS(ADDR_SURF_8_BANK));
1790                                break;
1791                        case 5:
1792                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1793                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1794                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1795                                                NUM_BANKS(ADDR_SURF_8_BANK));
1796                                break;
1797                        case 6:
1798                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1799                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1800                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1801                                                NUM_BANKS(ADDR_SURF_8_BANK));
1802                                break;
1803                        case 8:
1804                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1805                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1806                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1807                                                NUM_BANKS(ADDR_SURF_16_BANK));
1808                                break;
1809                        case 9:
1810                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1811                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1812                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1813                                                NUM_BANKS(ADDR_SURF_16_BANK));
1814                                break;
1815                        case 10:
1816                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1817                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1818                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1819                                                NUM_BANKS(ADDR_SURF_16_BANK));
1820                                break;
1821                        case 11:
1822                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1823                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1824                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1825                                                NUM_BANKS(ADDR_SURF_16_BANK));
1826                                break;
1827                        case 12:
1828                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1829                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1830                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1831                                                NUM_BANKS(ADDR_SURF_16_BANK));
1832                                break;
1833                        case 13:
1834                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1835                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1836                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1837                                                NUM_BANKS(ADDR_SURF_16_BANK));
1838                                break;
1839                        case 14:
1840                                gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1841                                                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1842                                                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1843                                                NUM_BANKS(ADDR_SURF_8_BANK));
1844                                break;
1845                        default:
1846                                gb_tile_moden = 0;
1847                                break;
1848                        }
1849                        adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
1850                        WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
1851                }
1852                break;
1853        }
1854}
1855
1856/**
1857 * gfx_v7_0_select_se_sh - select which SE, SH to address
1858 *
1859 * @adev: amdgpu_device pointer
1860 * @se_num: shader engine to address
1861 * @sh_num: sh block to address
1862 *
1863 * Select which SE, SH combinations to address. Certain
1864 * registers are instanced per SE or SH.  0xffffffff means
1865 * broadcast to all SEs or SHs (CIK).
1866 */
1867void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
1868{
1869        u32 data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK;
1870
1871        if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1872                data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
1873                        GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
1874        else if (se_num == 0xffffffff)
1875                data |= GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK |
1876                        (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT);
1877        else if (sh_num == 0xffffffff)
1878                data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
1879                        (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
1880        else
1881                data |= (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT) |
1882                        (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
1883        WREG32(mmGRBM_GFX_INDEX, data);
1884}
1885
1886/**
1887 * gfx_v7_0_create_bitmask - create a bitmask
1888 *
1889 * @bit_width: length of the mask
1890 *
1891 * create a variable length bit mask (CIK).
1892 * Returns the bitmask.
1893 */
1894static u32 gfx_v7_0_create_bitmask(u32 bit_width)
1895{
1896        u32 i, mask = 0;
1897
1898        for (i = 0; i < bit_width; i++) {
1899                mask <<= 1;
1900                mask |= 1;
1901        }
1902        return mask;
1903}
1904
1905/**
1906 * gfx_v7_0_get_rb_disabled - computes the mask of disabled RBs
1907 *
1908 * @adev: amdgpu_device pointer
1909 * @max_rb_num: max RBs (render backends) for the asic
1910 * @se_num: number of SEs (shader engines) for the asic
1911 * @sh_per_se: number of SH blocks per SE for the asic
1912 *
1913 * Calculates the bitmask of disabled RBs (CIK).
1914 * Returns the disabled RB bitmask.
1915 */
1916static u32 gfx_v7_0_get_rb_disabled(struct amdgpu_device *adev,
1917                                    u32 max_rb_num_per_se,
1918                                    u32 sh_per_se)
1919{
1920        u32 data, mask;
1921
1922        data = RREG32(mmCC_RB_BACKEND_DISABLE);
1923        if (data & 1)
1924                data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1925        else
1926                data = 0;
1927
1928        data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
1929
1930        data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1931
1932        mask = gfx_v7_0_create_bitmask(max_rb_num_per_se / sh_per_se);
1933
1934        return data & mask;
1935}
1936
1937/**
1938 * gfx_v7_0_setup_rb - setup the RBs on the asic
1939 *
1940 * @adev: amdgpu_device pointer
1941 * @se_num: number of SEs (shader engines) for the asic
1942 * @sh_per_se: number of SH blocks per SE for the asic
1943 * @max_rb_num: max RBs (render backends) for the asic
1944 *
1945 * Configures per-SE/SH RB registers (CIK).
1946 */
1947static void gfx_v7_0_setup_rb(struct amdgpu_device *adev,
1948                              u32 se_num, u32 sh_per_se,
1949                              u32 max_rb_num_per_se)
1950{
1951        int i, j;
1952        u32 data, mask;
1953        u32 disabled_rbs = 0;
1954        u32 enabled_rbs = 0;
1955
1956        mutex_lock(&adev->grbm_idx_mutex);
1957        for (i = 0; i < se_num; i++) {
1958                for (j = 0; j < sh_per_se; j++) {
1959                        gfx_v7_0_select_se_sh(adev, i, j);
1960                        data = gfx_v7_0_get_rb_disabled(adev, max_rb_num_per_se, sh_per_se);
1961                        if (adev->asic_type == CHIP_HAWAII)
1962                                disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
1963                        else
1964                                disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1965                }
1966        }
1967        gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
1968        mutex_unlock(&adev->grbm_idx_mutex);
1969
1970        mask = 1;
1971        for (i = 0; i < max_rb_num_per_se * se_num; i++) {
1972                if (!(disabled_rbs & mask))
1973                        enabled_rbs |= mask;
1974                mask <<= 1;
1975        }
1976
1977        adev->gfx.config.backend_enable_mask = enabled_rbs;
1978
1979        mutex_lock(&adev->grbm_idx_mutex);
1980        for (i = 0; i < se_num; i++) {
1981                gfx_v7_0_select_se_sh(adev, i, 0xffffffff);
1982                data = 0;
1983                for (j = 0; j < sh_per_se; j++) {
1984                        switch (enabled_rbs & 3) {
1985                        case 0:
1986                                if (j == 0)
1987                                        data |= (RASTER_CONFIG_RB_MAP_3 <<
1988                                                PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
1989                                else
1990                                        data |= (RASTER_CONFIG_RB_MAP_0 <<
1991                                                PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
1992                                break;
1993                        case 1:
1994                                data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1995                                break;
1996                        case 2:
1997                                data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1998                                break;
1999                        case 3:
2000                        default:
2001                                data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2002                                break;
2003                        }
2004                        enabled_rbs >>= 2;
2005                }
2006                WREG32(mmPA_SC_RASTER_CONFIG, data);
2007        }
2008        gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2009        mutex_unlock(&adev->grbm_idx_mutex);
2010}
2011
2012/**
2013 * gmc_v7_0_init_compute_vmid - gart enable
2014 *
2015 * @rdev: amdgpu_device pointer
2016 *
2017 * Initialize compute vmid sh_mem registers
2018 *
2019 */
2020#define DEFAULT_SH_MEM_BASES    (0x6000)
2021#define FIRST_COMPUTE_VMID      (8)
2022#define LAST_COMPUTE_VMID       (16)
2023static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev)
2024{
2025        int i;
2026        uint32_t sh_mem_config;
2027        uint32_t sh_mem_bases;
2028
2029        /*
2030         * Configure apertures:
2031         * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2032         * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2033         * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2034        */
2035        sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2036        sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2037                        SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2038        sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT;
2039        mutex_lock(&adev->srbm_mutex);
2040        for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2041                cik_srbm_select(adev, 0, 0, 0, i);
2042                /* CP and shaders */
2043                WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2044                WREG32(mmSH_MEM_APE1_BASE, 1);
2045                WREG32(mmSH_MEM_APE1_LIMIT, 0);
2046                WREG32(mmSH_MEM_BASES, sh_mem_bases);
2047        }
2048        cik_srbm_select(adev, 0, 0, 0, 0);
2049        mutex_unlock(&adev->srbm_mutex);
2050}
2051
2052/**
2053 * gfx_v7_0_gpu_init - setup the 3D engine
2054 *
2055 * @adev: amdgpu_device pointer
2056 *
2057 * Configures the 3D engine and tiling configuration
2058 * registers so that the 3D engine is usable.
2059 */
2060static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
2061{
2062        u32 gb_addr_config;
2063        u32 mc_shared_chmap, mc_arb_ramcfg;
2064        u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
2065        u32 sh_mem_cfg;
2066        u32 tmp;
2067        int i;
2068
2069        switch (adev->asic_type) {
2070        case CHIP_BONAIRE:
2071                adev->gfx.config.max_shader_engines = 2;
2072                adev->gfx.config.max_tile_pipes = 4;
2073                adev->gfx.config.max_cu_per_sh = 7;
2074                adev->gfx.config.max_sh_per_se = 1;
2075                adev->gfx.config.max_backends_per_se = 2;
2076                adev->gfx.config.max_texture_channel_caches = 4;
2077                adev->gfx.config.max_gprs = 256;
2078                adev->gfx.config.max_gs_threads = 32;
2079                adev->gfx.config.max_hw_contexts = 8;
2080
2081                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2082                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2083                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2084                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2085                gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2086                break;
2087        case CHIP_HAWAII:
2088                adev->gfx.config.max_shader_engines = 4;
2089                adev->gfx.config.max_tile_pipes = 16;
2090                adev->gfx.config.max_cu_per_sh = 11;
2091                adev->gfx.config.max_sh_per_se = 1;
2092                adev->gfx.config.max_backends_per_se = 4;
2093                adev->gfx.config.max_texture_channel_caches = 16;
2094                adev->gfx.config.max_gprs = 256;
2095                adev->gfx.config.max_gs_threads = 32;
2096                adev->gfx.config.max_hw_contexts = 8;
2097
2098                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2099                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2100                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2101                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2102                gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
2103                break;
2104        case CHIP_KAVERI:
2105                adev->gfx.config.max_shader_engines = 1;
2106                adev->gfx.config.max_tile_pipes = 4;
2107                if ((adev->pdev->device == 0x1304) ||
2108                    (adev->pdev->device == 0x1305) ||
2109                    (adev->pdev->device == 0x130C) ||
2110                    (adev->pdev->device == 0x130F) ||
2111                    (adev->pdev->device == 0x1310) ||
2112                    (adev->pdev->device == 0x1311) ||
2113                    (adev->pdev->device == 0x131C)) {
2114                        adev->gfx.config.max_cu_per_sh = 8;
2115                        adev->gfx.config.max_backends_per_se = 2;
2116                } else if ((adev->pdev->device == 0x1309) ||
2117                           (adev->pdev->device == 0x130A) ||
2118                           (adev->pdev->device == 0x130D) ||
2119                           (adev->pdev->device == 0x1313) ||
2120                           (adev->pdev->device == 0x131D)) {
2121                        adev->gfx.config.max_cu_per_sh = 6;
2122                        adev->gfx.config.max_backends_per_se = 2;
2123                } else if ((adev->pdev->device == 0x1306) ||
2124                           (adev->pdev->device == 0x1307) ||
2125                           (adev->pdev->device == 0x130B) ||
2126                           (adev->pdev->device == 0x130E) ||
2127                           (adev->pdev->device == 0x1315) ||
2128                           (adev->pdev->device == 0x131B)) {
2129                        adev->gfx.config.max_cu_per_sh = 4;
2130                        adev->gfx.config.max_backends_per_se = 1;
2131                } else {
2132                        adev->gfx.config.max_cu_per_sh = 3;
2133                        adev->gfx.config.max_backends_per_se = 1;
2134                }
2135                adev->gfx.config.max_sh_per_se = 1;
2136                adev->gfx.config.max_texture_channel_caches = 4;
2137                adev->gfx.config.max_gprs = 256;
2138                adev->gfx.config.max_gs_threads = 16;
2139                adev->gfx.config.max_hw_contexts = 8;
2140
2141                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2142                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2143                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2144                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2145                gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2146                break;
2147        case CHIP_KABINI:
2148        case CHIP_MULLINS:
2149        default:
2150                adev->gfx.config.max_shader_engines = 1;
2151                adev->gfx.config.max_tile_pipes = 2;
2152                adev->gfx.config.max_cu_per_sh = 2;
2153                adev->gfx.config.max_sh_per_se = 1;
2154                adev->gfx.config.max_backends_per_se = 1;
2155                adev->gfx.config.max_texture_channel_caches = 2;
2156                adev->gfx.config.max_gprs = 256;
2157                adev->gfx.config.max_gs_threads = 16;
2158                adev->gfx.config.max_hw_contexts = 8;
2159
2160                adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2161                adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2162                adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2163                adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2164                gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2165                break;
2166        }
2167
2168        WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
2169
2170        mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
2171        adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
2172        mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
2173
2174        adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2175        adev->gfx.config.mem_max_burst_length_bytes = 256;
2176        if (adev->flags & AMD_IS_APU) {
2177                /* Get memory bank mapping mode. */
2178                tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2179                dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2180                dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2181
2182                tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2183                dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2184                dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2185
2186                /* Validate settings in case only one DIMM installed. */
2187                if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2188                        dimm00_addr_map = 0;
2189                if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2190                        dimm01_addr_map = 0;
2191                if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2192                        dimm10_addr_map = 0;
2193                if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2194                        dimm11_addr_map = 0;
2195
2196                /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2197                /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2198                if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2199                        adev->gfx.config.mem_row_size_in_kb = 2;
2200                else
2201                        adev->gfx.config.mem_row_size_in_kb = 1;
2202        } else {
2203                tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT;
2204                adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2205                if (adev->gfx.config.mem_row_size_in_kb > 4)
2206                        adev->gfx.config.mem_row_size_in_kb = 4;
2207        }
2208        /* XXX use MC settings? */
2209        adev->gfx.config.shader_engine_tile_size = 32;
2210        adev->gfx.config.num_gpus = 1;
2211        adev->gfx.config.multi_gpu_tile_size = 64;
2212
2213        /* fix up row size */
2214        gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK;
2215        switch (adev->gfx.config.mem_row_size_in_kb) {
2216        case 1:
2217        default:
2218                gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
2219                break;
2220        case 2:
2221                gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
2222                break;
2223        case 4:
2224                gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
2225                break;
2226        }
2227        adev->gfx.config.gb_addr_config = gb_addr_config;
2228
2229        WREG32(mmGB_ADDR_CONFIG, gb_addr_config);
2230        WREG32(mmHDP_ADDR_CONFIG, gb_addr_config);
2231        WREG32(mmDMIF_ADDR_CALC, gb_addr_config);
2232        WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2233        WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2234        WREG32(mmUVD_UDEC_ADDR_CONFIG, gb_addr_config);
2235        WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2236        WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2237
2238        gfx_v7_0_tiling_mode_table_init(adev);
2239
2240        gfx_v7_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
2241                          adev->gfx.config.max_sh_per_se,
2242                          adev->gfx.config.max_backends_per_se);
2243
2244        /* set HW defaults for 3D engine */
2245        WREG32(mmCP_MEQ_THRESHOLDS,
2246                        (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
2247                        (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
2248
2249        mutex_lock(&adev->grbm_idx_mutex);
2250        /*
2251         * making sure that the following register writes will be broadcasted
2252         * to all the shaders
2253         */
2254        gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2255
2256        /* XXX SH_MEM regs */
2257        /* where to put LDS, scratch, GPUVM in FSA64 space */
2258        sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 
2259                                   SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2260
2261        mutex_lock(&adev->srbm_mutex);
2262        for (i = 0; i < 16; i++) {
2263                cik_srbm_select(adev, 0, 0, 0, i);
2264                /* CP and shaders */
2265                WREG32(mmSH_MEM_CONFIG, sh_mem_cfg);
2266                WREG32(mmSH_MEM_APE1_BASE, 1);
2267                WREG32(mmSH_MEM_APE1_LIMIT, 0);
2268                WREG32(mmSH_MEM_BASES, 0);
2269        }
2270        cik_srbm_select(adev, 0, 0, 0, 0);
2271        mutex_unlock(&adev->srbm_mutex);
2272
2273        gmc_v7_0_init_compute_vmid(adev);
2274
2275        WREG32(mmSX_DEBUG_1, 0x20);
2276
2277        WREG32(mmTA_CNTL_AUX, 0x00010000);
2278
2279        tmp = RREG32(mmSPI_CONFIG_CNTL);
2280        tmp |= 0x03000000;
2281        WREG32(mmSPI_CONFIG_CNTL, tmp);
2282
2283        WREG32(mmSQ_CONFIG, 1);
2284
2285        WREG32(mmDB_DEBUG, 0);
2286
2287        tmp = RREG32(mmDB_DEBUG2) & ~0xf00fffff;
2288        tmp |= 0x00000400;
2289        WREG32(mmDB_DEBUG2, tmp);
2290
2291        tmp = RREG32(mmDB_DEBUG3) & ~0x0002021c;
2292        tmp |= 0x00020200;
2293        WREG32(mmDB_DEBUG3, tmp);
2294
2295        tmp = RREG32(mmCB_HW_CONTROL) & ~0x00010000;
2296        tmp |= 0x00018208;
2297        WREG32(mmCB_HW_CONTROL, tmp);
2298
2299        WREG32(mmSPI_CONFIG_CNTL_1, (4 << SPI_CONFIG_CNTL_1__VTX_DONE_DELAY__SHIFT));
2300
2301        WREG32(mmPA_SC_FIFO_SIZE,
2302                ((adev->gfx.config.sc_prim_fifo_size_frontend << PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2303                (adev->gfx.config.sc_prim_fifo_size_backend << PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2304                (adev->gfx.config.sc_hiz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2305                (adev->gfx.config.sc_earlyz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)));
2306
2307        WREG32(mmVGT_NUM_INSTANCES, 1);
2308
2309        WREG32(mmCP_PERFMON_CNTL, 0);
2310
2311        WREG32(mmSQ_CONFIG, 0);
2312
2313        WREG32(mmPA_SC_FORCE_EOV_MAX_CNTS,
2314                ((4095 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT__SHIFT) |
2315                (255 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT__SHIFT)));
2316
2317        WREG32(mmVGT_CACHE_INVALIDATION,
2318                (VC_AND_TC << VGT_CACHE_INVALIDATION__CACHE_INVALIDATION__SHIFT) |
2319                (ES_AND_GS_AUTO << VGT_CACHE_INVALIDATION__AUTO_INVLD_EN__SHIFT));
2320
2321        WREG32(mmVGT_GS_VERTEX_REUSE, 16);
2322        WREG32(mmPA_SC_LINE_STIPPLE_STATE, 0);
2323
2324        WREG32(mmPA_CL_ENHANCE, PA_CL_ENHANCE__CLIP_VTX_REORDER_ENA_MASK |
2325                        (3 << PA_CL_ENHANCE__NUM_CLIP_SEQ__SHIFT));
2326        WREG32(mmPA_SC_ENHANCE, PA_SC_ENHANCE__ENABLE_PA_SC_OUT_OF_ORDER_MASK);
2327        mutex_unlock(&adev->grbm_idx_mutex);
2328
2329        udelay(50);
2330}
2331
2332/*
2333 * GPU scratch registers helpers function.
2334 */
2335/**
2336 * gfx_v7_0_scratch_init - setup driver info for CP scratch regs
2337 *
2338 * @adev: amdgpu_device pointer
2339 *
2340 * Set up the number and offset of the CP scratch registers.
2341 * NOTE: use of CP scratch registers is a legacy inferface and
2342 * is not used by default on newer asics (r6xx+).  On newer asics,
2343 * memory buffers are used for fences rather than scratch regs.
2344 */
2345static void gfx_v7_0_scratch_init(struct amdgpu_device *adev)
2346{
2347        int i;
2348
2349        adev->gfx.scratch.num_reg = 7;
2350        adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
2351        for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
2352                adev->gfx.scratch.free[i] = true;
2353                adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
2354        }
2355}
2356
2357/**
2358 * gfx_v7_0_ring_test_ring - basic gfx ring test
2359 *
2360 * @adev: amdgpu_device pointer
2361 * @ring: amdgpu_ring structure holding ring information
2362 *
2363 * Allocate a scratch register and write to it using the gfx ring (CIK).
2364 * Provides a basic gfx ring test to verify that the ring is working.
2365 * Used by gfx_v7_0_cp_gfx_resume();
2366 * Returns 0 on success, error on failure.
2367 */
2368static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
2369{
2370        struct amdgpu_device *adev = ring->adev;
2371        uint32_t scratch;
2372        uint32_t tmp = 0;
2373        unsigned i;
2374        int r;
2375
2376        r = amdgpu_gfx_scratch_get(adev, &scratch);
2377        if (r) {
2378                DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
2379                return r;
2380        }
2381        WREG32(scratch, 0xCAFEDEAD);
2382        r = amdgpu_ring_lock(ring, 3);
2383        if (r) {
2384                DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r);
2385                amdgpu_gfx_scratch_free(adev, scratch);
2386                return r;
2387        }
2388        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2389        amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
2390        amdgpu_ring_write(ring, 0xDEADBEEF);
2391        amdgpu_ring_unlock_commit(ring);
2392
2393        for (i = 0; i < adev->usec_timeout; i++) {
2394                tmp = RREG32(scratch);
2395                if (tmp == 0xDEADBEEF)
2396                        break;
2397                DRM_UDELAY(1);
2398        }
2399        if (i < adev->usec_timeout) {
2400                DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2401        } else {
2402                DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2403                          ring->idx, scratch, tmp);
2404                r = -EINVAL;
2405        }
2406        amdgpu_gfx_scratch_free(adev, scratch);
2407        return r;
2408}
2409
2410/**
2411 * gfx_v7_0_ring_emit_hdp - emit an hdp flush on the cp
2412 *
2413 * @adev: amdgpu_device pointer
2414 * @ridx: amdgpu ring index
2415 *
2416 * Emits an hdp flush on the cp.
2417 */
2418static void gfx_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
2419{
2420        u32 ref_and_mask;
2421        int usepfp = ring->type == AMDGPU_RING_TYPE_COMPUTE ? 0 : 1;
2422
2423        if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
2424                switch (ring->me) {
2425                case 1:
2426                        ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
2427                        break;
2428                case 2:
2429                        ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
2430                        break;
2431                default:
2432                        return;
2433                }
2434        } else {
2435                ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
2436        }
2437
2438        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
2439        amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
2440                                 WAIT_REG_MEM_FUNCTION(3) |  /* == */
2441                                 WAIT_REG_MEM_ENGINE(usepfp)));   /* pfp or me */
2442        amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
2443        amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
2444        amdgpu_ring_write(ring, ref_and_mask);
2445        amdgpu_ring_write(ring, ref_and_mask);
2446        amdgpu_ring_write(ring, 0x20); /* poll interval */
2447}
2448
2449/**
2450 * gfx_v7_0_ring_emit_fence_gfx - emit a fence on the gfx ring
2451 *
2452 * @adev: amdgpu_device pointer
2453 * @fence: amdgpu fence object
2454 *
2455 * Emits a fence sequnce number on the gfx ring and flushes
2456 * GPU caches.
2457 */
2458static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
2459                                         u64 seq, unsigned flags)
2460{
2461        bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
2462        bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
2463        /* Workaround for cache flush problems. First send a dummy EOP
2464         * event down the pipe with seq one below.
2465         */
2466        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2467        amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2468                                 EOP_TC_ACTION_EN |
2469                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2470                                 EVENT_INDEX(5)));
2471        amdgpu_ring_write(ring, addr & 0xfffffffc);
2472        amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
2473                                DATA_SEL(1) | INT_SEL(0));
2474        amdgpu_ring_write(ring, lower_32_bits(seq - 1));
2475        amdgpu_ring_write(ring, upper_32_bits(seq - 1));
2476
2477        /* Then send the real EOP event down the pipe. */
2478        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2479        amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2480                                 EOP_TC_ACTION_EN |
2481                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2482                                 EVENT_INDEX(5)));
2483        amdgpu_ring_write(ring, addr & 0xfffffffc);
2484        amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
2485                                DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
2486        amdgpu_ring_write(ring, lower_32_bits(seq));
2487        amdgpu_ring_write(ring, upper_32_bits(seq));
2488}
2489
2490/**
2491 * gfx_v7_0_ring_emit_fence_compute - emit a fence on the compute ring
2492 *
2493 * @adev: amdgpu_device pointer
2494 * @fence: amdgpu fence object
2495 *
2496 * Emits a fence sequnce number on the compute ring and flushes
2497 * GPU caches.
2498 */
2499static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
2500                                             u64 addr, u64 seq,
2501                                             unsigned flags)
2502{
2503        bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
2504        bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
2505
2506        /* RELEASE_MEM - flush caches, send int */
2507        amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2508        amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
2509                                 EOP_TC_ACTION_EN |
2510                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2511                                 EVENT_INDEX(5)));
2512        amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
2513        amdgpu_ring_write(ring, addr & 0xfffffffc);
2514        amdgpu_ring_write(ring, upper_32_bits(addr));
2515        amdgpu_ring_write(ring, lower_32_bits(seq));
2516        amdgpu_ring_write(ring, upper_32_bits(seq));
2517}
2518
2519/**
2520 * gfx_v7_0_ring_emit_semaphore - emit a semaphore on the CP ring
2521 *
2522 * @ring: amdgpu ring buffer object
2523 * @semaphore: amdgpu semaphore object
2524 * @emit_wait: Is this a sempahore wait?
2525 *
2526 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
2527 * from running ahead of semaphore waits.
2528 */
2529static bool gfx_v7_0_ring_emit_semaphore(struct amdgpu_ring *ring,
2530                                         struct amdgpu_semaphore *semaphore,
2531                                         bool emit_wait)
2532{
2533        uint64_t addr = semaphore->gpu_addr;
2534        unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2535
2536        amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2537        amdgpu_ring_write(ring, addr & 0xffffffff);
2538        amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2539
2540        if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) {
2541                /* Prevent the PFP from running ahead of the semaphore wait */
2542                amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2543                amdgpu_ring_write(ring, 0x0);
2544        }
2545
2546        return true;
2547}
2548
2549/*
2550 * IB stuff
2551 */
2552/**
2553 * gfx_v7_0_ring_emit_ib - emit an IB (Indirect Buffer) on the ring
2554 *
2555 * @ring: amdgpu_ring structure holding ring information
2556 * @ib: amdgpu indirect buffer object
2557 *
2558 * Emits an DE (drawing engine) or CE (constant engine) IB
2559 * on the gfx ring.  IBs are usually generated by userspace
2560 * acceleration drivers and submitted to the kernel for
2561 * sheduling on the ring.  This function schedules the IB
2562 * on the gfx ring for execution by the GPU.
2563 */
2564static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
2565                                  struct amdgpu_ib *ib)
2566{
2567        bool need_ctx_switch = ring->current_ctx != ib->ctx;
2568        u32 header, control = 0;
2569        u32 next_rptr = ring->wptr + 5;
2570
2571        /* drop the CE preamble IB for the same context */
2572        if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
2573                return;
2574
2575        if (need_ctx_switch)
2576                next_rptr += 2;
2577
2578        next_rptr += 4;
2579        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2580        amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
2581        amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2582        amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2583        amdgpu_ring_write(ring, next_rptr);
2584
2585        /* insert SWITCH_BUFFER packet before first IB in the ring frame */
2586        if (need_ctx_switch) {
2587                amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2588                amdgpu_ring_write(ring, 0);
2589        }
2590
2591        if (ib->flags & AMDGPU_IB_FLAG_CE)
2592                header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2593        else
2594                header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2595
2596        control |= ib->length_dw |
2597                (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
2598
2599        amdgpu_ring_write(ring, header);
2600        amdgpu_ring_write(ring,
2601#ifdef __BIG_ENDIAN
2602                          (2 << 0) |
2603#endif
2604                          (ib->gpu_addr & 0xFFFFFFFC));
2605        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2606        amdgpu_ring_write(ring, control);
2607}
2608
2609static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
2610                                  struct amdgpu_ib *ib)
2611{
2612        u32 header, control = 0;
2613        u32 next_rptr = ring->wptr + 5;
2614
2615        control |= INDIRECT_BUFFER_VALID;
2616        next_rptr += 4;
2617        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2618        amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
2619        amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2620        amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2621        amdgpu_ring_write(ring, next_rptr);
2622
2623        header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2624
2625        control |= ib->length_dw |
2626                           (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
2627
2628        amdgpu_ring_write(ring, header);
2629        amdgpu_ring_write(ring,
2630#ifdef __BIG_ENDIAN
2631                                          (2 << 0) |
2632#endif
2633                                          (ib->gpu_addr & 0xFFFFFFFC));
2634        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2635        amdgpu_ring_write(ring, control);
2636}
2637
2638/**
2639 * gfx_v7_0_ring_test_ib - basic ring IB test
2640 *
2641 * @ring: amdgpu_ring structure holding ring information
2642 *
2643 * Allocate an IB and execute it on the gfx ring (CIK).
2644 * Provides a basic gfx ring test to verify that IBs are working.
2645 * Returns 0 on success, error on failure.
2646 */
2647static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
2648{
2649        struct amdgpu_device *adev = ring->adev;
2650        struct amdgpu_ib ib;
2651        struct fence *f = NULL;
2652        uint32_t scratch;
2653        uint32_t tmp = 0;
2654        unsigned i;
2655        int r;
2656
2657        r = amdgpu_gfx_scratch_get(adev, &scratch);
2658        if (r) {
2659                DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
2660                return r;
2661        }
2662        WREG32(scratch, 0xCAFEDEAD);
2663        memset(&ib, 0, sizeof(ib));
2664        r = amdgpu_ib_get(ring, NULL, 256, &ib);
2665        if (r) {
2666                DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
2667                goto err1;
2668        }
2669        ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2670        ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
2671        ib.ptr[2] = 0xDEADBEEF;
2672        ib.length_dw = 3;
2673
2674        r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
2675                                                 AMDGPU_FENCE_OWNER_UNDEFINED,
2676                                                 &f);
2677        if (r)
2678                goto err2;
2679
2680        r = fence_wait(f, false);
2681        if (r) {
2682                DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
2683                goto err2;
2684        }
2685        for (i = 0; i < adev->usec_timeout; i++) {
2686                tmp = RREG32(scratch);
2687                if (tmp == 0xDEADBEEF)
2688                        break;
2689                DRM_UDELAY(1);
2690        }
2691        if (i < adev->usec_timeout) {
2692                DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
2693                         ring->idx, i);
2694                goto err2;
2695        } else {
2696                DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
2697                          scratch, tmp);
2698                r = -EINVAL;
2699        }
2700
2701err2:
2702        fence_put(f);
2703        amdgpu_ib_free(adev, &ib);
2704err1:
2705        amdgpu_gfx_scratch_free(adev, scratch);
2706        return r;
2707}
2708
2709/*
2710 * CP.
2711 * On CIK, gfx and compute now have independant command processors.
2712 *
2713 * GFX
2714 * Gfx consists of a single ring and can process both gfx jobs and
2715 * compute jobs.  The gfx CP consists of three microengines (ME):
2716 * PFP - Pre-Fetch Parser
2717 * ME - Micro Engine
2718 * CE - Constant Engine
2719 * The PFP and ME make up what is considered the Drawing Engine (DE).
2720 * The CE is an asynchronous engine used for updating buffer desciptors
2721 * used by the DE so that they can be loaded into cache in parallel
2722 * while the DE is processing state update packets.
2723 *
2724 * Compute
2725 * The compute CP consists of two microengines (ME):
2726 * MEC1 - Compute MicroEngine 1
2727 * MEC2 - Compute MicroEngine 2
2728 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2729 * The queues are exposed to userspace and are programmed directly
2730 * by the compute runtime.
2731 */
2732/**
2733 * gfx_v7_0_cp_gfx_enable - enable/disable the gfx CP MEs
2734 *
2735 * @adev: amdgpu_device pointer
2736 * @enable: enable or disable the MEs
2737 *
2738 * Halts or unhalts the gfx MEs.
2739 */
2740static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2741{
2742        int i;
2743
2744        if (enable) {
2745                WREG32(mmCP_ME_CNTL, 0);
2746        } else {
2747                WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK));
2748                for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2749                        adev->gfx.gfx_ring[i].ready = false;
2750        }
2751        udelay(50);
2752}
2753
2754/**
2755 * gfx_v7_0_cp_gfx_load_microcode - load the gfx CP ME ucode
2756 *
2757 * @adev: amdgpu_device pointer
2758 *
2759 * Loads the gfx PFP, ME, and CE ucode.
2760 * Returns 0 for success, -EINVAL if the ucode is not available.
2761 */
2762static int gfx_v7_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2763{
2764        const struct gfx_firmware_header_v1_0 *pfp_hdr;
2765        const struct gfx_firmware_header_v1_0 *ce_hdr;
2766        const struct gfx_firmware_header_v1_0 *me_hdr;
2767        const __le32 *fw_data;
2768        unsigned i, fw_size;
2769
2770        if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2771                return -EINVAL;
2772
2773        pfp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
2774        ce_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
2775        me_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
2776
2777        amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2778        amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2779        amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2780        adev->gfx.pfp_fw_version = le32_to_cpu(pfp_hdr->header.ucode_version);
2781        adev->gfx.ce_fw_version = le32_to_cpu(ce_hdr->header.ucode_version);
2782        adev->gfx.me_fw_version = le32_to_cpu(me_hdr->header.ucode_version);
2783        adev->gfx.me_feature_version = le32_to_cpu(me_hdr->ucode_feature_version);
2784        adev->gfx.ce_feature_version = le32_to_cpu(ce_hdr->ucode_feature_version);
2785        adev->gfx.pfp_feature_version = le32_to_cpu(pfp_hdr->ucode_feature_version);
2786
2787        gfx_v7_0_cp_gfx_enable(adev, false);
2788
2789        /* PFP */
2790        fw_data = (const __le32 *)
2791                (adev->gfx.pfp_fw->data +
2792                 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2793        fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2794        WREG32(mmCP_PFP_UCODE_ADDR, 0);
2795        for (i = 0; i < fw_size; i++)
2796                WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2797        WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2798
2799        /* CE */
2800        fw_data = (const __le32 *)
2801                (adev->gfx.ce_fw->data +
2802                 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2803        fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2804        WREG32(mmCP_CE_UCODE_ADDR, 0);
2805        for (i = 0; i < fw_size; i++)
2806                WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2807        WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2808
2809        /* ME */
2810        fw_data = (const __le32 *)
2811                (adev->gfx.me_fw->data +
2812                 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2813        fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2814        WREG32(mmCP_ME_RAM_WADDR, 0);
2815        for (i = 0; i < fw_size; i++)
2816                WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2817        WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2818
2819        return 0;
2820}
2821
2822/**
2823 * gfx_v7_0_cp_gfx_start - start the gfx ring
2824 *
2825 * @adev: amdgpu_device pointer
2826 *
2827 * Enables the ring and loads the clear state context and other
2828 * packets required to init the ring.
2829 * Returns 0 for success, error for failure.
2830 */
2831static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev)
2832{
2833        struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2834        const struct cs_section_def *sect = NULL;
2835        const struct cs_extent_def *ext = NULL;
2836        int r, i;
2837
2838        /* init the CP */
2839        WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2840        WREG32(mmCP_ENDIAN_SWAP, 0);
2841        WREG32(mmCP_DEVICE_ID, 1);
2842
2843        gfx_v7_0_cp_gfx_enable(adev, true);
2844
2845        r = amdgpu_ring_lock(ring, gfx_v7_0_get_csb_size(adev) + 8);
2846        if (r) {
2847                DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2848                return r;
2849        }
2850
2851        /* init the CE partitions.  CE only used for gfx on CIK */
2852        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2853        amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2854        amdgpu_ring_write(ring, 0x8000);
2855        amdgpu_ring_write(ring, 0x8000);
2856
2857        /* clear state buffer */
2858        amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2859        amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2860
2861        amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2862        amdgpu_ring_write(ring, 0x80000000);
2863        amdgpu_ring_write(ring, 0x80000000);
2864
2865        for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
2866                for (ext = sect->section; ext->extent != NULL; ++ext) {
2867                        if (sect->id == SECT_CONTEXT) {
2868                                amdgpu_ring_write(ring,
2869                                                  PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
2870                                amdgpu_ring_write(ring, ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2871                                for (i = 0; i < ext->reg_count; i++)
2872                                        amdgpu_ring_write(ring, ext->extent[i]);
2873                        }
2874                }
2875        }
2876
2877        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2878        amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
2879        switch (adev->asic_type) {
2880        case CHIP_BONAIRE:
2881                amdgpu_ring_write(ring, 0x16000012);
2882                amdgpu_ring_write(ring, 0x00000000);
2883                break;
2884        case CHIP_KAVERI:
2885                amdgpu_ring_write(ring, 0x00000000); /* XXX */
2886                amdgpu_ring_write(ring, 0x00000000);
2887                break;
2888        case CHIP_KABINI:
2889        case CHIP_MULLINS:
2890                amdgpu_ring_write(ring, 0x00000000); /* XXX */
2891                amdgpu_ring_write(ring, 0x00000000);
2892                break;
2893        case CHIP_HAWAII:
2894                amdgpu_ring_write(ring, 0x3a00161a);
2895                amdgpu_ring_write(ring, 0x0000002e);
2896                break;
2897        default:
2898                amdgpu_ring_write(ring, 0x00000000);
2899                amdgpu_ring_write(ring, 0x00000000);
2900                break;
2901        }
2902
2903        amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2904        amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2905
2906        amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2907        amdgpu_ring_write(ring, 0);
2908
2909        amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2910        amdgpu_ring_write(ring, 0x00000316);
2911        amdgpu_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2912        amdgpu_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2913
2914        amdgpu_ring_unlock_commit(ring);
2915
2916        return 0;
2917}
2918
2919/**
2920 * gfx_v7_0_cp_gfx_resume - setup the gfx ring buffer registers
2921 *
2922 * @adev: amdgpu_device pointer
2923 *
2924 * Program the location and size of the gfx ring buffer
2925 * and test it to make sure it's working.
2926 * Returns 0 for success, error for failure.
2927 */
2928static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
2929{
2930        struct amdgpu_ring *ring;
2931        u32 tmp;
2932        u32 rb_bufsz;
2933        u64 rb_addr, rptr_addr;
2934        int r;
2935
2936        WREG32(mmCP_SEM_WAIT_TIMER, 0x0);
2937        if (adev->asic_type != CHIP_HAWAII)
2938                WREG32(mmCP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2939
2940        /* Set the write pointer delay */
2941        WREG32(mmCP_RB_WPTR_DELAY, 0);
2942
2943        /* set the RB to use vmid 0 */
2944        WREG32(mmCP_RB_VMID, 0);
2945
2946        WREG32(mmSCRATCH_ADDR, 0);
2947
2948        /* ring 0 - compute and gfx */
2949        /* Set ring buffer size */
2950        ring = &adev->gfx.gfx_ring[0];
2951        rb_bufsz = order_base_2(ring->ring_size / 8);
2952        tmp = (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2953#ifdef __BIG_ENDIAN
2954        tmp |= 2 << CP_RB0_CNTL__BUF_SWAP__SHIFT;
2955#endif
2956        WREG32(mmCP_RB0_CNTL, tmp);
2957
2958        /* Initialize the ring buffer's read and write pointers */
2959        WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
2960        ring->wptr = 0;
2961        WREG32(mmCP_RB0_WPTR, ring->wptr);
2962
2963        /* set the wb address wether it's enabled or not */
2964        rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2965        WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2966        WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
2967
2968        /* scratch register shadowing is no longer supported */
2969        WREG32(mmSCRATCH_UMSK, 0);
2970
2971        mdelay(1);
2972        WREG32(mmCP_RB0_CNTL, tmp);
2973
2974        rb_addr = ring->gpu_addr >> 8;
2975        WREG32(mmCP_RB0_BASE, rb_addr);
2976        WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2977
2978        /* start the ring */
2979        gfx_v7_0_cp_gfx_start(adev);
2980        ring->ready = true;
2981        r = amdgpu_ring_test_ring(ring);
2982        if (r) {
2983                ring->ready = false;
2984                return r;
2985        }
2986
2987        return 0;
2988}
2989
2990static u32 gfx_v7_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
2991{
2992        u32 rptr;
2993
2994        rptr = ring->adev->wb.wb[ring->rptr_offs];
2995
2996        return rptr;
2997}
2998
2999static u32 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
3000{
3001        struct amdgpu_device *adev = ring->adev;
3002        u32 wptr;
3003
3004        wptr = RREG32(mmCP_RB0_WPTR);
3005
3006        return wptr;
3007}
3008
3009static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
3010{
3011        struct amdgpu_device *adev = ring->adev;
3012
3013        WREG32(mmCP_RB0_WPTR, ring->wptr);
3014        (void)RREG32(mmCP_RB0_WPTR);
3015}
3016
3017static u32 gfx_v7_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
3018{
3019        u32 rptr;
3020
3021        rptr = ring->adev->wb.wb[ring->rptr_offs];
3022
3023        return rptr;
3024}
3025
3026static u32 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
3027{
3028        u32 wptr;
3029
3030        /* XXX check if swapping is necessary on BE */
3031        wptr = ring->adev->wb.wb[ring->wptr_offs];
3032
3033        return wptr;
3034}
3035
3036static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
3037{
3038        struct amdgpu_device *adev = ring->adev;
3039
3040        /* XXX check if swapping is necessary on BE */
3041        adev->wb.wb[ring->wptr_offs] = ring->wptr;
3042        WDOORBELL32(ring->doorbell_index, ring->wptr);
3043}
3044
3045/**
3046 * gfx_v7_0_cp_compute_enable - enable/disable the compute CP MEs
3047 *
3048 * @adev: amdgpu_device pointer
3049 * @enable: enable or disable the MEs
3050 *
3051 * Halts or unhalts the compute MEs.
3052 */
3053static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3054{
3055        int i;
3056
3057        if (enable) {
3058                WREG32(mmCP_MEC_CNTL, 0);
3059        } else {
3060                WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3061                for (i = 0; i < adev->gfx.num_compute_rings; i++)
3062                        adev->gfx.compute_ring[i].ready = false;
3063        }
3064        udelay(50);
3065}
3066
3067/**
3068 * gfx_v7_0_cp_compute_load_microcode - load the compute CP ME ucode
3069 *
3070 * @adev: amdgpu_device pointer
3071 *
3072 * Loads the compute MEC1&2 ucode.
3073 * Returns 0 for success, -EINVAL if the ucode is not available.
3074 */
3075static int gfx_v7_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3076{
3077        const struct gfx_firmware_header_v1_0 *mec_hdr;
3078        const __le32 *fw_data;
3079        unsigned i, fw_size;
3080
3081        if (!adev->gfx.mec_fw)
3082                return -EINVAL;
3083
3084        mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3085        amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3086        adev->gfx.mec_fw_version = le32_to_cpu(mec_hdr->header.ucode_version);
3087        adev->gfx.mec_feature_version = le32_to_cpu(
3088                                        mec_hdr->ucode_feature_version);
3089
3090        gfx_v7_0_cp_compute_enable(adev, false);
3091
3092        /* MEC1 */
3093        fw_data = (const __le32 *)
3094                (adev->gfx.mec_fw->data +
3095                 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3096        fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3097        WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3098        for (i = 0; i < fw_size; i++)
3099                WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
3100        WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3101
3102        if (adev->asic_type == CHIP_KAVERI) {
3103                const struct gfx_firmware_header_v1_0 *mec2_hdr;
3104
3105                if (!adev->gfx.mec2_fw)
3106                        return -EINVAL;
3107
3108                mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3109                amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
3110                adev->gfx.mec2_fw_version = le32_to_cpu(mec2_hdr->header.ucode_version);
3111                adev->gfx.mec2_feature_version = le32_to_cpu(
3112                                mec2_hdr->ucode_feature_version);
3113
3114                /* MEC2 */
3115                fw_data = (const __le32 *)
3116                        (adev->gfx.mec2_fw->data +
3117                         le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3118                fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3119                WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3120                for (i = 0; i < fw_size; i++)
3121                        WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
3122                WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3123        }
3124
3125        return 0;
3126}
3127
3128/**
3129 * gfx_v7_0_cp_compute_start - start the compute queues
3130 *
3131 * @adev: amdgpu_device pointer
3132 *
3133 * Enable the compute queues.
3134 * Returns 0 for success, error for failure.
3135 */
3136static int gfx_v7_0_cp_compute_start(struct amdgpu_device *adev)
3137{
3138        gfx_v7_0_cp_compute_enable(adev, true);
3139
3140        return 0;
3141}
3142
3143/**
3144 * gfx_v7_0_cp_compute_fini - stop the compute queues
3145 *
3146 * @adev: amdgpu_device pointer
3147 *
3148 * Stop the compute queues and tear down the driver queue
3149 * info.
3150 */
3151static void gfx_v7_0_cp_compute_fini(struct amdgpu_device *adev)
3152{
3153        int i, r;
3154
3155        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3156                struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3157
3158                if (ring->mqd_obj) {
3159                        r = amdgpu_bo_reserve(ring->mqd_obj, false);
3160                        if (unlikely(r != 0))
3161                                dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3162
3163                        amdgpu_bo_unpin(ring->mqd_obj);
3164                        amdgpu_bo_unreserve(ring->mqd_obj);
3165
3166                        amdgpu_bo_unref(&ring->mqd_obj);
3167                        ring->mqd_obj = NULL;
3168                }
3169        }
3170}
3171
3172static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
3173{
3174        int r;
3175
3176        if (adev->gfx.mec.hpd_eop_obj) {
3177                r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
3178                if (unlikely(r != 0))
3179                        dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3180                amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
3181                amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
3182
3183                amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
3184                adev->gfx.mec.hpd_eop_obj = NULL;
3185        }
3186}
3187
3188#define MEC_HPD_SIZE 2048
3189
3190static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
3191{
3192        int r;
3193        u32 *hpd;
3194
3195        /*
3196         * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3197         * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3198         * Nonetheless, we assign only 1 pipe because all other pipes will
3199         * be handled by KFD
3200         */
3201        adev->gfx.mec.num_mec = 1;
3202        adev->gfx.mec.num_pipe = 1;
3203        adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
3204
3205        if (adev->gfx.mec.hpd_eop_obj == NULL) {
3206                r = amdgpu_bo_create(adev,
3207                                     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
3208                                     PAGE_SIZE, true,
3209                                     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
3210                                     &adev->gfx.mec.hpd_eop_obj);
3211                if (r) {
3212                        dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
3213                        return r;
3214                }
3215        }
3216
3217        r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
3218        if (unlikely(r != 0)) {
3219                gfx_v7_0_mec_fini(adev);
3220                return r;
3221        }
3222        r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
3223                          &adev->gfx.mec.hpd_eop_gpu_addr);
3224        if (r) {
3225                dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
3226                gfx_v7_0_mec_fini(adev);
3227                return r;
3228        }
3229        r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
3230        if (r) {
3231                dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
3232                gfx_v7_0_mec_fini(adev);
3233                return r;
3234        }
3235
3236        /* clear memory.  Not sure if this is required or not */
3237        memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
3238
3239        amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
3240        amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
3241
3242        return 0;
3243}
3244
3245struct hqd_registers
3246{
3247        u32 cp_mqd_base_addr;
3248        u32 cp_mqd_base_addr_hi;
3249        u32 cp_hqd_active;
3250        u32 cp_hqd_vmid;
3251        u32 cp_hqd_persistent_state;
3252        u32 cp_hqd_pipe_priority;
3253        u32 cp_hqd_queue_priority;
3254        u32 cp_hqd_quantum;
3255        u32 cp_hqd_pq_base;
3256        u32 cp_hqd_pq_base_hi;
3257        u32 cp_hqd_pq_rptr;
3258        u32 cp_hqd_pq_rptr_report_addr;
3259        u32 cp_hqd_pq_rptr_report_addr_hi;
3260        u32 cp_hqd_pq_wptr_poll_addr;
3261        u32 cp_hqd_pq_wptr_poll_addr_hi;
3262        u32 cp_hqd_pq_doorbell_control;
3263        u32 cp_hqd_pq_wptr;
3264        u32 cp_hqd_pq_control;
3265        u32 cp_hqd_ib_base_addr;
3266        u32 cp_hqd_ib_base_addr_hi;
3267        u32 cp_hqd_ib_rptr;
3268        u32 cp_hqd_ib_control;
3269        u32 cp_hqd_iq_timer;
3270        u32 cp_hqd_iq_rptr;
3271        u32 cp_hqd_dequeue_request;
3272        u32 cp_hqd_dma_offload;
3273        u32 cp_hqd_sema_cmd;
3274        u32 cp_hqd_msg_type;
3275        u32 cp_hqd_atomic0_preop_lo;
3276        u32 cp_hqd_atomic0_preop_hi;
3277        u32 cp_hqd_atomic1_preop_lo;
3278        u32 cp_hqd_atomic1_preop_hi;
3279        u32 cp_hqd_hq_scheduler0;
3280        u32 cp_hqd_hq_scheduler1;
3281        u32 cp_mqd_control;
3282};
3283
3284struct bonaire_mqd
3285{
3286        u32 header;
3287        u32 dispatch_initiator;
3288        u32 dimensions[3];
3289        u32 start_idx[3];
3290        u32 num_threads[3];
3291        u32 pipeline_stat_enable;
3292        u32 perf_counter_enable;
3293        u32 pgm[2];
3294        u32 tba[2];
3295        u32 tma[2];
3296        u32 pgm_rsrc[2];
3297        u32 vmid;
3298        u32 resource_limits;
3299        u32 static_thread_mgmt01[2];
3300        u32 tmp_ring_size;
3301        u32 static_thread_mgmt23[2];
3302        u32 restart[3];
3303        u32 thread_trace_enable;
3304        u32 reserved1;
3305        u32 user_data[16];
3306        u32 vgtcs_invoke_count[2];
3307        struct hqd_registers queue_state;
3308        u32 dequeue_cntr;
3309        u32 interrupt_queue[64];
3310};
3311
3312/**
3313 * gfx_v7_0_cp_compute_resume - setup the compute queue registers
3314 *
3315 * @adev: amdgpu_device pointer
3316 *
3317 * Program the compute queues and test them to make sure they
3318 * are working.
3319 * Returns 0 for success, error for failure.
3320 */
3321static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
3322{
3323        int r, i, j;
3324        u32 tmp;
3325        bool use_doorbell = true;
3326        u64 hqd_gpu_addr;
3327        u64 mqd_gpu_addr;
3328        u64 eop_gpu_addr;
3329        u64 wb_gpu_addr;
3330        u32 *buf;
3331        struct bonaire_mqd *mqd;
3332
3333        r = gfx_v7_0_cp_compute_start(adev);
3334        if (r)
3335                return r;
3336
3337        /* fix up chicken bits */
3338        tmp = RREG32(mmCP_CPF_DEBUG);
3339        tmp |= (1 << 23);
3340        WREG32(mmCP_CPF_DEBUG, tmp);
3341
3342        /* init the pipes */
3343        mutex_lock(&adev->srbm_mutex);
3344        for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3345                int me = (i < 4) ? 1 : 2;
3346                int pipe = (i < 4) ? i : (i - 4);
3347
3348                eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3349
3350                cik_srbm_select(adev, me, pipe, 0, 0);
3351
3352                /* write the EOP addr */
3353                WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3354                WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3355
3356                /* set the VMID assigned */
3357                WREG32(mmCP_HPD_EOP_VMID, 0);
3358
3359                /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3360                tmp = RREG32(mmCP_HPD_EOP_CONTROL);
3361                tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
3362                tmp |= order_base_2(MEC_HPD_SIZE / 8);
3363                WREG32(mmCP_HPD_EOP_CONTROL, tmp);
3364        }
3365        cik_srbm_select(adev, 0, 0, 0, 0);
3366        mutex_unlock(&adev->srbm_mutex);
3367
3368        /* init the queues.  Just two for now. */
3369        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3370                struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3371
3372                if (ring->mqd_obj == NULL) {
3373                        r = amdgpu_bo_create(adev,
3374                                             sizeof(struct bonaire_mqd),
3375                                             PAGE_SIZE, true,
3376                                             AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
3377                                             &ring->mqd_obj);
3378                        if (r) {
3379                                dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3380                                return r;
3381                        }
3382                }
3383
3384                r = amdgpu_bo_reserve(ring->mqd_obj, false);
3385                if (unlikely(r != 0)) {
3386                        gfx_v7_0_cp_compute_fini(adev);
3387                        return r;
3388                }
3389                r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3390                                  &mqd_gpu_addr);
3391                if (r) {
3392                        dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3393                        gfx_v7_0_cp_compute_fini(adev);
3394                        return r;
3395                }
3396                r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3397                if (r) {
3398                        dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3399                        gfx_v7_0_cp_compute_fini(adev);
3400                        return r;
3401                }
3402
3403                /* init the mqd struct */
3404                memset(buf, 0, sizeof(struct bonaire_mqd));
3405
3406                mqd = (struct bonaire_mqd *)buf;
3407                mqd->header = 0xC0310800;
3408                mqd->static_thread_mgmt01[0] = 0xffffffff;
3409                mqd->static_thread_mgmt01[1] = 0xffffffff;
3410                mqd->static_thread_mgmt23[0] = 0xffffffff;
3411                mqd->static_thread_mgmt23[1] = 0xffffffff;
3412
3413                mutex_lock(&adev->srbm_mutex);
3414                cik_srbm_select(adev, ring->me,
3415                                ring->pipe,
3416                                ring->queue, 0);
3417
3418                /* disable wptr polling */
3419                tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3420                tmp &= ~CP_PQ_WPTR_POLL_CNTL__EN_MASK;
3421                WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3422
3423                /* enable doorbell? */
3424                mqd->queue_state.cp_hqd_pq_doorbell_control =
3425                        RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3426                if (use_doorbell)
3427                        mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
3428                else
3429                        mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
3430                WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3431                       mqd->queue_state.cp_hqd_pq_doorbell_control);
3432
3433                /* disable the queue if it's active */
3434                mqd->queue_state.cp_hqd_dequeue_request = 0;
3435                mqd->queue_state.cp_hqd_pq_rptr = 0;
3436                mqd->queue_state.cp_hqd_pq_wptr= 0;
3437                if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3438                        WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3439                        for (j = 0; j < adev->usec_timeout; j++) {
3440                                if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3441                                        break;
3442                                udelay(1);
3443                        }
3444                        WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3445                        WREG32(mmCP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3446                        WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3447                }
3448
3449                /* set the pointer to the MQD */
3450                mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3451                mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3452                WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3453                WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3454                /* set MQD vmid to 0 */
3455                mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
3456                mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
3457                WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3458
3459                /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3460                hqd_gpu_addr = ring->gpu_addr >> 8;
3461                mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3462                mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3463                WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3464                WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3465
3466                /* set up the HQD, this is similar to CP_RB0_CNTL */
3467                mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
3468                mqd->queue_state.cp_hqd_pq_control &=
3469                        ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
3470                                        CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
3471
3472                mqd->queue_state.cp_hqd_pq_control |=
3473                        order_base_2(ring->ring_size / 8);
3474                mqd->queue_state.cp_hqd_pq_control |=
3475                        (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
3476#ifdef __BIG_ENDIAN
3477                mqd->queue_state.cp_hqd_pq_control |=
3478                        2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
3479#endif
3480                mqd->queue_state.cp_hqd_pq_control &=
3481                        ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
3482                                CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
3483                                CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
3484                mqd->queue_state.cp_hqd_pq_control |=
3485                        CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
3486                        CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
3487                WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3488
3489                /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3490                wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3491                mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3492                mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3493                WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3494                WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3495                       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3496
3497                /* set the wb address wether it's enabled or not */
3498                wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3499                mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3500                mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3501                        upper_32_bits(wb_gpu_addr) & 0xffff;
3502                WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3503                       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3504                WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3505                       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3506
3507                /* enable the doorbell if requested */
3508                if (use_doorbell) {
3509                        mqd->queue_state.cp_hqd_pq_doorbell_control =
3510                                RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3511                        mqd->queue_state.cp_hqd_pq_doorbell_control &=
3512                                ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
3513                        mqd->queue_state.cp_hqd_pq_doorbell_control |=
3514                                (ring->doorbell_index <<
3515                                 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
3516                        mqd->queue_state.cp_hqd_pq_doorbell_control |=
3517                                CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
3518                        mqd->queue_state.cp_hqd_pq_doorbell_control &=
3519                                ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
3520                                CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
3521
3522                } else {
3523                        mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3524                }
3525                WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3526                       mqd->queue_state.cp_hqd_pq_doorbell_control);
3527
3528                /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3529                ring->wptr = 0;
3530                mqd->queue_state.cp_hqd_pq_wptr = ring->wptr;
3531                WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3532                mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3533
3534                /* set the vmid for the queue */
3535                mqd->queue_state.cp_hqd_vmid = 0;
3536                WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3537
3538                /* activate the queue */
3539                mqd->queue_state.cp_hqd_active = 1;
3540                WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3541
3542                cik_srbm_select(adev, 0, 0, 0, 0);
3543                mutex_unlock(&adev->srbm_mutex);
3544
3545                amdgpu_bo_kunmap(ring->mqd_obj);
3546                amdgpu_bo_unreserve(ring->mqd_obj);
3547
3548                ring->ready = true;
3549                r = amdgpu_ring_test_ring(ring);
3550                if (r)
3551                        ring->ready = false;
3552        }
3553
3554        return 0;
3555}
3556
3557static void gfx_v7_0_cp_enable(struct amdgpu_device *adev, bool enable)
3558{
3559        gfx_v7_0_cp_gfx_enable(adev, enable);
3560        gfx_v7_0_cp_compute_enable(adev, enable);
3561}
3562
3563static int gfx_v7_0_cp_load_microcode(struct amdgpu_device *adev)
3564{
3565        int r;
3566
3567        r = gfx_v7_0_cp_gfx_load_microcode(adev);
3568        if (r)
3569                return r;
3570        r = gfx_v7_0_cp_compute_load_microcode(adev);
3571        if (r)
3572                return r;
3573
3574        return 0;
3575}
3576
3577static void gfx_v7_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3578                                               bool enable)
3579{
3580        u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3581
3582        if (enable)
3583                tmp |= (CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
3584                                CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
3585        else
3586                tmp &= ~(CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
3587                                CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
3588        WREG32(mmCP_INT_CNTL_RING0, tmp);
3589}
3590
3591static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
3592{
3593        int r;
3594
3595        gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3596
3597        r = gfx_v7_0_cp_load_microcode(adev);
3598        if (r)
3599                return r;
3600
3601        r = gfx_v7_0_cp_gfx_resume(adev);
3602        if (r)
3603                return r;
3604        r = gfx_v7_0_cp_compute_resume(adev);
3605        if (r)
3606                return r;
3607
3608        gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3609
3610        return 0;
3611}
3612
3613/*
3614 * vm
3615 * VMID 0 is the physical GPU addresses as used by the kernel.
3616 * VMIDs 1-15 are used for userspace clients and are handled
3617 * by the amdgpu vm/hsa code.
3618 */
3619/**
3620 * gfx_v7_0_ring_emit_vm_flush - cik vm flush using the CP
3621 *
3622 * @adev: amdgpu_device pointer
3623 *
3624 * Update the page table base and flush the VM TLB
3625 * using the CP (CIK).
3626 */
3627static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
3628                                        unsigned vm_id, uint64_t pd_addr)
3629{
3630        int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
3631        if (usepfp) {
3632                /* synce CE with ME to prevent CE fetch CEIB before context switch done */
3633                amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3634                amdgpu_ring_write(ring, 0);
3635                amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3636                amdgpu_ring_write(ring, 0);
3637        }
3638
3639        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3640        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
3641                                 WRITE_DATA_DST_SEL(0)));
3642        if (vm_id < 8) {
3643                amdgpu_ring_write(ring,
3644                                  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
3645        } else {
3646                amdgpu_ring_write(ring,
3647                                  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
3648        }
3649        amdgpu_ring_write(ring, 0);
3650        amdgpu_ring_write(ring, pd_addr >> 12);
3651
3652        /* bits 0-15 are the VM contexts0-15 */
3653        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3654        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3655                                 WRITE_DATA_DST_SEL(0)));
3656        amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
3657        amdgpu_ring_write(ring, 0);
3658        amdgpu_ring_write(ring, 1 << vm_id);
3659
3660        /* wait for the invalidate to complete */
3661        amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3662        amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
3663                                 WAIT_REG_MEM_FUNCTION(0) |  /* always */
3664                                 WAIT_REG_MEM_ENGINE(0))); /* me */
3665        amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
3666        amdgpu_ring_write(ring, 0);
3667        amdgpu_ring_write(ring, 0); /* ref */
3668        amdgpu_ring_write(ring, 0); /* mask */
3669        amdgpu_ring_write(ring, 0x20); /* poll interval */
3670
3671        /* compute doesn't have PFP */
3672        if (usepfp) {
3673                /* sync PFP to ME, otherwise we might get invalid PFP reads */
3674                amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3675                amdgpu_ring_write(ring, 0x0);
3676
3677                /* synce CE with ME to prevent CE fetch CEIB before context switch done */
3678                amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3679                amdgpu_ring_write(ring, 0);
3680                amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3681                amdgpu_ring_write(ring, 0);
3682        }
3683}
3684
3685/*
3686 * RLC
3687 * The RLC is a multi-purpose microengine that handles a
3688 * variety of functions.
3689 */
3690static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev)
3691{
3692        int r;
3693
3694        /* save restore block */
3695        if (adev->gfx.rlc.save_restore_obj) {
3696                r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, false);
3697                if (unlikely(r != 0))
3698                        dev_warn(adev->dev, "(%d) reserve RLC sr bo failed\n", r);
3699                amdgpu_bo_unpin(adev->gfx.rlc.save_restore_obj);
3700                amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
3701
3702                amdgpu_bo_unref(&adev->gfx.rlc.save_restore_obj);
3703                adev->gfx.rlc.save_restore_obj = NULL;
3704        }
3705
3706        /* clear state block */
3707        if (adev->gfx.rlc.clear_state_obj) {
3708                r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
3709                if (unlikely(r != 0))
3710                        dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
3711                amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
3712                amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
3713
3714                amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
3715                adev->gfx.rlc.clear_state_obj = NULL;
3716        }
3717
3718        /* clear state block */
3719        if (adev->gfx.rlc.cp_table_obj) {
3720                r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
3721                if (unlikely(r != 0))
3722                        dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
3723                amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
3724                amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
3725
3726                amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
3727                adev->gfx.rlc.cp_table_obj = NULL;
3728        }
3729}
3730
3731static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
3732{
3733        const u32 *src_ptr;
3734        volatile u32 *dst_ptr;
3735        u32 dws, i;
3736        const struct cs_section_def *cs_data;
3737        int r;
3738
3739        /* allocate rlc buffers */
3740        if (adev->flags & AMD_IS_APU) {
3741                if (adev->asic_type == CHIP_KAVERI) {
3742                        adev->gfx.rlc.reg_list = spectre_rlc_save_restore_register_list;
3743                        adev->gfx.rlc.reg_list_size =
3744                                (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
3745                } else {
3746                        adev->gfx.rlc.reg_list = kalindi_rlc_save_restore_register_list;
3747                        adev->gfx.rlc.reg_list_size =
3748                                (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
3749                }
3750        }
3751        adev->gfx.rlc.cs_data = ci_cs_data;
3752        adev->gfx.rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
3753
3754        src_ptr = adev->gfx.rlc.reg_list;
3755        dws = adev->gfx.rlc.reg_list_size;
3756        dws += (5 * 16) + 48 + 48 + 64;
3757
3758        cs_data = adev->gfx.rlc.cs_data;
3759
3760        if (src_ptr) {
3761                /* save restore block */
3762                if (adev->gfx.rlc.save_restore_obj == NULL) {
3763                        r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
3764                                             AMDGPU_GEM_DOMAIN_VRAM,
3765                                             AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
3766                                             NULL, NULL,
3767                                             &adev->gfx.rlc.save_restore_obj);
3768                        if (r) {
3769                                dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", r);
3770                                return r;
3771                        }
3772                }
3773
3774                r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, false);
3775                if (unlikely(r != 0)) {
3776                        gfx_v7_0_rlc_fini(adev);
3777                        return r;
3778                }
3779                r = amdgpu_bo_pin(adev->gfx.rlc.save_restore_obj, AMDGPU_GEM_DOMAIN_VRAM,
3780                                  &adev->gfx.rlc.save_restore_gpu_addr);
3781                if (r) {
3782                        amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
3783                        dev_warn(adev->dev, "(%d) pin RLC sr bo failed\n", r);
3784                        gfx_v7_0_rlc_fini(adev);
3785                        return r;
3786                }
3787
3788                r = amdgpu_bo_kmap(adev->gfx.rlc.save_restore_obj, (void **)&adev->gfx.rlc.sr_ptr);
3789                if (r) {
3790                        dev_warn(adev->dev, "(%d) map RLC sr bo failed\n", r);
3791                        gfx_v7_0_rlc_fini(adev);
3792                        return r;
3793                }
3794                /* write the sr buffer */
3795                dst_ptr = adev->gfx.rlc.sr_ptr;
3796                for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
3797                        dst_ptr[i] = cpu_to_le32(src_ptr[i]);
3798                amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
3799                amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
3800        }
3801
3802        if (cs_data) {
3803                /* clear state block */
3804                adev->gfx.rlc.clear_state_size = dws = gfx_v7_0_get_csb_size(adev);
3805
3806                if (adev->gfx.rlc.clear_state_obj == NULL) {
3807                        r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
3808                                             AMDGPU_GEM_DOMAIN_VRAM,
3809                                             AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
3810                                             NULL, NULL,
3811                                             &adev->gfx.rlc.clear_state_obj);
3812                        if (r) {
3813                                dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
3814                                gfx_v7_0_rlc_fini(adev);
3815                                return r;
3816                        }
3817                }
3818                r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
3819                if (unlikely(r != 0)) {
3820                        gfx_v7_0_rlc_fini(adev);
3821                        return r;
3822                }
3823                r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
3824                                  &adev->gfx.rlc.clear_state_gpu_addr);
3825                if (r) {
3826                        amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
3827                        dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
3828                        gfx_v7_0_rlc_fini(adev);
3829                        return r;
3830                }
3831
3832                r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
3833                if (r) {
3834                        dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
3835                        gfx_v7_0_rlc_fini(adev);
3836                        return r;
3837                }
3838                /* set up the cs buffer */
3839                dst_ptr = adev->gfx.rlc.cs_ptr;
3840                gfx_v7_0_get_csb_buffer(adev, dst_ptr);
3841                amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
3842                amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
3843        }
3844
3845        if (adev->gfx.rlc.cp_table_size) {
3846                if (adev->gfx.rlc.cp_table_obj == NULL) {
3847                        r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
3848                                             AMDGPU_GEM_DOMAIN_VRAM,
3849                                             AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
3850                                             NULL, NULL,
3851                                             &adev->gfx.rlc.cp_table_obj);
3852                        if (r) {
3853                                dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
3854                                gfx_v7_0_rlc_fini(adev);
3855                                return r;
3856                        }
3857                }
3858
3859                r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
3860                if (unlikely(r != 0)) {
3861                        dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
3862                        gfx_v7_0_rlc_fini(adev);
3863                        return r;
3864                }
3865                r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
3866                                  &adev->gfx.rlc.cp_table_gpu_addr);
3867                if (r) {
3868                        amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
3869                        dev_warn(adev->dev, "(%d) pin RLC cp_table bo failed\n", r);
3870                        gfx_v7_0_rlc_fini(adev);
3871                        return r;
3872                }
3873                r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
3874                if (r) {
3875                        dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
3876                        gfx_v7_0_rlc_fini(adev);
3877                        return r;
3878                }
3879
3880                gfx_v7_0_init_cp_pg_table(adev);
3881
3882                amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
3883                amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
3884
3885        }
3886
3887        return 0;
3888}
3889
3890static void gfx_v7_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
3891{
3892        u32 tmp;
3893
3894        tmp = RREG32(mmRLC_LB_CNTL);
3895        if (enable)
3896                tmp |= RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
3897        else
3898                tmp &= ~RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
3899        WREG32(mmRLC_LB_CNTL, tmp);
3900}
3901
3902static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3903{
3904        u32 i, j, k;
3905        u32 mask;
3906
3907        mutex_lock(&adev->grbm_idx_mutex);
3908        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3909                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3910                        gfx_v7_0_select_se_sh(adev, i, j);
3911                        for (k = 0; k < adev->usec_timeout; k++) {
3912                                if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3913                                        break;
3914                                udelay(1);
3915                        }
3916                }
3917        }
3918        gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3919        mutex_unlock(&adev->grbm_idx_mutex);
3920
3921        mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3922                RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3923                RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3924                RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3925        for (k = 0; k < adev->usec_timeout; k++) {
3926                if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3927                        break;
3928                udelay(1);
3929        }
3930}
3931
3932static void gfx_v7_0_update_rlc(struct amdgpu_device *adev, u32 rlc)
3933{
3934        u32 tmp;
3935
3936        tmp = RREG32(mmRLC_CNTL);
3937        if (tmp != rlc)
3938                WREG32(mmRLC_CNTL, rlc);
3939}
3940
3941static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev)
3942{
3943        u32 data, orig;
3944
3945        orig = data = RREG32(mmRLC_CNTL);
3946
3947        if (data & RLC_CNTL__RLC_ENABLE_F32_MASK) {
3948                u32 i;
3949
3950                data &= ~RLC_CNTL__RLC_ENABLE_F32_MASK;
3951                WREG32(mmRLC_CNTL, data);
3952
3953                for (i = 0; i < adev->usec_timeout; i++) {
3954                        if ((RREG32(mmRLC_GPM_STAT) & RLC_GPM_STAT__RLC_BUSY_MASK) == 0)
3955                                break;
3956                        udelay(1);
3957                }
3958
3959                gfx_v7_0_wait_for_rlc_serdes(adev);
3960        }
3961
3962        return orig;
3963}
3964
3965void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
3966{
3967        u32 tmp, i, mask;
3968
3969        tmp = 0x1 | (1 << 1);
3970        WREG32(mmRLC_GPR_REG2, tmp);
3971
3972        mask = RLC_GPM_STAT__GFX_POWER_STATUS_MASK |
3973                RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK;
3974        for (i = 0; i < adev->usec_timeout; i++) {
3975                if ((RREG32(mmRLC_GPM_STAT) & mask) == mask)
3976                        break;
3977                udelay(1);
3978        }
3979
3980        for (i = 0; i < adev->usec_timeout; i++) {
3981                if ((RREG32(mmRLC_GPR_REG2) & 0x1) == 0)
3982                        break;
3983                udelay(1);
3984        }
3985}
3986
3987void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
3988{
3989        u32 tmp;
3990
3991        tmp = 0x1 | (0 << 1);
3992        WREG32(mmRLC_GPR_REG2, tmp);
3993}
3994
3995/**
3996 * gfx_v7_0_rlc_stop - stop the RLC ME
3997 *
3998 * @adev: amdgpu_device pointer
3999 *
4000 * Halt the RLC ME (MicroEngine) (CIK).
4001 */
4002void gfx_v7_0_rlc_stop(struct amdgpu_device *adev)
4003{
4004        WREG32(mmRLC_CNTL, 0);
4005
4006        gfx_v7_0_enable_gui_idle_interrupt(adev, false);
4007
4008        gfx_v7_0_wait_for_rlc_serdes(adev);
4009}
4010
4011/**
4012 * gfx_v7_0_rlc_start - start the RLC ME
4013 *
4014 * @adev: amdgpu_device pointer
4015 *
4016 * Unhalt the RLC ME (MicroEngine) (CIK).
4017 */
4018static void gfx_v7_0_rlc_start(struct amdgpu_device *adev)
4019{
4020        WREG32(mmRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
4021
4022        gfx_v7_0_enable_gui_idle_interrupt(adev, true);
4023
4024        udelay(50);
4025}
4026
4027static void gfx_v7_0_rlc_reset(struct amdgpu_device *adev)
4028{
4029        u32 tmp = RREG32(mmGRBM_SOFT_RESET);
4030
4031        tmp |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
4032        WREG32(mmGRBM_SOFT_RESET, tmp);
4033        udelay(50);
4034        tmp &= ~GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
4035        WREG32(mmGRBM_SOFT_RESET, tmp);
4036        udelay(50);
4037}
4038
4039/**
4040 * gfx_v7_0_rlc_resume - setup the RLC hw
4041 *
4042 * @adev: amdgpu_device pointer
4043 *
4044 * Initialize the RLC registers, load the ucode,
4045 * and start the RLC (CIK).
4046 * Returns 0 for success, -EINVAL if the ucode is not available.
4047 */
4048static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
4049{
4050        const struct rlc_firmware_header_v1_0 *hdr;
4051        const __le32 *fw_data;
4052        unsigned i, fw_size;
4053        u32 tmp;
4054
4055        if (!adev->gfx.rlc_fw)
4056                return -EINVAL;
4057
4058        hdr = (const struct rlc_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
4059        amdgpu_ucode_print_rlc_hdr(&hdr->header);
4060        adev->gfx.rlc_fw_version = le32_to_cpu(hdr->header.ucode_version);
4061        adev->gfx.rlc_feature_version = le32_to_cpu(
4062                                        hdr->ucode_feature_version);
4063
4064        gfx_v7_0_rlc_stop(adev);
4065
4066        /* disable CG */
4067        tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4068        WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4069
4070        gfx_v7_0_rlc_reset(adev);
4071
4072        gfx_v7_0_init_pg(adev);
4073
4074        WREG32(mmRLC_LB_CNTR_INIT, 0);
4075        WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);
4076
4077        mutex_lock(&adev->grbm_idx_mutex);
4078        gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4079        WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
4080        WREG32(mmRLC_LB_PARAMS, 0x00600408);
4081        WREG32(mmRLC_LB_CNTL, 0x80000004);
4082        mutex_unlock(&adev->grbm_idx_mutex);
4083
4084        WREG32(mmRLC_MC_CNTL, 0);
4085        WREG32(mmRLC_UCODE_CNTL, 0);
4086
4087        fw_data = (const __le32 *)
4088                (adev->gfx.rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4089        fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4090        WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4091        for (i = 0; i < fw_size; i++)
4092                WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4093        WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4094
4095        /* XXX - find out what chips support lbpw */
4096        gfx_v7_0_enable_lbpw(adev, false);
4097
4098        if (adev->asic_type == CHIP_BONAIRE)
4099                WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0);
4100
4101        gfx_v7_0_rlc_start(adev);
4102
4103        return 0;
4104}
4105
4106static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
4107{
4108        u32 data, orig, tmp, tmp2;
4109
4110        orig = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
4111
4112        if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_CGCG)) {
4113                gfx_v7_0_enable_gui_idle_interrupt(adev, true);
4114
4115                tmp = gfx_v7_0_halt_rlc(adev);
4116
4117                mutex_lock(&adev->grbm_idx_mutex);
4118                gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4119                WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4120                WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4121                tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
4122                        RLC_SERDES_WR_CTRL__CGCG_OVERRIDE_0_MASK |
4123                        RLC_SERDES_WR_CTRL__CGLS_ENABLE_MASK;
4124                WREG32(mmRLC_SERDES_WR_CTRL, tmp2);
4125                mutex_unlock(&adev->grbm_idx_mutex);
4126
4127                gfx_v7_0_update_rlc(adev, tmp);
4128
4129                data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4130        } else {
4131                gfx_v7_0_enable_gui_idle_interrupt(adev, false);
4132
4133                RREG32(mmCB_CGTT_SCLK_CTRL);
4134                RREG32(mmCB_CGTT_SCLK_CTRL);
4135                RREG32(mmCB_CGTT_SCLK_CTRL);
4136                RREG32(mmCB_CGTT_SCLK_CTRL);
4137
4138                data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4139        }
4140
4141        if (orig != data)
4142                WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4143
4144}
4145
4146static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
4147{
4148        u32 data, orig, tmp = 0;
4149
4150        if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_MGCG)) {
4151                if (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_MGLS) {
4152                        if (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_CP_LS) {
4153                                orig = data = RREG32(mmCP_MEM_SLP_CNTL);
4154                                data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4155                                if (orig != data)
4156                                        WREG32(mmCP_MEM_SLP_CNTL, data);
4157                        }
4158                }
4159
4160                orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4161                data |= 0x00000001;
4162                data &= 0xfffffffd;
4163                if (orig != data)
4164                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4165
4166                tmp = gfx_v7_0_halt_rlc(adev);
4167
4168                mutex_lock(&adev->grbm_idx_mutex);
4169                gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4170                WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4171                WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4172                data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
4173                        RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_0_MASK;
4174                WREG32(mmRLC_SERDES_WR_CTRL, data);
4175                mutex_unlock(&adev->grbm_idx_mutex);
4176
4177                gfx_v7_0_update_rlc(adev, tmp);
4178
4179                if (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_CGTS) {
4180                        orig = data = RREG32(mmCGTS_SM_CTRL_REG);
4181                        data &= ~CGTS_SM_CTRL_REG__SM_MODE_MASK;
4182                        data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
4183                        data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
4184                        data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
4185                        if ((adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_MGLS) &&
4186                            (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_CGTS_LS))
4187                                data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
4188                        data &= ~CGTS_SM_CTRL_REG__ON_MONITOR_ADD_MASK;
4189                        data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
4190                        data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
4191                        if (orig != data)
4192                                WREG32(mmCGTS_SM_CTRL_REG, data);
4193                }
4194        } else {
4195                orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4196                data |= 0x00000003;
4197                if (orig != data)
4198                        WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4199
4200                data = RREG32(mmRLC_MEM_SLP_CNTL);
4201                if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4202                        data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4203                        WREG32(mmRLC_MEM_SLP_CNTL, data);
4204                }
4205
4206                data = RREG32(mmCP_MEM_SLP_CNTL);
4207                if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4208                        data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4209                        WREG32(mmCP_MEM_SLP_CNTL, data);
4210                }
4211
4212                orig = data = RREG32(mmCGTS_SM_CTRL_REG);
4213                data |= CGTS_SM_CTRL_REG__OVERRIDE_MASK | CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
4214                if (orig != data)
4215                        WREG32(mmCGTS_SM_CTRL_REG, data);
4216
4217                tmp = gfx_v7_0_halt_rlc(adev);
4218
4219                mutex_lock(&adev->grbm_idx_mutex);
4220                gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4221                WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4222                WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4223                data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;
4224                WREG32(mmRLC_SERDES_WR_CTRL, data);
4225                mutex_unlock(&adev->grbm_idx_mutex);
4226
4227                gfx_v7_0_update_rlc(adev, tmp);
4228        }
4229}
4230
4231static void gfx_v7_0_update_cg(struct amdgpu_device *adev,
4232                               bool enable)
4233{
4234        gfx_v7_0_enable_gui_idle_interrupt(adev, false);
4235        /* order matters! */
4236        if (enable) {
4237                gfx_v7_0_enable_mgcg(adev, true);
4238                gfx_v7_0_enable_cgcg(adev, true);
4239        } else {
4240                gfx_v7_0_enable_cgcg(adev, false);
4241                gfx_v7_0_enable_mgcg(adev, false);
4242        }
4243        gfx_v7_0_enable_gui_idle_interrupt(adev, true);
4244}
4245
4246static void gfx_v7_0_enable_sclk_slowdown_on_pu(struct amdgpu_device *adev,
4247                                                bool enable)
4248{
4249        u32 data, orig;
4250
4251        orig = data = RREG32(mmRLC_PG_CNTL);
4252        if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_RLC_SMU_HS))
4253                data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
4254        else
4255                data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
4256        if (orig != data)
4257                WREG32(mmRLC_PG_CNTL, data);
4258}
4259
4260static void gfx_v7_0_enable_sclk_slowdown_on_pd(struct amdgpu_device *adev,
4261                                                bool enable)
4262{
4263        u32 data, orig;
4264
4265        orig = data = RREG32(mmRLC_PG_CNTL);
4266        if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_RLC_SMU_HS))
4267                data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
4268        else
4269                data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
4270        if (orig != data)
4271                WREG32(mmRLC_PG_CNTL, data);
4272}
4273
4274static void gfx_v7_0_enable_cp_pg(struct amdgpu_device *adev, bool enable)
4275{
4276        u32 data, orig;
4277
4278        orig = data = RREG32(mmRLC_PG_CNTL);
4279        if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_CP))
4280                data &= ~0x8000;
4281        else
4282                data |= 0x8000;
4283        if (orig != data)
4284                WREG32(mmRLC_PG_CNTL, data);
4285}
4286
4287static void gfx_v7_0_enable_gds_pg(struct amdgpu_device *adev, bool enable)
4288{
4289        u32 data, orig;
4290
4291        orig = data = RREG32(mmRLC_PG_CNTL);
4292        if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_GDS))
4293                data &= ~0x2000;
4294        else
4295                data |= 0x2000;
4296        if (orig != data)
4297                WREG32(mmRLC_PG_CNTL, data);
4298}
4299
4300static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev)
4301{
4302        const __le32 *fw_data;
4303        volatile u32 *dst_ptr;
4304        int me, i, max_me = 4;
4305        u32 bo_offset = 0;
4306        u32 table_offset, table_size;
4307
4308        if (adev->asic_type == CHIP_KAVERI)
4309                max_me = 5;
4310
4311        if (adev->gfx.rlc.cp_table_ptr == NULL)
4312                return;
4313
4314        /* write the cp table buffer */
4315        dst_ptr = adev->gfx.rlc.cp_table_ptr;
4316        for (me = 0; me < max_me; me++) {
4317                if (me == 0) {
4318                        const struct gfx_firmware_header_v1_0 *hdr =
4319                                (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
4320                        fw_data = (const __le32 *)
4321                                (adev->gfx.ce_fw->data +
4322                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4323                        table_offset = le32_to_cpu(hdr->jt_offset);
4324                        table_size = le32_to_cpu(hdr->jt_size);
4325                } else if (me == 1) {
4326                        const struct gfx_firmware_header_v1_0 *hdr =
4327                                (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
4328                        fw_data = (const __le32 *)
4329                                (adev->gfx.pfp_fw->data +
4330                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4331                        table_offset = le32_to_cpu(hdr->jt_offset);
4332                        table_size = le32_to_cpu(hdr->jt_size);
4333                } else if (me == 2) {
4334                        const struct gfx_firmware_header_v1_0 *hdr =
4335                                (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
4336                        fw_data = (const __le32 *)
4337                                (adev->gfx.me_fw->data +
4338                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4339                        table_offset = le32_to_cpu(hdr->jt_offset);
4340                        table_size = le32_to_cpu(hdr->jt_size);
4341                } else if (me == 3) {
4342                        const struct gfx_firmware_header_v1_0 *hdr =
4343                                (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4344                        fw_data = (const __le32 *)
4345                                (adev->gfx.mec_fw->data +
4346                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4347                        table_offset = le32_to_cpu(hdr->jt_offset);
4348                        table_size = le32_to_cpu(hdr->jt_size);
4349                } else {
4350                        const struct gfx_firmware_header_v1_0 *hdr =
4351                                (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4352                        fw_data = (const __le32 *)
4353                                (adev->gfx.mec2_fw->data +
4354                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4355                        table_offset = le32_to_cpu(hdr->jt_offset);
4356                        table_size = le32_to_cpu(hdr->jt_size);
4357                }
4358
4359                for (i = 0; i < table_size; i ++) {
4360                        dst_ptr[bo_offset + i] =
4361                                cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
4362                }
4363
4364                bo_offset += table_size;
4365        }
4366}
4367
4368static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
4369                                     bool enable)
4370{
4371        u32 data, orig;
4372
4373        if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG)) {
4374                orig = data = RREG32(mmRLC_PG_CNTL);
4375                data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
4376                if (orig != data)
4377                        WREG32(mmRLC_PG_CNTL, data);
4378
4379                orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
4380                data |= RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
4381                if (orig != data)
4382                        WREG32(mmRLC_AUTO_PG_CTRL, data);
4383        } else {
4384                orig = data = RREG32(mmRLC_PG_CNTL);
4385                data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
4386                if (orig != data)
4387                        WREG32(mmRLC_PG_CNTL, data);
4388
4389                orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
4390                data &= ~RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
4391                if (orig != data)
4392                        WREG32(mmRLC_AUTO_PG_CTRL, data);
4393
4394                data = RREG32(mmDB_RENDER_CONTROL);
4395        }
4396}
4397
4398static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev,
4399                                         u32 se, u32 sh)
4400{
4401        u32 mask = 0, tmp, tmp1;
4402        int i;
4403
4404        gfx_v7_0_select_se_sh(adev, se, sh);
4405        tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
4406        tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
4407        gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4408
4409        tmp &= 0xffff0000;
4410
4411        tmp |= tmp1;
4412        tmp >>= 16;
4413
4414        for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
4415                mask <<= 1;
4416                mask |= 1;
4417        }
4418
4419        return (~tmp) & mask;
4420}
4421
4422static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev)
4423{
4424        uint32_t tmp, active_cu_number;
4425        struct amdgpu_cu_info cu_info;
4426
4427        gfx_v7_0_get_cu_info(adev, &cu_info);
4428        tmp = cu_info.ao_cu_mask;
4429        active_cu_number = cu_info.number;
4430
4431        WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, tmp);
4432
4433        tmp = RREG32(mmRLC_MAX_PG_CU);
4434        tmp &= ~RLC_MAX_PG_CU__MAX_POWERED_UP_CU_MASK;
4435        tmp |= (active_cu_number << RLC_MAX_PG_CU__MAX_POWERED_UP_CU__SHIFT);
4436        WREG32(mmRLC_MAX_PG_CU, tmp);
4437}
4438
4439static void gfx_v7_0_enable_gfx_static_mgpg(struct amdgpu_device *adev,
4440                                            bool enable)
4441{
4442        u32 data, orig;
4443
4444        orig = data = RREG32(mmRLC_PG_CNTL);
4445        if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_SMG))
4446                data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
4447        else
4448                data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
4449        if (orig != data)
4450                WREG32(mmRLC_PG_CNTL, data);
4451}
4452
4453static void gfx_v7_0_enable_gfx_dynamic_mgpg(struct amdgpu_device *adev,
4454                                             bool enable)
4455{
4456        u32 data, orig;
4457
4458        orig = data = RREG32(mmRLC_PG_CNTL);
4459        if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_DMG))
4460                data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
4461        else
4462                data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
4463        if (orig != data)
4464                WREG32(mmRLC_PG_CNTL, data);
4465}
4466
4467#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
4468#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
4469
4470static void gfx_v7_0_init_gfx_cgpg(struct amdgpu_device *adev)
4471{
4472        u32 data, orig;
4473        u32 i;
4474
4475        if (adev->gfx.rlc.cs_data) {
4476                WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
4477                WREG32(mmRLC_GPM_SCRATCH_DATA, upper_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
4478                WREG32(mmRLC_GPM_SCRATCH_DATA, lower_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
4479                WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.clear_state_size);
4480        } else {
4481                WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
4482                for (i = 0; i < 3; i++)
4483                        WREG32(mmRLC_GPM_SCRATCH_DATA, 0);
4484        }
4485        if (adev->gfx.rlc.reg_list) {
4486                WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
4487                for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
4488                        WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.reg_list[i]);
4489        }
4490
4491        orig = data = RREG32(mmRLC_PG_CNTL);
4492        data |= RLC_PG_CNTL__GFX_POWER_GATING_SRC_MASK;
4493        if (orig != data)
4494                WREG32(mmRLC_PG_CNTL, data);
4495
4496        WREG32(mmRLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8);
4497        WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4498
4499        data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
4500        data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
4501        data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4502        WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
4503
4504        data = 0x10101010;
4505        WREG32(mmRLC_PG_DELAY, data);
4506
4507        data = RREG32(mmRLC_PG_DELAY_2);
4508        data &= ~0xff;
4509        data |= 0x3;
4510        WREG32(mmRLC_PG_DELAY_2, data);
4511
4512        data = RREG32(mmRLC_AUTO_PG_CTRL);
4513        data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
4514        data |= (0x700 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
4515        WREG32(mmRLC_AUTO_PG_CTRL, data);
4516
4517}
4518
4519static void gfx_v7_0_update_gfx_pg(struct amdgpu_device *adev, bool enable)
4520{
4521        gfx_v7_0_enable_gfx_cgpg(adev, enable);
4522        gfx_v7_0_enable_gfx_static_mgpg(adev, enable);
4523        gfx_v7_0_enable_gfx_dynamic_mgpg(adev, enable);
4524}
4525
4526static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev)
4527{
4528        u32 count = 0;
4529        const struct cs_section_def *sect = NULL;
4530        const struct cs_extent_def *ext = NULL;
4531
4532        if (adev->gfx.rlc.cs_data == NULL)
4533                return 0;
4534
4535        /* begin clear state */
4536        count += 2;
4537        /* context control state */
4538        count += 3;
4539
4540        for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
4541                for (ext = sect->section; ext->extent != NULL; ++ext) {
4542                        if (sect->id == SECT_CONTEXT)
4543                                count += 2 + ext->reg_count;
4544                        else
4545                                return 0;
4546                }
4547        }
4548        /* pa_sc_raster_config/pa_sc_raster_config1 */
4549        count += 4;
4550        /* end clear state */
4551        count += 2;
4552        /* clear state */
4553        count += 2;
4554
4555        return count;
4556}
4557
4558static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev,
4559                                    volatile u32 *buffer)
4560{
4561        u32 count = 0, i;
4562        const struct cs_section_def *sect = NULL;
4563        const struct cs_extent_def *ext = NULL;
4564
4565        if (adev->gfx.rlc.cs_data == NULL)
4566                return;
4567        if (buffer == NULL)
4568                return;
4569
4570        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4571        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4572
4573        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4574        buffer[count++] = cpu_to_le32(0x80000000);
4575        buffer[count++] = cpu_to_le32(0x80000000);
4576
4577        for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
4578                for (ext = sect->section; ext->extent != NULL; ++ext) {
4579                        if (sect->id == SECT_CONTEXT) {
4580                                buffer[count++] =
4581                                        cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
4582                                buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4583                                for (i = 0; i < ext->reg_count; i++)
4584                                        buffer[count++] = cpu_to_le32(ext->extent[i]);
4585                        } else {
4586                                return;
4587                        }
4588                }
4589        }
4590
4591        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4592        buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4593        switch (adev->asic_type) {
4594        case CHIP_BONAIRE:
4595                buffer[count++] = cpu_to_le32(0x16000012);
4596                buffer[count++] = cpu_to_le32(0x00000000);
4597                break;
4598        case CHIP_KAVERI:
4599                buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
4600                buffer[count++] = cpu_to_le32(0x00000000);
4601                break;
4602        case CHIP_KABINI:
4603        case CHIP_MULLINS:
4604                buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
4605                buffer[count++] = cpu_to_le32(0x00000000);
4606                break;
4607        case CHIP_HAWAII:
4608                buffer[count++] = cpu_to_le32(0x3a00161a);
4609                buffer[count++] = cpu_to_le32(0x0000002e);
4610                break;
4611        default:
4612                buffer[count++] = cpu_to_le32(0x00000000);
4613                buffer[count++] = cpu_to_le32(0x00000000);
4614                break;
4615        }
4616
4617        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4618        buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
4619
4620        buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
4621        buffer[count++] = cpu_to_le32(0);
4622}
4623
4624static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
4625{
4626        if (adev->pg_flags & (AMDGPU_PG_SUPPORT_GFX_PG |
4627                              AMDGPU_PG_SUPPORT_GFX_SMG |
4628                              AMDGPU_PG_SUPPORT_GFX_DMG |
4629                              AMDGPU_PG_SUPPORT_CP |
4630                              AMDGPU_PG_SUPPORT_GDS |
4631                              AMDGPU_PG_SUPPORT_RLC_SMU_HS)) {
4632                gfx_v7_0_enable_sclk_slowdown_on_pu(adev, true);
4633                gfx_v7_0_enable_sclk_slowdown_on_pd(adev, true);
4634                if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG) {
4635                        gfx_v7_0_init_gfx_cgpg(adev);
4636                        gfx_v7_0_enable_cp_pg(adev, true);
4637                        gfx_v7_0_enable_gds_pg(adev, true);
4638                }
4639                gfx_v7_0_init_ao_cu_mask(adev);
4640                gfx_v7_0_update_gfx_pg(adev, true);
4641        }
4642}
4643
4644static void gfx_v7_0_fini_pg(struct amdgpu_device *adev)
4645{
4646        if (adev->pg_flags & (AMDGPU_PG_SUPPORT_GFX_PG |
4647                              AMDGPU_PG_SUPPORT_GFX_SMG |
4648                              AMDGPU_PG_SUPPORT_GFX_DMG |
4649                              AMDGPU_PG_SUPPORT_CP |
4650                              AMDGPU_PG_SUPPORT_GDS |
4651                              AMDGPU_PG_SUPPORT_RLC_SMU_HS)) {
4652                gfx_v7_0_update_gfx_pg(adev, false);
4653                if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG) {
4654                        gfx_v7_0_enable_cp_pg(adev, false);
4655                        gfx_v7_0_enable_gds_pg(adev, false);
4656                }
4657        }
4658}
4659
4660/**
4661 * gfx_v7_0_get_gpu_clock_counter - return GPU clock counter snapshot
4662 *
4663 * @adev: amdgpu_device pointer
4664 *
4665 * Fetches a GPU clock counter snapshot (SI).
4666 * Returns the 64 bit clock counter snapshot.
4667 */
4668uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4669{
4670        uint64_t clock;
4671
4672        mutex_lock(&adev->gfx.gpu_clock_mutex);
4673        WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4674        clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4675                ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4676        mutex_unlock(&adev->gfx.gpu_clock_mutex);
4677        return clock;
4678}
4679
4680static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4681                                          uint32_t vmid,
4682                                          uint32_t gds_base, uint32_t gds_size,
4683                                          uint32_t gws_base, uint32_t gws_size,
4684                                          uint32_t oa_base, uint32_t oa_size)
4685{
4686        gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4687        gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4688
4689        gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4690        gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4691
4692        oa_base = oa_base >> AMDGPU_OA_SHIFT;
4693        oa_size = oa_size >> AMDGPU_OA_SHIFT;
4694
4695        /* GDS Base */
4696        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4697        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4698                                WRITE_DATA_DST_SEL(0)));
4699        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4700        amdgpu_ring_write(ring, 0);
4701        amdgpu_ring_write(ring, gds_base);
4702
4703        /* GDS Size */
4704        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4705        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4706                                WRITE_DATA_DST_SEL(0)));
4707        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4708        amdgpu_ring_write(ring, 0);
4709        amdgpu_ring_write(ring, gds_size);
4710
4711        /* GWS */
4712        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4713        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4714                                WRITE_DATA_DST_SEL(0)));
4715        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4716        amdgpu_ring_write(ring, 0);
4717        amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4718
4719        /* OA */
4720        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4721        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4722                                WRITE_DATA_DST_SEL(0)));
4723        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4724        amdgpu_ring_write(ring, 0);
4725        amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4726}
4727
4728static int gfx_v7_0_early_init(void *handle)
4729{
4730        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4731
4732        adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
4733        adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS;
4734        gfx_v7_0_set_ring_funcs(adev);
4735        gfx_v7_0_set_irq_funcs(adev);
4736        gfx_v7_0_set_gds_init(adev);
4737
4738        return 0;
4739}
4740
4741static int gfx_v7_0_sw_init(void *handle)
4742{
4743        struct amdgpu_ring *ring;
4744        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4745        int i, r;
4746
4747        /* EOP Event */
4748        r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
4749        if (r)
4750                return r;
4751
4752        /* Privileged reg */
4753        r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
4754        if (r)
4755                return r;
4756
4757        /* Privileged inst */
4758        r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
4759        if (r)
4760                return r;
4761
4762        gfx_v7_0_scratch_init(adev);
4763
4764        r = gfx_v7_0_init_microcode(adev);
4765        if (r) {
4766                DRM_ERROR("Failed to load gfx firmware!\n");
4767                return r;
4768        }
4769
4770        r = gfx_v7_0_rlc_init(adev);
4771        if (r) {
4772                DRM_ERROR("Failed to init rlc BOs!\n");
4773                return r;
4774        }
4775
4776        /* allocate mec buffers */
4777        r = gfx_v7_0_mec_init(adev);
4778        if (r) {
4779                DRM_ERROR("Failed to init MEC BOs!\n");
4780                return r;
4781        }
4782
4783        for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4784                ring = &adev->gfx.gfx_ring[i];
4785                ring->ring_obj = NULL;
4786                sprintf(ring->name, "gfx");
4787                r = amdgpu_ring_init(adev, ring, 1024 * 1024,
4788                                     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
4789                                     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
4790                                     AMDGPU_RING_TYPE_GFX);
4791                if (r)
4792                        return r;
4793        }
4794
4795        /* set up the compute queues */
4796        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4797                unsigned irq_type;
4798
4799                /* max 32 queues per MEC */
4800                if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
4801                        DRM_ERROR("Too many (%d) compute rings!\n", i);
4802                        break;
4803                }
4804                ring = &adev->gfx.compute_ring[i];
4805                ring->ring_obj = NULL;
4806                ring->use_doorbell = true;
4807                ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
4808                ring->me = 1; /* first MEC */
4809                ring->pipe = i / 8;
4810                ring->queue = i % 8;
4811                sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
4812                irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
4813                /* type-2 packets are deprecated on MEC, use type-3 instead */
4814                r = amdgpu_ring_init(adev, ring, 1024 * 1024,
4815                                     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
4816                                     &adev->gfx.eop_irq, irq_type,
4817                                     AMDGPU_RING_TYPE_COMPUTE);
4818                if (r)
4819                        return r;
4820        }
4821
4822        /* reserve GDS, GWS and OA resource for gfx */
4823        r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
4824                        PAGE_SIZE, true,
4825                        AMDGPU_GEM_DOMAIN_GDS, 0,
4826                        NULL, NULL, &adev->gds.gds_gfx_bo);
4827        if (r)
4828                return r;
4829
4830        r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
4831                PAGE_SIZE, true,
4832                AMDGPU_GEM_DOMAIN_GWS, 0,
4833                NULL, NULL, &adev->gds.gws_gfx_bo);
4834        if (r)
4835                return r;
4836
4837        r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
4838                        PAGE_SIZE, true,
4839                        AMDGPU_GEM_DOMAIN_OA, 0,
4840                        NULL, NULL, &adev->gds.oa_gfx_bo);
4841        if (r)
4842                return r;
4843
4844        return r;
4845}
4846
4847static int gfx_v7_0_sw_fini(void *handle)
4848{
4849        int i;
4850        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4851
4852        amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
4853        amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
4854        amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
4855
4856        for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4857                amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
4858        for (i = 0; i < adev->gfx.num_compute_rings; i++)
4859                amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
4860
4861        gfx_v7_0_cp_compute_fini(adev);
4862        gfx_v7_0_rlc_fini(adev);
4863        gfx_v7_0_mec_fini(adev);
4864
4865        return 0;
4866}
4867
4868static int gfx_v7_0_hw_init(void *handle)
4869{
4870        int r;
4871        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4872
4873        gfx_v7_0_gpu_init(adev);
4874
4875        /* init rlc */
4876        r = gfx_v7_0_rlc_resume(adev);
4877        if (r)
4878                return r;
4879
4880        r = gfx_v7_0_cp_resume(adev);
4881        if (r)
4882                return r;
4883
4884        adev->gfx.ce_ram_size = 0x8000;
4885
4886        return r;
4887}
4888
4889static int gfx_v7_0_hw_fini(void *handle)
4890{
4891        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4892
4893        gfx_v7_0_cp_enable(adev, false);
4894        gfx_v7_0_rlc_stop(adev);
4895        gfx_v7_0_fini_pg(adev);
4896
4897        return 0;
4898}
4899
4900static int gfx_v7_0_suspend(void *handle)
4901{
4902        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4903
4904        return gfx_v7_0_hw_fini(adev);
4905}
4906
4907static int gfx_v7_0_resume(void *handle)
4908{
4909        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4910
4911        return gfx_v7_0_hw_init(adev);
4912}
4913
4914static bool gfx_v7_0_is_idle(void *handle)
4915{
4916        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4917
4918        if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK)
4919                return false;
4920        else
4921                return true;
4922}
4923
4924static int gfx_v7_0_wait_for_idle(void *handle)
4925{
4926        unsigned i;
4927        u32 tmp;
4928        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4929
4930        for (i = 0; i < adev->usec_timeout; i++) {
4931                /* read MC_STATUS */
4932                tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4933
4934                if (!tmp)
4935                        return 0;
4936                udelay(1);
4937        }
4938        return -ETIMEDOUT;
4939}
4940
4941static void gfx_v7_0_print_status(void *handle)
4942{
4943        int i;
4944        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4945
4946        dev_info(adev->dev, "GFX 7.x registers\n");
4947        dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
4948                RREG32(mmGRBM_STATUS));
4949        dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
4950                RREG32(mmGRBM_STATUS2));
4951        dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4952                RREG32(mmGRBM_STATUS_SE0));
4953        dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4954                RREG32(mmGRBM_STATUS_SE1));
4955        dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4956                RREG32(mmGRBM_STATUS_SE2));
4957        dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4958                RREG32(mmGRBM_STATUS_SE3));
4959        dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
4960        dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4961                 RREG32(mmCP_STALLED_STAT1));
4962        dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4963                 RREG32(mmCP_STALLED_STAT2));
4964        dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4965                 RREG32(mmCP_STALLED_STAT3));
4966        dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4967                 RREG32(mmCP_CPF_BUSY_STAT));
4968        dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4969                 RREG32(mmCP_CPF_STALLED_STAT1));
4970        dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
4971        dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
4972        dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4973                 RREG32(mmCP_CPC_STALLED_STAT1));
4974        dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
4975
4976        for (i = 0; i < 32; i++) {
4977                dev_info(adev->dev, "  GB_TILE_MODE%d=0x%08X\n",
4978                         i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
4979        }
4980        for (i = 0; i < 16; i++) {
4981                dev_info(adev->dev, "  GB_MACROTILE_MODE%d=0x%08X\n",
4982                         i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
4983        }
4984        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
4985                dev_info(adev->dev, "  se: %d\n", i);
4986                gfx_v7_0_select_se_sh(adev, i, 0xffffffff);
4987                dev_info(adev->dev, "  PA_SC_RASTER_CONFIG=0x%08X\n",
4988                         RREG32(mmPA_SC_RASTER_CONFIG));
4989                dev_info(adev->dev, "  PA_SC_RASTER_CONFIG_1=0x%08X\n",
4990                         RREG32(mmPA_SC_RASTER_CONFIG_1));
4991        }
4992        gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4993
4994        dev_info(adev->dev, "  GB_ADDR_CONFIG=0x%08X\n",
4995                 RREG32(mmGB_ADDR_CONFIG));
4996        dev_info(adev->dev, "  HDP_ADDR_CONFIG=0x%08X\n",
4997                 RREG32(mmHDP_ADDR_CONFIG));
4998        dev_info(adev->dev, "  DMIF_ADDR_CALC=0x%08X\n",
4999                 RREG32(mmDMIF_ADDR_CALC));
5000        dev_info(adev->dev, "  SDMA0_TILING_CONFIG=0x%08X\n",
5001                 RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
5002        dev_info(adev->dev, "  SDMA1_TILING_CONFIG=0x%08X\n",
5003                 RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
5004        dev_info(adev->dev, "  UVD_UDEC_ADDR_CONFIG=0x%08X\n",
5005                 RREG32(mmUVD_UDEC_ADDR_CONFIG));
5006        dev_info(adev->dev, "  UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
5007                 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
5008        dev_info(adev->dev, "  UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
5009                 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
5010
5011        dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
5012                 RREG32(mmCP_MEQ_THRESHOLDS));
5013        dev_info(adev->dev, "  SX_DEBUG_1=0x%08X\n",
5014                 RREG32(mmSX_DEBUG_1));
5015        dev_info(adev->dev, "  TA_CNTL_AUX=0x%08X\n",
5016                 RREG32(mmTA_CNTL_AUX));
5017        dev_info(adev->dev, "  SPI_CONFIG_CNTL=0x%08X\n",
5018                 RREG32(mmSPI_CONFIG_CNTL));
5019        dev_info(adev->dev, "  SQ_CONFIG=0x%08X\n",
5020                 RREG32(mmSQ_CONFIG));
5021        dev_info(adev->dev, "  DB_DEBUG=0x%08X\n",
5022                 RREG32(mmDB_DEBUG));
5023        dev_info(adev->dev, "  DB_DEBUG2=0x%08X\n",
5024                 RREG32(mmDB_DEBUG2));
5025        dev_info(adev->dev, "  DB_DEBUG3=0x%08X\n",
5026                 RREG32(mmDB_DEBUG3));
5027        dev_info(adev->dev, "  CB_HW_CONTROL=0x%08X\n",
5028                 RREG32(mmCB_HW_CONTROL));
5029        dev_info(adev->dev, "  SPI_CONFIG_CNTL_1=0x%08X\n",
5030                 RREG32(mmSPI_CONFIG_CNTL_1));
5031        dev_info(adev->dev, "  PA_SC_FIFO_SIZE=0x%08X\n",
5032                 RREG32(mmPA_SC_FIFO_SIZE));
5033        dev_info(adev->dev, "  VGT_NUM_INSTANCES=0x%08X\n",
5034                 RREG32(mmVGT_NUM_INSTANCES));
5035        dev_info(adev->dev, "  CP_PERFMON_CNTL=0x%08X\n",
5036                 RREG32(mmCP_PERFMON_CNTL));
5037        dev_info(adev->dev, "  PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
5038                 RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
5039        dev_info(adev->dev, "  VGT_CACHE_INVALIDATION=0x%08X\n",
5040                 RREG32(mmVGT_CACHE_INVALIDATION));
5041        dev_info(adev->dev, "  VGT_GS_VERTEX_REUSE=0x%08X\n",
5042                 RREG32(mmVGT_GS_VERTEX_REUSE));
5043        dev_info(adev->dev, "  PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
5044                 RREG32(mmPA_SC_LINE_STIPPLE_STATE));
5045        dev_info(adev->dev, "  PA_CL_ENHANCE=0x%08X\n",
5046                 RREG32(mmPA_CL_ENHANCE));
5047        dev_info(adev->dev, "  PA_SC_ENHANCE=0x%08X\n",
5048                 RREG32(mmPA_SC_ENHANCE));
5049
5050        dev_info(adev->dev, "  CP_ME_CNTL=0x%08X\n",
5051                 RREG32(mmCP_ME_CNTL));
5052        dev_info(adev->dev, "  CP_MAX_CONTEXT=0x%08X\n",
5053                 RREG32(mmCP_MAX_CONTEXT));
5054        dev_info(adev->dev, "  CP_ENDIAN_SWAP=0x%08X\n",
5055                 RREG32(mmCP_ENDIAN_SWAP));
5056        dev_info(adev->dev, "  CP_DEVICE_ID=0x%08X\n",
5057                 RREG32(mmCP_DEVICE_ID));
5058
5059        dev_info(adev->dev, "  CP_SEM_WAIT_TIMER=0x%08X\n",
5060                 RREG32(mmCP_SEM_WAIT_TIMER));
5061        if (adev->asic_type != CHIP_HAWAII)
5062                dev_info(adev->dev, "  CP_SEM_INCOMPLETE_TIMER_CNTL=0x%08X\n",
5063                         RREG32(mmCP_SEM_INCOMPLETE_TIMER_CNTL));
5064
5065        dev_info(adev->dev, "  CP_RB_WPTR_DELAY=0x%08X\n",
5066                 RREG32(mmCP_RB_WPTR_DELAY));
5067        dev_info(adev->dev, "  CP_RB_VMID=0x%08X\n",
5068                 RREG32(mmCP_RB_VMID));
5069        dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
5070                 RREG32(mmCP_RB0_CNTL));
5071        dev_info(adev->dev, "  CP_RB0_WPTR=0x%08X\n",
5072                 RREG32(mmCP_RB0_WPTR));
5073        dev_info(adev->dev, "  CP_RB0_RPTR_ADDR=0x%08X\n",
5074                 RREG32(mmCP_RB0_RPTR_ADDR));
5075        dev_info(adev->dev, "  CP_RB0_RPTR_ADDR_HI=0x%08X\n",
5076                 RREG32(mmCP_RB0_RPTR_ADDR_HI));
5077        dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
5078                 RREG32(mmCP_RB0_CNTL));
5079        dev_info(adev->dev, "  CP_RB0_BASE=0x%08X\n",
5080                 RREG32(mmCP_RB0_BASE));
5081        dev_info(adev->dev, "  CP_RB0_BASE_HI=0x%08X\n",
5082                 RREG32(mmCP_RB0_BASE_HI));
5083        dev_info(adev->dev, "  CP_MEC_CNTL=0x%08X\n",
5084                 RREG32(mmCP_MEC_CNTL));
5085        dev_info(adev->dev, "  CP_CPF_DEBUG=0x%08X\n",
5086                 RREG32(mmCP_CPF_DEBUG));
5087
5088        dev_info(adev->dev, "  SCRATCH_ADDR=0x%08X\n",
5089                 RREG32(mmSCRATCH_ADDR));
5090        dev_info(adev->dev, "  SCRATCH_UMSK=0x%08X\n",
5091                 RREG32(mmSCRATCH_UMSK));
5092
5093        /* init the pipes */
5094        mutex_lock(&adev->srbm_mutex);
5095        for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
5096                int me = (i < 4) ? 1 : 2;
5097                int pipe = (i < 4) ? i : (i - 4);
5098                int queue;
5099
5100                dev_info(adev->dev, "  me: %d, pipe: %d\n", me, pipe);
5101                cik_srbm_select(adev, me, pipe, 0, 0);
5102                dev_info(adev->dev, "  CP_HPD_EOP_BASE_ADDR=0x%08X\n",
5103                         RREG32(mmCP_HPD_EOP_BASE_ADDR));
5104                dev_info(adev->dev, "  CP_HPD_EOP_BASE_ADDR_HI=0x%08X\n",
5105                         RREG32(mmCP_HPD_EOP_BASE_ADDR_HI));
5106                dev_info(adev->dev, "  CP_HPD_EOP_VMID=0x%08X\n",
5107                         RREG32(mmCP_HPD_EOP_VMID));
5108                dev_info(adev->dev, "  CP_HPD_EOP_CONTROL=0x%08X\n",
5109                         RREG32(mmCP_HPD_EOP_CONTROL));
5110
5111                for (queue = 0; queue < 8; queue++) {
5112                        cik_srbm_select(adev, me, pipe, queue, 0);
5113                        dev_info(adev->dev, "  queue: %d\n", queue);
5114                        dev_info(adev->dev, "  CP_PQ_WPTR_POLL_CNTL=0x%08X\n",
5115                                 RREG32(mmCP_PQ_WPTR_POLL_CNTL));
5116                        dev_info(adev->dev, "  CP_HQD_PQ_DOORBELL_CONTROL=0x%08X\n",
5117                                 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL));
5118                        dev_info(adev->dev, "  CP_HQD_ACTIVE=0x%08X\n",
5119                                 RREG32(mmCP_HQD_ACTIVE));
5120                        dev_info(adev->dev, "  CP_HQD_DEQUEUE_REQUEST=0x%08X\n",
5121                                 RREG32(mmCP_HQD_DEQUEUE_REQUEST));
5122                        dev_info(adev->dev, "  CP_HQD_PQ_RPTR=0x%08X\n",
5123                                 RREG32(mmCP_HQD_PQ_RPTR));
5124                        dev_info(adev->dev, "  CP_HQD_PQ_WPTR=0x%08X\n",
5125                                 RREG32(mmCP_HQD_PQ_WPTR));
5126                        dev_info(adev->dev, "  CP_HQD_PQ_BASE=0x%08X\n",
5127                                 RREG32(mmCP_HQD_PQ_BASE));
5128                        dev_info(adev->dev, "  CP_HQD_PQ_BASE_HI=0x%08X\n",
5129                                 RREG32(mmCP_HQD_PQ_BASE_HI));
5130                        dev_info(adev->dev, "  CP_HQD_PQ_CONTROL=0x%08X\n",
5131                                 RREG32(mmCP_HQD_PQ_CONTROL));
5132                        dev_info(adev->dev, "  CP_HQD_PQ_WPTR_POLL_ADDR=0x%08X\n",
5133                                 RREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR));
5134                        dev_info(adev->dev, "  CP_HQD_PQ_WPTR_POLL_ADDR_HI=0x%08X\n",
5135                                 RREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI));
5136                        dev_info(adev->dev, "  CP_HQD_PQ_RPTR_REPORT_ADDR=0x%08X\n",
5137                                 RREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR));
5138                        dev_info(adev->dev, "  CP_HQD_PQ_RPTR_REPORT_ADDR_HI=0x%08X\n",
5139                                 RREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI));
5140                        dev_info(adev->dev, "  CP_HQD_PQ_DOORBELL_CONTROL=0x%08X\n",
5141                                 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL));
5142                        dev_info(adev->dev, "  CP_HQD_PQ_WPTR=0x%08X\n",
5143                                 RREG32(mmCP_HQD_PQ_WPTR));
5144                        dev_info(adev->dev, "  CP_HQD_VMID=0x%08X\n",
5145                                 RREG32(mmCP_HQD_VMID));
5146                        dev_info(adev->dev, "  CP_MQD_BASE_ADDR=0x%08X\n",
5147                                 RREG32(mmCP_MQD_BASE_ADDR));
5148                        dev_info(adev->dev, "  CP_MQD_BASE_ADDR_HI=0x%08X\n",
5149                                 RREG32(mmCP_MQD_BASE_ADDR_HI));
5150                        dev_info(adev->dev, "  CP_MQD_CONTROL=0x%08X\n",
5151                                 RREG32(mmCP_MQD_CONTROL));
5152                }
5153        }
5154        cik_srbm_select(adev, 0, 0, 0, 0);
5155        mutex_unlock(&adev->srbm_mutex);
5156
5157        dev_info(adev->dev, "  CP_INT_CNTL_RING0=0x%08X\n",
5158                 RREG32(mmCP_INT_CNTL_RING0));
5159        dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
5160                 RREG32(mmRLC_LB_CNTL));
5161        dev_info(adev->dev, "  RLC_CNTL=0x%08X\n",
5162                 RREG32(mmRLC_CNTL));
5163        dev_info(adev->dev, "  RLC_CGCG_CGLS_CTRL=0x%08X\n",
5164                 RREG32(mmRLC_CGCG_CGLS_CTRL));
5165        dev_info(adev->dev, "  RLC_LB_CNTR_INIT=0x%08X\n",
5166                 RREG32(mmRLC_LB_CNTR_INIT));
5167        dev_info(adev->dev, "  RLC_LB_CNTR_MAX=0x%08X\n",
5168                 RREG32(mmRLC_LB_CNTR_MAX));
5169        dev_info(adev->dev, "  RLC_LB_INIT_CU_MASK=0x%08X\n",
5170                 RREG32(mmRLC_LB_INIT_CU_MASK));
5171        dev_info(adev->dev, "  RLC_LB_PARAMS=0x%08X\n",
5172                 RREG32(mmRLC_LB_PARAMS));
5173        dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
5174                 RREG32(mmRLC_LB_CNTL));
5175        dev_info(adev->dev, "  RLC_MC_CNTL=0x%08X\n",
5176                 RREG32(mmRLC_MC_CNTL));
5177        dev_info(adev->dev, "  RLC_UCODE_CNTL=0x%08X\n",
5178                 RREG32(mmRLC_UCODE_CNTL));
5179
5180        if (adev->asic_type == CHIP_BONAIRE)
5181                dev_info(adev->dev, "  RLC_DRIVER_CPDMA_STATUS=0x%08X\n",
5182                         RREG32(mmRLC_DRIVER_CPDMA_STATUS));
5183
5184        mutex_lock(&adev->srbm_mutex);
5185        for (i = 0; i < 16; i++) {
5186                cik_srbm_select(adev, 0, 0, 0, i);
5187                dev_info(adev->dev, "  VM %d:\n", i);
5188                dev_info(adev->dev, "  SH_MEM_CONFIG=0x%08X\n",
5189                         RREG32(mmSH_MEM_CONFIG));
5190                dev_info(adev->dev, "  SH_MEM_APE1_BASE=0x%08X\n",
5191                         RREG32(mmSH_MEM_APE1_BASE));
5192                dev_info(adev->dev, "  SH_MEM_APE1_LIMIT=0x%08X\n",
5193                         RREG32(mmSH_MEM_APE1_LIMIT));
5194                dev_info(adev->dev, "  SH_MEM_BASES=0x%08X\n",
5195                         RREG32(mmSH_MEM_BASES));
5196        }
5197        cik_srbm_select(adev, 0, 0, 0, 0);
5198        mutex_unlock(&adev->srbm_mutex);
5199}
5200
5201static int gfx_v7_0_soft_reset(void *handle)
5202{
5203        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5204        u32 tmp;
5205        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5206
5207        /* GRBM_STATUS */
5208        tmp = RREG32(mmGRBM_STATUS);
5209        if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5210                   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5211                   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5212                   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5213                   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5214                   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK))
5215                grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK |
5216                        GRBM_SOFT_RESET__SOFT_RESET_GFX_MASK;
5217
5218        if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5219                grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK;
5220                srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
5221        }
5222
5223        /* GRBM_STATUS2 */
5224        tmp = RREG32(mmGRBM_STATUS2);
5225        if (tmp & GRBM_STATUS2__RLC_BUSY_MASK)
5226                grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
5227
5228        /* SRBM_STATUS */
5229        tmp = RREG32(mmSRBM_STATUS);
5230        if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK)
5231                srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
5232
5233        if (grbm_soft_reset || srbm_soft_reset) {
5234                gfx_v7_0_print_status((void *)adev);
5235                /* disable CG/PG */
5236                gfx_v7_0_fini_pg(adev);
5237                gfx_v7_0_update_cg(adev, false);
5238
5239                /* stop the rlc */
5240                gfx_v7_0_rlc_stop(adev);
5241
5242                /* Disable GFX parsing/prefetching */
5243                WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
5244
5245                /* Disable MEC parsing/prefetching */
5246                WREG32(mmCP_MEC_CNTL, CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK);
5247
5248                if (grbm_soft_reset) {
5249                        tmp = RREG32(mmGRBM_SOFT_RESET);
5250                        tmp |= grbm_soft_reset;
5251                        dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5252                        WREG32(mmGRBM_SOFT_RESET, tmp);
5253                        tmp = RREG32(mmGRBM_SOFT_RESET);
5254
5255                        udelay(50);
5256
5257                        tmp &= ~grbm_soft_reset;
5258                        WREG32(mmGRBM_SOFT_RESET, tmp);
5259                        tmp = RREG32(mmGRBM_SOFT_RESET);
5260                }
5261
5262                if (srbm_soft_reset) {
5263                        tmp = RREG32(mmSRBM_SOFT_RESET);
5264                        tmp |= srbm_soft_reset;
5265                        dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5266                        WREG32(mmSRBM_SOFT_RESET, tmp);
5267                        tmp = RREG32(mmSRBM_SOFT_RESET);
5268
5269                        udelay(50);
5270
5271                        tmp &= ~srbm_soft_reset;
5272                        WREG32(mmSRBM_SOFT_RESET, tmp);
5273                        tmp = RREG32(mmSRBM_SOFT_RESET);
5274                }
5275                /* Wait a little for things to settle down */
5276                udelay(50);
5277                gfx_v7_0_print_status((void *)adev);
5278        }
5279        return 0;
5280}
5281
5282static void gfx_v7_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5283                                                 enum amdgpu_interrupt_state state)
5284{
5285        u32 cp_int_cntl;
5286
5287        switch (state) {
5288        case AMDGPU_IRQ_STATE_DISABLE:
5289                cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5290                cp_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
5291                WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5292                break;
5293        case AMDGPU_IRQ_STATE_ENABLE:
5294                cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5295                cp_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
5296                WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5297                break;
5298        default:
5299                break;
5300        }
5301}
5302
5303static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5304                                                     int me, int pipe,
5305                                                     enum amdgpu_interrupt_state state)
5306{
5307        u32 mec_int_cntl, mec_int_cntl_reg;
5308
5309        /*
5310         * amdgpu controls only pipe 0 of MEC1. That's why this function only
5311         * handles the setting of interrupts for this specific pipe. All other
5312         * pipes' interrupts are set by amdkfd.
5313         */
5314
5315        if (me == 1) {
5316                switch (pipe) {
5317                case 0:
5318                        mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
5319                        break;
5320                default:
5321                        DRM_DEBUG("invalid pipe %d\n", pipe);
5322                        return;
5323                }
5324        } else {
5325                DRM_DEBUG("invalid me %d\n", me);
5326                return;
5327        }
5328
5329        switch (state) {
5330        case AMDGPU_IRQ_STATE_DISABLE:
5331                mec_int_cntl = RREG32(mec_int_cntl_reg);
5332                mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
5333                WREG32(mec_int_cntl_reg, mec_int_cntl);
5334                break;
5335        case AMDGPU_IRQ_STATE_ENABLE:
5336                mec_int_cntl = RREG32(mec_int_cntl_reg);
5337                mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
5338                WREG32(mec_int_cntl_reg, mec_int_cntl);
5339                break;
5340        default:
5341                break;
5342        }
5343}
5344
5345static int gfx_v7_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5346                                             struct amdgpu_irq_src *src,
5347                                             unsigned type,
5348                                             enum amdgpu_interrupt_state state)
5349{
5350        u32 cp_int_cntl;
5351
5352        switch (state) {
5353        case AMDGPU_IRQ_STATE_DISABLE:
5354                cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5355                cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
5356                WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5357                break;
5358        case AMDGPU_IRQ_STATE_ENABLE:
5359                cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5360                cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
5361                WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5362                break;
5363        default:
5364                break;
5365        }
5366
5367        return 0;
5368}
5369
5370static int gfx_v7_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5371                                              struct amdgpu_irq_src *src,
5372                                              unsigned type,
5373                                              enum amdgpu_interrupt_state state)
5374{
5375        u32 cp_int_cntl;
5376
5377        switch (state) {
5378        case AMDGPU_IRQ_STATE_DISABLE:
5379                cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5380                cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
5381                WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5382                break;
5383        case AMDGPU_IRQ_STATE_ENABLE:
5384                cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5385                cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
5386                WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5387                break;
5388        default:
5389                break;
5390        }
5391
5392        return 0;
5393}
5394
5395static int gfx_v7_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5396                                            struct amdgpu_irq_src *src,
5397                                            unsigned type,
5398                                            enum amdgpu_interrupt_state state)
5399{
5400        switch (type) {
5401        case AMDGPU_CP_IRQ_GFX_EOP:
5402                gfx_v7_0_set_gfx_eop_interrupt_state(adev, state);
5403                break;
5404        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5405                gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5406                break;
5407        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5408                gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5409                break;
5410        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5411                gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5412                break;
5413        case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5414                gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5415                break;
5416        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5417                gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5418                break;
5419        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5420                gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5421                break;
5422        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5423                gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5424                break;
5425        case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5426                gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5427                break;
5428        default:
5429                break;
5430        }
5431        return 0;
5432}
5433
5434static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
5435                            struct amdgpu_irq_src *source,
5436                            struct amdgpu_iv_entry *entry)
5437{
5438        u8 me_id, pipe_id;
5439        struct amdgpu_ring *ring;
5440        int i;
5441
5442        DRM_DEBUG("IH: CP EOP\n");
5443        me_id = (entry->ring_id & 0x0c) >> 2;
5444        pipe_id = (entry->ring_id & 0x03) >> 0;
5445        switch (me_id) {
5446        case 0:
5447                amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5448                break;
5449        case 1:
5450        case 2:
5451                for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5452                        ring = &adev->gfx.compute_ring[i];
5453                        if ((ring->me == me_id) & (ring->pipe == pipe_id))
5454                                amdgpu_fence_process(ring);
5455                }
5456                break;
5457        }
5458        return 0;
5459}
5460
5461static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev,
5462                                 struct amdgpu_irq_src *source,
5463                                 struct amdgpu_iv_entry *entry)
5464{
5465        DRM_ERROR("Illegal register access in command stream\n");
5466        schedule_work(&adev->reset_work);
5467        return 0;
5468}
5469
5470static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
5471                                  struct amdgpu_irq_src *source,
5472                                  struct amdgpu_iv_entry *entry)
5473{
5474        DRM_ERROR("Illegal instruction in command stream\n");
5475        // XXX soft reset the gfx block only
5476        schedule_work(&adev->reset_work);
5477        return 0;
5478}
5479
5480static int gfx_v7_0_set_clockgating_state(void *handle,
5481                                          enum amd_clockgating_state state)
5482{
5483        bool gate = false;
5484        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5485
5486        if (state == AMD_CG_STATE_GATE)
5487                gate = true;
5488
5489        gfx_v7_0_enable_gui_idle_interrupt(adev, false);
5490        /* order matters! */
5491        if (gate) {
5492                gfx_v7_0_enable_mgcg(adev, true);
5493                gfx_v7_0_enable_cgcg(adev, true);
5494        } else {
5495                gfx_v7_0_enable_cgcg(adev, false);
5496                gfx_v7_0_enable_mgcg(adev, false);
5497        }
5498        gfx_v7_0_enable_gui_idle_interrupt(adev, true);
5499
5500        return 0;
5501}
5502
5503static int gfx_v7_0_set_powergating_state(void *handle,
5504                                          enum amd_powergating_state state)
5505{
5506        bool gate = false;
5507        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5508
5509        if (state == AMD_PG_STATE_GATE)
5510                gate = true;
5511
5512        if (adev->pg_flags & (AMDGPU_PG_SUPPORT_GFX_PG |
5513                              AMDGPU_PG_SUPPORT_GFX_SMG |
5514                              AMDGPU_PG_SUPPORT_GFX_DMG |
5515                              AMDGPU_PG_SUPPORT_CP |
5516                              AMDGPU_PG_SUPPORT_GDS |
5517                              AMDGPU_PG_SUPPORT_RLC_SMU_HS)) {
5518                gfx_v7_0_update_gfx_pg(adev, gate);
5519                if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG) {
5520                        gfx_v7_0_enable_cp_pg(adev, gate);
5521                        gfx_v7_0_enable_gds_pg(adev, gate);
5522                }
5523        }
5524
5525        return 0;
5526}
5527
5528const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
5529        .early_init = gfx_v7_0_early_init,
5530        .late_init = NULL,
5531        .sw_init = gfx_v7_0_sw_init,
5532        .sw_fini = gfx_v7_0_sw_fini,
5533        .hw_init = gfx_v7_0_hw_init,
5534        .hw_fini = gfx_v7_0_hw_fini,
5535        .suspend = gfx_v7_0_suspend,
5536        .resume = gfx_v7_0_resume,
5537        .is_idle = gfx_v7_0_is_idle,
5538        .wait_for_idle = gfx_v7_0_wait_for_idle,
5539        .soft_reset = gfx_v7_0_soft_reset,
5540        .print_status = gfx_v7_0_print_status,
5541        .set_clockgating_state = gfx_v7_0_set_clockgating_state,
5542        .set_powergating_state = gfx_v7_0_set_powergating_state,
5543};
5544
5545/**
5546 * gfx_v7_0_ring_is_lockup - check if the 3D engine is locked up
5547 *
5548 * @adev: amdgpu_device pointer
5549 * @ring: amdgpu_ring structure holding ring information
5550 *
5551 * Check if the 3D engine is locked up (CIK).
5552 * Returns true if the engine is locked, false if not.
5553 */
5554static bool gfx_v7_0_ring_is_lockup(struct amdgpu_ring *ring)
5555{
5556        if (gfx_v7_0_is_idle(ring->adev)) {
5557                amdgpu_ring_lockup_update(ring);
5558                return false;
5559        }
5560        return amdgpu_ring_test_lockup(ring);
5561}
5562
5563static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
5564        .get_rptr = gfx_v7_0_ring_get_rptr_gfx,
5565        .get_wptr = gfx_v7_0_ring_get_wptr_gfx,
5566        .set_wptr = gfx_v7_0_ring_set_wptr_gfx,
5567        .parse_cs = NULL,
5568        .emit_ib = gfx_v7_0_ring_emit_ib_gfx,
5569        .emit_fence = gfx_v7_0_ring_emit_fence_gfx,
5570        .emit_semaphore = gfx_v7_0_ring_emit_semaphore,
5571        .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
5572        .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
5573        .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5574        .test_ring = gfx_v7_0_ring_test_ring,
5575        .test_ib = gfx_v7_0_ring_test_ib,
5576        .is_lockup = gfx_v7_0_ring_is_lockup,
5577        .insert_nop = amdgpu_ring_insert_nop,
5578};
5579
5580static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
5581        .get_rptr = gfx_v7_0_ring_get_rptr_compute,
5582        .get_wptr = gfx_v7_0_ring_get_wptr_compute,
5583        .set_wptr = gfx_v7_0_ring_set_wptr_compute,
5584        .parse_cs = NULL,
5585        .emit_ib = gfx_v7_0_ring_emit_ib_compute,
5586        .emit_fence = gfx_v7_0_ring_emit_fence_compute,
5587        .emit_semaphore = gfx_v7_0_ring_emit_semaphore,
5588        .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
5589        .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
5590        .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5591        .test_ring = gfx_v7_0_ring_test_ring,
5592        .test_ib = gfx_v7_0_ring_test_ib,
5593        .is_lockup = gfx_v7_0_ring_is_lockup,
5594        .insert_nop = amdgpu_ring_insert_nop,
5595};
5596
5597static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
5598{
5599        int i;
5600
5601        for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5602                adev->gfx.gfx_ring[i].funcs = &gfx_v7_0_ring_funcs_gfx;
5603        for (i = 0; i < adev->gfx.num_compute_rings; i++)
5604                adev->gfx.compute_ring[i].funcs = &gfx_v7_0_ring_funcs_compute;
5605}
5606
5607static const struct amdgpu_irq_src_funcs gfx_v7_0_eop_irq_funcs = {
5608        .set = gfx_v7_0_set_eop_interrupt_state,
5609        .process = gfx_v7_0_eop_irq,
5610};
5611
5612static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_reg_irq_funcs = {
5613        .set = gfx_v7_0_set_priv_reg_fault_state,
5614        .process = gfx_v7_0_priv_reg_irq,
5615};
5616
5617static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_inst_irq_funcs = {
5618        .set = gfx_v7_0_set_priv_inst_fault_state,
5619        .process = gfx_v7_0_priv_inst_irq,
5620};
5621
5622static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev)
5623{
5624        adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5625        adev->gfx.eop_irq.funcs = &gfx_v7_0_eop_irq_funcs;
5626
5627        adev->gfx.priv_reg_irq.num_types = 1;
5628        adev->gfx.priv_reg_irq.funcs = &gfx_v7_0_priv_reg_irq_funcs;
5629
5630        adev->gfx.priv_inst_irq.num_types = 1;
5631        adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs;
5632}
5633
5634static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
5635{
5636        /* init asci gds info */
5637        adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5638        adev->gds.gws.total_size = 64;
5639        adev->gds.oa.total_size = 16;
5640
5641        if (adev->gds.mem.total_size == 64 * 1024) {
5642                adev->gds.mem.gfx_partition_size = 4096;
5643                adev->gds.mem.cs_partition_size = 4096;
5644
5645                adev->gds.gws.gfx_partition_size = 4;
5646                adev->gds.gws.cs_partition_size = 4;
5647
5648                adev->gds.oa.gfx_partition_size = 4;
5649                adev->gds.oa.cs_partition_size = 1;
5650        } else {
5651                adev->gds.mem.gfx_partition_size = 1024;
5652                adev->gds.mem.cs_partition_size = 1024;
5653
5654                adev->gds.gws.gfx_partition_size = 16;
5655                adev->gds.gws.cs_partition_size = 16;
5656
5657                adev->gds.oa.gfx_partition_size = 4;
5658                adev->gds.oa.cs_partition_size = 4;
5659        }
5660}
5661
5662
5663int gfx_v7_0_get_cu_info(struct amdgpu_device *adev,
5664                                                                   struct amdgpu_cu_info *cu_info)
5665{
5666        int i, j, k, counter, active_cu_number = 0;
5667        u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5668
5669        if (!adev || !cu_info)
5670                return -EINVAL;
5671
5672        mutex_lock(&adev->grbm_idx_mutex);
5673        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5674                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5675                        mask = 1;
5676                        ao_bitmap = 0;
5677                        counter = 0;
5678                        bitmap = gfx_v7_0_get_cu_active_bitmap(adev, i, j);
5679                        cu_info->bitmap[i][j] = bitmap;
5680
5681                        for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5682                                if (bitmap & mask) {
5683                                        if (counter < 2)
5684                                                ao_bitmap |= mask;
5685                                        counter ++;
5686                                }
5687                                mask <<= 1;
5688                        }
5689                        active_cu_number += counter;
5690                        ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5691                }
5692        }
5693
5694        cu_info->number = active_cu_number;
5695        cu_info->ao_cu_mask = ao_cu_mask;
5696        mutex_unlock(&adev->grbm_idx_mutex);
5697        return 0;
5698}
5699