linux/drivers/gpu/drm/radeon/rv770.c
<<
>>
Prefs
   1/*
   2 * Copyright 2008 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 * Copyright 2009 Jerome Glisse.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22 * OTHER DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors: Dave Airlie
  25 *          Alex Deucher
  26 *          Jerome Glisse
  27 */
  28#include <linux/firmware.h>
  29#include <linux/platform_device.h>
  30#include <linux/slab.h>
  31#include <drm/drmP.h>
  32#include "radeon.h"
  33#include "radeon_asic.h"
  34#include <drm/radeon_drm.h>
  35#include "rv770d.h"
  36#include "atom.h"
  37#include "avivod.h"
  38
  39#define R700_PFP_UCODE_SIZE 848
  40#define R700_PM4_UCODE_SIZE 1360
  41
  42static void rv770_gpu_init(struct radeon_device *rdev);
  43void rv770_fini(struct radeon_device *rdev);
  44static void rv770_pcie_gen2_enable(struct radeon_device *rdev);
  45
  46u32 rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
  47{
  48        struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
  49        u32 tmp = RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset);
  50        int i;
  51
  52        /* Lock the graphics update lock */
  53        tmp |= AVIVO_D1GRPH_UPDATE_LOCK;
  54        WREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset, tmp);
  55
  56        /* update the scanout addresses */
  57        if (radeon_crtc->crtc_id) {
  58                WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base));
  59                WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base));
  60        } else {
  61                WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base));
  62                WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base));
  63        }
  64        WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
  65               (u32)crtc_base);
  66        WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
  67               (u32)crtc_base);
  68
  69        /* Wait for update_pending to go high. */
  70        for (i = 0; i < rdev->usec_timeout; i++) {
  71                if (RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset) & AVIVO_D1GRPH_SURFACE_UPDATE_PENDING)
  72                        break;
  73                udelay(1);
  74        }
  75        DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
  76
  77        /* Unlock the lock, so double-buffering can take place inside vblank */
  78        tmp &= ~AVIVO_D1GRPH_UPDATE_LOCK;
  79        WREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset, tmp);
  80
  81        /* Return current update_pending status: */
  82        return RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset) & AVIVO_D1GRPH_SURFACE_UPDATE_PENDING;
  83}
  84
  85/* get temperature in millidegrees */
  86int rv770_get_temp(struct radeon_device *rdev)
  87{
  88        u32 temp = (RREG32(CG_MULT_THERMAL_STATUS) & ASIC_T_MASK) >>
  89                ASIC_T_SHIFT;
  90        int actual_temp;
  91
  92        if (temp & 0x400)
  93                actual_temp = -256;
  94        else if (temp & 0x200)
  95                actual_temp = 255;
  96        else if (temp & 0x100) {
  97                actual_temp = temp & 0x1ff;
  98                actual_temp |= ~0x1ff;
  99        } else
 100                actual_temp = temp & 0xff;
 101
 102        return (actual_temp * 1000) / 2;
 103}
 104
 105void rv770_pm_misc(struct radeon_device *rdev)
 106{
 107        int req_ps_idx = rdev->pm.requested_power_state_index;
 108        int req_cm_idx = rdev->pm.requested_clock_mode_index;
 109        struct radeon_power_state *ps = &rdev->pm.power_state[req_ps_idx];
 110        struct radeon_voltage *voltage = &ps->clock_info[req_cm_idx].voltage;
 111
 112        if ((voltage->type == VOLTAGE_SW) && voltage->voltage) {
 113                /* 0xff01 is a flag rather then an actual voltage */
 114                if (voltage->voltage == 0xff01)
 115                        return;
 116                if (voltage->voltage != rdev->pm.current_vddc) {
 117                        radeon_atom_set_voltage(rdev, voltage->voltage, SET_VOLTAGE_TYPE_ASIC_VDDC);
 118                        rdev->pm.current_vddc = voltage->voltage;
 119                        DRM_DEBUG("Setting: v: %d\n", voltage->voltage);
 120                }
 121        }
 122}
 123
 124/*
 125 * GART
 126 */
 127static int rv770_pcie_gart_enable(struct radeon_device *rdev)
 128{
 129        u32 tmp;
 130        int r, i;
 131
 132        if (rdev->gart.robj == NULL) {
 133                dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
 134                return -EINVAL;
 135        }
 136        r = radeon_gart_table_vram_pin(rdev);
 137        if (r)
 138                return r;
 139        radeon_gart_restore(rdev);
 140        /* Setup L2 cache */
 141        WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
 142                                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
 143                                EFFECTIVE_L2_QUEUE_SIZE(7));
 144        WREG32(VM_L2_CNTL2, 0);
 145        WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
 146        /* Setup TLB control */
 147        tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING |
 148                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
 149                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
 150                EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
 151        WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
 152        WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
 153        WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
 154        if (rdev->family == CHIP_RV740)
 155                WREG32(MC_VM_MD_L1_TLB3_CNTL, tmp);
 156        WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
 157        WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
 158        WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
 159        WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
 160        WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
 161        WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
 162        WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
 163        WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
 164                                RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
 165        WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
 166                        (u32)(rdev->dummy_page.addr >> 12));
 167        for (i = 1; i < 7; i++)
 168                WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
 169
 170        r600_pcie_gart_tlb_flush(rdev);
 171        DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
 172                 (unsigned)(rdev->mc.gtt_size >> 20),
 173                 (unsigned long long)rdev->gart.table_addr);
 174        rdev->gart.ready = true;
 175        return 0;
 176}
 177
 178static void rv770_pcie_gart_disable(struct radeon_device *rdev)
 179{
 180        u32 tmp;
 181        int i;
 182
 183        /* Disable all tables */
 184        for (i = 0; i < 7; i++)
 185                WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
 186
 187        /* Setup L2 cache */
 188        WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING |
 189                                EFFECTIVE_L2_QUEUE_SIZE(7));
 190        WREG32(VM_L2_CNTL2, 0);
 191        WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
 192        /* Setup TLB control */
 193        tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
 194        WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
 195        WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
 196        WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
 197        WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
 198        WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
 199        WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
 200        WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
 201        radeon_gart_table_vram_unpin(rdev);
 202}
 203
 204static void rv770_pcie_gart_fini(struct radeon_device *rdev)
 205{
 206        radeon_gart_fini(rdev);
 207        rv770_pcie_gart_disable(rdev);
 208        radeon_gart_table_vram_free(rdev);
 209}
 210
 211
 212static void rv770_agp_enable(struct radeon_device *rdev)
 213{
 214        u32 tmp;
 215        int i;
 216
 217        /* Setup L2 cache */
 218        WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
 219                                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
 220                                EFFECTIVE_L2_QUEUE_SIZE(7));
 221        WREG32(VM_L2_CNTL2, 0);
 222        WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
 223        /* Setup TLB control */
 224        tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING |
 225                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
 226                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
 227                EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
 228        WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
 229        WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
 230        WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
 231        WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
 232        WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
 233        WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
 234        WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
 235        for (i = 0; i < 7; i++)
 236                WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
 237}
 238
 239static void rv770_mc_program(struct radeon_device *rdev)
 240{
 241        struct rv515_mc_save save;
 242        u32 tmp;
 243        int i, j;
 244
 245        /* Initialize HDP */
 246        for (i = 0, j = 0; i < 32; i++, j += 0x18) {
 247                WREG32((0x2c14 + j), 0x00000000);
 248                WREG32((0x2c18 + j), 0x00000000);
 249                WREG32((0x2c1c + j), 0x00000000);
 250                WREG32((0x2c20 + j), 0x00000000);
 251                WREG32((0x2c24 + j), 0x00000000);
 252        }
 253        /* r7xx hw bug.  Read from HDP_DEBUG1 rather
 254         * than writing to HDP_REG_COHERENCY_FLUSH_CNTL
 255         */
 256        tmp = RREG32(HDP_DEBUG1);
 257
 258        rv515_mc_stop(rdev, &save);
 259        if (r600_mc_wait_for_idle(rdev)) {
 260                dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
 261        }
 262        /* Lockout access through VGA aperture*/
 263        WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
 264        /* Update configuration */
 265        if (rdev->flags & RADEON_IS_AGP) {
 266                if (rdev->mc.vram_start < rdev->mc.gtt_start) {
 267                        /* VRAM before AGP */
 268                        WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
 269                                rdev->mc.vram_start >> 12);
 270                        WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
 271                                rdev->mc.gtt_end >> 12);
 272                } else {
 273                        /* VRAM after AGP */
 274                        WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
 275                                rdev->mc.gtt_start >> 12);
 276                        WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
 277                                rdev->mc.vram_end >> 12);
 278                }
 279        } else {
 280                WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
 281                        rdev->mc.vram_start >> 12);
 282                WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
 283                        rdev->mc.vram_end >> 12);
 284        }
 285        WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, rdev->vram_scratch.gpu_addr >> 12);
 286        tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
 287        tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
 288        WREG32(MC_VM_FB_LOCATION, tmp);
 289        WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
 290        WREG32(HDP_NONSURFACE_INFO, (2 << 7));
 291        WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
 292        if (rdev->flags & RADEON_IS_AGP) {
 293                WREG32(MC_VM_AGP_TOP, rdev->mc.gtt_end >> 16);
 294                WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 16);
 295                WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22);
 296        } else {
 297                WREG32(MC_VM_AGP_BASE, 0);
 298                WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
 299                WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
 300        }
 301        if (r600_mc_wait_for_idle(rdev)) {
 302                dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
 303        }
 304        rv515_mc_resume(rdev, &save);
 305        /* we need to own VRAM, so turn off the VGA renderer here
 306         * to stop it overwriting our objects */
 307        rv515_vga_render_disable(rdev);
 308}
 309
 310
 311/*
 312 * CP.
 313 */
 314void r700_cp_stop(struct radeon_device *rdev)
 315{
 316        radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
 317        WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
 318        WREG32(SCRATCH_UMSK, 0);
 319        rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
 320}
 321
 322static int rv770_cp_load_microcode(struct radeon_device *rdev)
 323{
 324        const __be32 *fw_data;
 325        int i;
 326
 327        if (!rdev->me_fw || !rdev->pfp_fw)
 328                return -EINVAL;
 329
 330        r700_cp_stop(rdev);
 331        WREG32(CP_RB_CNTL,
 332#ifdef __BIG_ENDIAN
 333               BUF_SWAP_32BIT |
 334#endif
 335               RB_NO_UPDATE | RB_BLKSZ(15) | RB_BUFSZ(3));
 336
 337        /* Reset cp */
 338        WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
 339        RREG32(GRBM_SOFT_RESET);
 340        mdelay(15);
 341        WREG32(GRBM_SOFT_RESET, 0);
 342
 343        fw_data = (const __be32 *)rdev->pfp_fw->data;
 344        WREG32(CP_PFP_UCODE_ADDR, 0);
 345        for (i = 0; i < R700_PFP_UCODE_SIZE; i++)
 346                WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
 347        WREG32(CP_PFP_UCODE_ADDR, 0);
 348
 349        fw_data = (const __be32 *)rdev->me_fw->data;
 350        WREG32(CP_ME_RAM_WADDR, 0);
 351        for (i = 0; i < R700_PM4_UCODE_SIZE; i++)
 352                WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
 353
 354        WREG32(CP_PFP_UCODE_ADDR, 0);
 355        WREG32(CP_ME_RAM_WADDR, 0);
 356        WREG32(CP_ME_RAM_RADDR, 0);
 357        return 0;
 358}
 359
 360void r700_cp_fini(struct radeon_device *rdev)
 361{
 362        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 363        r700_cp_stop(rdev);
 364        radeon_ring_fini(rdev, ring);
 365        radeon_scratch_free(rdev, ring->rptr_save_reg);
 366}
 367
 368/*
 369 * Core functions
 370 */
 371static void rv770_gpu_init(struct radeon_device *rdev)
 372{
 373        int i, j, num_qd_pipes;
 374        u32 ta_aux_cntl;
 375        u32 sx_debug_1;
 376        u32 smx_dc_ctl0;
 377        u32 db_debug3;
 378        u32 num_gs_verts_per_thread;
 379        u32 vgt_gs_per_es;
 380        u32 gs_prim_buffer_depth = 0;
 381        u32 sq_ms_fifo_sizes;
 382        u32 sq_config;
 383        u32 sq_thread_resource_mgmt;
 384        u32 hdp_host_path_cntl;
 385        u32 sq_dyn_gpr_size_simd_ab_0;
 386        u32 gb_tiling_config = 0;
 387        u32 cc_rb_backend_disable = 0;
 388        u32 cc_gc_shader_pipe_config = 0;
 389        u32 mc_arb_ramcfg;
 390        u32 db_debug4, tmp;
 391        u32 inactive_pipes, shader_pipe_config;
 392        u32 disabled_rb_mask;
 393        unsigned active_number;
 394
 395        /* setup chip specs */
 396        rdev->config.rv770.tiling_group_size = 256;
 397        switch (rdev->family) {
 398        case CHIP_RV770:
 399                rdev->config.rv770.max_pipes = 4;
 400                rdev->config.rv770.max_tile_pipes = 8;
 401                rdev->config.rv770.max_simds = 10;
 402                rdev->config.rv770.max_backends = 4;
 403                rdev->config.rv770.max_gprs = 256;
 404                rdev->config.rv770.max_threads = 248;
 405                rdev->config.rv770.max_stack_entries = 512;
 406                rdev->config.rv770.max_hw_contexts = 8;
 407                rdev->config.rv770.max_gs_threads = 16 * 2;
 408                rdev->config.rv770.sx_max_export_size = 128;
 409                rdev->config.rv770.sx_max_export_pos_size = 16;
 410                rdev->config.rv770.sx_max_export_smx_size = 112;
 411                rdev->config.rv770.sq_num_cf_insts = 2;
 412
 413                rdev->config.rv770.sx_num_of_sets = 7;
 414                rdev->config.rv770.sc_prim_fifo_size = 0xF9;
 415                rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
 416                rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
 417                break;
 418        case CHIP_RV730:
 419                rdev->config.rv770.max_pipes = 2;
 420                rdev->config.rv770.max_tile_pipes = 4;
 421                rdev->config.rv770.max_simds = 8;
 422                rdev->config.rv770.max_backends = 2;
 423                rdev->config.rv770.max_gprs = 128;
 424                rdev->config.rv770.max_threads = 248;
 425                rdev->config.rv770.max_stack_entries = 256;
 426                rdev->config.rv770.max_hw_contexts = 8;
 427                rdev->config.rv770.max_gs_threads = 16 * 2;
 428                rdev->config.rv770.sx_max_export_size = 256;
 429                rdev->config.rv770.sx_max_export_pos_size = 32;
 430                rdev->config.rv770.sx_max_export_smx_size = 224;
 431                rdev->config.rv770.sq_num_cf_insts = 2;
 432
 433                rdev->config.rv770.sx_num_of_sets = 7;
 434                rdev->config.rv770.sc_prim_fifo_size = 0xf9;
 435                rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
 436                rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
 437                if (rdev->config.rv770.sx_max_export_pos_size > 16) {
 438                        rdev->config.rv770.sx_max_export_pos_size -= 16;
 439                        rdev->config.rv770.sx_max_export_smx_size += 16;
 440                }
 441                break;
 442        case CHIP_RV710:
 443                rdev->config.rv770.max_pipes = 2;
 444                rdev->config.rv770.max_tile_pipes = 2;
 445                rdev->config.rv770.max_simds = 2;
 446                rdev->config.rv770.max_backends = 1;
 447                rdev->config.rv770.max_gprs = 256;
 448                rdev->config.rv770.max_threads = 192;
 449                rdev->config.rv770.max_stack_entries = 256;
 450                rdev->config.rv770.max_hw_contexts = 4;
 451                rdev->config.rv770.max_gs_threads = 8 * 2;
 452                rdev->config.rv770.sx_max_export_size = 128;
 453                rdev->config.rv770.sx_max_export_pos_size = 16;
 454                rdev->config.rv770.sx_max_export_smx_size = 112;
 455                rdev->config.rv770.sq_num_cf_insts = 1;
 456
 457                rdev->config.rv770.sx_num_of_sets = 7;
 458                rdev->config.rv770.sc_prim_fifo_size = 0x40;
 459                rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
 460                rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
 461                break;
 462        case CHIP_RV740:
 463                rdev->config.rv770.max_pipes = 4;
 464                rdev->config.rv770.max_tile_pipes = 4;
 465                rdev->config.rv770.max_simds = 8;
 466                rdev->config.rv770.max_backends = 4;
 467                rdev->config.rv770.max_gprs = 256;
 468                rdev->config.rv770.max_threads = 248;
 469                rdev->config.rv770.max_stack_entries = 512;
 470                rdev->config.rv770.max_hw_contexts = 8;
 471                rdev->config.rv770.max_gs_threads = 16 * 2;
 472                rdev->config.rv770.sx_max_export_size = 256;
 473                rdev->config.rv770.sx_max_export_pos_size = 32;
 474                rdev->config.rv770.sx_max_export_smx_size = 224;
 475                rdev->config.rv770.sq_num_cf_insts = 2;
 476
 477                rdev->config.rv770.sx_num_of_sets = 7;
 478                rdev->config.rv770.sc_prim_fifo_size = 0x100;
 479                rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
 480                rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
 481
 482                if (rdev->config.rv770.sx_max_export_pos_size > 16) {
 483                        rdev->config.rv770.sx_max_export_pos_size -= 16;
 484                        rdev->config.rv770.sx_max_export_smx_size += 16;
 485                }
 486                break;
 487        default:
 488                break;
 489        }
 490
 491        /* Initialize HDP */
 492        j = 0;
 493        for (i = 0; i < 32; i++) {
 494                WREG32((0x2c14 + j), 0x00000000);
 495                WREG32((0x2c18 + j), 0x00000000);
 496                WREG32((0x2c1c + j), 0x00000000);
 497                WREG32((0x2c20 + j), 0x00000000);
 498                WREG32((0x2c24 + j), 0x00000000);
 499                j += 0x18;
 500        }
 501
 502        WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
 503
 504        /* setup tiling, simd, pipe config */
 505        mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
 506
 507        shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG);
 508        inactive_pipes = (shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> INACTIVE_QD_PIPES_SHIFT;
 509        for (i = 0, tmp = 1, active_number = 0; i < R7XX_MAX_PIPES; i++) {
 510                if (!(inactive_pipes & tmp)) {
 511                        active_number++;
 512                }
 513                tmp <<= 1;
 514        }
 515        if (active_number == 1) {
 516                WREG32(SPI_CONFIG_CNTL, DISABLE_INTERP_1);
 517        } else {
 518                WREG32(SPI_CONFIG_CNTL, 0);
 519        }
 520
 521        cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000;
 522        tmp = R7XX_MAX_BACKENDS - r600_count_pipe_bits(cc_rb_backend_disable >> 16);
 523        if (tmp < rdev->config.rv770.max_backends) {
 524                rdev->config.rv770.max_backends = tmp;
 525        }
 526
 527        cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00;
 528        tmp = R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config >> 8) & R7XX_MAX_PIPES_MASK);
 529        if (tmp < rdev->config.rv770.max_pipes) {
 530                rdev->config.rv770.max_pipes = tmp;
 531        }
 532        tmp = R7XX_MAX_SIMDS - r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R7XX_MAX_SIMDS_MASK);
 533        if (tmp < rdev->config.rv770.max_simds) {
 534                rdev->config.rv770.max_simds = tmp;
 535        }
 536
 537        switch (rdev->config.rv770.max_tile_pipes) {
 538        case 1:
 539        default:
 540                gb_tiling_config = PIPE_TILING(0);
 541                break;
 542        case 2:
 543                gb_tiling_config = PIPE_TILING(1);
 544                break;
 545        case 4:
 546                gb_tiling_config = PIPE_TILING(2);
 547                break;
 548        case 8:
 549                gb_tiling_config = PIPE_TILING(3);
 550                break;
 551        }
 552        rdev->config.rv770.tiling_npipes = rdev->config.rv770.max_tile_pipes;
 553
 554        disabled_rb_mask = (RREG32(CC_RB_BACKEND_DISABLE) >> 16) & R7XX_MAX_BACKENDS_MASK;
 555        tmp = (gb_tiling_config & PIPE_TILING__MASK) >> PIPE_TILING__SHIFT;
 556        tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.rv770.max_backends,
 557                                        R7XX_MAX_BACKENDS, disabled_rb_mask);
 558        gb_tiling_config |= tmp << 16;
 559        rdev->config.rv770.backend_map = tmp;
 560
 561        if (rdev->family == CHIP_RV770)
 562                gb_tiling_config |= BANK_TILING(1);
 563        else {
 564                if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
 565                        gb_tiling_config |= BANK_TILING(1);
 566                else
 567                        gb_tiling_config |= BANK_TILING(0);
 568        }
 569        rdev->config.rv770.tiling_nbanks = 4 << ((gb_tiling_config >> 4) & 0x3);
 570        gb_tiling_config |= GROUP_SIZE((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT);
 571        if (((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT) > 3) {
 572                gb_tiling_config |= ROW_TILING(3);
 573                gb_tiling_config |= SAMPLE_SPLIT(3);
 574        } else {
 575                gb_tiling_config |=
 576                        ROW_TILING(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT));
 577                gb_tiling_config |=
 578                        SAMPLE_SPLIT(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT));
 579        }
 580
 581        gb_tiling_config |= BANK_SWAPS(1);
 582        rdev->config.rv770.tile_config = gb_tiling_config;
 583
 584        WREG32(GB_TILING_CONFIG, gb_tiling_config);
 585        WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
 586        WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
 587        WREG32(DMA_TILING_CONFIG, (gb_tiling_config & 0xffff));
 588        WREG32(DMA_TILING_CONFIG2, (gb_tiling_config & 0xffff));
 589
 590        WREG32(CGTS_SYS_TCC_DISABLE, 0);
 591        WREG32(CGTS_TCC_DISABLE, 0);
 592        WREG32(CGTS_USER_SYS_TCC_DISABLE, 0);
 593        WREG32(CGTS_USER_TCC_DISABLE, 0);
 594
 595
 596        num_qd_pipes = R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8);
 597        WREG32(VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & DEALLOC_DIST_MASK);
 598        WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & VTX_REUSE_DEPTH_MASK);
 599
 600        /* set HW defaults for 3D engine */
 601        WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
 602                                     ROQ_IB2_START(0x2b)));
 603
 604        WREG32(CP_MEQ_THRESHOLDS, STQ_SPLIT(0x30));
 605
 606        ta_aux_cntl = RREG32(TA_CNTL_AUX);
 607        WREG32(TA_CNTL_AUX, ta_aux_cntl | DISABLE_CUBE_ANISO);
 608
 609        sx_debug_1 = RREG32(SX_DEBUG_1);
 610        sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
 611        WREG32(SX_DEBUG_1, sx_debug_1);
 612
 613        smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
 614        smx_dc_ctl0 &= ~CACHE_DEPTH(0x1ff);
 615        smx_dc_ctl0 |= CACHE_DEPTH((rdev->config.rv770.sx_num_of_sets * 64) - 1);
 616        WREG32(SMX_DC_CTL0, smx_dc_ctl0);
 617
 618        if (rdev->family != CHIP_RV740)
 619                WREG32(SMX_EVENT_CTL, (ES_FLUSH_CTL(4) |
 620                                       GS_FLUSH_CTL(4) |
 621                                       ACK_FLUSH_CTL(3) |
 622                                       SYNC_FLUSH_CTL));
 623
 624        if (rdev->family != CHIP_RV770)
 625                WREG32(SMX_SAR_CTL0, 0x00003f3f);
 626
 627        db_debug3 = RREG32(DB_DEBUG3);
 628        db_debug3 &= ~DB_CLK_OFF_DELAY(0x1f);
 629        switch (rdev->family) {
 630        case CHIP_RV770:
 631        case CHIP_RV740:
 632                db_debug3 |= DB_CLK_OFF_DELAY(0x1f);
 633                break;
 634        case CHIP_RV710:
 635        case CHIP_RV730:
 636        default:
 637                db_debug3 |= DB_CLK_OFF_DELAY(2);
 638                break;
 639        }
 640        WREG32(DB_DEBUG3, db_debug3);
 641
 642        if (rdev->family != CHIP_RV770) {
 643                db_debug4 = RREG32(DB_DEBUG4);
 644                db_debug4 |= DISABLE_TILE_COVERED_FOR_PS_ITER;
 645                WREG32(DB_DEBUG4, db_debug4);
 646        }
 647
 648        WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.rv770.sx_max_export_size / 4) - 1) |
 649                                        POSITION_BUFFER_SIZE((rdev->config.rv770.sx_max_export_pos_size / 4) - 1) |
 650                                        SMX_BUFFER_SIZE((rdev->config.rv770.sx_max_export_smx_size / 4) - 1)));
 651
 652        WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.rv770.sc_prim_fifo_size) |
 653                                 SC_HIZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_hiz_tile_fifo_size) |
 654                                 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_earlyz_tile_fifo_fize)));
 655
 656        WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
 657
 658        WREG32(VGT_NUM_INSTANCES, 1);
 659
 660        WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
 661
 662        WREG32(CP_PERFMON_CNTL, 0);
 663
 664        sq_ms_fifo_sizes = (CACHE_FIFO_SIZE(16 * rdev->config.rv770.sq_num_cf_insts) |
 665                            DONE_FIFO_HIWATER(0xe0) |
 666                            ALU_UPDATE_FIFO_HIWATER(0x8));
 667        switch (rdev->family) {
 668        case CHIP_RV770:
 669        case CHIP_RV730:
 670        case CHIP_RV710:
 671                sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x1);
 672                break;
 673        case CHIP_RV740:
 674        default:
 675                sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x4);
 676                break;
 677        }
 678        WREG32(SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
 679
 680        /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
 681         * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
 682         */
 683        sq_config = RREG32(SQ_CONFIG);
 684        sq_config &= ~(PS_PRIO(3) |
 685                       VS_PRIO(3) |
 686                       GS_PRIO(3) |
 687                       ES_PRIO(3));
 688        sq_config |= (DX9_CONSTS |
 689                      VC_ENABLE |
 690                      EXPORT_SRC_C |
 691                      PS_PRIO(0) |
 692                      VS_PRIO(1) |
 693                      GS_PRIO(2) |
 694                      ES_PRIO(3));
 695        if (rdev->family == CHIP_RV710)
 696                /* no vertex cache */
 697                sq_config &= ~VC_ENABLE;
 698
 699        WREG32(SQ_CONFIG, sq_config);
 700
 701        WREG32(SQ_GPR_RESOURCE_MGMT_1,  (NUM_PS_GPRS((rdev->config.rv770.max_gprs * 24)/64) |
 702                                         NUM_VS_GPRS((rdev->config.rv770.max_gprs * 24)/64) |
 703                                         NUM_CLAUSE_TEMP_GPRS(((rdev->config.rv770.max_gprs * 24)/64)/2)));
 704
 705        WREG32(SQ_GPR_RESOURCE_MGMT_2,  (NUM_GS_GPRS((rdev->config.rv770.max_gprs * 7)/64) |
 706                                         NUM_ES_GPRS((rdev->config.rv770.max_gprs * 7)/64)));
 707
 708        sq_thread_resource_mgmt = (NUM_PS_THREADS((rdev->config.rv770.max_threads * 4)/8) |
 709                                   NUM_VS_THREADS((rdev->config.rv770.max_threads * 2)/8) |
 710                                   NUM_ES_THREADS((rdev->config.rv770.max_threads * 1)/8));
 711        if (((rdev->config.rv770.max_threads * 1) / 8) > rdev->config.rv770.max_gs_threads)
 712                sq_thread_resource_mgmt |= NUM_GS_THREADS(rdev->config.rv770.max_gs_threads);
 713        else
 714                sq_thread_resource_mgmt |= NUM_GS_THREADS((rdev->config.rv770.max_gs_threads * 1)/8);
 715        WREG32(SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
 716
 717        WREG32(SQ_STACK_RESOURCE_MGMT_1, (NUM_PS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) |
 718                                                     NUM_VS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4)));
 719
 720        WREG32(SQ_STACK_RESOURCE_MGMT_2, (NUM_GS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) |
 721                                                     NUM_ES_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4)));
 722
 723        sq_dyn_gpr_size_simd_ab_0 = (SIMDA_RING0((rdev->config.rv770.max_gprs * 38)/64) |
 724                                     SIMDA_RING1((rdev->config.rv770.max_gprs * 38)/64) |
 725                                     SIMDB_RING0((rdev->config.rv770.max_gprs * 38)/64) |
 726                                     SIMDB_RING1((rdev->config.rv770.max_gprs * 38)/64));
 727
 728        WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0);
 729        WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0);
 730        WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0);
 731        WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0);
 732        WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0);
 733        WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0);
 734        WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0);
 735        WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0);
 736
 737        WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
 738                                          FORCE_EOV_MAX_REZ_CNT(255)));
 739
 740        if (rdev->family == CHIP_RV710)
 741                WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(TC_ONLY) |
 742                                                AUTO_INVLD_EN(ES_AND_GS_AUTO)));
 743        else
 744                WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(VC_AND_TC) |
 745                                                AUTO_INVLD_EN(ES_AND_GS_AUTO)));
 746
 747        switch (rdev->family) {
 748        case CHIP_RV770:
 749        case CHIP_RV730:
 750        case CHIP_RV740:
 751                gs_prim_buffer_depth = 384;
 752                break;
 753        case CHIP_RV710:
 754                gs_prim_buffer_depth = 128;
 755                break;
 756        default:
 757                break;
 758        }
 759
 760        num_gs_verts_per_thread = rdev->config.rv770.max_pipes * 16;
 761        vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
 762        /* Max value for this is 256 */
 763        if (vgt_gs_per_es > 256)
 764                vgt_gs_per_es = 256;
 765
 766        WREG32(VGT_ES_PER_GS, 128);
 767        WREG32(VGT_GS_PER_ES, vgt_gs_per_es);
 768        WREG32(VGT_GS_PER_VS, 2);
 769
 770        /* more default values. 2D/3D driver should adjust as needed */
 771        WREG32(VGT_GS_VERTEX_REUSE, 16);
 772        WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
 773        WREG32(VGT_STRMOUT_EN, 0);
 774        WREG32(SX_MISC, 0);
 775        WREG32(PA_SC_MODE_CNTL, 0);
 776        WREG32(PA_SC_EDGERULE, 0xaaaaaaaa);
 777        WREG32(PA_SC_AA_CONFIG, 0);
 778        WREG32(PA_SC_CLIPRECT_RULE, 0xffff);
 779        WREG32(PA_SC_LINE_STIPPLE, 0);
 780        WREG32(SPI_INPUT_Z, 0);
 781        WREG32(SPI_PS_IN_CONTROL_0, NUM_INTERP(2));
 782        WREG32(CB_COLOR7_FRAG, 0);
 783
 784        /* clear render buffer base addresses */
 785        WREG32(CB_COLOR0_BASE, 0);
 786        WREG32(CB_COLOR1_BASE, 0);
 787        WREG32(CB_COLOR2_BASE, 0);
 788        WREG32(CB_COLOR3_BASE, 0);
 789        WREG32(CB_COLOR4_BASE, 0);
 790        WREG32(CB_COLOR5_BASE, 0);
 791        WREG32(CB_COLOR6_BASE, 0);
 792        WREG32(CB_COLOR7_BASE, 0);
 793
 794        WREG32(TCP_CNTL, 0);
 795
 796        hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
 797        WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
 798
 799        WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
 800
 801        WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA |
 802                                          NUM_CLIP_SEQ(3)));
 803        WREG32(VC_ENHANCE, 0);
 804}
 805
 806void r700_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
 807{
 808        u64 size_bf, size_af;
 809
 810        if (mc->mc_vram_size > 0xE0000000) {
 811                /* leave room for at least 512M GTT */
 812                dev_warn(rdev->dev, "limiting VRAM\n");
 813                mc->real_vram_size = 0xE0000000;
 814                mc->mc_vram_size = 0xE0000000;
 815        }
 816        if (rdev->flags & RADEON_IS_AGP) {
 817                size_bf = mc->gtt_start;
 818                size_af = 0xFFFFFFFF - mc->gtt_end;
 819                if (size_bf > size_af) {
 820                        if (mc->mc_vram_size > size_bf) {
 821                                dev_warn(rdev->dev, "limiting VRAM\n");
 822                                mc->real_vram_size = size_bf;
 823                                mc->mc_vram_size = size_bf;
 824                        }
 825                        mc->vram_start = mc->gtt_start - mc->mc_vram_size;
 826                } else {
 827                        if (mc->mc_vram_size > size_af) {
 828                                dev_warn(rdev->dev, "limiting VRAM\n");
 829                                mc->real_vram_size = size_af;
 830                                mc->mc_vram_size = size_af;
 831                        }
 832                        mc->vram_start = mc->gtt_end + 1;
 833                }
 834                mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
 835                dev_info(rdev->dev, "VRAM: %lluM 0x%08llX - 0x%08llX (%lluM used)\n",
 836                                mc->mc_vram_size >> 20, mc->vram_start,
 837                                mc->vram_end, mc->real_vram_size >> 20);
 838        } else {
 839                radeon_vram_location(rdev, &rdev->mc, 0);
 840                rdev->mc.gtt_base_align = 0;
 841                radeon_gtt_location(rdev, mc);
 842        }
 843}
 844
 845static int rv770_mc_init(struct radeon_device *rdev)
 846{
 847        u32 tmp;
 848        int chansize, numchan;
 849
 850        /* Get VRAM informations */
 851        rdev->mc.vram_is_ddr = true;
 852        tmp = RREG32(MC_ARB_RAMCFG);
 853        if (tmp & CHANSIZE_OVERRIDE) {
 854                chansize = 16;
 855        } else if (tmp & CHANSIZE_MASK) {
 856                chansize = 64;
 857        } else {
 858                chansize = 32;
 859        }
 860        tmp = RREG32(MC_SHARED_CHMAP);
 861        switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
 862        case 0:
 863        default:
 864                numchan = 1;
 865                break;
 866        case 1:
 867                numchan = 2;
 868                break;
 869        case 2:
 870                numchan = 4;
 871                break;
 872        case 3:
 873                numchan = 8;
 874                break;
 875        }
 876        rdev->mc.vram_width = numchan * chansize;
 877        /* Could aper size report 0 ? */
 878        rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
 879        rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
 880        /* Setup GPU memory space */
 881        rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
 882        rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
 883        rdev->mc.visible_vram_size = rdev->mc.aper_size;
 884        r700_vram_gtt_location(rdev, &rdev->mc);
 885        radeon_update_bandwidth_info(rdev);
 886
 887        return 0;
 888}
 889
 890/**
 891 * rv770_copy_dma - copy pages using the DMA engine
 892 *
 893 * @rdev: radeon_device pointer
 894 * @src_offset: src GPU address
 895 * @dst_offset: dst GPU address
 896 * @num_gpu_pages: number of GPU pages to xfer
 897 * @fence: radeon fence object
 898 *
 899 * Copy GPU paging using the DMA engine (r7xx).
 900 * Used by the radeon ttm implementation to move pages if
 901 * registered as the asic copy callback.
 902 */
 903int rv770_copy_dma(struct radeon_device *rdev,
 904                  uint64_t src_offset, uint64_t dst_offset,
 905                  unsigned num_gpu_pages,
 906                  struct radeon_fence **fence)
 907{
 908        struct radeon_semaphore *sem = NULL;
 909        int ring_index = rdev->asic->copy.dma_ring_index;
 910        struct radeon_ring *ring = &rdev->ring[ring_index];
 911        u32 size_in_dw, cur_size_in_dw;
 912        int i, num_loops;
 913        int r = 0;
 914
 915        r = radeon_semaphore_create(rdev, &sem);
 916        if (r) {
 917                DRM_ERROR("radeon: moving bo (%d).\n", r);
 918                return r;
 919        }
 920
 921        size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
 922        num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF);
 923        r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8);
 924        if (r) {
 925                DRM_ERROR("radeon: moving bo (%d).\n", r);
 926                radeon_semaphore_free(rdev, &sem, NULL);
 927                return r;
 928        }
 929
 930        if (radeon_fence_need_sync(*fence, ring->idx)) {
 931                radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
 932                                            ring->idx);
 933                radeon_fence_note_sync(*fence, ring->idx);
 934        } else {
 935                radeon_semaphore_free(rdev, &sem, NULL);
 936        }
 937
 938        for (i = 0; i < num_loops; i++) {
 939                cur_size_in_dw = size_in_dw;
 940                if (cur_size_in_dw > 0xFFFF)
 941                        cur_size_in_dw = 0xFFFF;
 942                size_in_dw -= cur_size_in_dw;
 943                radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
 944                radeon_ring_write(ring, dst_offset & 0xfffffffc);
 945                radeon_ring_write(ring, src_offset & 0xfffffffc);
 946                radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
 947                radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
 948                src_offset += cur_size_in_dw * 4;
 949                dst_offset += cur_size_in_dw * 4;
 950        }
 951
 952        r = radeon_fence_emit(rdev, fence, ring->idx);
 953        if (r) {
 954                radeon_ring_unlock_undo(rdev, ring);
 955                return r;
 956        }
 957
 958        radeon_ring_unlock_commit(rdev, ring);
 959        radeon_semaphore_free(rdev, &sem, *fence);
 960
 961        return r;
 962}
 963
 964static int rv770_startup(struct radeon_device *rdev)
 965{
 966        struct radeon_ring *ring;
 967        int r;
 968
 969        /* enable pcie gen2 link */
 970        rv770_pcie_gen2_enable(rdev);
 971
 972        if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
 973                r = r600_init_microcode(rdev);
 974                if (r) {
 975                        DRM_ERROR("Failed to load firmware!\n");
 976                        return r;
 977                }
 978        }
 979
 980        r = r600_vram_scratch_init(rdev);
 981        if (r)
 982                return r;
 983
 984        rv770_mc_program(rdev);
 985        if (rdev->flags & RADEON_IS_AGP) {
 986                rv770_agp_enable(rdev);
 987        } else {
 988                r = rv770_pcie_gart_enable(rdev);
 989                if (r)
 990                        return r;
 991        }
 992
 993        rv770_gpu_init(rdev);
 994        r = r600_blit_init(rdev);
 995        if (r) {
 996                r600_blit_fini(rdev);
 997                rdev->asic->copy.copy = NULL;
 998                dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
 999        }
1000
1001        /* allocate wb buffer */
1002        r = radeon_wb_init(rdev);
1003        if (r)
1004                return r;
1005
1006        r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
1007        if (r) {
1008                dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
1009                return r;
1010        }
1011
1012        r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
1013        if (r) {
1014                dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
1015                return r;
1016        }
1017
1018        /* Enable IRQ */
1019        r = r600_irq_init(rdev);
1020        if (r) {
1021                DRM_ERROR("radeon: IH init failed (%d).\n", r);
1022                radeon_irq_kms_fini(rdev);
1023                return r;
1024        }
1025        r600_irq_set(rdev);
1026
1027        ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1028        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
1029                             R600_CP_RB_RPTR, R600_CP_RB_WPTR,
1030                             0, 0xfffff, RADEON_CP_PACKET2);
1031        if (r)
1032                return r;
1033
1034        ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1035        r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
1036                             DMA_RB_RPTR, DMA_RB_WPTR,
1037                             2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1038        if (r)
1039                return r;
1040
1041        r = rv770_cp_load_microcode(rdev);
1042        if (r)
1043                return r;
1044        r = r600_cp_resume(rdev);
1045        if (r)
1046                return r;
1047
1048        r = r600_dma_resume(rdev);
1049        if (r)
1050                return r;
1051
1052        r = radeon_ib_pool_init(rdev);
1053        if (r) {
1054                dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
1055                return r;
1056        }
1057
1058        r = r600_audio_init(rdev);
1059        if (r) {
1060                DRM_ERROR("radeon: audio init failed\n");
1061                return r;
1062        }
1063
1064        return 0;
1065}
1066
1067int rv770_resume(struct radeon_device *rdev)
1068{
1069        int r;
1070
1071        /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
1072         * posting will perform necessary task to bring back GPU into good
1073         * shape.
1074         */
1075        /* post card */
1076        atom_asic_init(rdev->mode_info.atom_context);
1077
1078        rdev->accel_working = true;
1079        r = rv770_startup(rdev);
1080        if (r) {
1081                DRM_ERROR("r600 startup failed on resume\n");
1082                rdev->accel_working = false;
1083                return r;
1084        }
1085
1086        return r;
1087
1088}
1089
1090int rv770_suspend(struct radeon_device *rdev)
1091{
1092        r600_audio_fini(rdev);
1093        r700_cp_stop(rdev);
1094        r600_dma_stop(rdev);
1095        r600_irq_suspend(rdev);
1096        radeon_wb_disable(rdev);
1097        rv770_pcie_gart_disable(rdev);
1098
1099        return 0;
1100}
1101
1102/* Plan is to move initialization in that function and use
1103 * helper function so that radeon_device_init pretty much
1104 * do nothing more than calling asic specific function. This
1105 * should also allow to remove a bunch of callback function
1106 * like vram_info.
1107 */
1108int rv770_init(struct radeon_device *rdev)
1109{
1110        int r;
1111
1112        /* Read BIOS */
1113        if (!radeon_get_bios(rdev)) {
1114                if (ASIC_IS_AVIVO(rdev))
1115                        return -EINVAL;
1116        }
1117        /* Must be an ATOMBIOS */
1118        if (!rdev->is_atom_bios) {
1119                dev_err(rdev->dev, "Expecting atombios for R600 GPU\n");
1120                return -EINVAL;
1121        }
1122        r = radeon_atombios_init(rdev);
1123        if (r)
1124                return r;
1125        /* Post card if necessary */
1126        if (!radeon_card_posted(rdev)) {
1127                if (!rdev->bios) {
1128                        dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
1129                        return -EINVAL;
1130                }
1131                DRM_INFO("GPU not posted. posting now...\n");
1132                atom_asic_init(rdev->mode_info.atom_context);
1133        }
1134        /* Initialize scratch registers */
1135        r600_scratch_init(rdev);
1136        /* Initialize surface registers */
1137        radeon_surface_init(rdev);
1138        /* Initialize clocks */
1139        radeon_get_clock_info(rdev->ddev);
1140        /* Fence driver */
1141        r = radeon_fence_driver_init(rdev);
1142        if (r)
1143                return r;
1144        /* initialize AGP */
1145        if (rdev->flags & RADEON_IS_AGP) {
1146                r = radeon_agp_init(rdev);
1147                if (r)
1148                        radeon_agp_disable(rdev);
1149        }
1150        r = rv770_mc_init(rdev);
1151        if (r)
1152                return r;
1153        /* Memory manager */
1154        r = radeon_bo_init(rdev);
1155        if (r)
1156                return r;
1157
1158        r = radeon_irq_kms_init(rdev);
1159        if (r)
1160                return r;
1161
1162        rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
1163        r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
1164
1165        rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
1166        r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
1167
1168        rdev->ih.ring_obj = NULL;
1169        r600_ih_ring_init(rdev, 64 * 1024);
1170
1171        r = r600_pcie_gart_init(rdev);
1172        if (r)
1173                return r;
1174
1175        rdev->accel_working = true;
1176        r = rv770_startup(rdev);
1177        if (r) {
1178                dev_err(rdev->dev, "disabling GPU acceleration\n");
1179                r700_cp_fini(rdev);
1180                r600_dma_fini(rdev);
1181                r600_irq_fini(rdev);
1182                radeon_wb_fini(rdev);
1183                radeon_ib_pool_fini(rdev);
1184                radeon_irq_kms_fini(rdev);
1185                rv770_pcie_gart_fini(rdev);
1186                rdev->accel_working = false;
1187        }
1188
1189        return 0;
1190}
1191
1192void rv770_fini(struct radeon_device *rdev)
1193{
1194        r600_blit_fini(rdev);
1195        r700_cp_fini(rdev);
1196        r600_dma_fini(rdev);
1197        r600_irq_fini(rdev);
1198        radeon_wb_fini(rdev);
1199        radeon_ib_pool_fini(rdev);
1200        radeon_irq_kms_fini(rdev);
1201        rv770_pcie_gart_fini(rdev);
1202        r600_vram_scratch_fini(rdev);
1203        radeon_gem_fini(rdev);
1204        radeon_fence_driver_fini(rdev);
1205        radeon_agp_fini(rdev);
1206        radeon_bo_fini(rdev);
1207        radeon_atombios_fini(rdev);
1208        kfree(rdev->bios);
1209        rdev->bios = NULL;
1210}
1211
1212static void rv770_pcie_gen2_enable(struct radeon_device *rdev)
1213{
1214        u32 link_width_cntl, lanes, speed_cntl, tmp;
1215        u16 link_cntl2;
1216        u32 mask;
1217        int ret;
1218
1219        if (radeon_pcie_gen2 == 0)
1220                return;
1221
1222        if (rdev->flags & RADEON_IS_IGP)
1223                return;
1224
1225        if (!(rdev->flags & RADEON_IS_PCIE))
1226                return;
1227
1228        /* x2 cards have a special sequence */
1229        if (ASIC_IS_X2(rdev))
1230                return;
1231
1232        ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
1233        if (ret != 0)
1234                return;
1235
1236        if (!(mask & DRM_PCIE_SPEED_50))
1237                return;
1238
1239        DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
1240
1241        /* advertise upconfig capability */
1242        link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL);
1243        link_width_cntl &= ~LC_UPCONFIGURE_DIS;
1244        WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
1245        link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL);
1246        if (link_width_cntl & LC_RENEGOTIATION_SUPPORT) {
1247                lanes = (link_width_cntl & LC_LINK_WIDTH_RD_MASK) >> LC_LINK_WIDTH_RD_SHIFT;
1248                link_width_cntl &= ~(LC_LINK_WIDTH_MASK |
1249                                     LC_RECONFIG_ARC_MISSING_ESCAPE);
1250                link_width_cntl |= lanes | LC_RECONFIG_NOW |
1251                        LC_RENEGOTIATE_EN | LC_UPCONFIGURE_SUPPORT;
1252                WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
1253        } else {
1254                link_width_cntl |= LC_UPCONFIGURE_DIS;
1255                WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
1256        }
1257
1258        speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
1259        if ((speed_cntl & LC_OTHER_SIDE_EVER_SENT_GEN2) &&
1260            (speed_cntl & LC_OTHER_SIDE_SUPPORTS_GEN2)) {
1261
1262                tmp = RREG32(0x541c);
1263                WREG32(0x541c, tmp | 0x8);
1264                WREG32(MM_CFGREGS_CNTL, MM_WR_TO_CFG_EN);
1265                link_cntl2 = RREG16(0x4088);
1266                link_cntl2 &= ~TARGET_LINK_SPEED_MASK;
1267                link_cntl2 |= 0x2;
1268                WREG16(0x4088, link_cntl2);
1269                WREG32(MM_CFGREGS_CNTL, 0);
1270
1271                speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
1272                speed_cntl &= ~LC_TARGET_LINK_SPEED_OVERRIDE_EN;
1273                WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
1274
1275                speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
1276                speed_cntl |= LC_CLR_FAILED_SPD_CHANGE_CNT;
1277                WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
1278
1279                speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
1280                speed_cntl &= ~LC_CLR_FAILED_SPD_CHANGE_CNT;
1281                WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
1282
1283                speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
1284                speed_cntl |= LC_GEN2_EN_STRAP;
1285                WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
1286
1287        } else {
1288                link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL);
1289                /* XXX: only disable it if gen1 bridge vendor == 0x111d or 0x1106 */
1290                if (1)
1291                        link_width_cntl |= LC_UPCONFIGURE_DIS;
1292                else
1293                        link_width_cntl &= ~LC_UPCONFIGURE_DIS;
1294                WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
1295        }
1296}
1297