linux/drivers/gpu/drm/radeon/r100.c
<<
>>
Prefs
   1/*
   2 * Copyright 2008 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 * Copyright 2009 Jerome Glisse.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22 * OTHER DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors: Dave Airlie
  25 *          Alex Deucher
  26 *          Jerome Glisse
  27 */
  28
  29#include <linux/firmware.h>
  30#include <linux/module.h>
  31#include <linux/pci.h>
  32#include <linux/seq_file.h>
  33#include <linux/slab.h>
  34
  35#include <drm/drm_debugfs.h>
  36#include <drm/drm_device.h>
  37#include <drm/drm_file.h>
  38#include <drm/drm_fourcc.h>
  39#include <drm/drm_vblank.h>
  40#include <drm/radeon_drm.h>
  41
  42#include "atom.h"
  43#include "r100_reg_safe.h"
  44#include "r100d.h"
  45#include "radeon.h"
  46#include "radeon_asic.h"
  47#include "radeon_reg.h"
  48#include "rn50_reg_safe.h"
  49#include "rs100d.h"
  50#include "rv200d.h"
  51#include "rv250d.h"
  52
  53/* Firmware Names */
  54#define FIRMWARE_R100           "radeon/R100_cp.bin"
  55#define FIRMWARE_R200           "radeon/R200_cp.bin"
  56#define FIRMWARE_R300           "radeon/R300_cp.bin"
  57#define FIRMWARE_R420           "radeon/R420_cp.bin"
  58#define FIRMWARE_RS690          "radeon/RS690_cp.bin"
  59#define FIRMWARE_RS600          "radeon/RS600_cp.bin"
  60#define FIRMWARE_R520           "radeon/R520_cp.bin"
  61
  62MODULE_FIRMWARE(FIRMWARE_R100);
  63MODULE_FIRMWARE(FIRMWARE_R200);
  64MODULE_FIRMWARE(FIRMWARE_R300);
  65MODULE_FIRMWARE(FIRMWARE_R420);
  66MODULE_FIRMWARE(FIRMWARE_RS690);
  67MODULE_FIRMWARE(FIRMWARE_RS600);
  68MODULE_FIRMWARE(FIRMWARE_R520);
  69
  70#include "r100_track.h"
  71
  72/* This files gather functions specifics to:
  73 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
  74 * and others in some cases.
  75 */
  76
  77static bool r100_is_in_vblank(struct radeon_device *rdev, int crtc)
  78{
  79        if (crtc == 0) {
  80                if (RREG32(RADEON_CRTC_STATUS) & RADEON_CRTC_VBLANK_CUR)
  81                        return true;
  82                else
  83                        return false;
  84        } else {
  85                if (RREG32(RADEON_CRTC2_STATUS) & RADEON_CRTC2_VBLANK_CUR)
  86                        return true;
  87                else
  88                        return false;
  89        }
  90}
  91
  92static bool r100_is_counter_moving(struct radeon_device *rdev, int crtc)
  93{
  94        u32 vline1, vline2;
  95
  96        if (crtc == 0) {
  97                vline1 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
  98                vline2 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
  99        } else {
 100                vline1 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
 101                vline2 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
 102        }
 103        if (vline1 != vline2)
 104                return true;
 105        else
 106                return false;
 107}
 108
 109/**
 110 * r100_wait_for_vblank - vblank wait asic callback.
 111 *
 112 * @rdev: radeon_device pointer
 113 * @crtc: crtc to wait for vblank on
 114 *
 115 * Wait for vblank on the requested crtc (r1xx-r4xx).
 116 */
 117void r100_wait_for_vblank(struct radeon_device *rdev, int crtc)
 118{
 119        unsigned i = 0;
 120
 121        if (crtc >= rdev->num_crtc)
 122                return;
 123
 124        if (crtc == 0) {
 125                if (!(RREG32(RADEON_CRTC_GEN_CNTL) & RADEON_CRTC_EN))
 126                        return;
 127        } else {
 128                if (!(RREG32(RADEON_CRTC2_GEN_CNTL) & RADEON_CRTC2_EN))
 129                        return;
 130        }
 131
 132        /* depending on when we hit vblank, we may be close to active; if so,
 133         * wait for another frame.
 134         */
 135        while (r100_is_in_vblank(rdev, crtc)) {
 136                if (i++ % 100 == 0) {
 137                        if (!r100_is_counter_moving(rdev, crtc))
 138                                break;
 139                }
 140        }
 141
 142        while (!r100_is_in_vblank(rdev, crtc)) {
 143                if (i++ % 100 == 0) {
 144                        if (!r100_is_counter_moving(rdev, crtc))
 145                                break;
 146                }
 147        }
 148}
 149
 150/**
 151 * r100_page_flip - pageflip callback.
 152 *
 153 * @rdev: radeon_device pointer
 154 * @crtc_id: crtc to cleanup pageflip on
 155 * @crtc_base: new address of the crtc (GPU MC address)
 156 * @async: asynchronous flip
 157 *
 158 * Does the actual pageflip (r1xx-r4xx).
 159 * During vblank we take the crtc lock and wait for the update_pending
 160 * bit to go high, when it does, we release the lock, and allow the
 161 * double buffered update to take place.
 162 */
 163void r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base, bool async)
 164{
 165        struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
 166        u32 tmp = ((u32)crtc_base) | RADEON_CRTC_OFFSET__OFFSET_LOCK;
 167        int i;
 168
 169        /* Lock the graphics update lock */
 170        /* update the scanout addresses */
 171        WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
 172
 173        /* Wait for update_pending to go high. */
 174        for (i = 0; i < rdev->usec_timeout; i++) {
 175                if (RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET)
 176                        break;
 177                udelay(1);
 178        }
 179        DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
 180
 181        /* Unlock the lock, so double-buffering can take place inside vblank */
 182        tmp &= ~RADEON_CRTC_OFFSET__OFFSET_LOCK;
 183        WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
 184
 185}
 186
 187/**
 188 * r100_page_flip_pending - check if page flip is still pending
 189 *
 190 * @rdev: radeon_device pointer
 191 * @crtc_id: crtc to check
 192 *
 193 * Check if the last pagefilp is still pending (r1xx-r4xx).
 194 * Returns the current update pending status.
 195 */
 196bool r100_page_flip_pending(struct radeon_device *rdev, int crtc_id)
 197{
 198        struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
 199
 200        /* Return current update_pending status: */
 201        return !!(RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) &
 202                RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET);
 203}
 204
 205/**
 206 * r100_pm_get_dynpm_state - look up dynpm power state callback.
 207 *
 208 * @rdev: radeon_device pointer
 209 *
 210 * Look up the optimal power state based on the
 211 * current state of the GPU (r1xx-r5xx).
 212 * Used for dynpm only.
 213 */
 214void r100_pm_get_dynpm_state(struct radeon_device *rdev)
 215{
 216        int i;
 217        rdev->pm.dynpm_can_upclock = true;
 218        rdev->pm.dynpm_can_downclock = true;
 219
 220        switch (rdev->pm.dynpm_planned_action) {
 221        case DYNPM_ACTION_MINIMUM:
 222                rdev->pm.requested_power_state_index = 0;
 223                rdev->pm.dynpm_can_downclock = false;
 224                break;
 225        case DYNPM_ACTION_DOWNCLOCK:
 226                if (rdev->pm.current_power_state_index == 0) {
 227                        rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
 228                        rdev->pm.dynpm_can_downclock = false;
 229                } else {
 230                        if (rdev->pm.active_crtc_count > 1) {
 231                                for (i = 0; i < rdev->pm.num_power_states; i++) {
 232                                        if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
 233                                                continue;
 234                                        else if (i >= rdev->pm.current_power_state_index) {
 235                                                rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
 236                                                break;
 237                                        } else {
 238                                                rdev->pm.requested_power_state_index = i;
 239                                                break;
 240                                        }
 241                                }
 242                        } else
 243                                rdev->pm.requested_power_state_index =
 244                                        rdev->pm.current_power_state_index - 1;
 245                }
 246                /* don't use the power state if crtcs are active and no display flag is set */
 247                if ((rdev->pm.active_crtc_count > 0) &&
 248                    (rdev->pm.power_state[rdev->pm.requested_power_state_index].clock_info[0].flags &
 249                     RADEON_PM_MODE_NO_DISPLAY)) {
 250                        rdev->pm.requested_power_state_index++;
 251                }
 252                break;
 253        case DYNPM_ACTION_UPCLOCK:
 254                if (rdev->pm.current_power_state_index == (rdev->pm.num_power_states - 1)) {
 255                        rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
 256                        rdev->pm.dynpm_can_upclock = false;
 257                } else {
 258                        if (rdev->pm.active_crtc_count > 1) {
 259                                for (i = (rdev->pm.num_power_states - 1); i >= 0; i--) {
 260                                        if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
 261                                                continue;
 262                                        else if (i <= rdev->pm.current_power_state_index) {
 263                                                rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
 264                                                break;
 265                                        } else {
 266                                                rdev->pm.requested_power_state_index = i;
 267                                                break;
 268                                        }
 269                                }
 270                        } else
 271                                rdev->pm.requested_power_state_index =
 272                                        rdev->pm.current_power_state_index + 1;
 273                }
 274                break;
 275        case DYNPM_ACTION_DEFAULT:
 276                rdev->pm.requested_power_state_index = rdev->pm.default_power_state_index;
 277                rdev->pm.dynpm_can_upclock = false;
 278                break;
 279        case DYNPM_ACTION_NONE:
 280        default:
 281                DRM_ERROR("Requested mode for not defined action\n");
 282                return;
 283        }
 284        /* only one clock mode per power state */
 285        rdev->pm.requested_clock_mode_index = 0;
 286
 287        DRM_DEBUG_DRIVER("Requested: e: %d m: %d p: %d\n",
 288                  rdev->pm.power_state[rdev->pm.requested_power_state_index].
 289                  clock_info[rdev->pm.requested_clock_mode_index].sclk,
 290                  rdev->pm.power_state[rdev->pm.requested_power_state_index].
 291                  clock_info[rdev->pm.requested_clock_mode_index].mclk,
 292                  rdev->pm.power_state[rdev->pm.requested_power_state_index].
 293                  pcie_lanes);
 294}
 295
 296/**
 297 * r100_pm_init_profile - Initialize power profiles callback.
 298 *
 299 * @rdev: radeon_device pointer
 300 *
 301 * Initialize the power states used in profile mode
 302 * (r1xx-r3xx).
 303 * Used for profile mode only.
 304 */
 305void r100_pm_init_profile(struct radeon_device *rdev)
 306{
 307        /* default */
 308        rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_ps_idx = rdev->pm.default_power_state_index;
 309        rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 310        rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_cm_idx = 0;
 311        rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_cm_idx = 0;
 312        /* low sh */
 313        rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_ps_idx = 0;
 314        rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_ps_idx = 0;
 315        rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_cm_idx = 0;
 316        rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_cm_idx = 0;
 317        /* mid sh */
 318        rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_ps_idx = 0;
 319        rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_ps_idx = 0;
 320        rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_cm_idx = 0;
 321        rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_cm_idx = 0;
 322        /* high sh */
 323        rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_ps_idx = 0;
 324        rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 325        rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_cm_idx = 0;
 326        rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_cm_idx = 0;
 327        /* low mh */
 328        rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_ps_idx = 0;
 329        rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 330        rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_cm_idx = 0;
 331        rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_cm_idx = 0;
 332        /* mid mh */
 333        rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_ps_idx = 0;
 334        rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 335        rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_cm_idx = 0;
 336        rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_cm_idx = 0;
 337        /* high mh */
 338        rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_ps_idx = 0;
 339        rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 340        rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_cm_idx = 0;
 341        rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_cm_idx = 0;
 342}
 343
 344/**
 345 * r100_pm_misc - set additional pm hw parameters callback.
 346 *
 347 * @rdev: radeon_device pointer
 348 *
 349 * Set non-clock parameters associated with a power state
 350 * (voltage, pcie lanes, etc.) (r1xx-r4xx).
 351 */
 352void r100_pm_misc(struct radeon_device *rdev)
 353{
 354        int requested_index = rdev->pm.requested_power_state_index;
 355        struct radeon_power_state *ps = &rdev->pm.power_state[requested_index];
 356        struct radeon_voltage *voltage = &ps->clock_info[0].voltage;
 357        u32 tmp, sclk_cntl, sclk_cntl2, sclk_more_cntl;
 358
 359        if ((voltage->type == VOLTAGE_GPIO) && (voltage->gpio.valid)) {
 360                if (ps->misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_SUPPORT) {
 361                        tmp = RREG32(voltage->gpio.reg);
 362                        if (voltage->active_high)
 363                                tmp |= voltage->gpio.mask;
 364                        else
 365                                tmp &= ~(voltage->gpio.mask);
 366                        WREG32(voltage->gpio.reg, tmp);
 367                        if (voltage->delay)
 368                                udelay(voltage->delay);
 369                } else {
 370                        tmp = RREG32(voltage->gpio.reg);
 371                        if (voltage->active_high)
 372                                tmp &= ~voltage->gpio.mask;
 373                        else
 374                                tmp |= voltage->gpio.mask;
 375                        WREG32(voltage->gpio.reg, tmp);
 376                        if (voltage->delay)
 377                                udelay(voltage->delay);
 378                }
 379        }
 380
 381        sclk_cntl = RREG32_PLL(SCLK_CNTL);
 382        sclk_cntl2 = RREG32_PLL(SCLK_CNTL2);
 383        sclk_cntl2 &= ~REDUCED_SPEED_SCLK_SEL(3);
 384        sclk_more_cntl = RREG32_PLL(SCLK_MORE_CNTL);
 385        sclk_more_cntl &= ~VOLTAGE_DELAY_SEL(3);
 386        if (ps->misc & ATOM_PM_MISCINFO_ASIC_REDUCED_SPEED_SCLK_EN) {
 387                sclk_more_cntl |= REDUCED_SPEED_SCLK_EN;
 388                if (ps->misc & ATOM_PM_MISCINFO_DYN_CLK_3D_IDLE)
 389                        sclk_cntl2 |= REDUCED_SPEED_SCLK_MODE;
 390                else
 391                        sclk_cntl2 &= ~REDUCED_SPEED_SCLK_MODE;
 392                if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_2)
 393                        sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(0);
 394                else if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_4)
 395                        sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(2);
 396        } else
 397                sclk_more_cntl &= ~REDUCED_SPEED_SCLK_EN;
 398
 399        if (ps->misc & ATOM_PM_MISCINFO_ASIC_DYNAMIC_VOLTAGE_EN) {
 400                sclk_more_cntl |= IO_CG_VOLTAGE_DROP;
 401                if (voltage->delay) {
 402                        sclk_more_cntl |= VOLTAGE_DROP_SYNC;
 403                        switch (voltage->delay) {
 404                        case 33:
 405                                sclk_more_cntl |= VOLTAGE_DELAY_SEL(0);
 406                                break;
 407                        case 66:
 408                                sclk_more_cntl |= VOLTAGE_DELAY_SEL(1);
 409                                break;
 410                        case 99:
 411                                sclk_more_cntl |= VOLTAGE_DELAY_SEL(2);
 412                                break;
 413                        case 132:
 414                                sclk_more_cntl |= VOLTAGE_DELAY_SEL(3);
 415                                break;
 416                        }
 417                } else
 418                        sclk_more_cntl &= ~VOLTAGE_DROP_SYNC;
 419        } else
 420                sclk_more_cntl &= ~IO_CG_VOLTAGE_DROP;
 421
 422        if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_HDP_BLOCK_EN)
 423                sclk_cntl &= ~FORCE_HDP;
 424        else
 425                sclk_cntl |= FORCE_HDP;
 426
 427        WREG32_PLL(SCLK_CNTL, sclk_cntl);
 428        WREG32_PLL(SCLK_CNTL2, sclk_cntl2);
 429        WREG32_PLL(SCLK_MORE_CNTL, sclk_more_cntl);
 430
 431        /* set pcie lanes */
 432        if ((rdev->flags & RADEON_IS_PCIE) &&
 433            !(rdev->flags & RADEON_IS_IGP) &&
 434            rdev->asic->pm.set_pcie_lanes &&
 435            (ps->pcie_lanes !=
 436             rdev->pm.power_state[rdev->pm.current_power_state_index].pcie_lanes)) {
 437                radeon_set_pcie_lanes(rdev,
 438                                      ps->pcie_lanes);
 439                DRM_DEBUG_DRIVER("Setting: p: %d\n", ps->pcie_lanes);
 440        }
 441}
 442
 443/**
 444 * r100_pm_prepare - pre-power state change callback.
 445 *
 446 * @rdev: radeon_device pointer
 447 *
 448 * Prepare for a power state change (r1xx-r4xx).
 449 */
 450void r100_pm_prepare(struct radeon_device *rdev)
 451{
 452        struct drm_device *ddev = rdev->ddev;
 453        struct drm_crtc *crtc;
 454        struct radeon_crtc *radeon_crtc;
 455        u32 tmp;
 456
 457        /* disable any active CRTCs */
 458        list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
 459                radeon_crtc = to_radeon_crtc(crtc);
 460                if (radeon_crtc->enabled) {
 461                        if (radeon_crtc->crtc_id) {
 462                                tmp = RREG32(RADEON_CRTC2_GEN_CNTL);
 463                                tmp |= RADEON_CRTC2_DISP_REQ_EN_B;
 464                                WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
 465                        } else {
 466                                tmp = RREG32(RADEON_CRTC_GEN_CNTL);
 467                                tmp |= RADEON_CRTC_DISP_REQ_EN_B;
 468                                WREG32(RADEON_CRTC_GEN_CNTL, tmp);
 469                        }
 470                }
 471        }
 472}
 473
 474/**
 475 * r100_pm_finish - post-power state change callback.
 476 *
 477 * @rdev: radeon_device pointer
 478 *
 479 * Clean up after a power state change (r1xx-r4xx).
 480 */
 481void r100_pm_finish(struct radeon_device *rdev)
 482{
 483        struct drm_device *ddev = rdev->ddev;
 484        struct drm_crtc *crtc;
 485        struct radeon_crtc *radeon_crtc;
 486        u32 tmp;
 487
 488        /* enable any active CRTCs */
 489        list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
 490                radeon_crtc = to_radeon_crtc(crtc);
 491                if (radeon_crtc->enabled) {
 492                        if (radeon_crtc->crtc_id) {
 493                                tmp = RREG32(RADEON_CRTC2_GEN_CNTL);
 494                                tmp &= ~RADEON_CRTC2_DISP_REQ_EN_B;
 495                                WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
 496                        } else {
 497                                tmp = RREG32(RADEON_CRTC_GEN_CNTL);
 498                                tmp &= ~RADEON_CRTC_DISP_REQ_EN_B;
 499                                WREG32(RADEON_CRTC_GEN_CNTL, tmp);
 500                        }
 501                }
 502        }
 503}
 504
 505/**
 506 * r100_gui_idle - gui idle callback.
 507 *
 508 * @rdev: radeon_device pointer
 509 *
 510 * Check of the GUI (2D/3D engines) are idle (r1xx-r5xx).
 511 * Returns true if idle, false if not.
 512 */
 513bool r100_gui_idle(struct radeon_device *rdev)
 514{
 515        if (RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)
 516                return false;
 517        else
 518                return true;
 519}
 520
 521/* hpd for digital panel detect/disconnect */
 522/**
 523 * r100_hpd_sense - hpd sense callback.
 524 *
 525 * @rdev: radeon_device pointer
 526 * @hpd: hpd (hotplug detect) pin
 527 *
 528 * Checks if a digital monitor is connected (r1xx-r4xx).
 529 * Returns true if connected, false if not connected.
 530 */
 531bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
 532{
 533        bool connected = false;
 534
 535        switch (hpd) {
 536        case RADEON_HPD_1:
 537                if (RREG32(RADEON_FP_GEN_CNTL) & RADEON_FP_DETECT_SENSE)
 538                        connected = true;
 539                break;
 540        case RADEON_HPD_2:
 541                if (RREG32(RADEON_FP2_GEN_CNTL) & RADEON_FP2_DETECT_SENSE)
 542                        connected = true;
 543                break;
 544        default:
 545                break;
 546        }
 547        return connected;
 548}
 549
 550/**
 551 * r100_hpd_set_polarity - hpd set polarity callback.
 552 *
 553 * @rdev: radeon_device pointer
 554 * @hpd: hpd (hotplug detect) pin
 555 *
 556 * Set the polarity of the hpd pin (r1xx-r4xx).
 557 */
 558void r100_hpd_set_polarity(struct radeon_device *rdev,
 559                           enum radeon_hpd_id hpd)
 560{
 561        u32 tmp;
 562        bool connected = r100_hpd_sense(rdev, hpd);
 563
 564        switch (hpd) {
 565        case RADEON_HPD_1:
 566                tmp = RREG32(RADEON_FP_GEN_CNTL);
 567                if (connected)
 568                        tmp &= ~RADEON_FP_DETECT_INT_POL;
 569                else
 570                        tmp |= RADEON_FP_DETECT_INT_POL;
 571                WREG32(RADEON_FP_GEN_CNTL, tmp);
 572                break;
 573        case RADEON_HPD_2:
 574                tmp = RREG32(RADEON_FP2_GEN_CNTL);
 575                if (connected)
 576                        tmp &= ~RADEON_FP2_DETECT_INT_POL;
 577                else
 578                        tmp |= RADEON_FP2_DETECT_INT_POL;
 579                WREG32(RADEON_FP2_GEN_CNTL, tmp);
 580                break;
 581        default:
 582                break;
 583        }
 584}
 585
 586/**
 587 * r100_hpd_init - hpd setup callback.
 588 *
 589 * @rdev: radeon_device pointer
 590 *
 591 * Setup the hpd pins used by the card (r1xx-r4xx).
 592 * Set the polarity, and enable the hpd interrupts.
 593 */
 594void r100_hpd_init(struct radeon_device *rdev)
 595{
 596        struct drm_device *dev = rdev->ddev;
 597        struct drm_connector *connector;
 598        unsigned enable = 0;
 599
 600        list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 601                struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 602                if (radeon_connector->hpd.hpd != RADEON_HPD_NONE)
 603                        enable |= 1 << radeon_connector->hpd.hpd;
 604                radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
 605        }
 606        radeon_irq_kms_enable_hpd(rdev, enable);
 607}
 608
 609/**
 610 * r100_hpd_fini - hpd tear down callback.
 611 *
 612 * @rdev: radeon_device pointer
 613 *
 614 * Tear down the hpd pins used by the card (r1xx-r4xx).
 615 * Disable the hpd interrupts.
 616 */
 617void r100_hpd_fini(struct radeon_device *rdev)
 618{
 619        struct drm_device *dev = rdev->ddev;
 620        struct drm_connector *connector;
 621        unsigned disable = 0;
 622
 623        list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 624                struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 625                if (radeon_connector->hpd.hpd != RADEON_HPD_NONE)
 626                        disable |= 1 << radeon_connector->hpd.hpd;
 627        }
 628        radeon_irq_kms_disable_hpd(rdev, disable);
 629}
 630
 631/*
 632 * PCI GART
 633 */
 634void r100_pci_gart_tlb_flush(struct radeon_device *rdev)
 635{
 636        /* TODO: can we do somethings here ? */
 637        /* It seems hw only cache one entry so we should discard this
 638         * entry otherwise if first GPU GART read hit this entry it
 639         * could end up in wrong address. */
 640}
 641
 642int r100_pci_gart_init(struct radeon_device *rdev)
 643{
 644        int r;
 645
 646        if (rdev->gart.ptr) {
 647                WARN(1, "R100 PCI GART already initialized\n");
 648                return 0;
 649        }
 650        /* Initialize common gart structure */
 651        r = radeon_gart_init(rdev);
 652        if (r)
 653                return r;
 654        rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
 655        rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush;
 656        rdev->asic->gart.get_page_entry = &r100_pci_gart_get_page_entry;
 657        rdev->asic->gart.set_page = &r100_pci_gart_set_page;
 658        return radeon_gart_table_ram_alloc(rdev);
 659}
 660
 661int r100_pci_gart_enable(struct radeon_device *rdev)
 662{
 663        uint32_t tmp;
 664
 665        /* discard memory request outside of configured range */
 666        tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
 667        WREG32(RADEON_AIC_CNTL, tmp);
 668        /* set address range for PCI address translate */
 669        WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_start);
 670        WREG32(RADEON_AIC_HI_ADDR, rdev->mc.gtt_end);
 671        /* set PCI GART page-table base address */
 672        WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr);
 673        tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
 674        WREG32(RADEON_AIC_CNTL, tmp);
 675        r100_pci_gart_tlb_flush(rdev);
 676        DRM_INFO("PCI GART of %uM enabled (table at 0x%016llX).\n",
 677                 (unsigned)(rdev->mc.gtt_size >> 20),
 678                 (unsigned long long)rdev->gart.table_addr);
 679        rdev->gart.ready = true;
 680        return 0;
 681}
 682
 683void r100_pci_gart_disable(struct radeon_device *rdev)
 684{
 685        uint32_t tmp;
 686
 687        /* discard memory request outside of configured range */
 688        tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
 689        WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN);
 690        WREG32(RADEON_AIC_LO_ADDR, 0);
 691        WREG32(RADEON_AIC_HI_ADDR, 0);
 692}
 693
 694uint64_t r100_pci_gart_get_page_entry(uint64_t addr, uint32_t flags)
 695{
 696        return addr;
 697}
 698
 699void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
 700                            uint64_t entry)
 701{
 702        u32 *gtt = rdev->gart.ptr;
 703        gtt[i] = cpu_to_le32(lower_32_bits(entry));
 704}
 705
 706void r100_pci_gart_fini(struct radeon_device *rdev)
 707{
 708        radeon_gart_fini(rdev);
 709        r100_pci_gart_disable(rdev);
 710        radeon_gart_table_ram_free(rdev);
 711}
 712
 713int r100_irq_set(struct radeon_device *rdev)
 714{
 715        uint32_t tmp = 0;
 716
 717        if (!rdev->irq.installed) {
 718                WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
 719                WREG32(R_000040_GEN_INT_CNTL, 0);
 720                return -EINVAL;
 721        }
 722        if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
 723                tmp |= RADEON_SW_INT_ENABLE;
 724        }
 725        if (rdev->irq.crtc_vblank_int[0] ||
 726            atomic_read(&rdev->irq.pflip[0])) {
 727                tmp |= RADEON_CRTC_VBLANK_MASK;
 728        }
 729        if (rdev->irq.crtc_vblank_int[1] ||
 730            atomic_read(&rdev->irq.pflip[1])) {
 731                tmp |= RADEON_CRTC2_VBLANK_MASK;
 732        }
 733        if (rdev->irq.hpd[0]) {
 734                tmp |= RADEON_FP_DETECT_MASK;
 735        }
 736        if (rdev->irq.hpd[1]) {
 737                tmp |= RADEON_FP2_DETECT_MASK;
 738        }
 739        WREG32(RADEON_GEN_INT_CNTL, tmp);
 740
 741        /* read back to post the write */
 742        RREG32(RADEON_GEN_INT_CNTL);
 743
 744        return 0;
 745}
 746
 747void r100_irq_disable(struct radeon_device *rdev)
 748{
 749        u32 tmp;
 750
 751        WREG32(R_000040_GEN_INT_CNTL, 0);
 752        /* Wait and acknowledge irq */
 753        mdelay(1);
 754        tmp = RREG32(R_000044_GEN_INT_STATUS);
 755        WREG32(R_000044_GEN_INT_STATUS, tmp);
 756}
 757
 758static uint32_t r100_irq_ack(struct radeon_device *rdev)
 759{
 760        uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS);
 761        uint32_t irq_mask = RADEON_SW_INT_TEST |
 762                RADEON_CRTC_VBLANK_STAT | RADEON_CRTC2_VBLANK_STAT |
 763                RADEON_FP_DETECT_STAT | RADEON_FP2_DETECT_STAT;
 764
 765        if (irqs) {
 766                WREG32(RADEON_GEN_INT_STATUS, irqs);
 767        }
 768        return irqs & irq_mask;
 769}
 770
 771int r100_irq_process(struct radeon_device *rdev)
 772{
 773        uint32_t status, msi_rearm;
 774        bool queue_hotplug = false;
 775
 776        status = r100_irq_ack(rdev);
 777        if (!status) {
 778                return IRQ_NONE;
 779        }
 780        if (rdev->shutdown) {
 781                return IRQ_NONE;
 782        }
 783        while (status) {
 784                /* SW interrupt */
 785                if (status & RADEON_SW_INT_TEST) {
 786                        radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
 787                }
 788                /* Vertical blank interrupts */
 789                if (status & RADEON_CRTC_VBLANK_STAT) {
 790                        if (rdev->irq.crtc_vblank_int[0]) {
 791                                drm_handle_vblank(rdev->ddev, 0);
 792                                rdev->pm.vblank_sync = true;
 793                                wake_up(&rdev->irq.vblank_queue);
 794                        }
 795                        if (atomic_read(&rdev->irq.pflip[0]))
 796                                radeon_crtc_handle_vblank(rdev, 0);
 797                }
 798                if (status & RADEON_CRTC2_VBLANK_STAT) {
 799                        if (rdev->irq.crtc_vblank_int[1]) {
 800                                drm_handle_vblank(rdev->ddev, 1);
 801                                rdev->pm.vblank_sync = true;
 802                                wake_up(&rdev->irq.vblank_queue);
 803                        }
 804                        if (atomic_read(&rdev->irq.pflip[1]))
 805                                radeon_crtc_handle_vblank(rdev, 1);
 806                }
 807                if (status & RADEON_FP_DETECT_STAT) {
 808                        queue_hotplug = true;
 809                        DRM_DEBUG("HPD1\n");
 810                }
 811                if (status & RADEON_FP2_DETECT_STAT) {
 812                        queue_hotplug = true;
 813                        DRM_DEBUG("HPD2\n");
 814                }
 815                status = r100_irq_ack(rdev);
 816        }
 817        if (queue_hotplug)
 818                schedule_delayed_work(&rdev->hotplug_work, 0);
 819        if (rdev->msi_enabled) {
 820                switch (rdev->family) {
 821                case CHIP_RS400:
 822                case CHIP_RS480:
 823                        msi_rearm = RREG32(RADEON_AIC_CNTL) & ~RS400_MSI_REARM;
 824                        WREG32(RADEON_AIC_CNTL, msi_rearm);
 825                        WREG32(RADEON_AIC_CNTL, msi_rearm | RS400_MSI_REARM);
 826                        break;
 827                default:
 828                        WREG32(RADEON_MSI_REARM_EN, RV370_MSI_REARM_EN);
 829                        break;
 830                }
 831        }
 832        return IRQ_HANDLED;
 833}
 834
 835u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc)
 836{
 837        if (crtc == 0)
 838                return RREG32(RADEON_CRTC_CRNT_FRAME);
 839        else
 840                return RREG32(RADEON_CRTC2_CRNT_FRAME);
 841}
 842
 843/**
 844 * r100_ring_hdp_flush - flush Host Data Path via the ring buffer
 845 * @rdev: radeon device structure
 846 * @ring: ring buffer struct for emitting packets
 847 */
 848static void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring)
 849{
 850        radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
 851        radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
 852                                RADEON_HDP_READ_BUFFER_INVALIDATE);
 853        radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
 854        radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
 855}
 856
 857/* Who ever call radeon_fence_emit should call ring_lock and ask
 858 * for enough space (today caller are ib schedule and buffer move) */
 859void r100_fence_ring_emit(struct radeon_device *rdev,
 860                          struct radeon_fence *fence)
 861{
 862        struct radeon_ring *ring = &rdev->ring[fence->ring];
 863
 864        /* We have to make sure that caches are flushed before
 865         * CPU might read something from VRAM. */
 866        radeon_ring_write(ring, PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0));
 867        radeon_ring_write(ring, RADEON_RB3D_DC_FLUSH_ALL);
 868        radeon_ring_write(ring, PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0));
 869        radeon_ring_write(ring, RADEON_RB3D_ZC_FLUSH_ALL);
 870        /* Wait until IDLE & CLEAN */
 871        radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
 872        radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
 873        r100_ring_hdp_flush(rdev, ring);
 874        /* Emit fence sequence & fire IRQ */
 875        radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0));
 876        radeon_ring_write(ring, fence->seq);
 877        radeon_ring_write(ring, PACKET0(RADEON_GEN_INT_STATUS, 0));
 878        radeon_ring_write(ring, RADEON_SW_INT_FIRE);
 879}
 880
 881bool r100_semaphore_ring_emit(struct radeon_device *rdev,
 882                              struct radeon_ring *ring,
 883                              struct radeon_semaphore *semaphore,
 884                              bool emit_wait)
 885{
 886        /* Unused on older asics, since we don't have semaphores or multiple rings */
 887        BUG();
 888        return false;
 889}
 890
 891struct radeon_fence *r100_copy_blit(struct radeon_device *rdev,
 892                                    uint64_t src_offset,
 893                                    uint64_t dst_offset,
 894                                    unsigned num_gpu_pages,
 895                                    struct dma_resv *resv)
 896{
 897        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 898        struct radeon_fence *fence;
 899        uint32_t cur_pages;
 900        uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
 901        uint32_t pitch;
 902        uint32_t stride_pixels;
 903        unsigned ndw;
 904        int num_loops;
 905        int r = 0;
 906
 907        /* radeon limited to 16k stride */
 908        stride_bytes &= 0x3fff;
 909        /* radeon pitch is /64 */
 910        pitch = stride_bytes / 64;
 911        stride_pixels = stride_bytes / 4;
 912        num_loops = DIV_ROUND_UP(num_gpu_pages, 8191);
 913
 914        /* Ask for enough room for blit + flush + fence */
 915        ndw = 64 + (10 * num_loops);
 916        r = radeon_ring_lock(rdev, ring, ndw);
 917        if (r) {
 918                DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
 919                return ERR_PTR(-EINVAL);
 920        }
 921        while (num_gpu_pages > 0) {
 922                cur_pages = num_gpu_pages;
 923                if (cur_pages > 8191) {
 924                        cur_pages = 8191;
 925                }
 926                num_gpu_pages -= cur_pages;
 927
 928                /* pages are in Y direction - height
 929                   page width in X direction - width */
 930                radeon_ring_write(ring, PACKET3(PACKET3_BITBLT_MULTI, 8));
 931                radeon_ring_write(ring,
 932                                  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
 933                                  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
 934                                  RADEON_GMC_SRC_CLIPPING |
 935                                  RADEON_GMC_DST_CLIPPING |
 936                                  RADEON_GMC_BRUSH_NONE |
 937                                  (RADEON_COLOR_FORMAT_ARGB8888 << 8) |
 938                                  RADEON_GMC_SRC_DATATYPE_COLOR |
 939                                  RADEON_ROP3_S |
 940                                  RADEON_DP_SRC_SOURCE_MEMORY |
 941                                  RADEON_GMC_CLR_CMP_CNTL_DIS |
 942                                  RADEON_GMC_WR_MSK_DIS);
 943                radeon_ring_write(ring, (pitch << 22) | (src_offset >> 10));
 944                radeon_ring_write(ring, (pitch << 22) | (dst_offset >> 10));
 945                radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
 946                radeon_ring_write(ring, 0);
 947                radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
 948                radeon_ring_write(ring, num_gpu_pages);
 949                radeon_ring_write(ring, num_gpu_pages);
 950                radeon_ring_write(ring, cur_pages | (stride_pixels << 16));
 951        }
 952        radeon_ring_write(ring, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
 953        radeon_ring_write(ring, RADEON_RB2D_DC_FLUSH_ALL);
 954        radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
 955        radeon_ring_write(ring,
 956                          RADEON_WAIT_2D_IDLECLEAN |
 957                          RADEON_WAIT_HOST_IDLECLEAN |
 958                          RADEON_WAIT_DMA_GUI_IDLE);
 959        r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
 960        if (r) {
 961                radeon_ring_unlock_undo(rdev, ring);
 962                return ERR_PTR(r);
 963        }
 964        radeon_ring_unlock_commit(rdev, ring, false);
 965        return fence;
 966}
 967
 968static int r100_cp_wait_for_idle(struct radeon_device *rdev)
 969{
 970        unsigned i;
 971        u32 tmp;
 972
 973        for (i = 0; i < rdev->usec_timeout; i++) {
 974                tmp = RREG32(R_000E40_RBBM_STATUS);
 975                if (!G_000E40_CP_CMDSTRM_BUSY(tmp)) {
 976                        return 0;
 977                }
 978                udelay(1);
 979        }
 980        return -1;
 981}
 982
 983void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring)
 984{
 985        int r;
 986
 987        r = radeon_ring_lock(rdev, ring, 2);
 988        if (r) {
 989                return;
 990        }
 991        radeon_ring_write(ring, PACKET0(RADEON_ISYNC_CNTL, 0));
 992        radeon_ring_write(ring,
 993                          RADEON_ISYNC_ANY2D_IDLE3D |
 994                          RADEON_ISYNC_ANY3D_IDLE2D |
 995                          RADEON_ISYNC_WAIT_IDLEGUI |
 996                          RADEON_ISYNC_CPSCRATCH_IDLEGUI);
 997        radeon_ring_unlock_commit(rdev, ring, false);
 998}
 999
1000
1001/* Load the microcode for the CP */
1002static int r100_cp_init_microcode(struct radeon_device *rdev)
1003{
1004        const char *fw_name = NULL;
1005        int err;
1006
1007        DRM_DEBUG_KMS("\n");
1008
1009        if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) ||
1010            (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) ||
1011            (rdev->family == CHIP_RS200)) {
1012                DRM_INFO("Loading R100 Microcode\n");
1013                fw_name = FIRMWARE_R100;
1014        } else if ((rdev->family == CHIP_R200) ||
1015                   (rdev->family == CHIP_RV250) ||
1016                   (rdev->family == CHIP_RV280) ||
1017                   (rdev->family == CHIP_RS300)) {
1018                DRM_INFO("Loading R200 Microcode\n");
1019                fw_name = FIRMWARE_R200;
1020        } else if ((rdev->family == CHIP_R300) ||
1021                   (rdev->family == CHIP_R350) ||
1022                   (rdev->family == CHIP_RV350) ||
1023                   (rdev->family == CHIP_RV380) ||
1024                   (rdev->family == CHIP_RS400) ||
1025                   (rdev->family == CHIP_RS480)) {
1026                DRM_INFO("Loading R300 Microcode\n");
1027                fw_name = FIRMWARE_R300;
1028        } else if ((rdev->family == CHIP_R420) ||
1029                   (rdev->family == CHIP_R423) ||
1030                   (rdev->family == CHIP_RV410)) {
1031                DRM_INFO("Loading R400 Microcode\n");
1032                fw_name = FIRMWARE_R420;
1033        } else if ((rdev->family == CHIP_RS690) ||
1034                   (rdev->family == CHIP_RS740)) {
1035                DRM_INFO("Loading RS690/RS740 Microcode\n");
1036                fw_name = FIRMWARE_RS690;
1037        } else if (rdev->family == CHIP_RS600) {
1038                DRM_INFO("Loading RS600 Microcode\n");
1039                fw_name = FIRMWARE_RS600;
1040        } else if ((rdev->family == CHIP_RV515) ||
1041                   (rdev->family == CHIP_R520) ||
1042                   (rdev->family == CHIP_RV530) ||
1043                   (rdev->family == CHIP_R580) ||
1044                   (rdev->family == CHIP_RV560) ||
1045                   (rdev->family == CHIP_RV570)) {
1046                DRM_INFO("Loading R500 Microcode\n");
1047                fw_name = FIRMWARE_R520;
1048        }
1049
1050        err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1051        if (err) {
1052                pr_err("radeon_cp: Failed to load firmware \"%s\"\n", fw_name);
1053        } else if (rdev->me_fw->size % 8) {
1054                pr_err("radeon_cp: Bogus length %zu in firmware \"%s\"\n",
1055                       rdev->me_fw->size, fw_name);
1056                err = -EINVAL;
1057                release_firmware(rdev->me_fw);
1058                rdev->me_fw = NULL;
1059        }
1060        return err;
1061}
1062
1063u32 r100_gfx_get_rptr(struct radeon_device *rdev,
1064                      struct radeon_ring *ring)
1065{
1066        u32 rptr;
1067
1068        if (rdev->wb.enabled)
1069                rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
1070        else
1071                rptr = RREG32(RADEON_CP_RB_RPTR);
1072
1073        return rptr;
1074}
1075
1076u32 r100_gfx_get_wptr(struct radeon_device *rdev,
1077                      struct radeon_ring *ring)
1078{
1079        return RREG32(RADEON_CP_RB_WPTR);
1080}
1081
1082void r100_gfx_set_wptr(struct radeon_device *rdev,
1083                       struct radeon_ring *ring)
1084{
1085        WREG32(RADEON_CP_RB_WPTR, ring->wptr);
1086        (void)RREG32(RADEON_CP_RB_WPTR);
1087}
1088
1089static void r100_cp_load_microcode(struct radeon_device *rdev)
1090{
1091        const __be32 *fw_data;
1092        int i, size;
1093
1094        if (r100_gui_wait_for_idle(rdev)) {
1095                pr_warn("Failed to wait GUI idle while programming pipes. Bad things might happen.\n");
1096        }
1097
1098        if (rdev->me_fw) {
1099                size = rdev->me_fw->size / 4;
1100                fw_data = (const __be32 *)&rdev->me_fw->data[0];
1101                WREG32(RADEON_CP_ME_RAM_ADDR, 0);
1102                for (i = 0; i < size; i += 2) {
1103                        WREG32(RADEON_CP_ME_RAM_DATAH,
1104                               be32_to_cpup(&fw_data[i]));
1105                        WREG32(RADEON_CP_ME_RAM_DATAL,
1106                               be32_to_cpup(&fw_data[i + 1]));
1107                }
1108        }
1109}
1110
1111int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
1112{
1113        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1114        unsigned rb_bufsz;
1115        unsigned rb_blksz;
1116        unsigned max_fetch;
1117        unsigned pre_write_timer;
1118        unsigned pre_write_limit;
1119        unsigned indirect2_start;
1120        unsigned indirect1_start;
1121        uint32_t tmp;
1122        int r;
1123
1124        if (r100_debugfs_cp_init(rdev)) {
1125                DRM_ERROR("Failed to register debugfs file for CP !\n");
1126        }
1127        if (!rdev->me_fw) {
1128                r = r100_cp_init_microcode(rdev);
1129                if (r) {
1130                        DRM_ERROR("Failed to load firmware!\n");
1131                        return r;
1132                }
1133        }
1134
1135        /* Align ring size */
1136        rb_bufsz = order_base_2(ring_size / 8);
1137        ring_size = (1 << (rb_bufsz + 1)) * 4;
1138        r100_cp_load_microcode(rdev);
1139        r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET,
1140                             RADEON_CP_PACKET2);
1141        if (r) {
1142                return r;
1143        }
1144        /* Each time the cp read 1024 bytes (16 dword/quadword) update
1145         * the rptr copy in system ram */
1146        rb_blksz = 9;
1147        /* cp will read 128bytes at a time (4 dwords) */
1148        max_fetch = 1;
1149        ring->align_mask = 16 - 1;
1150        /* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
1151        pre_write_timer = 64;
1152        /* Force CP_RB_WPTR write if written more than one time before the
1153         * delay expire
1154         */
1155        pre_write_limit = 0;
1156        /* Setup the cp cache like this (cache size is 96 dwords) :
1157         *      RING            0  to 15
1158         *      INDIRECT1       16 to 79
1159         *      INDIRECT2       80 to 95
1160         * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
1161         *    indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords))
1162         *    indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
1163         * Idea being that most of the gpu cmd will be through indirect1 buffer
1164         * so it gets the bigger cache.
1165         */
1166        indirect2_start = 80;
1167        indirect1_start = 16;
1168        /* cp setup */
1169        WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
1170        tmp = (REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
1171               REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
1172               REG_SET(RADEON_MAX_FETCH, max_fetch));
1173#ifdef __BIG_ENDIAN
1174        tmp |= RADEON_BUF_SWAP_32BIT;
1175#endif
1176        WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_NO_UPDATE);
1177
1178        /* Set ring address */
1179        DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)ring->gpu_addr);
1180        WREG32(RADEON_CP_RB_BASE, ring->gpu_addr);
1181        /* Force read & write ptr to 0 */
1182        WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE);
1183        WREG32(RADEON_CP_RB_RPTR_WR, 0);
1184        ring->wptr = 0;
1185        WREG32(RADEON_CP_RB_WPTR, ring->wptr);
1186
1187        /* set the wb address whether it's enabled or not */
1188        WREG32(R_00070C_CP_RB_RPTR_ADDR,
1189                S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) >> 2));
1190        WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET);
1191
1192        if (rdev->wb.enabled)
1193                WREG32(R_000770_SCRATCH_UMSK, 0xff);
1194        else {
1195                tmp |= RADEON_RB_NO_UPDATE;
1196                WREG32(R_000770_SCRATCH_UMSK, 0);
1197        }
1198
1199        WREG32(RADEON_CP_RB_CNTL, tmp);
1200        udelay(10);
1201        /* Set cp mode to bus mastering & enable cp*/
1202        WREG32(RADEON_CP_CSQ_MODE,
1203               REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
1204               REG_SET(RADEON_INDIRECT1_START, indirect1_start));
1205        WREG32(RADEON_CP_RB_WPTR_DELAY, 0);
1206        WREG32(RADEON_CP_CSQ_MODE, 0x00004D4D);
1207        WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
1208
1209        /* at this point everything should be setup correctly to enable master */
1210        pci_set_master(rdev->pdev);
1211
1212        radeon_ring_start(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1213        r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, ring);
1214        if (r) {
1215                DRM_ERROR("radeon: cp isn't working (%d).\n", r);
1216                return r;
1217        }
1218        ring->ready = true;
1219        radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1220
1221        if (!ring->rptr_save_reg /* not resuming from suspend */
1222            && radeon_ring_supports_scratch_reg(rdev, ring)) {
1223                r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
1224                if (r) {
1225                        DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
1226                        ring->rptr_save_reg = 0;
1227                }
1228        }
1229        return 0;
1230}
1231
1232void r100_cp_fini(struct radeon_device *rdev)
1233{
1234        if (r100_cp_wait_for_idle(rdev)) {
1235                DRM_ERROR("Wait for CP idle timeout, shutting down CP.\n");
1236        }
1237        /* Disable ring */
1238        r100_cp_disable(rdev);
1239        radeon_scratch_free(rdev, rdev->ring[RADEON_RING_TYPE_GFX_INDEX].rptr_save_reg);
1240        radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1241        DRM_INFO("radeon: cp finalized\n");
1242}
1243
1244void r100_cp_disable(struct radeon_device *rdev)
1245{
1246        /* Disable ring */
1247        radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1248        rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1249        WREG32(RADEON_CP_CSQ_MODE, 0);
1250        WREG32(RADEON_CP_CSQ_CNTL, 0);
1251        WREG32(R_000770_SCRATCH_UMSK, 0);
1252        if (r100_gui_wait_for_idle(rdev)) {
1253                pr_warn("Failed to wait GUI idle while programming pipes. Bad things might happen.\n");
1254        }
1255}
1256
1257/*
1258 * CS functions
1259 */
1260int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
1261                            struct radeon_cs_packet *pkt,
1262                            unsigned idx,
1263                            unsigned reg)
1264{
1265        int r;
1266        u32 tile_flags = 0;
1267        u32 tmp;
1268        struct radeon_bo_list *reloc;
1269        u32 value;
1270
1271        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1272        if (r) {
1273                DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1274                          idx, reg);
1275                radeon_cs_dump_packet(p, pkt);
1276                return r;
1277        }
1278
1279        value = radeon_get_ib_value(p, idx);
1280        tmp = value & 0x003fffff;
1281        tmp += (((u32)reloc->gpu_offset) >> 10);
1282
1283        if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1284                if (reloc->tiling_flags & RADEON_TILING_MACRO)
1285                        tile_flags |= RADEON_DST_TILE_MACRO;
1286                if (reloc->tiling_flags & RADEON_TILING_MICRO) {
1287                        if (reg == RADEON_SRC_PITCH_OFFSET) {
1288                                DRM_ERROR("Cannot src blit from microtiled surface\n");
1289                                radeon_cs_dump_packet(p, pkt);
1290                                return -EINVAL;
1291                        }
1292                        tile_flags |= RADEON_DST_TILE_MICRO;
1293                }
1294
1295                tmp |= tile_flags;
1296                p->ib.ptr[idx] = (value & 0x3fc00000) | tmp;
1297        } else
1298                p->ib.ptr[idx] = (value & 0xffc00000) | tmp;
1299        return 0;
1300}
1301
1302int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
1303                             struct radeon_cs_packet *pkt,
1304                             int idx)
1305{
1306        unsigned c, i;
1307        struct radeon_bo_list *reloc;
1308        struct r100_cs_track *track;
1309        int r = 0;
1310        volatile uint32_t *ib;
1311        u32 idx_value;
1312
1313        ib = p->ib.ptr;
1314        track = (struct r100_cs_track *)p->track;
1315        c = radeon_get_ib_value(p, idx++) & 0x1F;
1316        if (c > 16) {
1317            DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
1318                      pkt->opcode);
1319            radeon_cs_dump_packet(p, pkt);
1320            return -EINVAL;
1321        }
1322        track->num_arrays = c;
1323        for (i = 0; i < (c - 1); i+=2, idx+=3) {
1324                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1325                if (r) {
1326                        DRM_ERROR("No reloc for packet3 %d\n",
1327                                  pkt->opcode);
1328                        radeon_cs_dump_packet(p, pkt);
1329                        return r;
1330                }
1331                idx_value = radeon_get_ib_value(p, idx);
1332                ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);
1333
1334                track->arrays[i + 0].esize = idx_value >> 8;
1335                track->arrays[i + 0].robj = reloc->robj;
1336                track->arrays[i + 0].esize &= 0x7F;
1337                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1338                if (r) {
1339                        DRM_ERROR("No reloc for packet3 %d\n",
1340                                  pkt->opcode);
1341                        radeon_cs_dump_packet(p, pkt);
1342                        return r;
1343                }
1344                ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->gpu_offset);
1345                track->arrays[i + 1].robj = reloc->robj;
1346                track->arrays[i + 1].esize = idx_value >> 24;
1347                track->arrays[i + 1].esize &= 0x7F;
1348        }
1349        if (c & 1) {
1350                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1351                if (r) {
1352                        DRM_ERROR("No reloc for packet3 %d\n",
1353                                          pkt->opcode);
1354                        radeon_cs_dump_packet(p, pkt);
1355                        return r;
1356                }
1357                idx_value = radeon_get_ib_value(p, idx);
1358                ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);
1359                track->arrays[i + 0].robj = reloc->robj;
1360                track->arrays[i + 0].esize = idx_value >> 8;
1361                track->arrays[i + 0].esize &= 0x7F;
1362        }
1363        return r;
1364}
1365
1366int r100_cs_parse_packet0(struct radeon_cs_parser *p,
1367                          struct radeon_cs_packet *pkt,
1368                          const unsigned *auth, unsigned n,
1369                          radeon_packet0_check_t check)
1370{
1371        unsigned reg;
1372        unsigned i, j, m;
1373        unsigned idx;
1374        int r;
1375
1376        idx = pkt->idx + 1;
1377        reg = pkt->reg;
1378        /* Check that register fall into register range
1379         * determined by the number of entry (n) in the
1380         * safe register bitmap.
1381         */
1382        if (pkt->one_reg_wr) {
1383                if ((reg >> 7) > n) {
1384                        return -EINVAL;
1385                }
1386        } else {
1387                if (((reg + (pkt->count << 2)) >> 7) > n) {
1388                        return -EINVAL;
1389                }
1390        }
1391        for (i = 0; i <= pkt->count; i++, idx++) {
1392                j = (reg >> 7);
1393                m = 1 << ((reg >> 2) & 31);
1394                if (auth[j] & m) {
1395                        r = check(p, pkt, idx, reg);
1396                        if (r) {
1397                                return r;
1398                        }
1399                }
1400                if (pkt->one_reg_wr) {
1401                        if (!(auth[j] & m)) {
1402                                break;
1403                        }
1404                } else {
1405                        reg += 4;
1406                }
1407        }
1408        return 0;
1409}
1410
1411/**
1412 * r100_cs_packet_next_vline() - parse userspace VLINE packet
1413 * @p:          parser structure holding parsing context.
1414 *
1415 * Userspace sends a special sequence for VLINE waits.
1416 * PACKET0 - VLINE_START_END + value
1417 * PACKET0 - WAIT_UNTIL +_value
1418 * RELOC (P3) - crtc_id in reloc.
1419 *
1420 * This function parses this and relocates the VLINE START END
1421 * and WAIT UNTIL packets to the correct crtc.
1422 * It also detects a switched off crtc and nulls out the
1423 * wait in that case.
1424 */
1425int r100_cs_packet_parse_vline(struct radeon_cs_parser *p)
1426{
1427        struct drm_crtc *crtc;
1428        struct radeon_crtc *radeon_crtc;
1429        struct radeon_cs_packet p3reloc, waitreloc;
1430        int crtc_id;
1431        int r;
1432        uint32_t header, h_idx, reg;
1433        volatile uint32_t *ib;
1434
1435        ib = p->ib.ptr;
1436
1437        /* parse the wait until */
1438        r = radeon_cs_packet_parse(p, &waitreloc, p->idx);
1439        if (r)
1440                return r;
1441
1442        /* check its a wait until and only 1 count */
1443        if (waitreloc.reg != RADEON_WAIT_UNTIL ||
1444            waitreloc.count != 0) {
1445                DRM_ERROR("vline wait had illegal wait until segment\n");
1446                return -EINVAL;
1447        }
1448
1449        if (radeon_get_ib_value(p, waitreloc.idx + 1) != RADEON_WAIT_CRTC_VLINE) {
1450                DRM_ERROR("vline wait had illegal wait until\n");
1451                return -EINVAL;
1452        }
1453
1454        /* jump over the NOP */
1455        r = radeon_cs_packet_parse(p, &p3reloc, p->idx + waitreloc.count + 2);
1456        if (r)
1457                return r;
1458
1459        h_idx = p->idx - 2;
1460        p->idx += waitreloc.count + 2;
1461        p->idx += p3reloc.count + 2;
1462
1463        header = radeon_get_ib_value(p, h_idx);
1464        crtc_id = radeon_get_ib_value(p, h_idx + 5);
1465        reg = R100_CP_PACKET0_GET_REG(header);
1466        crtc = drm_crtc_find(p->rdev->ddev, p->filp, crtc_id);
1467        if (!crtc) {
1468                DRM_ERROR("cannot find crtc %d\n", crtc_id);
1469                return -ENOENT;
1470        }
1471        radeon_crtc = to_radeon_crtc(crtc);
1472        crtc_id = radeon_crtc->crtc_id;
1473
1474        if (!crtc->enabled) {
1475                /* if the CRTC isn't enabled - we need to nop out the wait until */
1476                ib[h_idx + 2] = PACKET2(0);
1477                ib[h_idx + 3] = PACKET2(0);
1478        } else if (crtc_id == 1) {
1479                switch (reg) {
1480                case AVIVO_D1MODE_VLINE_START_END:
1481                        header &= ~R300_CP_PACKET0_REG_MASK;
1482                        header |= AVIVO_D2MODE_VLINE_START_END >> 2;
1483                        break;
1484                case RADEON_CRTC_GUI_TRIG_VLINE:
1485                        header &= ~R300_CP_PACKET0_REG_MASK;
1486                        header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2;
1487                        break;
1488                default:
1489                        DRM_ERROR("unknown crtc reloc\n");
1490                        return -EINVAL;
1491                }
1492                ib[h_idx] = header;
1493                ib[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1;
1494        }
1495
1496        return 0;
1497}
1498
1499static int r100_get_vtx_size(uint32_t vtx_fmt)
1500{
1501        int vtx_size;
1502        vtx_size = 2;
1503        /* ordered according to bits in spec */
1504        if (vtx_fmt & RADEON_SE_VTX_FMT_W0)
1505                vtx_size++;
1506        if (vtx_fmt & RADEON_SE_VTX_FMT_FPCOLOR)
1507                vtx_size += 3;
1508        if (vtx_fmt & RADEON_SE_VTX_FMT_FPALPHA)
1509                vtx_size++;
1510        if (vtx_fmt & RADEON_SE_VTX_FMT_PKCOLOR)
1511                vtx_size++;
1512        if (vtx_fmt & RADEON_SE_VTX_FMT_FPSPEC)
1513                vtx_size += 3;
1514        if (vtx_fmt & RADEON_SE_VTX_FMT_FPFOG)
1515                vtx_size++;
1516        if (vtx_fmt & RADEON_SE_VTX_FMT_PKSPEC)
1517                vtx_size++;
1518        if (vtx_fmt & RADEON_SE_VTX_FMT_ST0)
1519                vtx_size += 2;
1520        if (vtx_fmt & RADEON_SE_VTX_FMT_ST1)
1521                vtx_size += 2;
1522        if (vtx_fmt & RADEON_SE_VTX_FMT_Q1)
1523                vtx_size++;
1524        if (vtx_fmt & RADEON_SE_VTX_FMT_ST2)
1525                vtx_size += 2;
1526        if (vtx_fmt & RADEON_SE_VTX_FMT_Q2)
1527                vtx_size++;
1528        if (vtx_fmt & RADEON_SE_VTX_FMT_ST3)
1529                vtx_size += 2;
1530        if (vtx_fmt & RADEON_SE_VTX_FMT_Q3)
1531                vtx_size++;
1532        if (vtx_fmt & RADEON_SE_VTX_FMT_Q0)
1533                vtx_size++;
1534        /* blend weight */
1535        if (vtx_fmt & (0x7 << 15))
1536                vtx_size += (vtx_fmt >> 15) & 0x7;
1537        if (vtx_fmt & RADEON_SE_VTX_FMT_N0)
1538                vtx_size += 3;
1539        if (vtx_fmt & RADEON_SE_VTX_FMT_XY1)
1540                vtx_size += 2;
1541        if (vtx_fmt & RADEON_SE_VTX_FMT_Z1)
1542                vtx_size++;
1543        if (vtx_fmt & RADEON_SE_VTX_FMT_W1)
1544                vtx_size++;
1545        if (vtx_fmt & RADEON_SE_VTX_FMT_N1)
1546                vtx_size++;
1547        if (vtx_fmt & RADEON_SE_VTX_FMT_Z)
1548                vtx_size++;
1549        return vtx_size;
1550}
1551
1552static int r100_packet0_check(struct radeon_cs_parser *p,
1553                              struct radeon_cs_packet *pkt,
1554                              unsigned idx, unsigned reg)
1555{
1556        struct radeon_bo_list *reloc;
1557        struct r100_cs_track *track;
1558        volatile uint32_t *ib;
1559        uint32_t tmp;
1560        int r;
1561        int i, face;
1562        u32 tile_flags = 0;
1563        u32 idx_value;
1564
1565        ib = p->ib.ptr;
1566        track = (struct r100_cs_track *)p->track;
1567
1568        idx_value = radeon_get_ib_value(p, idx);
1569
1570        switch (reg) {
1571        case RADEON_CRTC_GUI_TRIG_VLINE:
1572                r = r100_cs_packet_parse_vline(p);
1573                if (r) {
1574                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1575                                  idx, reg);
1576                        radeon_cs_dump_packet(p, pkt);
1577                        return r;
1578                }
1579                break;
1580                /* FIXME: only allow PACKET3 blit? easier to check for out of
1581                 * range access */
1582        case RADEON_DST_PITCH_OFFSET:
1583        case RADEON_SRC_PITCH_OFFSET:
1584                r = r100_reloc_pitch_offset(p, pkt, idx, reg);
1585                if (r)
1586                        return r;
1587                break;
1588        case RADEON_RB3D_DEPTHOFFSET:
1589                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1590                if (r) {
1591                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1592                                  idx, reg);
1593                        radeon_cs_dump_packet(p, pkt);
1594                        return r;
1595                }
1596                track->zb.robj = reloc->robj;
1597                track->zb.offset = idx_value;
1598                track->zb_dirty = true;
1599                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1600                break;
1601        case RADEON_RB3D_COLOROFFSET:
1602                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1603                if (r) {
1604                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1605                                  idx, reg);
1606                        radeon_cs_dump_packet(p, pkt);
1607                        return r;
1608                }
1609                track->cb[0].robj = reloc->robj;
1610                track->cb[0].offset = idx_value;
1611                track->cb_dirty = true;
1612                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1613                break;
1614        case RADEON_PP_TXOFFSET_0:
1615        case RADEON_PP_TXOFFSET_1:
1616        case RADEON_PP_TXOFFSET_2:
1617                i = (reg - RADEON_PP_TXOFFSET_0) / 24;
1618                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1619                if (r) {
1620                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1621                                  idx, reg);
1622                        radeon_cs_dump_packet(p, pkt);
1623                        return r;
1624                }
1625                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1626                        if (reloc->tiling_flags & RADEON_TILING_MACRO)
1627                                tile_flags |= RADEON_TXO_MACRO_TILE;
1628                        if (reloc->tiling_flags & RADEON_TILING_MICRO)
1629                                tile_flags |= RADEON_TXO_MICRO_TILE_X2;
1630
1631                        tmp = idx_value & ~(0x7 << 2);
1632                        tmp |= tile_flags;
1633                        ib[idx] = tmp + ((u32)reloc->gpu_offset);
1634                } else
1635                        ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1636                track->textures[i].robj = reloc->robj;
1637                track->tex_dirty = true;
1638                break;
1639        case RADEON_PP_CUBIC_OFFSET_T0_0:
1640        case RADEON_PP_CUBIC_OFFSET_T0_1:
1641        case RADEON_PP_CUBIC_OFFSET_T0_2:
1642        case RADEON_PP_CUBIC_OFFSET_T0_3:
1643        case RADEON_PP_CUBIC_OFFSET_T0_4:
1644                i = (reg - RADEON_PP_CUBIC_OFFSET_T0_0) / 4;
1645                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1646                if (r) {
1647                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1648                                  idx, reg);
1649                        radeon_cs_dump_packet(p, pkt);
1650                        return r;
1651                }
1652                track->textures[0].cube_info[i].offset = idx_value;
1653                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1654                track->textures[0].cube_info[i].robj = reloc->robj;
1655                track->tex_dirty = true;
1656                break;
1657        case RADEON_PP_CUBIC_OFFSET_T1_0:
1658        case RADEON_PP_CUBIC_OFFSET_T1_1:
1659        case RADEON_PP_CUBIC_OFFSET_T1_2:
1660        case RADEON_PP_CUBIC_OFFSET_T1_3:
1661        case RADEON_PP_CUBIC_OFFSET_T1_4:
1662                i = (reg - RADEON_PP_CUBIC_OFFSET_T1_0) / 4;
1663                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1664                if (r) {
1665                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1666                                  idx, reg);
1667                        radeon_cs_dump_packet(p, pkt);
1668                        return r;
1669                }
1670                track->textures[1].cube_info[i].offset = idx_value;
1671                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1672                track->textures[1].cube_info[i].robj = reloc->robj;
1673                track->tex_dirty = true;
1674                break;
1675        case RADEON_PP_CUBIC_OFFSET_T2_0:
1676        case RADEON_PP_CUBIC_OFFSET_T2_1:
1677        case RADEON_PP_CUBIC_OFFSET_T2_2:
1678        case RADEON_PP_CUBIC_OFFSET_T2_3:
1679        case RADEON_PP_CUBIC_OFFSET_T2_4:
1680                i = (reg - RADEON_PP_CUBIC_OFFSET_T2_0) / 4;
1681                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1682                if (r) {
1683                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1684                                  idx, reg);
1685                        radeon_cs_dump_packet(p, pkt);
1686                        return r;
1687                }
1688                track->textures[2].cube_info[i].offset = idx_value;
1689                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1690                track->textures[2].cube_info[i].robj = reloc->robj;
1691                track->tex_dirty = true;
1692                break;
1693        case RADEON_RE_WIDTH_HEIGHT:
1694                track->maxy = ((idx_value >> 16) & 0x7FF);
1695                track->cb_dirty = true;
1696                track->zb_dirty = true;
1697                break;
1698        case RADEON_RB3D_COLORPITCH:
1699                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1700                if (r) {
1701                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1702                                  idx, reg);
1703                        radeon_cs_dump_packet(p, pkt);
1704                        return r;
1705                }
1706                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1707                        if (reloc->tiling_flags & RADEON_TILING_MACRO)
1708                                tile_flags |= RADEON_COLOR_TILE_ENABLE;
1709                        if (reloc->tiling_flags & RADEON_TILING_MICRO)
1710                                tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;
1711
1712                        tmp = idx_value & ~(0x7 << 16);
1713                        tmp |= tile_flags;
1714                        ib[idx] = tmp;
1715                } else
1716                        ib[idx] = idx_value;
1717
1718                track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK;
1719                track->cb_dirty = true;
1720                break;
1721        case RADEON_RB3D_DEPTHPITCH:
1722                track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK;
1723                track->zb_dirty = true;
1724                break;
1725        case RADEON_RB3D_CNTL:
1726                switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
1727                case 7:
1728                case 8:
1729                case 9:
1730                case 11:
1731                case 12:
1732                        track->cb[0].cpp = 1;
1733                        break;
1734                case 3:
1735                case 4:
1736                case 15:
1737                        track->cb[0].cpp = 2;
1738                        break;
1739                case 6:
1740                        track->cb[0].cpp = 4;
1741                        break;
1742                default:
1743                        DRM_ERROR("Invalid color buffer format (%d) !\n",
1744                                  ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f));
1745                        return -EINVAL;
1746                }
1747                track->z_enabled = !!(idx_value & RADEON_Z_ENABLE);
1748                track->cb_dirty = true;
1749                track->zb_dirty = true;
1750                break;
1751        case RADEON_RB3D_ZSTENCILCNTL:
1752                switch (idx_value & 0xf) {
1753                case 0:
1754                        track->zb.cpp = 2;
1755                        break;
1756                case 2:
1757                case 3:
1758                case 4:
1759                case 5:
1760                case 9:
1761                case 11:
1762                        track->zb.cpp = 4;
1763                        break;
1764                default:
1765                        break;
1766                }
1767                track->zb_dirty = true;
1768                break;
1769        case RADEON_RB3D_ZPASS_ADDR:
1770                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1771                if (r) {
1772                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1773                                  idx, reg);
1774                        radeon_cs_dump_packet(p, pkt);
1775                        return r;
1776                }
1777                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1778                break;
1779        case RADEON_PP_CNTL:
1780                {
1781                        uint32_t temp = idx_value >> 4;
1782                        for (i = 0; i < track->num_texture; i++)
1783                                track->textures[i].enabled = !!(temp & (1 << i));
1784                        track->tex_dirty = true;
1785                }
1786                break;
1787        case RADEON_SE_VF_CNTL:
1788                track->vap_vf_cntl = idx_value;
1789                break;
1790        case RADEON_SE_VTX_FMT:
1791                track->vtx_size = r100_get_vtx_size(idx_value);
1792                break;
1793        case RADEON_PP_TEX_SIZE_0:
1794        case RADEON_PP_TEX_SIZE_1:
1795        case RADEON_PP_TEX_SIZE_2:
1796                i = (reg - RADEON_PP_TEX_SIZE_0) / 8;
1797                track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1;
1798                track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
1799                track->tex_dirty = true;
1800                break;
1801        case RADEON_PP_TEX_PITCH_0:
1802        case RADEON_PP_TEX_PITCH_1:
1803        case RADEON_PP_TEX_PITCH_2:
1804                i = (reg - RADEON_PP_TEX_PITCH_0) / 8;
1805                track->textures[i].pitch = idx_value + 32;
1806                track->tex_dirty = true;
1807                break;
1808        case RADEON_PP_TXFILTER_0:
1809        case RADEON_PP_TXFILTER_1:
1810        case RADEON_PP_TXFILTER_2:
1811                i = (reg - RADEON_PP_TXFILTER_0) / 24;
1812                track->textures[i].num_levels = ((idx_value & RADEON_MAX_MIP_LEVEL_MASK)
1813                                                 >> RADEON_MAX_MIP_LEVEL_SHIFT);
1814                tmp = (idx_value >> 23) & 0x7;
1815                if (tmp == 2 || tmp == 6)
1816                        track->textures[i].roundup_w = false;
1817                tmp = (idx_value >> 27) & 0x7;
1818                if (tmp == 2 || tmp == 6)
1819                        track->textures[i].roundup_h = false;
1820                track->tex_dirty = true;
1821                break;
1822        case RADEON_PP_TXFORMAT_0:
1823        case RADEON_PP_TXFORMAT_1:
1824        case RADEON_PP_TXFORMAT_2:
1825                i = (reg - RADEON_PP_TXFORMAT_0) / 24;
1826                if (idx_value & RADEON_TXFORMAT_NON_POWER2) {
1827                        track->textures[i].use_pitch = true;
1828                } else {
1829                        track->textures[i].use_pitch = false;
1830                        track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT);
1831                        track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT);
1832                }
1833                if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE)
1834                        track->textures[i].tex_coord_type = 2;
1835                switch ((idx_value & RADEON_TXFORMAT_FORMAT_MASK)) {
1836                case RADEON_TXFORMAT_I8:
1837                case RADEON_TXFORMAT_RGB332:
1838                case RADEON_TXFORMAT_Y8:
1839                        track->textures[i].cpp = 1;
1840                        track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1841                        break;
1842                case RADEON_TXFORMAT_AI88:
1843                case RADEON_TXFORMAT_ARGB1555:
1844                case RADEON_TXFORMAT_RGB565:
1845                case RADEON_TXFORMAT_ARGB4444:
1846                case RADEON_TXFORMAT_VYUY422:
1847                case RADEON_TXFORMAT_YVYU422:
1848                case RADEON_TXFORMAT_SHADOW16:
1849                case RADEON_TXFORMAT_LDUDV655:
1850                case RADEON_TXFORMAT_DUDV88:
1851                        track->textures[i].cpp = 2;
1852                        track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1853                        break;
1854                case RADEON_TXFORMAT_ARGB8888:
1855                case RADEON_TXFORMAT_RGBA8888:
1856                case RADEON_TXFORMAT_SHADOW32:
1857                case RADEON_TXFORMAT_LDUDUV8888:
1858                        track->textures[i].cpp = 4;
1859                        track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1860                        break;
1861                case RADEON_TXFORMAT_DXT1:
1862                        track->textures[i].cpp = 1;
1863                        track->textures[i].compress_format = R100_TRACK_COMP_DXT1;
1864                        break;
1865                case RADEON_TXFORMAT_DXT23:
1866                case RADEON_TXFORMAT_DXT45:
1867                        track->textures[i].cpp = 1;
1868                        track->textures[i].compress_format = R100_TRACK_COMP_DXT35;
1869                        break;
1870                }
1871                track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf);
1872                track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf);
1873                track->tex_dirty = true;
1874                break;
1875        case RADEON_PP_CUBIC_FACES_0:
1876        case RADEON_PP_CUBIC_FACES_1:
1877        case RADEON_PP_CUBIC_FACES_2:
1878                tmp = idx_value;
1879                i = (reg - RADEON_PP_CUBIC_FACES_0) / 4;
1880                for (face = 0; face < 4; face++) {
1881                        track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
1882                        track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
1883                }
1884                track->tex_dirty = true;
1885                break;
1886        default:
1887                pr_err("Forbidden register 0x%04X in cs at %d\n", reg, idx);
1888                return -EINVAL;
1889        }
1890        return 0;
1891}
1892
1893int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
1894                                         struct radeon_cs_packet *pkt,
1895                                         struct radeon_bo *robj)
1896{
1897        unsigned idx;
1898        u32 value;
1899        idx = pkt->idx + 1;
1900        value = radeon_get_ib_value(p, idx + 2);
1901        if ((value + 1) > radeon_bo_size(robj)) {
1902                DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER "
1903                          "(need %u have %lu) !\n",
1904                          value + 1,
1905                          radeon_bo_size(robj));
1906                return -EINVAL;
1907        }
1908        return 0;
1909}
1910
1911static int r100_packet3_check(struct radeon_cs_parser *p,
1912                              struct radeon_cs_packet *pkt)
1913{
1914        struct radeon_bo_list *reloc;
1915        struct r100_cs_track *track;
1916        unsigned idx;
1917        volatile uint32_t *ib;
1918        int r;
1919
1920        ib = p->ib.ptr;
1921        idx = pkt->idx + 1;
1922        track = (struct r100_cs_track *)p->track;
1923        switch (pkt->opcode) {
1924        case PACKET3_3D_LOAD_VBPNTR:
1925                r = r100_packet3_load_vbpntr(p, pkt, idx);
1926                if (r)
1927                        return r;
1928                break;
1929        case PACKET3_INDX_BUFFER:
1930                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1931                if (r) {
1932                        DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1933                        radeon_cs_dump_packet(p, pkt);
1934                        return r;
1935                }
1936                ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->gpu_offset);
1937                r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1938                if (r) {
1939                        return r;
1940                }
1941                break;
1942        case 0x23:
1943                /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */
1944                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1945                if (r) {
1946                        DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1947                        radeon_cs_dump_packet(p, pkt);
1948                        return r;
1949                }
1950                ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->gpu_offset);
1951                track->num_arrays = 1;
1952                track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2));
1953
1954                track->arrays[0].robj = reloc->robj;
1955                track->arrays[0].esize = track->vtx_size;
1956
1957                track->max_indx = radeon_get_ib_value(p, idx+1);
1958
1959                track->vap_vf_cntl = radeon_get_ib_value(p, idx+3);
1960                track->immd_dwords = pkt->count - 1;
1961                r = r100_cs_track_check(p->rdev, track);
1962                if (r)
1963                        return r;
1964                break;
1965        case PACKET3_3D_DRAW_IMMD:
1966                if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) {
1967                        DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1968                        return -EINVAL;
1969                }
1970                track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 0));
1971                track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
1972                track->immd_dwords = pkt->count - 1;
1973                r = r100_cs_track_check(p->rdev, track);
1974                if (r)
1975                        return r;
1976                break;
1977                /* triggers drawing using in-packet vertex data */
1978        case PACKET3_3D_DRAW_IMMD_2:
1979                if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) {
1980                        DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1981                        return -EINVAL;
1982                }
1983                track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1984                track->immd_dwords = pkt->count;
1985                r = r100_cs_track_check(p->rdev, track);
1986                if (r)
1987                        return r;
1988                break;
1989                /* triggers drawing using in-packet vertex data */
1990        case PACKET3_3D_DRAW_VBUF_2:
1991                track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1992                r = r100_cs_track_check(p->rdev, track);
1993                if (r)
1994                        return r;
1995                break;
1996                /* triggers drawing of vertex buffers setup elsewhere */
1997        case PACKET3_3D_DRAW_INDX_2:
1998                track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1999                r = r100_cs_track_check(p->rdev, track);
2000                if (r)
2001                        return r;
2002                break;
2003                /* triggers drawing using indices to vertex buffer */
2004        case PACKET3_3D_DRAW_VBUF:
2005                track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
2006                r = r100_cs_track_check(p->rdev, track);
2007                if (r)
2008                        return r;
2009                break;
2010                /* triggers drawing of vertex buffers setup elsewhere */
2011        case PACKET3_3D_DRAW_INDX:
2012                track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
2013                r = r100_cs_track_check(p->rdev, track);
2014                if (r)
2015                        return r;
2016                break;
2017                /* triggers drawing using indices to vertex buffer */
2018        case PACKET3_3D_CLEAR_HIZ:
2019        case PACKET3_3D_CLEAR_ZMASK:
2020                if (p->rdev->hyperz_filp != p->filp)
2021                        return -EINVAL;
2022                break;
2023        case PACKET3_NOP:
2024                break;
2025        default:
2026                DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2027                return -EINVAL;
2028        }
2029        return 0;
2030}
2031
2032int r100_cs_parse(struct radeon_cs_parser *p)
2033{
2034        struct radeon_cs_packet pkt;
2035        struct r100_cs_track *track;
2036        int r;
2037
2038        track = kzalloc(sizeof(*track), GFP_KERNEL);
2039        if (!track)
2040                return -ENOMEM;
2041        r100_cs_track_clear(p->rdev, track);
2042        p->track = track;
2043        do {
2044                r = radeon_cs_packet_parse(p, &pkt, p->idx);
2045                if (r) {
2046                        return r;
2047                }
2048                p->idx += pkt.count + 2;
2049                switch (pkt.type) {
2050                case RADEON_PACKET_TYPE0:
2051                        if (p->rdev->family >= CHIP_R200)
2052                                r = r100_cs_parse_packet0(p, &pkt,
2053                                        p->rdev->config.r100.reg_safe_bm,
2054                                        p->rdev->config.r100.reg_safe_bm_size,
2055                                        &r200_packet0_check);
2056                        else
2057                                r = r100_cs_parse_packet0(p, &pkt,
2058                                        p->rdev->config.r100.reg_safe_bm,
2059                                        p->rdev->config.r100.reg_safe_bm_size,
2060                                        &r100_packet0_check);
2061                        break;
2062                case RADEON_PACKET_TYPE2:
2063                        break;
2064                case RADEON_PACKET_TYPE3:
2065                        r = r100_packet3_check(p, &pkt);
2066                        break;
2067                default:
2068                        DRM_ERROR("Unknown packet type %d !\n",
2069                                  pkt.type);
2070                        return -EINVAL;
2071                }
2072                if (r)
2073                        return r;
2074        } while (p->idx < p->chunk_ib->length_dw);
2075        return 0;
2076}
2077
2078static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
2079{
2080        DRM_ERROR("pitch                      %d\n", t->pitch);
2081        DRM_ERROR("use_pitch                  %d\n", t->use_pitch);
2082        DRM_ERROR("width                      %d\n", t->width);
2083        DRM_ERROR("width_11                   %d\n", t->width_11);
2084        DRM_ERROR("height                     %d\n", t->height);
2085        DRM_ERROR("height_11                  %d\n", t->height_11);
2086        DRM_ERROR("num levels                 %d\n", t->num_levels);
2087        DRM_ERROR("depth                      %d\n", t->txdepth);
2088        DRM_ERROR("bpp                        %d\n", t->cpp);
2089        DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
2090        DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
2091        DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
2092        DRM_ERROR("compress format            %d\n", t->compress_format);
2093}
2094
2095static int r100_track_compress_size(int compress_format, int w, int h)
2096{
2097        int block_width, block_height, block_bytes;
2098        int wblocks, hblocks;
2099        int min_wblocks;
2100        int sz;
2101
2102        block_width = 4;
2103        block_height = 4;
2104
2105        switch (compress_format) {
2106        case R100_TRACK_COMP_DXT1:
2107                block_bytes = 8;
2108                min_wblocks = 4;
2109                break;
2110        default:
2111        case R100_TRACK_COMP_DXT35:
2112                block_bytes = 16;
2113                min_wblocks = 2;
2114                break;
2115        }
2116
2117        hblocks = (h + block_height - 1) / block_height;
2118        wblocks = (w + block_width - 1) / block_width;
2119        if (wblocks < min_wblocks)
2120                wblocks = min_wblocks;
2121        sz = wblocks * hblocks * block_bytes;
2122        return sz;
2123}
2124
2125static int r100_cs_track_cube(struct radeon_device *rdev,
2126                              struct r100_cs_track *track, unsigned idx)
2127{
2128        unsigned face, w, h;
2129        struct radeon_bo *cube_robj;
2130        unsigned long size;
2131        unsigned compress_format = track->textures[idx].compress_format;
2132
2133        for (face = 0; face < 5; face++) {
2134                cube_robj = track->textures[idx].cube_info[face].robj;
2135                w = track->textures[idx].cube_info[face].width;
2136                h = track->textures[idx].cube_info[face].height;
2137
2138                if (compress_format) {
2139                        size = r100_track_compress_size(compress_format, w, h);
2140                } else
2141                        size = w * h;
2142                size *= track->textures[idx].cpp;
2143
2144                size += track->textures[idx].cube_info[face].offset;
2145
2146                if (size > radeon_bo_size(cube_robj)) {
2147                        DRM_ERROR("Cube texture offset greater than object size %lu %lu\n",
2148                                  size, radeon_bo_size(cube_robj));
2149                        r100_cs_track_texture_print(&track->textures[idx]);
2150                        return -1;
2151                }
2152        }
2153        return 0;
2154}
2155
2156static int r100_cs_track_texture_check(struct radeon_device *rdev,
2157                                       struct r100_cs_track *track)
2158{
2159        struct radeon_bo *robj;
2160        unsigned long size;
2161        unsigned u, i, w, h, d;
2162        int ret;
2163
2164        for (u = 0; u < track->num_texture; u++) {
2165                if (!track->textures[u].enabled)
2166                        continue;
2167                if (track->textures[u].lookup_disable)
2168                        continue;
2169                robj = track->textures[u].robj;
2170                if (robj == NULL) {
2171                        DRM_ERROR("No texture bound to unit %u\n", u);
2172                        return -EINVAL;
2173                }
2174                size = 0;
2175                for (i = 0; i <= track->textures[u].num_levels; i++) {
2176                        if (track->textures[u].use_pitch) {
2177                                if (rdev->family < CHIP_R300)
2178                                        w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i);
2179                                else
2180                                        w = track->textures[u].pitch / (1 << i);
2181                        } else {
2182                                w = track->textures[u].width;
2183                                if (rdev->family >= CHIP_RV515)
2184                                        w |= track->textures[u].width_11;
2185                                w = w / (1 << i);
2186                                if (track->textures[u].roundup_w)
2187                                        w = roundup_pow_of_two(w);
2188                        }
2189                        h = track->textures[u].height;
2190                        if (rdev->family >= CHIP_RV515)
2191                                h |= track->textures[u].height_11;
2192                        h = h / (1 << i);
2193                        if (track->textures[u].roundup_h)
2194                                h = roundup_pow_of_two(h);
2195                        if (track->textures[u].tex_coord_type == 1) {
2196                                d = (1 << track->textures[u].txdepth) / (1 << i);
2197                                if (!d)
2198                                        d = 1;
2199                        } else {
2200                                d = 1;
2201                        }
2202                        if (track->textures[u].compress_format) {
2203
2204                                size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d;
2205                                /* compressed textures are block based */
2206                        } else
2207                                size += w * h * d;
2208                }
2209                size *= track->textures[u].cpp;
2210
2211                switch (track->textures[u].tex_coord_type) {
2212                case 0:
2213                case 1:
2214                        break;
2215                case 2:
2216                        if (track->separate_cube) {
2217                                ret = r100_cs_track_cube(rdev, track, u);
2218                                if (ret)
2219                                        return ret;
2220                        } else
2221                                size *= 6;
2222                        break;
2223                default:
2224                        DRM_ERROR("Invalid texture coordinate type %u for unit "
2225                                  "%u\n", track->textures[u].tex_coord_type, u);
2226                        return -EINVAL;
2227                }
2228                if (size > radeon_bo_size(robj)) {
2229                        DRM_ERROR("Texture of unit %u needs %lu bytes but is "
2230                                  "%lu\n", u, size, radeon_bo_size(robj));
2231                        r100_cs_track_texture_print(&track->textures[u]);
2232                        return -EINVAL;
2233                }
2234        }
2235        return 0;
2236}
2237
2238int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
2239{
2240        unsigned i;
2241        unsigned long size;
2242        unsigned prim_walk;
2243        unsigned nverts;
2244        unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
2245
2246        if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
2247            !track->blend_read_enable)
2248                num_cb = 0;
2249
2250        for (i = 0; i < num_cb; i++) {
2251                if (track->cb[i].robj == NULL) {
2252                        DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
2253                        return -EINVAL;
2254                }
2255                size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
2256                size += track->cb[i].offset;
2257                if (size > radeon_bo_size(track->cb[i].robj)) {
2258                        DRM_ERROR("[drm] Buffer too small for color buffer %d "
2259                                  "(need %lu have %lu) !\n", i, size,
2260                                  radeon_bo_size(track->cb[i].robj));
2261                        DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
2262                                  i, track->cb[i].pitch, track->cb[i].cpp,
2263                                  track->cb[i].offset, track->maxy);
2264                        return -EINVAL;
2265                }
2266        }
2267        track->cb_dirty = false;
2268
2269        if (track->zb_dirty && track->z_enabled) {
2270                if (track->zb.robj == NULL) {
2271                        DRM_ERROR("[drm] No buffer for z buffer !\n");
2272                        return -EINVAL;
2273                }
2274                size = track->zb.pitch * track->zb.cpp * track->maxy;
2275                size += track->zb.offset;
2276                if (size > radeon_bo_size(track->zb.robj)) {
2277                        DRM_ERROR("[drm] Buffer too small for z buffer "
2278                                  "(need %lu have %lu) !\n", size,
2279                                  radeon_bo_size(track->zb.robj));
2280                        DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n",
2281                                  track->zb.pitch, track->zb.cpp,
2282                                  track->zb.offset, track->maxy);
2283                        return -EINVAL;
2284                }
2285        }
2286        track->zb_dirty = false;
2287
2288        if (track->aa_dirty && track->aaresolve) {
2289                if (track->aa.robj == NULL) {
2290                        DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i);
2291                        return -EINVAL;
2292                }
2293                /* I believe the format comes from colorbuffer0. */
2294                size = track->aa.pitch * track->cb[0].cpp * track->maxy;
2295                size += track->aa.offset;
2296                if (size > radeon_bo_size(track->aa.robj)) {
2297                        DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d "
2298                                  "(need %lu have %lu) !\n", i, size,
2299                                  radeon_bo_size(track->aa.robj));
2300                        DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n",
2301                                  i, track->aa.pitch, track->cb[0].cpp,
2302                                  track->aa.offset, track->maxy);
2303                        return -EINVAL;
2304                }
2305        }
2306        track->aa_dirty = false;
2307
2308        prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
2309        if (track->vap_vf_cntl & (1 << 14)) {
2310                nverts = track->vap_alt_nverts;
2311        } else {
2312                nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
2313        }
2314        switch (prim_walk) {
2315        case 1:
2316                for (i = 0; i < track->num_arrays; i++) {
2317                        size = track->arrays[i].esize * track->max_indx * 4;
2318                        if (track->arrays[i].robj == NULL) {
2319                                DRM_ERROR("(PW %u) Vertex array %u no buffer "
2320                                          "bound\n", prim_walk, i);
2321                                return -EINVAL;
2322                        }
2323                        if (size > radeon_bo_size(track->arrays[i].robj)) {
2324                                dev_err(rdev->dev, "(PW %u) Vertex array %u "
2325                                        "need %lu dwords have %lu dwords\n",
2326                                        prim_walk, i, size >> 2,
2327                                        radeon_bo_size(track->arrays[i].robj)
2328                                        >> 2);
2329                                DRM_ERROR("Max indices %u\n", track->max_indx);
2330                                return -EINVAL;
2331                        }
2332                }
2333                break;
2334        case 2:
2335                for (i = 0; i < track->num_arrays; i++) {
2336                        size = track->arrays[i].esize * (nverts - 1) * 4;
2337                        if (track->arrays[i].robj == NULL) {
2338                                DRM_ERROR("(PW %u) Vertex array %u no buffer "
2339                                          "bound\n", prim_walk, i);
2340                                return -EINVAL;
2341                        }
2342                        if (size > radeon_bo_size(track->arrays[i].robj)) {
2343                                dev_err(rdev->dev, "(PW %u) Vertex array %u "
2344                                        "need %lu dwords have %lu dwords\n",
2345                                        prim_walk, i, size >> 2,
2346                                        radeon_bo_size(track->arrays[i].robj)
2347                                        >> 2);
2348                                return -EINVAL;
2349                        }
2350                }
2351                break;
2352        case 3:
2353                size = track->vtx_size * nverts;
2354                if (size != track->immd_dwords) {
2355                        DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
2356                                  track->immd_dwords, size);
2357                        DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
2358                                  nverts, track->vtx_size);
2359                        return -EINVAL;
2360                }
2361                break;
2362        default:
2363                DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
2364                          prim_walk);
2365                return -EINVAL;
2366        }
2367
2368        if (track->tex_dirty) {
2369                track->tex_dirty = false;
2370                return r100_cs_track_texture_check(rdev, track);
2371        }
2372        return 0;
2373}
2374
2375void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
2376{
2377        unsigned i, face;
2378
2379        track->cb_dirty = true;
2380        track->zb_dirty = true;
2381        track->tex_dirty = true;
2382        track->aa_dirty = true;
2383
2384        if (rdev->family < CHIP_R300) {
2385                track->num_cb = 1;
2386                if (rdev->family <= CHIP_RS200)
2387                        track->num_texture = 3;
2388                else
2389                        track->num_texture = 6;
2390                track->maxy = 2048;
2391                track->separate_cube = true;
2392        } else {
2393                track->num_cb = 4;
2394                track->num_texture = 16;
2395                track->maxy = 4096;
2396                track->separate_cube = false;
2397                track->aaresolve = false;
2398                track->aa.robj = NULL;
2399        }
2400
2401        for (i = 0; i < track->num_cb; i++) {
2402                track->cb[i].robj = NULL;
2403                track->cb[i].pitch = 8192;
2404                track->cb[i].cpp = 16;
2405                track->cb[i].offset = 0;
2406        }
2407        track->z_enabled = true;
2408        track->zb.robj = NULL;
2409        track->zb.pitch = 8192;
2410        track->zb.cpp = 4;
2411        track->zb.offset = 0;
2412        track->vtx_size = 0x7F;
2413        track->immd_dwords = 0xFFFFFFFFUL;
2414        track->num_arrays = 11;
2415        track->max_indx = 0x00FFFFFFUL;
2416        for (i = 0; i < track->num_arrays; i++) {
2417                track->arrays[i].robj = NULL;
2418                track->arrays[i].esize = 0x7F;
2419        }
2420        for (i = 0; i < track->num_texture; i++) {
2421                track->textures[i].compress_format = R100_TRACK_COMP_NONE;
2422                track->textures[i].pitch = 16536;
2423                track->textures[i].width = 16536;
2424                track->textures[i].height = 16536;
2425                track->textures[i].width_11 = 1 << 11;
2426                track->textures[i].height_11 = 1 << 11;
2427                track->textures[i].num_levels = 12;
2428                if (rdev->family <= CHIP_RS200) {
2429                        track->textures[i].tex_coord_type = 0;
2430                        track->textures[i].txdepth = 0;
2431                } else {
2432                        track->textures[i].txdepth = 16;
2433                        track->textures[i].tex_coord_type = 1;
2434                }
2435                track->textures[i].cpp = 64;
2436                track->textures[i].robj = NULL;
2437                /* CS IB emission code makes sure texture unit are disabled */
2438                track->textures[i].enabled = false;
2439                track->textures[i].lookup_disable = false;
2440                track->textures[i].roundup_w = true;
2441                track->textures[i].roundup_h = true;
2442                if (track->separate_cube)
2443                        for (face = 0; face < 5; face++) {
2444                                track->textures[i].cube_info[face].robj = NULL;
2445                                track->textures[i].cube_info[face].width = 16536;
2446                                track->textures[i].cube_info[face].height = 16536;
2447                                track->textures[i].cube_info[face].offset = 0;
2448                        }
2449        }
2450}
2451
2452/*
2453 * Global GPU functions
2454 */
2455static void r100_errata(struct radeon_device *rdev)
2456{
2457        rdev->pll_errata = 0;
2458
2459        if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) {
2460                rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS;
2461        }
2462
2463        if (rdev->family == CHIP_RV100 ||
2464            rdev->family == CHIP_RS100 ||
2465            rdev->family == CHIP_RS200) {
2466                rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY;
2467        }
2468}
2469
2470static int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n)
2471{
2472        unsigned i;
2473        uint32_t tmp;
2474
2475        for (i = 0; i < rdev->usec_timeout; i++) {
2476                tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
2477                if (tmp >= n) {
2478                        return 0;
2479                }
2480                udelay(1);
2481        }
2482        return -1;
2483}
2484
2485int r100_gui_wait_for_idle(struct radeon_device *rdev)
2486{
2487        unsigned i;
2488        uint32_t tmp;
2489
2490        if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) {
2491                pr_warn("radeon: wait for empty RBBM fifo failed! Bad things might happen.\n");
2492        }
2493        for (i = 0; i < rdev->usec_timeout; i++) {
2494                tmp = RREG32(RADEON_RBBM_STATUS);
2495                if (!(tmp & RADEON_RBBM_ACTIVE)) {
2496                        return 0;
2497                }
2498                udelay(1);
2499        }
2500        return -1;
2501}
2502
2503int r100_mc_wait_for_idle(struct radeon_device *rdev)
2504{
2505        unsigned i;
2506        uint32_t tmp;
2507
2508        for (i = 0; i < rdev->usec_timeout; i++) {
2509                /* read MC_STATUS */
2510                tmp = RREG32(RADEON_MC_STATUS);
2511                if (tmp & RADEON_MC_IDLE) {
2512                        return 0;
2513                }
2514                udelay(1);
2515        }
2516        return -1;
2517}
2518
2519bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2520{
2521        u32 rbbm_status;
2522
2523        rbbm_status = RREG32(R_000E40_RBBM_STATUS);
2524        if (!G_000E40_GUI_ACTIVE(rbbm_status)) {
2525                radeon_ring_lockup_update(rdev, ring);
2526                return false;
2527        }
2528        return radeon_ring_test_lockup(rdev, ring);
2529}
2530
2531/* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */
2532void r100_enable_bm(struct radeon_device *rdev)
2533{
2534        uint32_t tmp;
2535        /* Enable bus mastering */
2536        tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
2537        WREG32(RADEON_BUS_CNTL, tmp);
2538}
2539
2540void r100_bm_disable(struct radeon_device *rdev)
2541{
2542        u32 tmp;
2543
2544        /* disable bus mastering */
2545        tmp = RREG32(R_000030_BUS_CNTL);
2546        WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000044);
2547        mdelay(1);
2548        WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000042);
2549        mdelay(1);
2550        WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040);
2551        tmp = RREG32(RADEON_BUS_CNTL);
2552        mdelay(1);
2553        pci_clear_master(rdev->pdev);
2554        mdelay(1);
2555}
2556
2557int r100_asic_reset(struct radeon_device *rdev, bool hard)
2558{
2559        struct r100_mc_save save;
2560        u32 status, tmp;
2561        int ret = 0;
2562
2563        status = RREG32(R_000E40_RBBM_STATUS);
2564        if (!G_000E40_GUI_ACTIVE(status)) {
2565                return 0;
2566        }
2567        r100_mc_stop(rdev, &save);
2568        status = RREG32(R_000E40_RBBM_STATUS);
2569        dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2570        /* stop CP */
2571        WREG32(RADEON_CP_CSQ_CNTL, 0);
2572        tmp = RREG32(RADEON_CP_RB_CNTL);
2573        WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
2574        WREG32(RADEON_CP_RB_RPTR_WR, 0);
2575        WREG32(RADEON_CP_RB_WPTR, 0);
2576        WREG32(RADEON_CP_RB_CNTL, tmp);
2577        /* save PCI state */
2578        pci_save_state(rdev->pdev);
2579        /* disable bus mastering */
2580        r100_bm_disable(rdev);
2581        WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_SE(1) |
2582                                        S_0000F0_SOFT_RESET_RE(1) |
2583                                        S_0000F0_SOFT_RESET_PP(1) |
2584                                        S_0000F0_SOFT_RESET_RB(1));
2585        RREG32(R_0000F0_RBBM_SOFT_RESET);
2586        mdelay(500);
2587        WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
2588        mdelay(1);
2589        status = RREG32(R_000E40_RBBM_STATUS);
2590        dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2591        /* reset CP */
2592        WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1));
2593        RREG32(R_0000F0_RBBM_SOFT_RESET);
2594        mdelay(500);
2595        WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
2596        mdelay(1);
2597        status = RREG32(R_000E40_RBBM_STATUS);
2598        dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2599        /* restore PCI & busmastering */
2600        pci_restore_state(rdev->pdev);
2601        r100_enable_bm(rdev);
2602        /* Check if GPU is idle */
2603        if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) ||
2604                G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) {
2605                dev_err(rdev->dev, "failed to reset GPU\n");
2606                ret = -1;
2607        } else
2608                dev_info(rdev->dev, "GPU reset succeed\n");
2609        r100_mc_resume(rdev, &save);
2610        return ret;
2611}
2612
2613void r100_set_common_regs(struct radeon_device *rdev)
2614{
2615        bool force_dac2 = false;
2616        u32 tmp;
2617
2618        /* set these so they don't interfere with anything */
2619        WREG32(RADEON_OV0_SCALE_CNTL, 0);
2620        WREG32(RADEON_SUBPIC_CNTL, 0);
2621        WREG32(RADEON_VIPH_CONTROL, 0);
2622        WREG32(RADEON_I2C_CNTL_1, 0);
2623        WREG32(RADEON_DVI_I2C_CNTL_1, 0);
2624        WREG32(RADEON_CAP0_TRIG_CNTL, 0);
2625        WREG32(RADEON_CAP1_TRIG_CNTL, 0);
2626
2627        /* always set up dac2 on rn50 and some rv100 as lots
2628         * of servers seem to wire it up to a VGA port but
2629         * don't report it in the bios connector
2630         * table.
2631         */
2632        switch (rdev->pdev->device) {
2633                /* RN50 */
2634        case 0x515e:
2635        case 0x5969:
2636                force_dac2 = true;
2637                break;
2638                /* RV100*/
2639        case 0x5159:
2640        case 0x515a:
2641                /* DELL triple head servers */
2642                if ((rdev->pdev->subsystem_vendor == 0x1028 /* DELL */) &&
2643                    ((rdev->pdev->subsystem_device == 0x016c) ||
2644                     (rdev->pdev->subsystem_device == 0x016d) ||
2645                     (rdev->pdev->subsystem_device == 0x016e) ||
2646                     (rdev->pdev->subsystem_device == 0x016f) ||
2647                     (rdev->pdev->subsystem_device == 0x0170) ||
2648                     (rdev->pdev->subsystem_device == 0x017d) ||
2649                     (rdev->pdev->subsystem_device == 0x017e) ||
2650                     (rdev->pdev->subsystem_device == 0x0183) ||
2651                     (rdev->pdev->subsystem_device == 0x018a) ||
2652                     (rdev->pdev->subsystem_device == 0x019a)))
2653                        force_dac2 = true;
2654                break;
2655        }
2656
2657        if (force_dac2) {
2658                u32 disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG);
2659                u32 tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL);
2660                u32 dac2_cntl = RREG32(RADEON_DAC_CNTL2);
2661
2662                /* For CRT on DAC2, don't turn it on if BIOS didn't
2663                   enable it, even it's detected.
2664                */
2665
2666                /* force it to crtc0 */
2667                dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL;
2668                dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL;
2669                disp_hw_debug |= RADEON_CRT2_DISP1_SEL;
2670
2671                /* set up the TV DAC */
2672                tv_dac_cntl &= ~(RADEON_TV_DAC_PEDESTAL |
2673                                 RADEON_TV_DAC_STD_MASK |
2674                                 RADEON_TV_DAC_RDACPD |
2675                                 RADEON_TV_DAC_GDACPD |
2676                                 RADEON_TV_DAC_BDACPD |
2677                                 RADEON_TV_DAC_BGADJ_MASK |
2678                                 RADEON_TV_DAC_DACADJ_MASK);
2679                tv_dac_cntl |= (RADEON_TV_DAC_NBLANK |
2680                                RADEON_TV_DAC_NHOLD |
2681                                RADEON_TV_DAC_STD_PS2 |
2682                                (0x58 << 16));
2683
2684                WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl);
2685                WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug);
2686                WREG32(RADEON_DAC_CNTL2, dac2_cntl);
2687        }
2688
2689        /* switch PM block to ACPI mode */
2690        tmp = RREG32_PLL(RADEON_PLL_PWRMGT_CNTL);
2691        tmp &= ~RADEON_PM_MODE_SEL;
2692        WREG32_PLL(RADEON_PLL_PWRMGT_CNTL, tmp);
2693
2694}
2695
2696/*
2697 * VRAM info
2698 */
2699static void r100_vram_get_type(struct radeon_device *rdev)
2700{
2701        uint32_t tmp;
2702
2703        rdev->mc.vram_is_ddr = false;
2704        if (rdev->flags & RADEON_IS_IGP)
2705                rdev->mc.vram_is_ddr = true;
2706        else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR)
2707                rdev->mc.vram_is_ddr = true;
2708        if ((rdev->family == CHIP_RV100) ||
2709            (rdev->family == CHIP_RS100) ||
2710            (rdev->family == CHIP_RS200)) {
2711                tmp = RREG32(RADEON_MEM_CNTL);
2712                if (tmp & RV100_HALF_MODE) {
2713                        rdev->mc.vram_width = 32;
2714                } else {
2715                        rdev->mc.vram_width = 64;
2716                }
2717                if (rdev->flags & RADEON_SINGLE_CRTC) {
2718                        rdev->mc.vram_width /= 4;
2719                        rdev->mc.vram_is_ddr = true;
2720                }
2721        } else if (rdev->family <= CHIP_RV280) {
2722                tmp = RREG32(RADEON_MEM_CNTL);
2723                if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) {
2724                        rdev->mc.vram_width = 128;
2725                } else {
2726                        rdev->mc.vram_width = 64;
2727                }
2728        } else {
2729                /* newer IGPs */
2730                rdev->mc.vram_width = 128;
2731        }
2732}
2733
2734static u32 r100_get_accessible_vram(struct radeon_device *rdev)
2735{
2736        u32 aper_size;
2737        u8 byte;
2738
2739        aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2740
2741        /* Set HDP_APER_CNTL only on cards that are known not to be broken,
2742         * that is has the 2nd generation multifunction PCI interface
2743         */
2744        if (rdev->family == CHIP_RV280 ||
2745            rdev->family >= CHIP_RV350) {
2746                WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL,
2747                       ~RADEON_HDP_APER_CNTL);
2748                DRM_INFO("Generation 2 PCI interface, using max accessible memory\n");
2749                return aper_size * 2;
2750        }
2751
2752        /* Older cards have all sorts of funny issues to deal with. First
2753         * check if it's a multifunction card by reading the PCI config
2754         * header type... Limit those to one aperture size
2755         */
2756        pci_read_config_byte(rdev->pdev, 0xe, &byte);
2757        if (byte & 0x80) {
2758                DRM_INFO("Generation 1 PCI interface in multifunction mode\n");
2759                DRM_INFO("Limiting VRAM to one aperture\n");
2760                return aper_size;
2761        }
2762
2763        /* Single function older card. We read HDP_APER_CNTL to see how the BIOS
2764         * have set it up. We don't write this as it's broken on some ASICs but
2765         * we expect the BIOS to have done the right thing (might be too optimistic...)
2766         */
2767        if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL)
2768                return aper_size * 2;
2769        return aper_size;
2770}
2771
2772void r100_vram_init_sizes(struct radeon_device *rdev)
2773{
2774        u64 config_aper_size;
2775
2776        /* work out accessible VRAM */
2777        rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
2778        rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
2779        rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev);
2780        /* FIXME we don't use the second aperture yet when we could use it */
2781        if (rdev->mc.visible_vram_size > rdev->mc.aper_size)
2782                rdev->mc.visible_vram_size = rdev->mc.aper_size;
2783        config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2784        if (rdev->flags & RADEON_IS_IGP) {
2785                uint32_t tom;
2786                /* read NB_TOM to get the amount of ram stolen for the GPU */
2787                tom = RREG32(RADEON_NB_TOM);
2788                rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
2789                WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
2790                rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
2791        } else {
2792                rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
2793                /* Some production boards of m6 will report 0
2794                 * if it's 8 MB
2795                 */
2796                if (rdev->mc.real_vram_size == 0) {
2797                        rdev->mc.real_vram_size = 8192 * 1024;
2798                        WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
2799                }
2800                /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM -
2801                 * Novell bug 204882 + along with lots of ubuntu ones
2802                 */
2803                if (rdev->mc.aper_size > config_aper_size)
2804                        config_aper_size = rdev->mc.aper_size;
2805
2806                if (config_aper_size > rdev->mc.real_vram_size)
2807                        rdev->mc.mc_vram_size = config_aper_size;
2808                else
2809                        rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
2810        }
2811}
2812
2813void r100_vga_set_state(struct radeon_device *rdev, bool state)
2814{
2815        uint32_t temp;
2816
2817        temp = RREG32(RADEON_CONFIG_CNTL);
2818        if (!state) {
2819                temp &= ~RADEON_CFG_VGA_RAM_EN;
2820                temp |= RADEON_CFG_VGA_IO_DIS;
2821        } else {
2822                temp &= ~RADEON_CFG_VGA_IO_DIS;
2823        }
2824        WREG32(RADEON_CONFIG_CNTL, temp);
2825}
2826
2827static void r100_mc_init(struct radeon_device *rdev)
2828{
2829        u64 base;
2830
2831        r100_vram_get_type(rdev);
2832        r100_vram_init_sizes(rdev);
2833        base = rdev->mc.aper_base;
2834        if (rdev->flags & RADEON_IS_IGP)
2835                base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16;
2836        radeon_vram_location(rdev, &rdev->mc, base);
2837        rdev->mc.gtt_base_align = 0;
2838        if (!(rdev->flags & RADEON_IS_AGP))
2839                radeon_gtt_location(rdev, &rdev->mc);
2840        radeon_update_bandwidth_info(rdev);
2841}
2842
2843
2844/*
2845 * Indirect registers accessor
2846 */
2847void r100_pll_errata_after_index(struct radeon_device *rdev)
2848{
2849        if (rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS) {
2850                (void)RREG32(RADEON_CLOCK_CNTL_DATA);
2851                (void)RREG32(RADEON_CRTC_GEN_CNTL);
2852        }
2853}
2854
2855static void r100_pll_errata_after_data(struct radeon_device *rdev)
2856{
2857        /* This workarounds is necessary on RV100, RS100 and RS200 chips
2858         * or the chip could hang on a subsequent access
2859         */
2860        if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) {
2861                mdelay(5);
2862        }
2863
2864        /* This function is required to workaround a hardware bug in some (all?)
2865         * revisions of the R300.  This workaround should be called after every
2866         * CLOCK_CNTL_INDEX register access.  If not, register reads afterward
2867         * may not be correct.
2868         */
2869        if (rdev->pll_errata & CHIP_ERRATA_R300_CG) {
2870                uint32_t save, tmp;
2871
2872                save = RREG32(RADEON_CLOCK_CNTL_INDEX);
2873                tmp = save & ~(0x3f | RADEON_PLL_WR_EN);
2874                WREG32(RADEON_CLOCK_CNTL_INDEX, tmp);
2875                tmp = RREG32(RADEON_CLOCK_CNTL_DATA);
2876                WREG32(RADEON_CLOCK_CNTL_INDEX, save);
2877        }
2878}
2879
2880uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg)
2881{
2882        unsigned long flags;
2883        uint32_t data;
2884
2885        spin_lock_irqsave(&rdev->pll_idx_lock, flags);
2886        WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f);
2887        r100_pll_errata_after_index(rdev);
2888        data = RREG32(RADEON_CLOCK_CNTL_DATA);
2889        r100_pll_errata_after_data(rdev);
2890        spin_unlock_irqrestore(&rdev->pll_idx_lock, flags);
2891        return data;
2892}
2893
2894void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
2895{
2896        unsigned long flags;
2897
2898        spin_lock_irqsave(&rdev->pll_idx_lock, flags);
2899        WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN));
2900        r100_pll_errata_after_index(rdev);
2901        WREG32(RADEON_CLOCK_CNTL_DATA, v);
2902        r100_pll_errata_after_data(rdev);
2903        spin_unlock_irqrestore(&rdev->pll_idx_lock, flags);
2904}
2905
2906static void r100_set_safe_registers(struct radeon_device *rdev)
2907{
2908        if (ASIC_IS_RN50(rdev)) {
2909                rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm;
2910                rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm);
2911        } else if (rdev->family < CHIP_R200) {
2912                rdev->config.r100.reg_safe_bm = r100_reg_safe_bm;
2913                rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm);
2914        } else {
2915                r200_set_safe_registers(rdev);
2916        }
2917}
2918
2919/*
2920 * Debugfs info
2921 */
2922#if defined(CONFIG_DEBUG_FS)
2923static int r100_debugfs_rbbm_info(struct seq_file *m, void *data)
2924{
2925        struct drm_info_node *node = (struct drm_info_node *) m->private;
2926        struct drm_device *dev = node->minor->dev;
2927        struct radeon_device *rdev = dev->dev_private;
2928        uint32_t reg, value;
2929        unsigned i;
2930
2931        seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS));
2932        seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C));
2933        seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2934        for (i = 0; i < 64; i++) {
2935                WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100);
2936                reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2;
2937                WREG32(RADEON_RBBM_CMDFIFO_ADDR, i);
2938                value = RREG32(RADEON_RBBM_CMDFIFO_DATA);
2939                seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value);
2940        }
2941        return 0;
2942}
2943
2944static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data)
2945{
2946        struct drm_info_node *node = (struct drm_info_node *) m->private;
2947        struct drm_device *dev = node->minor->dev;
2948        struct radeon_device *rdev = dev->dev_private;
2949        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2950        uint32_t rdp, wdp;
2951        unsigned count, i, j;
2952
2953        radeon_ring_free_size(rdev, ring);
2954        rdp = RREG32(RADEON_CP_RB_RPTR);
2955        wdp = RREG32(RADEON_CP_RB_WPTR);
2956        count = (rdp + ring->ring_size - wdp) & ring->ptr_mask;
2957        seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2958        seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
2959        seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
2960        seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
2961        seq_printf(m, "%u dwords in ring\n", count);
2962        if (ring->ready) {
2963                for (j = 0; j <= count; j++) {
2964                        i = (rdp + j) & ring->ptr_mask;
2965                        seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]);
2966                }
2967        }
2968        return 0;
2969}
2970
2971
2972static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data)
2973{
2974        struct drm_info_node *node = (struct drm_info_node *) m->private;
2975        struct drm_device *dev = node->minor->dev;
2976        struct radeon_device *rdev = dev->dev_private;
2977        uint32_t csq_stat, csq2_stat, tmp;
2978        unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr;
2979        unsigned i;
2980
2981        seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2982        seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE));
2983        csq_stat = RREG32(RADEON_CP_CSQ_STAT);
2984        csq2_stat = RREG32(RADEON_CP_CSQ2_STAT);
2985        r_rptr = (csq_stat >> 0) & 0x3ff;
2986        r_wptr = (csq_stat >> 10) & 0x3ff;
2987        ib1_rptr = (csq_stat >> 20) & 0x3ff;
2988        ib1_wptr = (csq2_stat >> 0) & 0x3ff;
2989        ib2_rptr = (csq2_stat >> 10) & 0x3ff;
2990        ib2_wptr = (csq2_stat >> 20) & 0x3ff;
2991        seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat);
2992        seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat);
2993        seq_printf(m, "Ring rptr %u\n", r_rptr);
2994        seq_printf(m, "Ring wptr %u\n", r_wptr);
2995        seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr);
2996        seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr);
2997        seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr);
2998        seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr);
2999        /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms
3000         * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */
3001        seq_printf(m, "Ring fifo:\n");
3002        for (i = 0; i < 256; i++) {
3003                WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3004                tmp = RREG32(RADEON_CP_CSQ_DATA);
3005                seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp);
3006        }
3007        seq_printf(m, "Indirect1 fifo:\n");
3008        for (i = 256; i <= 512; i++) {
3009                WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3010                tmp = RREG32(RADEON_CP_CSQ_DATA);
3011                seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp);
3012        }
3013        seq_printf(m, "Indirect2 fifo:\n");
3014        for (i = 640; i < ib1_wptr; i++) {
3015                WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3016                tmp = RREG32(RADEON_CP_CSQ_DATA);
3017                seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp);
3018        }
3019        return 0;
3020}
3021
3022static int r100_debugfs_mc_info(struct seq_file *m, void *data)
3023{
3024        struct drm_info_node *node = (struct drm_info_node *) m->private;
3025        struct drm_device *dev = node->minor->dev;
3026        struct radeon_device *rdev = dev->dev_private;
3027        uint32_t tmp;
3028
3029        tmp = RREG32(RADEON_CONFIG_MEMSIZE);
3030        seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp);
3031        tmp = RREG32(RADEON_MC_FB_LOCATION);
3032        seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp);
3033        tmp = RREG32(RADEON_BUS_CNTL);
3034        seq_printf(m, "BUS_CNTL 0x%08x\n", tmp);
3035        tmp = RREG32(RADEON_MC_AGP_LOCATION);
3036        seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp);
3037        tmp = RREG32(RADEON_AGP_BASE);
3038        seq_printf(m, "AGP_BASE 0x%08x\n", tmp);
3039        tmp = RREG32(RADEON_HOST_PATH_CNTL);
3040        seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp);
3041        tmp = RREG32(0x01D0);
3042        seq_printf(m, "AIC_CTRL 0x%08x\n", tmp);
3043        tmp = RREG32(RADEON_AIC_LO_ADDR);
3044        seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp);
3045        tmp = RREG32(RADEON_AIC_HI_ADDR);
3046        seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp);
3047        tmp = RREG32(0x01E4);
3048        seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp);
3049        return 0;
3050}
3051
3052static struct drm_info_list r100_debugfs_rbbm_list[] = {
3053        {"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL},
3054};
3055
3056static struct drm_info_list r100_debugfs_cp_list[] = {
3057        {"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL},
3058        {"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL},
3059};
3060
3061static struct drm_info_list r100_debugfs_mc_info_list[] = {
3062        {"r100_mc_info", r100_debugfs_mc_info, 0, NULL},
3063};
3064#endif
3065
3066int r100_debugfs_rbbm_init(struct radeon_device *rdev)
3067{
3068#if defined(CONFIG_DEBUG_FS)
3069        return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1);
3070#else
3071        return 0;
3072#endif
3073}
3074
3075int r100_debugfs_cp_init(struct radeon_device *rdev)
3076{
3077#if defined(CONFIG_DEBUG_FS)
3078        return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2);
3079#else
3080        return 0;
3081#endif
3082}
3083
3084int r100_debugfs_mc_info_init(struct radeon_device *rdev)
3085{
3086#if defined(CONFIG_DEBUG_FS)
3087        return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1);
3088#else
3089        return 0;
3090#endif
3091}
3092
3093int r100_set_surface_reg(struct radeon_device *rdev, int reg,
3094                         uint32_t tiling_flags, uint32_t pitch,
3095                         uint32_t offset, uint32_t obj_size)
3096{
3097        int surf_index = reg * 16;
3098        int flags = 0;
3099
3100        if (rdev->family <= CHIP_RS200) {
3101                if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3102                                 == (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3103                        flags |= RADEON_SURF_TILE_COLOR_BOTH;
3104                if (tiling_flags & RADEON_TILING_MACRO)
3105                        flags |= RADEON_SURF_TILE_COLOR_MACRO;
3106                /* setting pitch to 0 disables tiling */
3107                if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3108                                == 0)
3109                        pitch = 0;
3110        } else if (rdev->family <= CHIP_RV280) {
3111                if (tiling_flags & (RADEON_TILING_MACRO))
3112                        flags |= R200_SURF_TILE_COLOR_MACRO;
3113                if (tiling_flags & RADEON_TILING_MICRO)
3114                        flags |= R200_SURF_TILE_COLOR_MICRO;
3115        } else {
3116                if (tiling_flags & RADEON_TILING_MACRO)
3117                        flags |= R300_SURF_TILE_MACRO;
3118                if (tiling_flags & RADEON_TILING_MICRO)
3119                        flags |= R300_SURF_TILE_MICRO;
3120        }
3121
3122        if (tiling_flags & RADEON_TILING_SWAP_16BIT)
3123                flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP;
3124        if (tiling_flags & RADEON_TILING_SWAP_32BIT)
3125                flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP;
3126
3127        /* r100/r200 divide by 16 */
3128        if (rdev->family < CHIP_R300)
3129                flags |= pitch / 16;
3130        else
3131                flags |= pitch / 8;
3132
3133
3134        DRM_DEBUG_KMS("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1);
3135        WREG32(RADEON_SURFACE0_INFO + surf_index, flags);
3136        WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset);
3137        WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1);
3138        return 0;
3139}
3140
3141void r100_clear_surface_reg(struct radeon_device *rdev, int reg)
3142{
3143        int surf_index = reg * 16;
3144        WREG32(RADEON_SURFACE0_INFO + surf_index, 0);
3145}
3146
3147void r100_bandwidth_update(struct radeon_device *rdev)
3148{
3149        fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff;
3150        fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff;
3151        fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff;
3152        fixed20_12 crit_point_ff = {0};
3153        uint32_t temp, data, mem_trcd, mem_trp, mem_tras;
3154        fixed20_12 memtcas_ff[8] = {
3155                dfixed_init(1),
3156                dfixed_init(2),
3157                dfixed_init(3),
3158                dfixed_init(0),
3159                dfixed_init_half(1),
3160                dfixed_init_half(2),
3161                dfixed_init(0),
3162        };
3163        fixed20_12 memtcas_rs480_ff[8] = {
3164                dfixed_init(0),
3165                dfixed_init(1),
3166                dfixed_init(2),
3167                dfixed_init(3),
3168                dfixed_init(0),
3169                dfixed_init_half(1),
3170                dfixed_init_half(2),
3171                dfixed_init_half(3),
3172        };
3173        fixed20_12 memtcas2_ff[8] = {
3174                dfixed_init(0),
3175                dfixed_init(1),
3176                dfixed_init(2),
3177                dfixed_init(3),
3178                dfixed_init(4),
3179                dfixed_init(5),
3180                dfixed_init(6),
3181                dfixed_init(7),
3182        };
3183        fixed20_12 memtrbs[8] = {
3184                dfixed_init(1),
3185                dfixed_init_half(1),
3186                dfixed_init(2),
3187                dfixed_init_half(2),
3188                dfixed_init(3),
3189                dfixed_init_half(3),
3190                dfixed_init(4),
3191                dfixed_init_half(4)
3192        };
3193        fixed20_12 memtrbs_r4xx[8] = {
3194                dfixed_init(4),
3195                dfixed_init(5),
3196                dfixed_init(6),
3197                dfixed_init(7),
3198                dfixed_init(8),
3199                dfixed_init(9),
3200                dfixed_init(10),
3201                dfixed_init(11)
3202        };
3203        fixed20_12 min_mem_eff;
3204        fixed20_12 mc_latency_sclk, mc_latency_mclk, k1;
3205        fixed20_12 cur_latency_mclk, cur_latency_sclk;
3206        fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate = {0},
3207                disp_drain_rate2, read_return_rate;
3208        fixed20_12 time_disp1_drop_priority;
3209        int c;
3210        int cur_size = 16;       /* in octawords */
3211        int critical_point = 0, critical_point2;
3212/*      uint32_t read_return_rate, time_disp1_drop_priority; */
3213        int stop_req, max_stop_req;
3214        struct drm_display_mode *mode1 = NULL;
3215        struct drm_display_mode *mode2 = NULL;
3216        uint32_t pixel_bytes1 = 0;
3217        uint32_t pixel_bytes2 = 0;
3218
3219        /* Guess line buffer size to be 8192 pixels */
3220        u32 lb_size = 8192;
3221
3222        if (!rdev->mode_info.mode_config_initialized)
3223                return;
3224
3225        radeon_update_display_priority(rdev);
3226
3227        if (rdev->mode_info.crtcs[0]->base.enabled) {
3228                const struct drm_framebuffer *fb =
3229                        rdev->mode_info.crtcs[0]->base.primary->fb;
3230
3231                mode1 = &rdev->mode_info.crtcs[0]->base.mode;
3232                pixel_bytes1 = fb->format->cpp[0];
3233        }
3234        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3235                if (rdev->mode_info.crtcs[1]->base.enabled) {
3236                        const struct drm_framebuffer *fb =
3237                                rdev->mode_info.crtcs[1]->base.primary->fb;
3238
3239                        mode2 = &rdev->mode_info.crtcs[1]->base.mode;
3240                        pixel_bytes2 = fb->format->cpp[0];
3241                }
3242        }
3243
3244        min_mem_eff.full = dfixed_const_8(0);
3245        /* get modes */
3246        if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) {
3247                uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER);
3248                mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT);
3249                mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT);
3250                /* check crtc enables */
3251                if (mode2)
3252                        mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT);
3253                if (mode1)
3254                        mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT);
3255                WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer);
3256        }
3257
3258        /*
3259         * determine is there is enough bw for current mode
3260         */
3261        sclk_ff = rdev->pm.sclk;
3262        mclk_ff = rdev->pm.mclk;
3263
3264        temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1);
3265        temp_ff.full = dfixed_const(temp);
3266        mem_bw.full = dfixed_mul(mclk_ff, temp_ff);
3267
3268        pix_clk.full = 0;
3269        pix_clk2.full = 0;
3270        peak_disp_bw.full = 0;
3271        if (mode1) {
3272                temp_ff.full = dfixed_const(1000);
3273                pix_clk.full = dfixed_const(mode1->clock); /* convert to fixed point */
3274                pix_clk.full = dfixed_div(pix_clk, temp_ff);
3275                temp_ff.full = dfixed_const(pixel_bytes1);
3276                peak_disp_bw.full += dfixed_mul(pix_clk, temp_ff);
3277        }
3278        if (mode2) {
3279                temp_ff.full = dfixed_const(1000);
3280                pix_clk2.full = dfixed_const(mode2->clock); /* convert to fixed point */
3281                pix_clk2.full = dfixed_div(pix_clk2, temp_ff);
3282                temp_ff.full = dfixed_const(pixel_bytes2);
3283                peak_disp_bw.full += dfixed_mul(pix_clk2, temp_ff);
3284        }
3285
3286        mem_bw.full = dfixed_mul(mem_bw, min_mem_eff);
3287        if (peak_disp_bw.full >= mem_bw.full) {
3288                DRM_ERROR("You may not have enough display bandwidth for current mode\n"
3289                          "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
3290        }
3291
3292        /*  Get values from the EXT_MEM_CNTL register...converting its contents. */
3293        temp = RREG32(RADEON_MEM_TIMING_CNTL);
3294        if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */
3295                mem_trcd = ((temp >> 2) & 0x3) + 1;
3296                mem_trp  = ((temp & 0x3)) + 1;
3297                mem_tras = ((temp & 0x70) >> 4) + 1;
3298        } else if (rdev->family == CHIP_R300 ||
3299                   rdev->family == CHIP_R350) { /* r300, r350 */
3300                mem_trcd = (temp & 0x7) + 1;
3301                mem_trp = ((temp >> 8) & 0x7) + 1;
3302                mem_tras = ((temp >> 11) & 0xf) + 4;
3303        } else if (rdev->family == CHIP_RV350 ||
3304                   rdev->family == CHIP_RV380) {
3305                /* rv3x0 */
3306                mem_trcd = (temp & 0x7) + 3;
3307                mem_trp = ((temp >> 8) & 0x7) + 3;
3308                mem_tras = ((temp >> 11) & 0xf) + 6;
3309        } else if (rdev->family == CHIP_R420 ||
3310                   rdev->family == CHIP_R423 ||
3311                   rdev->family == CHIP_RV410) {
3312                /* r4xx */
3313                mem_trcd = (temp & 0xf) + 3;
3314                if (mem_trcd > 15)
3315                        mem_trcd = 15;
3316                mem_trp = ((temp >> 8) & 0xf) + 3;
3317                if (mem_trp > 15)
3318                        mem_trp = 15;
3319                mem_tras = ((temp >> 12) & 0x1f) + 6;
3320                if (mem_tras > 31)
3321                        mem_tras = 31;
3322        } else { /* RV200, R200 */
3323                mem_trcd = (temp & 0x7) + 1;
3324                mem_trp = ((temp >> 8) & 0x7) + 1;
3325                mem_tras = ((temp >> 12) & 0xf) + 4;
3326        }
3327        /* convert to FF */
3328        trcd_ff.full = dfixed_const(mem_trcd);
3329        trp_ff.full = dfixed_const(mem_trp);
3330        tras_ff.full = dfixed_const(mem_tras);
3331
3332        /* Get values from the MEM_SDRAM_MODE_REG register...converting its */
3333        temp = RREG32(RADEON_MEM_SDRAM_MODE_REG);
3334        data = (temp & (7 << 20)) >> 20;
3335        if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) {
3336                if (rdev->family == CHIP_RS480) /* don't think rs400 */
3337                        tcas_ff = memtcas_rs480_ff[data];
3338                else
3339                        tcas_ff = memtcas_ff[data];
3340        } else
3341                tcas_ff = memtcas2_ff[data];
3342
3343        if (rdev->family == CHIP_RS400 ||
3344            rdev->family == CHIP_RS480) {
3345                /* extra cas latency stored in bits 23-25 0-4 clocks */
3346                data = (temp >> 23) & 0x7;
3347                if (data < 5)
3348                        tcas_ff.full += dfixed_const(data);
3349        }
3350
3351        if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) {
3352                /* on the R300, Tcas is included in Trbs.
3353                 */
3354                temp = RREG32(RADEON_MEM_CNTL);
3355                data = (R300_MEM_NUM_CHANNELS_MASK & temp);
3356                if (data == 1) {
3357                        if (R300_MEM_USE_CD_CH_ONLY & temp) {
3358                                temp = RREG32(R300_MC_IND_INDEX);
3359                                temp &= ~R300_MC_IND_ADDR_MASK;
3360                                temp |= R300_MC_READ_CNTL_CD_mcind;
3361                                WREG32(R300_MC_IND_INDEX, temp);
3362                                temp = RREG32(R300_MC_IND_DATA);
3363                                data = (R300_MEM_RBS_POSITION_C_MASK & temp);
3364                        } else {
3365                                temp = RREG32(R300_MC_READ_CNTL_AB);
3366                                data = (R300_MEM_RBS_POSITION_A_MASK & temp);
3367                        }
3368                } else {
3369                        temp = RREG32(R300_MC_READ_CNTL_AB);
3370                        data = (R300_MEM_RBS_POSITION_A_MASK & temp);
3371                }
3372                if (rdev->family == CHIP_RV410 ||
3373                    rdev->family == CHIP_R420 ||
3374                    rdev->family == CHIP_R423)
3375                        trbs_ff = memtrbs_r4xx[data];
3376                else
3377                        trbs_ff = memtrbs[data];
3378                tcas_ff.full += trbs_ff.full;
3379        }
3380
3381        sclk_eff_ff.full = sclk_ff.full;
3382
3383        if (rdev->flags & RADEON_IS_AGP) {
3384                fixed20_12 agpmode_ff;
3385                agpmode_ff.full = dfixed_const(radeon_agpmode);
3386                temp_ff.full = dfixed_const_666(16);
3387                sclk_eff_ff.full -= dfixed_mul(agpmode_ff, temp_ff);
3388        }
3389        /* TODO PCIE lanes may affect this - agpmode == 16?? */
3390
3391        if (ASIC_IS_R300(rdev)) {
3392                sclk_delay_ff.full = dfixed_const(250);
3393        } else {
3394                if ((rdev->family == CHIP_RV100) ||
3395                    rdev->flags & RADEON_IS_IGP) {
3396                        if (rdev->mc.vram_is_ddr)
3397                                sclk_delay_ff.full = dfixed_const(41);
3398                        else
3399                                sclk_delay_ff.full = dfixed_const(33);
3400                } else {
3401                        if (rdev->mc.vram_width == 128)
3402                                sclk_delay_ff.full = dfixed_const(57);
3403                        else
3404                                sclk_delay_ff.full = dfixed_const(41);
3405                }
3406        }
3407
3408        mc_latency_sclk.full = dfixed_div(sclk_delay_ff, sclk_eff_ff);
3409
3410        if (rdev->mc.vram_is_ddr) {
3411                if (rdev->mc.vram_width == 32) {
3412                        k1.full = dfixed_const(40);
3413                        c  = 3;
3414                } else {
3415                        k1.full = dfixed_const(20);
3416                        c  = 1;
3417                }
3418        } else {
3419                k1.full = dfixed_const(40);
3420                c  = 3;
3421        }
3422
3423        temp_ff.full = dfixed_const(2);
3424        mc_latency_mclk.full = dfixed_mul(trcd_ff, temp_ff);
3425        temp_ff.full = dfixed_const(c);
3426        mc_latency_mclk.full += dfixed_mul(tcas_ff, temp_ff);
3427        temp_ff.full = dfixed_const(4);
3428        mc_latency_mclk.full += dfixed_mul(tras_ff, temp_ff);
3429        mc_latency_mclk.full += dfixed_mul(trp_ff, temp_ff);
3430        mc_latency_mclk.full += k1.full;
3431
3432        mc_latency_mclk.full = dfixed_div(mc_latency_mclk, mclk_ff);
3433        mc_latency_mclk.full += dfixed_div(temp_ff, sclk_eff_ff);
3434
3435        /*
3436          HW cursor time assuming worst case of full size colour cursor.
3437        */
3438        temp_ff.full = dfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1))));
3439        temp_ff.full += trcd_ff.full;
3440        if (temp_ff.full < tras_ff.full)
3441                temp_ff.full = tras_ff.full;
3442        cur_latency_mclk.full = dfixed_div(temp_ff, mclk_ff);
3443
3444        temp_ff.full = dfixed_const(cur_size);
3445        cur_latency_sclk.full = dfixed_div(temp_ff, sclk_eff_ff);
3446        /*
3447          Find the total latency for the display data.
3448        */
3449        disp_latency_overhead.full = dfixed_const(8);
3450        disp_latency_overhead.full = dfixed_div(disp_latency_overhead, sclk_ff);
3451        mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full;
3452        mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full;
3453
3454        if (mc_latency_mclk.full > mc_latency_sclk.full)
3455                disp_latency.full = mc_latency_mclk.full;
3456        else
3457                disp_latency.full = mc_latency_sclk.full;
3458
3459        /* setup Max GRPH_STOP_REQ default value */
3460        if (ASIC_IS_RV100(rdev))
3461                max_stop_req = 0x5c;
3462        else
3463                max_stop_req = 0x7c;
3464
3465        if (mode1) {
3466                /*  CRTC1
3467                    Set GRPH_BUFFER_CNTL register using h/w defined optimal values.
3468                    GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ]
3469                */
3470                stop_req = mode1->hdisplay * pixel_bytes1 / 16;
3471
3472                if (stop_req > max_stop_req)
3473                        stop_req = max_stop_req;
3474
3475                /*
3476                  Find the drain rate of the display buffer.
3477                */
3478                temp_ff.full = dfixed_const((16/pixel_bytes1));
3479                disp_drain_rate.full = dfixed_div(pix_clk, temp_ff);
3480
3481                /*
3482                  Find the critical point of the display buffer.
3483                */
3484                crit_point_ff.full = dfixed_mul(disp_drain_rate, disp_latency);
3485                crit_point_ff.full += dfixed_const_half(0);
3486
3487                critical_point = dfixed_trunc(crit_point_ff);
3488
3489                if (rdev->disp_priority == 2) {
3490                        critical_point = 0;
3491                }
3492
3493                /*
3494                  The critical point should never be above max_stop_req-4.  Setting
3495                  GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time.
3496                */
3497                if (max_stop_req - critical_point < 4)
3498                        critical_point = 0;
3499
3500                if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) {
3501                        /* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/
3502                        critical_point = 0x10;
3503                }
3504
3505                temp = RREG32(RADEON_GRPH_BUFFER_CNTL);
3506                temp &= ~(RADEON_GRPH_STOP_REQ_MASK);
3507                temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3508                temp &= ~(RADEON_GRPH_START_REQ_MASK);
3509                if ((rdev->family == CHIP_R350) &&
3510                    (stop_req > 0x15)) {
3511                        stop_req -= 0x10;
3512                }
3513                temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3514                temp |= RADEON_GRPH_BUFFER_SIZE;
3515                temp &= ~(RADEON_GRPH_CRITICAL_CNTL   |
3516                          RADEON_GRPH_CRITICAL_AT_SOF |
3517                          RADEON_GRPH_STOP_CNTL);
3518                /*
3519                  Write the result into the register.
3520                */
3521                WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
3522                                                       (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3523
3524#if 0
3525                if ((rdev->family == CHIP_RS400) ||
3526                    (rdev->family == CHIP_RS480)) {
3527                        /* attempt to program RS400 disp regs correctly ??? */
3528                        temp = RREG32(RS400_DISP1_REG_CNTL);
3529                        temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK |
3530                                  RS400_DISP1_STOP_REQ_LEVEL_MASK);
3531                        WREG32(RS400_DISP1_REQ_CNTL1, (temp |
3532                                                       (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3533                                                       (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3534                        temp = RREG32(RS400_DMIF_MEM_CNTL1);
3535                        temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK |
3536                                  RS400_DISP1_CRITICAL_POINT_STOP_MASK);
3537                        WREG32(RS400_DMIF_MEM_CNTL1, (temp |
3538                                                      (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) |
3539                                                      (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT)));
3540                }
3541#endif
3542
3543                DRM_DEBUG_KMS("GRPH_BUFFER_CNTL from to %x\n",
3544                          /*      (unsigned int)info->SavedReg->grph_buffer_cntl, */
3545                          (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL));
3546        }
3547
3548        if (mode2) {
3549                u32 grph2_cntl;
3550                stop_req = mode2->hdisplay * pixel_bytes2 / 16;
3551
3552                if (stop_req > max_stop_req)
3553                        stop_req = max_stop_req;
3554
3555                /*
3556                  Find the drain rate of the display buffer.
3557                */
3558                temp_ff.full = dfixed_const((16/pixel_bytes2));
3559                disp_drain_rate2.full = dfixed_div(pix_clk2, temp_ff);
3560
3561                grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL);
3562                grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK);
3563                grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3564                grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK);
3565                if ((rdev->family == CHIP_R350) &&
3566                    (stop_req > 0x15)) {
3567                        stop_req -= 0x10;
3568                }
3569                grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3570                grph2_cntl |= RADEON_GRPH_BUFFER_SIZE;
3571                grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL   |
3572                          RADEON_GRPH_CRITICAL_AT_SOF |
3573                          RADEON_GRPH_STOP_CNTL);
3574
3575                if ((rdev->family == CHIP_RS100) ||
3576                    (rdev->family == CHIP_RS200))
3577                        critical_point2 = 0;
3578                else {
3579                        temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128;
3580                        temp_ff.full = dfixed_const(temp);
3581                        temp_ff.full = dfixed_mul(mclk_ff, temp_ff);
3582                        if (sclk_ff.full < temp_ff.full)
3583                                temp_ff.full = sclk_ff.full;
3584
3585                        read_return_rate.full = temp_ff.full;
3586
3587                        if (mode1) {
3588                                temp_ff.full = read_return_rate.full - disp_drain_rate.full;
3589                                time_disp1_drop_priority.full = dfixed_div(crit_point_ff, temp_ff);
3590                        } else {
3591                                time_disp1_drop_priority.full = 0;
3592                        }
3593                        crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full;
3594                        crit_point_ff.full = dfixed_mul(crit_point_ff, disp_drain_rate2);
3595                        crit_point_ff.full += dfixed_const_half(0);
3596
3597                        critical_point2 = dfixed_trunc(crit_point_ff);
3598
3599                        if (rdev->disp_priority == 2) {
3600                                critical_point2 = 0;
3601                        }
3602
3603                        if (max_stop_req - critical_point2 < 4)
3604                                critical_point2 = 0;
3605
3606                }
3607
3608                if (critical_point2 == 0 && rdev->family == CHIP_R300) {
3609                        /* some R300 cards have problem with this set to 0 */
3610                        critical_point2 = 0x10;
3611                }
3612
3613                WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
3614                                                  (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3615
3616                if ((rdev->family == CHIP_RS400) ||
3617                    (rdev->family == CHIP_RS480)) {
3618#if 0
3619                        /* attempt to program RS400 disp2 regs correctly ??? */
3620                        temp = RREG32(RS400_DISP2_REQ_CNTL1);
3621                        temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK |
3622                                  RS400_DISP2_STOP_REQ_LEVEL_MASK);
3623                        WREG32(RS400_DISP2_REQ_CNTL1, (temp |
3624                                                       (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3625                                                       (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3626                        temp = RREG32(RS400_DISP2_REQ_CNTL2);
3627                        temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK |
3628                                  RS400_DISP2_CRITICAL_POINT_STOP_MASK);
3629                        WREG32(RS400_DISP2_REQ_CNTL2, (temp |
3630                                                       (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) |
3631                                                       (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT)));
3632#endif
3633                        WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC);
3634                        WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000);
3635                        WREG32(RS400_DMIF_MEM_CNTL1,  0x29CA71DC);
3636                        WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC);
3637                }
3638
3639                DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n",
3640                          (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL));
3641        }
3642
3643        /* Save number of lines the linebuffer leads before the scanout */
3644        if (mode1)
3645            rdev->mode_info.crtcs[0]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode1->crtc_hdisplay);
3646
3647        if (mode2)
3648            rdev->mode_info.crtcs[1]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode2->crtc_hdisplay);
3649}
3650
3651int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3652{
3653        uint32_t scratch;
3654        uint32_t tmp = 0;
3655        unsigned i;
3656        int r;
3657
3658        r = radeon_scratch_get(rdev, &scratch);
3659        if (r) {
3660                DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3661                return r;
3662        }
3663        WREG32(scratch, 0xCAFEDEAD);
3664        r = radeon_ring_lock(rdev, ring, 2);
3665        if (r) {
3666                DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3667                radeon_scratch_free(rdev, scratch);
3668                return r;
3669        }
3670        radeon_ring_write(ring, PACKET0(scratch, 0));
3671        radeon_ring_write(ring, 0xDEADBEEF);
3672        radeon_ring_unlock_commit(rdev, ring, false);
3673        for (i = 0; i < rdev->usec_timeout; i++) {
3674                tmp = RREG32(scratch);
3675                if (tmp == 0xDEADBEEF) {
3676                        break;
3677                }
3678                udelay(1);
3679        }
3680        if (i < rdev->usec_timeout) {
3681                DRM_INFO("ring test succeeded in %d usecs\n", i);
3682        } else {
3683                DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n",
3684                          scratch, tmp);
3685                r = -EINVAL;
3686        }
3687        radeon_scratch_free(rdev, scratch);
3688        return r;
3689}
3690
3691void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3692{
3693        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3694
3695        if (ring->rptr_save_reg) {
3696                u32 next_rptr = ring->wptr + 2 + 3;
3697                radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
3698                radeon_ring_write(ring, next_rptr);
3699        }
3700
3701        radeon_ring_write(ring, PACKET0(RADEON_CP_IB_BASE, 1));
3702        radeon_ring_write(ring, ib->gpu_addr);
3703        radeon_ring_write(ring, ib->length_dw);
3704}
3705
3706int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3707{
3708        struct radeon_ib ib;
3709        uint32_t scratch;
3710        uint32_t tmp = 0;
3711        unsigned i;
3712        int r;
3713
3714        r = radeon_scratch_get(rdev, &scratch);
3715        if (r) {
3716                DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3717                return r;
3718        }
3719        WREG32(scratch, 0xCAFEDEAD);
3720        r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &ib, NULL, 256);
3721        if (r) {
3722                DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3723                goto free_scratch;
3724        }
3725        ib.ptr[0] = PACKET0(scratch, 0);
3726        ib.ptr[1] = 0xDEADBEEF;
3727        ib.ptr[2] = PACKET2(0);
3728        ib.ptr[3] = PACKET2(0);
3729        ib.ptr[4] = PACKET2(0);
3730        ib.ptr[5] = PACKET2(0);
3731        ib.ptr[6] = PACKET2(0);
3732        ib.ptr[7] = PACKET2(0);
3733        ib.length_dw = 8;
3734        r = radeon_ib_schedule(rdev, &ib, NULL, false);
3735        if (r) {
3736                DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3737                goto free_ib;
3738        }
3739        r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3740                RADEON_USEC_IB_TEST_TIMEOUT));
3741        if (r < 0) {
3742                DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3743                goto free_ib;
3744        } else if (r == 0) {
3745                DRM_ERROR("radeon: fence wait timed out.\n");
3746                r = -ETIMEDOUT;
3747                goto free_ib;
3748        }
3749        r = 0;
3750        for (i = 0; i < rdev->usec_timeout; i++) {
3751                tmp = RREG32(scratch);
3752                if (tmp == 0xDEADBEEF) {
3753                        break;
3754                }
3755                udelay(1);
3756        }
3757        if (i < rdev->usec_timeout) {
3758                DRM_INFO("ib test succeeded in %u usecs\n", i);
3759        } else {
3760                DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3761                          scratch, tmp);
3762                r = -EINVAL;
3763        }
3764free_ib:
3765        radeon_ib_free(rdev, &ib);
3766free_scratch:
3767        radeon_scratch_free(rdev, scratch);
3768        return r;
3769}
3770
3771void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save)
3772{
3773        /* Shutdown CP we shouldn't need to do that but better be safe than
3774         * sorry
3775         */
3776        rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3777        WREG32(R_000740_CP_CSQ_CNTL, 0);
3778
3779        /* Save few CRTC registers */
3780        save->GENMO_WT = RREG8(R_0003C2_GENMO_WT);
3781        save->CRTC_EXT_CNTL = RREG32(R_000054_CRTC_EXT_CNTL);
3782        save->CRTC_GEN_CNTL = RREG32(R_000050_CRTC_GEN_CNTL);
3783        save->CUR_OFFSET = RREG32(R_000260_CUR_OFFSET);
3784        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3785                save->CRTC2_GEN_CNTL = RREG32(R_0003F8_CRTC2_GEN_CNTL);
3786                save->CUR2_OFFSET = RREG32(R_000360_CUR2_OFFSET);
3787        }
3788
3789        /* Disable VGA aperture access */
3790        WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & save->GENMO_WT);
3791        /* Disable cursor, overlay, crtc */
3792        WREG32(R_000260_CUR_OFFSET, save->CUR_OFFSET | S_000260_CUR_LOCK(1));
3793        WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL |
3794                                        S_000054_CRTC_DISPLAY_DIS(1));
3795        WREG32(R_000050_CRTC_GEN_CNTL,
3796                        (C_000050_CRTC_CUR_EN & save->CRTC_GEN_CNTL) |
3797                        S_000050_CRTC_DISP_REQ_EN_B(1));
3798        WREG32(R_000420_OV0_SCALE_CNTL,
3799                C_000420_OV0_OVERLAY_EN & RREG32(R_000420_OV0_SCALE_CNTL));
3800        WREG32(R_000260_CUR_OFFSET, C_000260_CUR_LOCK & save->CUR_OFFSET);
3801        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3802                WREG32(R_000360_CUR2_OFFSET, save->CUR2_OFFSET |
3803                                                S_000360_CUR2_LOCK(1));
3804                WREG32(R_0003F8_CRTC2_GEN_CNTL,
3805                        (C_0003F8_CRTC2_CUR_EN & save->CRTC2_GEN_CNTL) |
3806                        S_0003F8_CRTC2_DISPLAY_DIS(1) |
3807                        S_0003F8_CRTC2_DISP_REQ_EN_B(1));
3808                WREG32(R_000360_CUR2_OFFSET,
3809                        C_000360_CUR2_LOCK & save->CUR2_OFFSET);
3810        }
3811}
3812
3813void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save)
3814{
3815        /* Update base address for crtc */
3816        WREG32(R_00023C_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
3817        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3818                WREG32(R_00033C_CRTC2_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
3819        }
3820        /* Restore CRTC registers */
3821        WREG8(R_0003C2_GENMO_WT, save->GENMO_WT);
3822        WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL);
3823        WREG32(R_000050_CRTC_GEN_CNTL, save->CRTC_GEN_CNTL);
3824        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3825                WREG32(R_0003F8_CRTC2_GEN_CNTL, save->CRTC2_GEN_CNTL);
3826        }
3827}
3828
3829void r100_vga_render_disable(struct radeon_device *rdev)
3830{
3831        u32 tmp;
3832
3833        tmp = RREG8(R_0003C2_GENMO_WT);
3834        WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & tmp);
3835}
3836
3837static void r100_debugfs(struct radeon_device *rdev)
3838{
3839        int r;
3840
3841        r = r100_debugfs_mc_info_init(rdev);
3842        if (r)
3843                dev_warn(rdev->dev, "Failed to create r100_mc debugfs file.\n");
3844}
3845
3846static void r100_mc_program(struct radeon_device *rdev)
3847{
3848        struct r100_mc_save save;
3849
3850        /* Stops all mc clients */
3851        r100_mc_stop(rdev, &save);
3852        if (rdev->flags & RADEON_IS_AGP) {
3853                WREG32(R_00014C_MC_AGP_LOCATION,
3854                        S_00014C_MC_AGP_START(rdev->mc.gtt_start >> 16) |
3855                        S_00014C_MC_AGP_TOP(rdev->mc.gtt_end >> 16));
3856                WREG32(R_000170_AGP_BASE, lower_32_bits(rdev->mc.agp_base));
3857                if (rdev->family > CHIP_RV200)
3858                        WREG32(R_00015C_AGP_BASE_2,
3859                                upper_32_bits(rdev->mc.agp_base) & 0xff);
3860        } else {
3861                WREG32(R_00014C_MC_AGP_LOCATION, 0x0FFFFFFF);
3862                WREG32(R_000170_AGP_BASE, 0);
3863                if (rdev->family > CHIP_RV200)
3864                        WREG32(R_00015C_AGP_BASE_2, 0);
3865        }
3866        /* Wait for mc idle */
3867        if (r100_mc_wait_for_idle(rdev))
3868                dev_warn(rdev->dev, "Wait for MC idle timeout.\n");
3869        /* Program MC, should be a 32bits limited address space */
3870        WREG32(R_000148_MC_FB_LOCATION,
3871                S_000148_MC_FB_START(rdev->mc.vram_start >> 16) |
3872                S_000148_MC_FB_TOP(rdev->mc.vram_end >> 16));
3873        r100_mc_resume(rdev, &save);
3874}
3875
3876static void r100_clock_startup(struct radeon_device *rdev)
3877{
3878        u32 tmp;
3879
3880        if (radeon_dynclks != -1 && radeon_dynclks)
3881                radeon_legacy_set_clock_gating(rdev, 1);
3882        /* We need to force on some of the block */
3883        tmp = RREG32_PLL(R_00000D_SCLK_CNTL);
3884        tmp |= S_00000D_FORCE_CP(1) | S_00000D_FORCE_VIP(1);
3885        if ((rdev->family == CHIP_RV250) || (rdev->family == CHIP_RV280))
3886                tmp |= S_00000D_FORCE_DISP1(1) | S_00000D_FORCE_DISP2(1);
3887        WREG32_PLL(R_00000D_SCLK_CNTL, tmp);
3888}
3889
3890static int r100_startup(struct radeon_device *rdev)
3891{
3892        int r;
3893
3894        /* set common regs */
3895        r100_set_common_regs(rdev);
3896        /* program mc */
3897        r100_mc_program(rdev);
3898        /* Resume clock */
3899        r100_clock_startup(rdev);
3900        /* Initialize GART (initialize after TTM so we can allocate
3901         * memory through TTM but finalize after TTM) */
3902        r100_enable_bm(rdev);
3903        if (rdev->flags & RADEON_IS_PCI) {
3904                r = r100_pci_gart_enable(rdev);
3905                if (r)
3906                        return r;
3907        }
3908
3909        /* allocate wb buffer */
3910        r = radeon_wb_init(rdev);
3911        if (r)
3912                return r;
3913
3914        r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
3915        if (r) {
3916                dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
3917                return r;
3918        }
3919
3920        /* Enable IRQ */
3921        if (!rdev->irq.installed) {
3922                r = radeon_irq_kms_init(rdev);
3923                if (r)
3924                        return r;
3925        }
3926
3927        r100_irq_set(rdev);
3928        rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
3929        /* 1M ring buffer */
3930        r = r100_cp_init(rdev, 1024 * 1024);
3931        if (r) {
3932                dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
3933                return r;
3934        }
3935
3936        r = radeon_ib_pool_init(rdev);
3937        if (r) {
3938                dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
3939                return r;
3940        }
3941
3942        return 0;
3943}
3944
3945int r100_resume(struct radeon_device *rdev)
3946{
3947        int r;
3948
3949        /* Make sur GART are not working */
3950        if (rdev->flags & RADEON_IS_PCI)
3951                r100_pci_gart_disable(rdev);
3952        /* Resume clock before doing reset */
3953        r100_clock_startup(rdev);
3954        /* Reset gpu before posting otherwise ATOM will enter infinite loop */
3955        if (radeon_asic_reset(rdev)) {
3956                dev_warn(rdev->dev, "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
3957                        RREG32(R_000E40_RBBM_STATUS),
3958                        RREG32(R_0007C0_CP_STAT));
3959        }
3960        /* post */
3961        radeon_combios_asic_init(rdev->ddev);
3962        /* Resume clock after posting */
3963        r100_clock_startup(rdev);
3964        /* Initialize surface registers */
3965        radeon_surface_init(rdev);
3966
3967        rdev->accel_working = true;
3968        r = r100_startup(rdev);
3969        if (r) {
3970                rdev->accel_working = false;
3971        }
3972        return r;
3973}
3974
3975int r100_suspend(struct radeon_device *rdev)
3976{
3977        radeon_pm_suspend(rdev);
3978        r100_cp_disable(rdev);
3979        radeon_wb_disable(rdev);
3980        r100_irq_disable(rdev);
3981        if (rdev->flags & RADEON_IS_PCI)
3982                r100_pci_gart_disable(rdev);
3983        return 0;
3984}
3985
3986void r100_fini(struct radeon_device *rdev)
3987{
3988        radeon_pm_fini(rdev);
3989        r100_cp_fini(rdev);
3990        radeon_wb_fini(rdev);
3991        radeon_ib_pool_fini(rdev);
3992        radeon_gem_fini(rdev);
3993        if (rdev->flags & RADEON_IS_PCI)
3994                r100_pci_gart_fini(rdev);
3995        radeon_agp_fini(rdev);
3996        radeon_irq_kms_fini(rdev);
3997        radeon_fence_driver_fini(rdev);
3998        radeon_bo_fini(rdev);
3999        radeon_atombios_fini(rdev);
4000        kfree(rdev->bios);
4001        rdev->bios = NULL;
4002}
4003
4004/*
4005 * Due to how kexec works, it can leave the hw fully initialised when it
4006 * boots the new kernel. However doing our init sequence with the CP and
4007 * WB stuff setup causes GPU hangs on the RN50 at least. So at startup
4008 * do some quick sanity checks and restore sane values to avoid this
4009 * problem.
4010 */
4011void r100_restore_sanity(struct radeon_device *rdev)
4012{
4013        u32 tmp;
4014
4015        tmp = RREG32(RADEON_CP_CSQ_CNTL);
4016        if (tmp) {
4017                WREG32(RADEON_CP_CSQ_CNTL, 0);
4018        }
4019        tmp = RREG32(RADEON_CP_RB_CNTL);
4020        if (tmp) {
4021                WREG32(RADEON_CP_RB_CNTL, 0);
4022        }
4023        tmp = RREG32(RADEON_SCRATCH_UMSK);
4024        if (tmp) {
4025                WREG32(RADEON_SCRATCH_UMSK, 0);
4026        }
4027}
4028
4029int r100_init(struct radeon_device *rdev)
4030{
4031        int r;
4032
4033        /* Register debugfs file specific to this group of asics */
4034        r100_debugfs(rdev);
4035        /* Disable VGA */
4036        r100_vga_render_disable(rdev);
4037        /* Initialize scratch registers */
4038        radeon_scratch_init(rdev);
4039        /* Initialize surface registers */
4040        radeon_surface_init(rdev);
4041        /* sanity check some register to avoid hangs like after kexec */
4042        r100_restore_sanity(rdev);
4043        /* TODO: disable VGA need to use VGA request */
4044        /* BIOS*/
4045        if (!radeon_get_bios(rdev)) {
4046                if (ASIC_IS_AVIVO(rdev))
4047                        return -EINVAL;
4048        }
4049        if (rdev->is_atom_bios) {
4050                dev_err(rdev->dev, "Expecting combios for RS400/RS480 GPU\n");
4051                return -EINVAL;
4052        } else {
4053                r = radeon_combios_init(rdev);
4054                if (r)
4055                        return r;
4056        }
4057        /* Reset gpu before posting otherwise ATOM will enter infinite loop */
4058        if (radeon_asic_reset(rdev)) {
4059                dev_warn(rdev->dev,
4060                        "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
4061                        RREG32(R_000E40_RBBM_STATUS),
4062                        RREG32(R_0007C0_CP_STAT));
4063        }
4064        /* check if cards are posted or not */
4065        if (radeon_boot_test_post_card(rdev) == false)
4066                return -EINVAL;
4067        /* Set asic errata */
4068        r100_errata(rdev);
4069        /* Initialize clocks */
4070        radeon_get_clock_info(rdev->ddev);
4071        /* initialize AGP */
4072        if (rdev->flags & RADEON_IS_AGP) {
4073                r = radeon_agp_init(rdev);
4074                if (r) {
4075                        radeon_agp_disable(rdev);
4076                }
4077        }
4078        /* initialize VRAM */
4079        r100_mc_init(rdev);
4080        /* Fence driver */
4081        r = radeon_fence_driver_init(rdev);
4082        if (r)
4083                return r;
4084        /* Memory manager */
4085        r = radeon_bo_init(rdev);
4086        if (r)
4087                return r;
4088        if (rdev->flags & RADEON_IS_PCI) {
4089                r = r100_pci_gart_init(rdev);
4090                if (r)
4091                        return r;
4092        }
4093        r100_set_safe_registers(rdev);
4094
4095        /* Initialize power management */
4096        radeon_pm_init(rdev);
4097
4098        rdev->accel_working = true;
4099        r = r100_startup(rdev);
4100        if (r) {
4101                /* Somethings want wront with the accel init stop accel */
4102                dev_err(rdev->dev, "Disabling GPU acceleration\n");
4103                r100_cp_fini(rdev);
4104                radeon_wb_fini(rdev);
4105                radeon_ib_pool_fini(rdev);
4106                radeon_irq_kms_fini(rdev);
4107                if (rdev->flags & RADEON_IS_PCI)
4108                        r100_pci_gart_fini(rdev);
4109                rdev->accel_working = false;
4110        }
4111        return 0;
4112}
4113
4114uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg)
4115{
4116        unsigned long flags;
4117        uint32_t ret;
4118
4119        spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
4120        writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4121        ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4122        spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
4123        return ret;
4124}
4125
4126void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg, uint32_t v)
4127{
4128        unsigned long flags;
4129
4130        spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
4131        writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4132        writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4133        spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
4134}
4135
4136u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
4137{
4138        if (reg < rdev->rio_mem_size)
4139                return ioread32(rdev->rio_mem + reg);
4140        else {
4141                iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
4142                return ioread32(rdev->rio_mem + RADEON_MM_DATA);
4143        }
4144}
4145
4146void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
4147{
4148        if (reg < rdev->rio_mem_size)
4149                iowrite32(v, rdev->rio_mem + reg);
4150        else {
4151                iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
4152                iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
4153        }
4154}
4155