linux/drivers/gpu/drm/radeon/r100.c
<<
>>
Prefs
   1/*
   2 * Copyright 2008 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 * Copyright 2009 Jerome Glisse.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22 * OTHER DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors: Dave Airlie
  25 *          Alex Deucher
  26 *          Jerome Glisse
  27 */
  28#include <linux/seq_file.h>
  29#include <linux/slab.h>
  30#include <drm/drmP.h>
  31#include <drm/radeon_drm.h>
  32#include "radeon_reg.h"
  33#include "radeon.h"
  34#include "radeon_asic.h"
  35#include "r100d.h"
  36#include "rs100d.h"
  37#include "rv200d.h"
  38#include "rv250d.h"
  39#include "atom.h"
  40
  41#include <linux/firmware.h>
  42#include <linux/module.h>
  43
  44#include "r100_reg_safe.h"
  45#include "rn50_reg_safe.h"
  46
  47/* Firmware Names */
  48#define FIRMWARE_R100           "radeon/R100_cp.bin"
  49#define FIRMWARE_R200           "radeon/R200_cp.bin"
  50#define FIRMWARE_R300           "radeon/R300_cp.bin"
  51#define FIRMWARE_R420           "radeon/R420_cp.bin"
  52#define FIRMWARE_RS690          "radeon/RS690_cp.bin"
  53#define FIRMWARE_RS600          "radeon/RS600_cp.bin"
  54#define FIRMWARE_R520           "radeon/R520_cp.bin"
  55
  56MODULE_FIRMWARE(FIRMWARE_R100);
  57MODULE_FIRMWARE(FIRMWARE_R200);
  58MODULE_FIRMWARE(FIRMWARE_R300);
  59MODULE_FIRMWARE(FIRMWARE_R420);
  60MODULE_FIRMWARE(FIRMWARE_RS690);
  61MODULE_FIRMWARE(FIRMWARE_RS600);
  62MODULE_FIRMWARE(FIRMWARE_R520);
  63
  64#include "r100_track.h"
  65
  66/* This files gather functions specifics to:
  67 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
  68 * and others in some cases.
  69 */
  70
  71static bool r100_is_in_vblank(struct radeon_device *rdev, int crtc)
  72{
  73        if (crtc == 0) {
  74                if (RREG32(RADEON_CRTC_STATUS) & RADEON_CRTC_VBLANK_CUR)
  75                        return true;
  76                else
  77                        return false;
  78        } else {
  79                if (RREG32(RADEON_CRTC2_STATUS) & RADEON_CRTC2_VBLANK_CUR)
  80                        return true;
  81                else
  82                        return false;
  83        }
  84}
  85
  86static bool r100_is_counter_moving(struct radeon_device *rdev, int crtc)
  87{
  88        u32 vline1, vline2;
  89
  90        if (crtc == 0) {
  91                vline1 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
  92                vline2 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
  93        } else {
  94                vline1 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
  95                vline2 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
  96        }
  97        if (vline1 != vline2)
  98                return true;
  99        else
 100                return false;
 101}
 102
 103/**
 104 * r100_wait_for_vblank - vblank wait asic callback.
 105 *
 106 * @rdev: radeon_device pointer
 107 * @crtc: crtc to wait for vblank on
 108 *
 109 * Wait for vblank on the requested crtc (r1xx-r4xx).
 110 */
 111void r100_wait_for_vblank(struct radeon_device *rdev, int crtc)
 112{
 113        unsigned i = 0;
 114
 115        if (crtc >= rdev->num_crtc)
 116                return;
 117
 118        if (crtc == 0) {
 119                if (!(RREG32(RADEON_CRTC_GEN_CNTL) & RADEON_CRTC_EN))
 120                        return;
 121        } else {
 122                if (!(RREG32(RADEON_CRTC2_GEN_CNTL) & RADEON_CRTC2_EN))
 123                        return;
 124        }
 125
 126        /* depending on when we hit vblank, we may be close to active; if so,
 127         * wait for another frame.
 128         */
 129        while (r100_is_in_vblank(rdev, crtc)) {
 130                if (i++ % 100 == 0) {
 131                        if (!r100_is_counter_moving(rdev, crtc))
 132                                break;
 133                }
 134        }
 135
 136        while (!r100_is_in_vblank(rdev, crtc)) {
 137                if (i++ % 100 == 0) {
 138                        if (!r100_is_counter_moving(rdev, crtc))
 139                                break;
 140                }
 141        }
 142}
 143
 144/**
 145 * r100_page_flip - pageflip callback.
 146 *
 147 * @rdev: radeon_device pointer
 148 * @crtc_id: crtc to cleanup pageflip on
 149 * @crtc_base: new address of the crtc (GPU MC address)
 150 *
 151 * Does the actual pageflip (r1xx-r4xx).
 152 * During vblank we take the crtc lock and wait for the update_pending
 153 * bit to go high, when it does, we release the lock, and allow the
 154 * double buffered update to take place.
 155 */
 156void r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base, bool async)
 157{
 158        struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
 159        u32 tmp = ((u32)crtc_base) | RADEON_CRTC_OFFSET__OFFSET_LOCK;
 160        int i;
 161
 162        /* Lock the graphics update lock */
 163        /* update the scanout addresses */
 164        WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
 165
 166        /* Wait for update_pending to go high. */
 167        for (i = 0; i < rdev->usec_timeout; i++) {
 168                if (RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET)
 169                        break;
 170                udelay(1);
 171        }
 172        DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
 173
 174        /* Unlock the lock, so double-buffering can take place inside vblank */
 175        tmp &= ~RADEON_CRTC_OFFSET__OFFSET_LOCK;
 176        WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
 177
 178}
 179
 180/**
 181 * r100_page_flip_pending - check if page flip is still pending
 182 *
 183 * @rdev: radeon_device pointer
 184 * @crtc_id: crtc to check
 185 *
 186 * Check if the last pagefilp is still pending (r1xx-r4xx).
 187 * Returns the current update pending status.
 188 */
 189bool r100_page_flip_pending(struct radeon_device *rdev, int crtc_id)
 190{
 191        struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
 192
 193        /* Return current update_pending status: */
 194        return !!(RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) &
 195                RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET);
 196}
 197
 198/**
 199 * r100_pm_get_dynpm_state - look up dynpm power state callback.
 200 *
 201 * @rdev: radeon_device pointer
 202 *
 203 * Look up the optimal power state based on the
 204 * current state of the GPU (r1xx-r5xx).
 205 * Used for dynpm only.
 206 */
 207void r100_pm_get_dynpm_state(struct radeon_device *rdev)
 208{
 209        int i;
 210        rdev->pm.dynpm_can_upclock = true;
 211        rdev->pm.dynpm_can_downclock = true;
 212
 213        switch (rdev->pm.dynpm_planned_action) {
 214        case DYNPM_ACTION_MINIMUM:
 215                rdev->pm.requested_power_state_index = 0;
 216                rdev->pm.dynpm_can_downclock = false;
 217                break;
 218        case DYNPM_ACTION_DOWNCLOCK:
 219                if (rdev->pm.current_power_state_index == 0) {
 220                        rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
 221                        rdev->pm.dynpm_can_downclock = false;
 222                } else {
 223                        if (rdev->pm.active_crtc_count > 1) {
 224                                for (i = 0; i < rdev->pm.num_power_states; i++) {
 225                                        if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
 226                                                continue;
 227                                        else if (i >= rdev->pm.current_power_state_index) {
 228                                                rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
 229                                                break;
 230                                        } else {
 231                                                rdev->pm.requested_power_state_index = i;
 232                                                break;
 233                                        }
 234                                }
 235                        } else
 236                                rdev->pm.requested_power_state_index =
 237                                        rdev->pm.current_power_state_index - 1;
 238                }
 239                /* don't use the power state if crtcs are active and no display flag is set */
 240                if ((rdev->pm.active_crtc_count > 0) &&
 241                    (rdev->pm.power_state[rdev->pm.requested_power_state_index].clock_info[0].flags &
 242                     RADEON_PM_MODE_NO_DISPLAY)) {
 243                        rdev->pm.requested_power_state_index++;
 244                }
 245                break;
 246        case DYNPM_ACTION_UPCLOCK:
 247                if (rdev->pm.current_power_state_index == (rdev->pm.num_power_states - 1)) {
 248                        rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
 249                        rdev->pm.dynpm_can_upclock = false;
 250                } else {
 251                        if (rdev->pm.active_crtc_count > 1) {
 252                                for (i = (rdev->pm.num_power_states - 1); i >= 0; i--) {
 253                                        if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
 254                                                continue;
 255                                        else if (i <= rdev->pm.current_power_state_index) {
 256                                                rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
 257                                                break;
 258                                        } else {
 259                                                rdev->pm.requested_power_state_index = i;
 260                                                break;
 261                                        }
 262                                }
 263                        } else
 264                                rdev->pm.requested_power_state_index =
 265                                        rdev->pm.current_power_state_index + 1;
 266                }
 267                break;
 268        case DYNPM_ACTION_DEFAULT:
 269                rdev->pm.requested_power_state_index = rdev->pm.default_power_state_index;
 270                rdev->pm.dynpm_can_upclock = false;
 271                break;
 272        case DYNPM_ACTION_NONE:
 273        default:
 274                DRM_ERROR("Requested mode for not defined action\n");
 275                return;
 276        }
 277        /* only one clock mode per power state */
 278        rdev->pm.requested_clock_mode_index = 0;
 279
 280        DRM_DEBUG_DRIVER("Requested: e: %d m: %d p: %d\n",
 281                  rdev->pm.power_state[rdev->pm.requested_power_state_index].
 282                  clock_info[rdev->pm.requested_clock_mode_index].sclk,
 283                  rdev->pm.power_state[rdev->pm.requested_power_state_index].
 284                  clock_info[rdev->pm.requested_clock_mode_index].mclk,
 285                  rdev->pm.power_state[rdev->pm.requested_power_state_index].
 286                  pcie_lanes);
 287}
 288
 289/**
 290 * r100_pm_init_profile - Initialize power profiles callback.
 291 *
 292 * @rdev: radeon_device pointer
 293 *
 294 * Initialize the power states used in profile mode
 295 * (r1xx-r3xx).
 296 * Used for profile mode only.
 297 */
 298void r100_pm_init_profile(struct radeon_device *rdev)
 299{
 300        /* default */
 301        rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_ps_idx = rdev->pm.default_power_state_index;
 302        rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 303        rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_cm_idx = 0;
 304        rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_cm_idx = 0;
 305        /* low sh */
 306        rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_ps_idx = 0;
 307        rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_ps_idx = 0;
 308        rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_cm_idx = 0;
 309        rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_cm_idx = 0;
 310        /* mid sh */
 311        rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_ps_idx = 0;
 312        rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_ps_idx = 0;
 313        rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_cm_idx = 0;
 314        rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_cm_idx = 0;
 315        /* high sh */
 316        rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_ps_idx = 0;
 317        rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 318        rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_cm_idx = 0;
 319        rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_cm_idx = 0;
 320        /* low mh */
 321        rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_ps_idx = 0;
 322        rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 323        rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_cm_idx = 0;
 324        rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_cm_idx = 0;
 325        /* mid mh */
 326        rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_ps_idx = 0;
 327        rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 328        rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_cm_idx = 0;
 329        rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_cm_idx = 0;
 330        /* high mh */
 331        rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_ps_idx = 0;
 332        rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 333        rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_cm_idx = 0;
 334        rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_cm_idx = 0;
 335}
 336
 337/**
 338 * r100_pm_misc - set additional pm hw parameters callback.
 339 *
 340 * @rdev: radeon_device pointer
 341 *
 342 * Set non-clock parameters associated with a power state
 343 * (voltage, pcie lanes, etc.) (r1xx-r4xx).
 344 */
 345void r100_pm_misc(struct radeon_device *rdev)
 346{
 347        int requested_index = rdev->pm.requested_power_state_index;
 348        struct radeon_power_state *ps = &rdev->pm.power_state[requested_index];
 349        struct radeon_voltage *voltage = &ps->clock_info[0].voltage;
 350        u32 tmp, sclk_cntl, sclk_cntl2, sclk_more_cntl;
 351
 352        if ((voltage->type == VOLTAGE_GPIO) && (voltage->gpio.valid)) {
 353                if (ps->misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_SUPPORT) {
 354                        tmp = RREG32(voltage->gpio.reg);
 355                        if (voltage->active_high)
 356                                tmp |= voltage->gpio.mask;
 357                        else
 358                                tmp &= ~(voltage->gpio.mask);
 359                        WREG32(voltage->gpio.reg, tmp);
 360                        if (voltage->delay)
 361                                udelay(voltage->delay);
 362                } else {
 363                        tmp = RREG32(voltage->gpio.reg);
 364                        if (voltage->active_high)
 365                                tmp &= ~voltage->gpio.mask;
 366                        else
 367                                tmp |= voltage->gpio.mask;
 368                        WREG32(voltage->gpio.reg, tmp);
 369                        if (voltage->delay)
 370                                udelay(voltage->delay);
 371                }
 372        }
 373
 374        sclk_cntl = RREG32_PLL(SCLK_CNTL);
 375        sclk_cntl2 = RREG32_PLL(SCLK_CNTL2);
 376        sclk_cntl2 &= ~REDUCED_SPEED_SCLK_SEL(3);
 377        sclk_more_cntl = RREG32_PLL(SCLK_MORE_CNTL);
 378        sclk_more_cntl &= ~VOLTAGE_DELAY_SEL(3);
 379        if (ps->misc & ATOM_PM_MISCINFO_ASIC_REDUCED_SPEED_SCLK_EN) {
 380                sclk_more_cntl |= REDUCED_SPEED_SCLK_EN;
 381                if (ps->misc & ATOM_PM_MISCINFO_DYN_CLK_3D_IDLE)
 382                        sclk_cntl2 |= REDUCED_SPEED_SCLK_MODE;
 383                else
 384                        sclk_cntl2 &= ~REDUCED_SPEED_SCLK_MODE;
 385                if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_2)
 386                        sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(0);
 387                else if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_4)
 388                        sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(2);
 389        } else
 390                sclk_more_cntl &= ~REDUCED_SPEED_SCLK_EN;
 391
 392        if (ps->misc & ATOM_PM_MISCINFO_ASIC_DYNAMIC_VOLTAGE_EN) {
 393                sclk_more_cntl |= IO_CG_VOLTAGE_DROP;
 394                if (voltage->delay) {
 395                        sclk_more_cntl |= VOLTAGE_DROP_SYNC;
 396                        switch (voltage->delay) {
 397                        case 33:
 398                                sclk_more_cntl |= VOLTAGE_DELAY_SEL(0);
 399                                break;
 400                        case 66:
 401                                sclk_more_cntl |= VOLTAGE_DELAY_SEL(1);
 402                                break;
 403                        case 99:
 404                                sclk_more_cntl |= VOLTAGE_DELAY_SEL(2);
 405                                break;
 406                        case 132:
 407                                sclk_more_cntl |= VOLTAGE_DELAY_SEL(3);
 408                                break;
 409                        }
 410                } else
 411                        sclk_more_cntl &= ~VOLTAGE_DROP_SYNC;
 412        } else
 413                sclk_more_cntl &= ~IO_CG_VOLTAGE_DROP;
 414
 415        if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_HDP_BLOCK_EN)
 416                sclk_cntl &= ~FORCE_HDP;
 417        else
 418                sclk_cntl |= FORCE_HDP;
 419
 420        WREG32_PLL(SCLK_CNTL, sclk_cntl);
 421        WREG32_PLL(SCLK_CNTL2, sclk_cntl2);
 422        WREG32_PLL(SCLK_MORE_CNTL, sclk_more_cntl);
 423
 424        /* set pcie lanes */
 425        if ((rdev->flags & RADEON_IS_PCIE) &&
 426            !(rdev->flags & RADEON_IS_IGP) &&
 427            rdev->asic->pm.set_pcie_lanes &&
 428            (ps->pcie_lanes !=
 429             rdev->pm.power_state[rdev->pm.current_power_state_index].pcie_lanes)) {
 430                radeon_set_pcie_lanes(rdev,
 431                                      ps->pcie_lanes);
 432                DRM_DEBUG_DRIVER("Setting: p: %d\n", ps->pcie_lanes);
 433        }
 434}
 435
 436/**
 437 * r100_pm_prepare - pre-power state change callback.
 438 *
 439 * @rdev: radeon_device pointer
 440 *
 441 * Prepare for a power state change (r1xx-r4xx).
 442 */
 443void r100_pm_prepare(struct radeon_device *rdev)
 444{
 445        struct drm_device *ddev = rdev->ddev;
 446        struct drm_crtc *crtc;
 447        struct radeon_crtc *radeon_crtc;
 448        u32 tmp;
 449
 450        /* disable any active CRTCs */
 451        list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
 452                radeon_crtc = to_radeon_crtc(crtc);
 453                if (radeon_crtc->enabled) {
 454                        if (radeon_crtc->crtc_id) {
 455                                tmp = RREG32(RADEON_CRTC2_GEN_CNTL);
 456                                tmp |= RADEON_CRTC2_DISP_REQ_EN_B;
 457                                WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
 458                        } else {
 459                                tmp = RREG32(RADEON_CRTC_GEN_CNTL);
 460                                tmp |= RADEON_CRTC_DISP_REQ_EN_B;
 461                                WREG32(RADEON_CRTC_GEN_CNTL, tmp);
 462                        }
 463                }
 464        }
 465}
 466
 467/**
 468 * r100_pm_finish - post-power state change callback.
 469 *
 470 * @rdev: radeon_device pointer
 471 *
 472 * Clean up after a power state change (r1xx-r4xx).
 473 */
 474void r100_pm_finish(struct radeon_device *rdev)
 475{
 476        struct drm_device *ddev = rdev->ddev;
 477        struct drm_crtc *crtc;
 478        struct radeon_crtc *radeon_crtc;
 479        u32 tmp;
 480
 481        /* enable any active CRTCs */
 482        list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
 483                radeon_crtc = to_radeon_crtc(crtc);
 484                if (radeon_crtc->enabled) {
 485                        if (radeon_crtc->crtc_id) {
 486                                tmp = RREG32(RADEON_CRTC2_GEN_CNTL);
 487                                tmp &= ~RADEON_CRTC2_DISP_REQ_EN_B;
 488                                WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
 489                        } else {
 490                                tmp = RREG32(RADEON_CRTC_GEN_CNTL);
 491                                tmp &= ~RADEON_CRTC_DISP_REQ_EN_B;
 492                                WREG32(RADEON_CRTC_GEN_CNTL, tmp);
 493                        }
 494                }
 495        }
 496}
 497
 498/**
 499 * r100_gui_idle - gui idle callback.
 500 *
 501 * @rdev: radeon_device pointer
 502 *
 503 * Check of the GUI (2D/3D engines) are idle (r1xx-r5xx).
 504 * Returns true if idle, false if not.
 505 */
 506bool r100_gui_idle(struct radeon_device *rdev)
 507{
 508        if (RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)
 509                return false;
 510        else
 511                return true;
 512}
 513
 514/* hpd for digital panel detect/disconnect */
 515/**
 516 * r100_hpd_sense - hpd sense callback.
 517 *
 518 * @rdev: radeon_device pointer
 519 * @hpd: hpd (hotplug detect) pin
 520 *
 521 * Checks if a digital monitor is connected (r1xx-r4xx).
 522 * Returns true if connected, false if not connected.
 523 */
 524bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
 525{
 526        bool connected = false;
 527
 528        switch (hpd) {
 529        case RADEON_HPD_1:
 530                if (RREG32(RADEON_FP_GEN_CNTL) & RADEON_FP_DETECT_SENSE)
 531                        connected = true;
 532                break;
 533        case RADEON_HPD_2:
 534                if (RREG32(RADEON_FP2_GEN_CNTL) & RADEON_FP2_DETECT_SENSE)
 535                        connected = true;
 536                break;
 537        default:
 538                break;
 539        }
 540        return connected;
 541}
 542
 543/**
 544 * r100_hpd_set_polarity - hpd set polarity callback.
 545 *
 546 * @rdev: radeon_device pointer
 547 * @hpd: hpd (hotplug detect) pin
 548 *
 549 * Set the polarity of the hpd pin (r1xx-r4xx).
 550 */
 551void r100_hpd_set_polarity(struct radeon_device *rdev,
 552                           enum radeon_hpd_id hpd)
 553{
 554        u32 tmp;
 555        bool connected = r100_hpd_sense(rdev, hpd);
 556
 557        switch (hpd) {
 558        case RADEON_HPD_1:
 559                tmp = RREG32(RADEON_FP_GEN_CNTL);
 560                if (connected)
 561                        tmp &= ~RADEON_FP_DETECT_INT_POL;
 562                else
 563                        tmp |= RADEON_FP_DETECT_INT_POL;
 564                WREG32(RADEON_FP_GEN_CNTL, tmp);
 565                break;
 566        case RADEON_HPD_2:
 567                tmp = RREG32(RADEON_FP2_GEN_CNTL);
 568                if (connected)
 569                        tmp &= ~RADEON_FP2_DETECT_INT_POL;
 570                else
 571                        tmp |= RADEON_FP2_DETECT_INT_POL;
 572                WREG32(RADEON_FP2_GEN_CNTL, tmp);
 573                break;
 574        default:
 575                break;
 576        }
 577}
 578
 579/**
 580 * r100_hpd_init - hpd setup callback.
 581 *
 582 * @rdev: radeon_device pointer
 583 *
 584 * Setup the hpd pins used by the card (r1xx-r4xx).
 585 * Set the polarity, and enable the hpd interrupts.
 586 */
 587void r100_hpd_init(struct radeon_device *rdev)
 588{
 589        struct drm_device *dev = rdev->ddev;
 590        struct drm_connector *connector;
 591        unsigned enable = 0;
 592
 593        list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 594                struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 595                if (radeon_connector->hpd.hpd != RADEON_HPD_NONE)
 596                        enable |= 1 << radeon_connector->hpd.hpd;
 597                radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
 598        }
 599        radeon_irq_kms_enable_hpd(rdev, enable);
 600}
 601
 602/**
 603 * r100_hpd_fini - hpd tear down callback.
 604 *
 605 * @rdev: radeon_device pointer
 606 *
 607 * Tear down the hpd pins used by the card (r1xx-r4xx).
 608 * Disable the hpd interrupts.
 609 */
 610void r100_hpd_fini(struct radeon_device *rdev)
 611{
 612        struct drm_device *dev = rdev->ddev;
 613        struct drm_connector *connector;
 614        unsigned disable = 0;
 615
 616        list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 617                struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 618                if (radeon_connector->hpd.hpd != RADEON_HPD_NONE)
 619                        disable |= 1 << radeon_connector->hpd.hpd;
 620        }
 621        radeon_irq_kms_disable_hpd(rdev, disable);
 622}
 623
 624/*
 625 * PCI GART
 626 */
 627void r100_pci_gart_tlb_flush(struct radeon_device *rdev)
 628{
 629        /* TODO: can we do somethings here ? */
 630        /* It seems hw only cache one entry so we should discard this
 631         * entry otherwise if first GPU GART read hit this entry it
 632         * could end up in wrong address. */
 633}
 634
 635int r100_pci_gart_init(struct radeon_device *rdev)
 636{
 637        int r;
 638
 639        if (rdev->gart.ptr) {
 640                WARN(1, "R100 PCI GART already initialized\n");
 641                return 0;
 642        }
 643        /* Initialize common gart structure */
 644        r = radeon_gart_init(rdev);
 645        if (r)
 646                return r;
 647        rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
 648        rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush;
 649        rdev->asic->gart.get_page_entry = &r100_pci_gart_get_page_entry;
 650        rdev->asic->gart.set_page = &r100_pci_gart_set_page;
 651        return radeon_gart_table_ram_alloc(rdev);
 652}
 653
 654int r100_pci_gart_enable(struct radeon_device *rdev)
 655{
 656        uint32_t tmp;
 657
 658        /* discard memory request outside of configured range */
 659        tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
 660        WREG32(RADEON_AIC_CNTL, tmp);
 661        /* set address range for PCI address translate */
 662        WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_start);
 663        WREG32(RADEON_AIC_HI_ADDR, rdev->mc.gtt_end);
 664        /* set PCI GART page-table base address */
 665        WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr);
 666        tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
 667        WREG32(RADEON_AIC_CNTL, tmp);
 668        r100_pci_gart_tlb_flush(rdev);
 669        DRM_INFO("PCI GART of %uM enabled (table at 0x%016llX).\n",
 670                 (unsigned)(rdev->mc.gtt_size >> 20),
 671                 (unsigned long long)rdev->gart.table_addr);
 672        rdev->gart.ready = true;
 673        return 0;
 674}
 675
 676void r100_pci_gart_disable(struct radeon_device *rdev)
 677{
 678        uint32_t tmp;
 679
 680        /* discard memory request outside of configured range */
 681        tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
 682        WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN);
 683        WREG32(RADEON_AIC_LO_ADDR, 0);
 684        WREG32(RADEON_AIC_HI_ADDR, 0);
 685}
 686
 687uint64_t r100_pci_gart_get_page_entry(uint64_t addr, uint32_t flags)
 688{
 689        return addr;
 690}
 691
 692void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
 693                            uint64_t entry)
 694{
 695        u32 *gtt = rdev->gart.ptr;
 696        gtt[i] = cpu_to_le32(lower_32_bits(entry));
 697}
 698
 699void r100_pci_gart_fini(struct radeon_device *rdev)
 700{
 701        radeon_gart_fini(rdev);
 702        r100_pci_gart_disable(rdev);
 703        radeon_gart_table_ram_free(rdev);
 704}
 705
 706int r100_irq_set(struct radeon_device *rdev)
 707{
 708        uint32_t tmp = 0;
 709
 710        if (!rdev->irq.installed) {
 711                WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
 712                WREG32(R_000040_GEN_INT_CNTL, 0);
 713                return -EINVAL;
 714        }
 715        if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
 716                tmp |= RADEON_SW_INT_ENABLE;
 717        }
 718        if (rdev->irq.crtc_vblank_int[0] ||
 719            atomic_read(&rdev->irq.pflip[0])) {
 720                tmp |= RADEON_CRTC_VBLANK_MASK;
 721        }
 722        if (rdev->irq.crtc_vblank_int[1] ||
 723            atomic_read(&rdev->irq.pflip[1])) {
 724                tmp |= RADEON_CRTC2_VBLANK_MASK;
 725        }
 726        if (rdev->irq.hpd[0]) {
 727                tmp |= RADEON_FP_DETECT_MASK;
 728        }
 729        if (rdev->irq.hpd[1]) {
 730                tmp |= RADEON_FP2_DETECT_MASK;
 731        }
 732        WREG32(RADEON_GEN_INT_CNTL, tmp);
 733
 734        /* read back to post the write */
 735        RREG32(RADEON_GEN_INT_CNTL);
 736
 737        return 0;
 738}
 739
 740void r100_irq_disable(struct radeon_device *rdev)
 741{
 742        u32 tmp;
 743
 744        WREG32(R_000040_GEN_INT_CNTL, 0);
 745        /* Wait and acknowledge irq */
 746        mdelay(1);
 747        tmp = RREG32(R_000044_GEN_INT_STATUS);
 748        WREG32(R_000044_GEN_INT_STATUS, tmp);
 749}
 750
 751static uint32_t r100_irq_ack(struct radeon_device *rdev)
 752{
 753        uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS);
 754        uint32_t irq_mask = RADEON_SW_INT_TEST |
 755                RADEON_CRTC_VBLANK_STAT | RADEON_CRTC2_VBLANK_STAT |
 756                RADEON_FP_DETECT_STAT | RADEON_FP2_DETECT_STAT;
 757
 758        if (irqs) {
 759                WREG32(RADEON_GEN_INT_STATUS, irqs);
 760        }
 761        return irqs & irq_mask;
 762}
 763
 764int r100_irq_process(struct radeon_device *rdev)
 765{
 766        uint32_t status, msi_rearm;
 767        bool queue_hotplug = false;
 768
 769        status = r100_irq_ack(rdev);
 770        if (!status) {
 771                return IRQ_NONE;
 772        }
 773        if (rdev->shutdown) {
 774                return IRQ_NONE;
 775        }
 776        while (status) {
 777                /* SW interrupt */
 778                if (status & RADEON_SW_INT_TEST) {
 779                        radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
 780                }
 781                /* Vertical blank interrupts */
 782                if (status & RADEON_CRTC_VBLANK_STAT) {
 783                        if (rdev->irq.crtc_vblank_int[0]) {
 784                                drm_handle_vblank(rdev->ddev, 0);
 785                                rdev->pm.vblank_sync = true;
 786                                wake_up(&rdev->irq.vblank_queue);
 787                        }
 788                        if (atomic_read(&rdev->irq.pflip[0]))
 789                                radeon_crtc_handle_vblank(rdev, 0);
 790                }
 791                if (status & RADEON_CRTC2_VBLANK_STAT) {
 792                        if (rdev->irq.crtc_vblank_int[1]) {
 793                                drm_handle_vblank(rdev->ddev, 1);
 794                                rdev->pm.vblank_sync = true;
 795                                wake_up(&rdev->irq.vblank_queue);
 796                        }
 797                        if (atomic_read(&rdev->irq.pflip[1]))
 798                                radeon_crtc_handle_vblank(rdev, 1);
 799                }
 800                if (status & RADEON_FP_DETECT_STAT) {
 801                        queue_hotplug = true;
 802                        DRM_DEBUG("HPD1\n");
 803                }
 804                if (status & RADEON_FP2_DETECT_STAT) {
 805                        queue_hotplug = true;
 806                        DRM_DEBUG("HPD2\n");
 807                }
 808                status = r100_irq_ack(rdev);
 809        }
 810        if (queue_hotplug)
 811                schedule_delayed_work(&rdev->hotplug_work, 0);
 812        if (rdev->msi_enabled) {
 813                switch (rdev->family) {
 814                case CHIP_RS400:
 815                case CHIP_RS480:
 816                        msi_rearm = RREG32(RADEON_AIC_CNTL) & ~RS400_MSI_REARM;
 817                        WREG32(RADEON_AIC_CNTL, msi_rearm);
 818                        WREG32(RADEON_AIC_CNTL, msi_rearm | RS400_MSI_REARM);
 819                        break;
 820                default:
 821                        WREG32(RADEON_MSI_REARM_EN, RV370_MSI_REARM_EN);
 822                        break;
 823                }
 824        }
 825        return IRQ_HANDLED;
 826}
 827
 828u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc)
 829{
 830        if (crtc == 0)
 831                return RREG32(RADEON_CRTC_CRNT_FRAME);
 832        else
 833                return RREG32(RADEON_CRTC2_CRNT_FRAME);
 834}
 835
 836/**
 837 * r100_ring_hdp_flush - flush Host Data Path via the ring buffer
 838 * rdev: radeon device structure
 839 * ring: ring buffer struct for emitting packets
 840 */
 841static void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring)
 842{
 843        radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
 844        radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
 845                                RADEON_HDP_READ_BUFFER_INVALIDATE);
 846        radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
 847        radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
 848}
 849
 850/* Who ever call radeon_fence_emit should call ring_lock and ask
 851 * for enough space (today caller are ib schedule and buffer move) */
 852void r100_fence_ring_emit(struct radeon_device *rdev,
 853                          struct radeon_fence *fence)
 854{
 855        struct radeon_ring *ring = &rdev->ring[fence->ring];
 856
 857        /* We have to make sure that caches are flushed before
 858         * CPU might read something from VRAM. */
 859        radeon_ring_write(ring, PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0));
 860        radeon_ring_write(ring, RADEON_RB3D_DC_FLUSH_ALL);
 861        radeon_ring_write(ring, PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0));
 862        radeon_ring_write(ring, RADEON_RB3D_ZC_FLUSH_ALL);
 863        /* Wait until IDLE & CLEAN */
 864        radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
 865        radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
 866        r100_ring_hdp_flush(rdev, ring);
 867        /* Emit fence sequence & fire IRQ */
 868        radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0));
 869        radeon_ring_write(ring, fence->seq);
 870        radeon_ring_write(ring, PACKET0(RADEON_GEN_INT_STATUS, 0));
 871        radeon_ring_write(ring, RADEON_SW_INT_FIRE);
 872}
 873
 874bool r100_semaphore_ring_emit(struct radeon_device *rdev,
 875                              struct radeon_ring *ring,
 876                              struct radeon_semaphore *semaphore,
 877                              bool emit_wait)
 878{
 879        /* Unused on older asics, since we don't have semaphores or multiple rings */
 880        BUG();
 881        return false;
 882}
 883
 884struct radeon_fence *r100_copy_blit(struct radeon_device *rdev,
 885                                    uint64_t src_offset,
 886                                    uint64_t dst_offset,
 887                                    unsigned num_gpu_pages,
 888                                    struct reservation_object *resv)
 889{
 890        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 891        struct radeon_fence *fence;
 892        uint32_t cur_pages;
 893        uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
 894        uint32_t pitch;
 895        uint32_t stride_pixels;
 896        unsigned ndw;
 897        int num_loops;
 898        int r = 0;
 899
 900        /* radeon limited to 16k stride */
 901        stride_bytes &= 0x3fff;
 902        /* radeon pitch is /64 */
 903        pitch = stride_bytes / 64;
 904        stride_pixels = stride_bytes / 4;
 905        num_loops = DIV_ROUND_UP(num_gpu_pages, 8191);
 906
 907        /* Ask for enough room for blit + flush + fence */
 908        ndw = 64 + (10 * num_loops);
 909        r = radeon_ring_lock(rdev, ring, ndw);
 910        if (r) {
 911                DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
 912                return ERR_PTR(-EINVAL);
 913        }
 914        while (num_gpu_pages > 0) {
 915                cur_pages = num_gpu_pages;
 916                if (cur_pages > 8191) {
 917                        cur_pages = 8191;
 918                }
 919                num_gpu_pages -= cur_pages;
 920
 921                /* pages are in Y direction - height
 922                   page width in X direction - width */
 923                radeon_ring_write(ring, PACKET3(PACKET3_BITBLT_MULTI, 8));
 924                radeon_ring_write(ring,
 925                                  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
 926                                  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
 927                                  RADEON_GMC_SRC_CLIPPING |
 928                                  RADEON_GMC_DST_CLIPPING |
 929                                  RADEON_GMC_BRUSH_NONE |
 930                                  (RADEON_COLOR_FORMAT_ARGB8888 << 8) |
 931                                  RADEON_GMC_SRC_DATATYPE_COLOR |
 932                                  RADEON_ROP3_S |
 933                                  RADEON_DP_SRC_SOURCE_MEMORY |
 934                                  RADEON_GMC_CLR_CMP_CNTL_DIS |
 935                                  RADEON_GMC_WR_MSK_DIS);
 936                radeon_ring_write(ring, (pitch << 22) | (src_offset >> 10));
 937                radeon_ring_write(ring, (pitch << 22) | (dst_offset >> 10));
 938                radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
 939                radeon_ring_write(ring, 0);
 940                radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
 941                radeon_ring_write(ring, num_gpu_pages);
 942                radeon_ring_write(ring, num_gpu_pages);
 943                radeon_ring_write(ring, cur_pages | (stride_pixels << 16));
 944        }
 945        radeon_ring_write(ring, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
 946        radeon_ring_write(ring, RADEON_RB2D_DC_FLUSH_ALL);
 947        radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
 948        radeon_ring_write(ring,
 949                          RADEON_WAIT_2D_IDLECLEAN |
 950                          RADEON_WAIT_HOST_IDLECLEAN |
 951                          RADEON_WAIT_DMA_GUI_IDLE);
 952        r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
 953        if (r) {
 954                radeon_ring_unlock_undo(rdev, ring);
 955                return ERR_PTR(r);
 956        }
 957        radeon_ring_unlock_commit(rdev, ring, false);
 958        return fence;
 959}
 960
 961static int r100_cp_wait_for_idle(struct radeon_device *rdev)
 962{
 963        unsigned i;
 964        u32 tmp;
 965
 966        for (i = 0; i < rdev->usec_timeout; i++) {
 967                tmp = RREG32(R_000E40_RBBM_STATUS);
 968                if (!G_000E40_CP_CMDSTRM_BUSY(tmp)) {
 969                        return 0;
 970                }
 971                udelay(1);
 972        }
 973        return -1;
 974}
 975
 976void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring)
 977{
 978        int r;
 979
 980        r = radeon_ring_lock(rdev, ring, 2);
 981        if (r) {
 982                return;
 983        }
 984        radeon_ring_write(ring, PACKET0(RADEON_ISYNC_CNTL, 0));
 985        radeon_ring_write(ring,
 986                          RADEON_ISYNC_ANY2D_IDLE3D |
 987                          RADEON_ISYNC_ANY3D_IDLE2D |
 988                          RADEON_ISYNC_WAIT_IDLEGUI |
 989                          RADEON_ISYNC_CPSCRATCH_IDLEGUI);
 990        radeon_ring_unlock_commit(rdev, ring, false);
 991}
 992
 993
 994/* Load the microcode for the CP */
 995static int r100_cp_init_microcode(struct radeon_device *rdev)
 996{
 997        const char *fw_name = NULL;
 998        int err;
 999
1000        DRM_DEBUG_KMS("\n");
1001
1002        if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) ||
1003            (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) ||
1004            (rdev->family == CHIP_RS200)) {
1005                DRM_INFO("Loading R100 Microcode\n");
1006                fw_name = FIRMWARE_R100;
1007        } else if ((rdev->family == CHIP_R200) ||
1008                   (rdev->family == CHIP_RV250) ||
1009                   (rdev->family == CHIP_RV280) ||
1010                   (rdev->family == CHIP_RS300)) {
1011                DRM_INFO("Loading R200 Microcode\n");
1012                fw_name = FIRMWARE_R200;
1013        } else if ((rdev->family == CHIP_R300) ||
1014                   (rdev->family == CHIP_R350) ||
1015                   (rdev->family == CHIP_RV350) ||
1016                   (rdev->family == CHIP_RV380) ||
1017                   (rdev->family == CHIP_RS400) ||
1018                   (rdev->family == CHIP_RS480)) {
1019                DRM_INFO("Loading R300 Microcode\n");
1020                fw_name = FIRMWARE_R300;
1021        } else if ((rdev->family == CHIP_R420) ||
1022                   (rdev->family == CHIP_R423) ||
1023                   (rdev->family == CHIP_RV410)) {
1024                DRM_INFO("Loading R400 Microcode\n");
1025                fw_name = FIRMWARE_R420;
1026        } else if ((rdev->family == CHIP_RS690) ||
1027                   (rdev->family == CHIP_RS740)) {
1028                DRM_INFO("Loading RS690/RS740 Microcode\n");
1029                fw_name = FIRMWARE_RS690;
1030        } else if (rdev->family == CHIP_RS600) {
1031                DRM_INFO("Loading RS600 Microcode\n");
1032                fw_name = FIRMWARE_RS600;
1033        } else if ((rdev->family == CHIP_RV515) ||
1034                   (rdev->family == CHIP_R520) ||
1035                   (rdev->family == CHIP_RV530) ||
1036                   (rdev->family == CHIP_R580) ||
1037                   (rdev->family == CHIP_RV560) ||
1038                   (rdev->family == CHIP_RV570)) {
1039                DRM_INFO("Loading R500 Microcode\n");
1040                fw_name = FIRMWARE_R520;
1041        }
1042
1043        err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1044        if (err) {
1045                pr_err("radeon_cp: Failed to load firmware \"%s\"\n", fw_name);
1046        } else if (rdev->me_fw->size % 8) {
1047                pr_err("radeon_cp: Bogus length %zu in firmware \"%s\"\n",
1048                       rdev->me_fw->size, fw_name);
1049                err = -EINVAL;
1050                release_firmware(rdev->me_fw);
1051                rdev->me_fw = NULL;
1052        }
1053        return err;
1054}
1055
1056u32 r100_gfx_get_rptr(struct radeon_device *rdev,
1057                      struct radeon_ring *ring)
1058{
1059        u32 rptr;
1060
1061        if (rdev->wb.enabled)
1062                rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
1063        else
1064                rptr = RREG32(RADEON_CP_RB_RPTR);
1065
1066        return rptr;
1067}
1068
1069u32 r100_gfx_get_wptr(struct radeon_device *rdev,
1070                      struct radeon_ring *ring)
1071{
1072        return RREG32(RADEON_CP_RB_WPTR);
1073}
1074
1075void r100_gfx_set_wptr(struct radeon_device *rdev,
1076                       struct radeon_ring *ring)
1077{
1078        WREG32(RADEON_CP_RB_WPTR, ring->wptr);
1079        (void)RREG32(RADEON_CP_RB_WPTR);
1080}
1081
1082static void r100_cp_load_microcode(struct radeon_device *rdev)
1083{
1084        const __be32 *fw_data;
1085        int i, size;
1086
1087        if (r100_gui_wait_for_idle(rdev)) {
1088                pr_warn("Failed to wait GUI idle while programming pipes. Bad things might happen.\n");
1089        }
1090
1091        if (rdev->me_fw) {
1092                size = rdev->me_fw->size / 4;
1093                fw_data = (const __be32 *)&rdev->me_fw->data[0];
1094                WREG32(RADEON_CP_ME_RAM_ADDR, 0);
1095                for (i = 0; i < size; i += 2) {
1096                        WREG32(RADEON_CP_ME_RAM_DATAH,
1097                               be32_to_cpup(&fw_data[i]));
1098                        WREG32(RADEON_CP_ME_RAM_DATAL,
1099                               be32_to_cpup(&fw_data[i + 1]));
1100                }
1101        }
1102}
1103
1104int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
1105{
1106        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1107        unsigned rb_bufsz;
1108        unsigned rb_blksz;
1109        unsigned max_fetch;
1110        unsigned pre_write_timer;
1111        unsigned pre_write_limit;
1112        unsigned indirect2_start;
1113        unsigned indirect1_start;
1114        uint32_t tmp;
1115        int r;
1116
1117        if (r100_debugfs_cp_init(rdev)) {
1118                DRM_ERROR("Failed to register debugfs file for CP !\n");
1119        }
1120        if (!rdev->me_fw) {
1121                r = r100_cp_init_microcode(rdev);
1122                if (r) {
1123                        DRM_ERROR("Failed to load firmware!\n");
1124                        return r;
1125                }
1126        }
1127
1128        /* Align ring size */
1129        rb_bufsz = order_base_2(ring_size / 8);
1130        ring_size = (1 << (rb_bufsz + 1)) * 4;
1131        r100_cp_load_microcode(rdev);
1132        r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET,
1133                             RADEON_CP_PACKET2);
1134        if (r) {
1135                return r;
1136        }
1137        /* Each time the cp read 1024 bytes (16 dword/quadword) update
1138         * the rptr copy in system ram */
1139        rb_blksz = 9;
1140        /* cp will read 128bytes at a time (4 dwords) */
1141        max_fetch = 1;
1142        ring->align_mask = 16 - 1;
1143        /* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
1144        pre_write_timer = 64;
1145        /* Force CP_RB_WPTR write if written more than one time before the
1146         * delay expire
1147         */
1148        pre_write_limit = 0;
1149        /* Setup the cp cache like this (cache size is 96 dwords) :
1150         *      RING            0  to 15
1151         *      INDIRECT1       16 to 79
1152         *      INDIRECT2       80 to 95
1153         * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
1154         *    indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords))
1155         *    indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
1156         * Idea being that most of the gpu cmd will be through indirect1 buffer
1157         * so it gets the bigger cache.
1158         */
1159        indirect2_start = 80;
1160        indirect1_start = 16;
1161        /* cp setup */
1162        WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
1163        tmp = (REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
1164               REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
1165               REG_SET(RADEON_MAX_FETCH, max_fetch));
1166#ifdef __BIG_ENDIAN
1167        tmp |= RADEON_BUF_SWAP_32BIT;
1168#endif
1169        WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_NO_UPDATE);
1170
1171        /* Set ring address */
1172        DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)ring->gpu_addr);
1173        WREG32(RADEON_CP_RB_BASE, ring->gpu_addr);
1174        /* Force read & write ptr to 0 */
1175        WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE);
1176        WREG32(RADEON_CP_RB_RPTR_WR, 0);
1177        ring->wptr = 0;
1178        WREG32(RADEON_CP_RB_WPTR, ring->wptr);
1179
1180        /* set the wb address whether it's enabled or not */
1181        WREG32(R_00070C_CP_RB_RPTR_ADDR,
1182                S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) >> 2));
1183        WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET);
1184
1185        if (rdev->wb.enabled)
1186                WREG32(R_000770_SCRATCH_UMSK, 0xff);
1187        else {
1188                tmp |= RADEON_RB_NO_UPDATE;
1189                WREG32(R_000770_SCRATCH_UMSK, 0);
1190        }
1191
1192        WREG32(RADEON_CP_RB_CNTL, tmp);
1193        udelay(10);
1194        /* Set cp mode to bus mastering & enable cp*/
1195        WREG32(RADEON_CP_CSQ_MODE,
1196               REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
1197               REG_SET(RADEON_INDIRECT1_START, indirect1_start));
1198        WREG32(RADEON_CP_RB_WPTR_DELAY, 0);
1199        WREG32(RADEON_CP_CSQ_MODE, 0x00004D4D);
1200        WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
1201
1202        /* at this point everything should be setup correctly to enable master */
1203        pci_set_master(rdev->pdev);
1204
1205        radeon_ring_start(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1206        r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, ring);
1207        if (r) {
1208                DRM_ERROR("radeon: cp isn't working (%d).\n", r);
1209                return r;
1210        }
1211        ring->ready = true;
1212        radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1213
1214        if (!ring->rptr_save_reg /* not resuming from suspend */
1215            && radeon_ring_supports_scratch_reg(rdev, ring)) {
1216                r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
1217                if (r) {
1218                        DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
1219                        ring->rptr_save_reg = 0;
1220                }
1221        }
1222        return 0;
1223}
1224
1225void r100_cp_fini(struct radeon_device *rdev)
1226{
1227        if (r100_cp_wait_for_idle(rdev)) {
1228                DRM_ERROR("Wait for CP idle timeout, shutting down CP.\n");
1229        }
1230        /* Disable ring */
1231        r100_cp_disable(rdev);
1232        radeon_scratch_free(rdev, rdev->ring[RADEON_RING_TYPE_GFX_INDEX].rptr_save_reg);
1233        radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1234        DRM_INFO("radeon: cp finalized\n");
1235}
1236
1237void r100_cp_disable(struct radeon_device *rdev)
1238{
1239        /* Disable ring */
1240        radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1241        rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1242        WREG32(RADEON_CP_CSQ_MODE, 0);
1243        WREG32(RADEON_CP_CSQ_CNTL, 0);
1244        WREG32(R_000770_SCRATCH_UMSK, 0);
1245        if (r100_gui_wait_for_idle(rdev)) {
1246                pr_warn("Failed to wait GUI idle while programming pipes. Bad things might happen.\n");
1247        }
1248}
1249
1250/*
1251 * CS functions
1252 */
1253int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
1254                            struct radeon_cs_packet *pkt,
1255                            unsigned idx,
1256                            unsigned reg)
1257{
1258        int r;
1259        u32 tile_flags = 0;
1260        u32 tmp;
1261        struct radeon_bo_list *reloc;
1262        u32 value;
1263
1264        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1265        if (r) {
1266                DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1267                          idx, reg);
1268                radeon_cs_dump_packet(p, pkt);
1269                return r;
1270        }
1271
1272        value = radeon_get_ib_value(p, idx);
1273        tmp = value & 0x003fffff;
1274        tmp += (((u32)reloc->gpu_offset) >> 10);
1275
1276        if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1277                if (reloc->tiling_flags & RADEON_TILING_MACRO)
1278                        tile_flags |= RADEON_DST_TILE_MACRO;
1279                if (reloc->tiling_flags & RADEON_TILING_MICRO) {
1280                        if (reg == RADEON_SRC_PITCH_OFFSET) {
1281                                DRM_ERROR("Cannot src blit from microtiled surface\n");
1282                                radeon_cs_dump_packet(p, pkt);
1283                                return -EINVAL;
1284                        }
1285                        tile_flags |= RADEON_DST_TILE_MICRO;
1286                }
1287
1288                tmp |= tile_flags;
1289                p->ib.ptr[idx] = (value & 0x3fc00000) | tmp;
1290        } else
1291                p->ib.ptr[idx] = (value & 0xffc00000) | tmp;
1292        return 0;
1293}
1294
1295int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
1296                             struct radeon_cs_packet *pkt,
1297                             int idx)
1298{
1299        unsigned c, i;
1300        struct radeon_bo_list *reloc;
1301        struct r100_cs_track *track;
1302        int r = 0;
1303        volatile uint32_t *ib;
1304        u32 idx_value;
1305
1306        ib = p->ib.ptr;
1307        track = (struct r100_cs_track *)p->track;
1308        c = radeon_get_ib_value(p, idx++) & 0x1F;
1309        if (c > 16) {
1310            DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
1311                      pkt->opcode);
1312            radeon_cs_dump_packet(p, pkt);
1313            return -EINVAL;
1314        }
1315        track->num_arrays = c;
1316        for (i = 0; i < (c - 1); i+=2, idx+=3) {
1317                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1318                if (r) {
1319                        DRM_ERROR("No reloc for packet3 %d\n",
1320                                  pkt->opcode);
1321                        radeon_cs_dump_packet(p, pkt);
1322                        return r;
1323                }
1324                idx_value = radeon_get_ib_value(p, idx);
1325                ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);
1326
1327                track->arrays[i + 0].esize = idx_value >> 8;
1328                track->arrays[i + 0].robj = reloc->robj;
1329                track->arrays[i + 0].esize &= 0x7F;
1330                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1331                if (r) {
1332                        DRM_ERROR("No reloc for packet3 %d\n",
1333                                  pkt->opcode);
1334                        radeon_cs_dump_packet(p, pkt);
1335                        return r;
1336                }
1337                ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->gpu_offset);
1338                track->arrays[i + 1].robj = reloc->robj;
1339                track->arrays[i + 1].esize = idx_value >> 24;
1340                track->arrays[i + 1].esize &= 0x7F;
1341        }
1342        if (c & 1) {
1343                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1344                if (r) {
1345                        DRM_ERROR("No reloc for packet3 %d\n",
1346                                          pkt->opcode);
1347                        radeon_cs_dump_packet(p, pkt);
1348                        return r;
1349                }
1350                idx_value = radeon_get_ib_value(p, idx);
1351                ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);
1352                track->arrays[i + 0].robj = reloc->robj;
1353                track->arrays[i + 0].esize = idx_value >> 8;
1354                track->arrays[i + 0].esize &= 0x7F;
1355        }
1356        return r;
1357}
1358
1359int r100_cs_parse_packet0(struct radeon_cs_parser *p,
1360                          struct radeon_cs_packet *pkt,
1361                          const unsigned *auth, unsigned n,
1362                          radeon_packet0_check_t check)
1363{
1364        unsigned reg;
1365        unsigned i, j, m;
1366        unsigned idx;
1367        int r;
1368
1369        idx = pkt->idx + 1;
1370        reg = pkt->reg;
1371        /* Check that register fall into register range
1372         * determined by the number of entry (n) in the
1373         * safe register bitmap.
1374         */
1375        if (pkt->one_reg_wr) {
1376                if ((reg >> 7) > n) {
1377                        return -EINVAL;
1378                }
1379        } else {
1380                if (((reg + (pkt->count << 2)) >> 7) > n) {
1381                        return -EINVAL;
1382                }
1383        }
1384        for (i = 0; i <= pkt->count; i++, idx++) {
1385                j = (reg >> 7);
1386                m = 1 << ((reg >> 2) & 31);
1387                if (auth[j] & m) {
1388                        r = check(p, pkt, idx, reg);
1389                        if (r) {
1390                                return r;
1391                        }
1392                }
1393                if (pkt->one_reg_wr) {
1394                        if (!(auth[j] & m)) {
1395                                break;
1396                        }
1397                } else {
1398                        reg += 4;
1399                }
1400        }
1401        return 0;
1402}
1403
1404/**
1405 * r100_cs_packet_next_vline() - parse userspace VLINE packet
1406 * @parser:             parser structure holding parsing context.
1407 *
1408 * Userspace sends a special sequence for VLINE waits.
1409 * PACKET0 - VLINE_START_END + value
1410 * PACKET0 - WAIT_UNTIL +_value
1411 * RELOC (P3) - crtc_id in reloc.
1412 *
1413 * This function parses this and relocates the VLINE START END
1414 * and WAIT UNTIL packets to the correct crtc.
1415 * It also detects a switched off crtc and nulls out the
1416 * wait in that case.
1417 */
1418int r100_cs_packet_parse_vline(struct radeon_cs_parser *p)
1419{
1420        struct drm_crtc *crtc;
1421        struct radeon_crtc *radeon_crtc;
1422        struct radeon_cs_packet p3reloc, waitreloc;
1423        int crtc_id;
1424        int r;
1425        uint32_t header, h_idx, reg;
1426        volatile uint32_t *ib;
1427
1428        ib = p->ib.ptr;
1429
1430        /* parse the wait until */
1431        r = radeon_cs_packet_parse(p, &waitreloc, p->idx);
1432        if (r)
1433                return r;
1434
1435        /* check its a wait until and only 1 count */
1436        if (waitreloc.reg != RADEON_WAIT_UNTIL ||
1437            waitreloc.count != 0) {
1438                DRM_ERROR("vline wait had illegal wait until segment\n");
1439                return -EINVAL;
1440        }
1441
1442        if (radeon_get_ib_value(p, waitreloc.idx + 1) != RADEON_WAIT_CRTC_VLINE) {
1443                DRM_ERROR("vline wait had illegal wait until\n");
1444                return -EINVAL;
1445        }
1446
1447        /* jump over the NOP */
1448        r = radeon_cs_packet_parse(p, &p3reloc, p->idx + waitreloc.count + 2);
1449        if (r)
1450                return r;
1451
1452        h_idx = p->idx - 2;
1453        p->idx += waitreloc.count + 2;
1454        p->idx += p3reloc.count + 2;
1455
1456        header = radeon_get_ib_value(p, h_idx);
1457        crtc_id = radeon_get_ib_value(p, h_idx + 5);
1458        reg = R100_CP_PACKET0_GET_REG(header);
1459        crtc = drm_crtc_find(p->rdev->ddev, crtc_id);
1460        if (!crtc) {
1461                DRM_ERROR("cannot find crtc %d\n", crtc_id);
1462                return -ENOENT;
1463        }
1464        radeon_crtc = to_radeon_crtc(crtc);
1465        crtc_id = radeon_crtc->crtc_id;
1466
1467        if (!crtc->enabled) {
1468                /* if the CRTC isn't enabled - we need to nop out the wait until */
1469                ib[h_idx + 2] = PACKET2(0);
1470                ib[h_idx + 3] = PACKET2(0);
1471        } else if (crtc_id == 1) {
1472                switch (reg) {
1473                case AVIVO_D1MODE_VLINE_START_END:
1474                        header &= ~R300_CP_PACKET0_REG_MASK;
1475                        header |= AVIVO_D2MODE_VLINE_START_END >> 2;
1476                        break;
1477                case RADEON_CRTC_GUI_TRIG_VLINE:
1478                        header &= ~R300_CP_PACKET0_REG_MASK;
1479                        header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2;
1480                        break;
1481                default:
1482                        DRM_ERROR("unknown crtc reloc\n");
1483                        return -EINVAL;
1484                }
1485                ib[h_idx] = header;
1486                ib[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1;
1487        }
1488
1489        return 0;
1490}
1491
1492static int r100_get_vtx_size(uint32_t vtx_fmt)
1493{
1494        int vtx_size;
1495        vtx_size = 2;
1496        /* ordered according to bits in spec */
1497        if (vtx_fmt & RADEON_SE_VTX_FMT_W0)
1498                vtx_size++;
1499        if (vtx_fmt & RADEON_SE_VTX_FMT_FPCOLOR)
1500                vtx_size += 3;
1501        if (vtx_fmt & RADEON_SE_VTX_FMT_FPALPHA)
1502                vtx_size++;
1503        if (vtx_fmt & RADEON_SE_VTX_FMT_PKCOLOR)
1504                vtx_size++;
1505        if (vtx_fmt & RADEON_SE_VTX_FMT_FPSPEC)
1506                vtx_size += 3;
1507        if (vtx_fmt & RADEON_SE_VTX_FMT_FPFOG)
1508                vtx_size++;
1509        if (vtx_fmt & RADEON_SE_VTX_FMT_PKSPEC)
1510                vtx_size++;
1511        if (vtx_fmt & RADEON_SE_VTX_FMT_ST0)
1512                vtx_size += 2;
1513        if (vtx_fmt & RADEON_SE_VTX_FMT_ST1)
1514                vtx_size += 2;
1515        if (vtx_fmt & RADEON_SE_VTX_FMT_Q1)
1516                vtx_size++;
1517        if (vtx_fmt & RADEON_SE_VTX_FMT_ST2)
1518                vtx_size += 2;
1519        if (vtx_fmt & RADEON_SE_VTX_FMT_Q2)
1520                vtx_size++;
1521        if (vtx_fmt & RADEON_SE_VTX_FMT_ST3)
1522                vtx_size += 2;
1523        if (vtx_fmt & RADEON_SE_VTX_FMT_Q3)
1524                vtx_size++;
1525        if (vtx_fmt & RADEON_SE_VTX_FMT_Q0)
1526                vtx_size++;
1527        /* blend weight */
1528        if (vtx_fmt & (0x7 << 15))
1529                vtx_size += (vtx_fmt >> 15) & 0x7;
1530        if (vtx_fmt & RADEON_SE_VTX_FMT_N0)
1531                vtx_size += 3;
1532        if (vtx_fmt & RADEON_SE_VTX_FMT_XY1)
1533                vtx_size += 2;
1534        if (vtx_fmt & RADEON_SE_VTX_FMT_Z1)
1535                vtx_size++;
1536        if (vtx_fmt & RADEON_SE_VTX_FMT_W1)
1537                vtx_size++;
1538        if (vtx_fmt & RADEON_SE_VTX_FMT_N1)
1539                vtx_size++;
1540        if (vtx_fmt & RADEON_SE_VTX_FMT_Z)
1541                vtx_size++;
1542        return vtx_size;
1543}
1544
1545static int r100_packet0_check(struct radeon_cs_parser *p,
1546                              struct radeon_cs_packet *pkt,
1547                              unsigned idx, unsigned reg)
1548{
1549        struct radeon_bo_list *reloc;
1550        struct r100_cs_track *track;
1551        volatile uint32_t *ib;
1552        uint32_t tmp;
1553        int r;
1554        int i, face;
1555        u32 tile_flags = 0;
1556        u32 idx_value;
1557
1558        ib = p->ib.ptr;
1559        track = (struct r100_cs_track *)p->track;
1560
1561        idx_value = radeon_get_ib_value(p, idx);
1562
1563        switch (reg) {
1564        case RADEON_CRTC_GUI_TRIG_VLINE:
1565                r = r100_cs_packet_parse_vline(p);
1566                if (r) {
1567                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1568                                  idx, reg);
1569                        radeon_cs_dump_packet(p, pkt);
1570                        return r;
1571                }
1572                break;
1573                /* FIXME: only allow PACKET3 blit? easier to check for out of
1574                 * range access */
1575        case RADEON_DST_PITCH_OFFSET:
1576        case RADEON_SRC_PITCH_OFFSET:
1577                r = r100_reloc_pitch_offset(p, pkt, idx, reg);
1578                if (r)
1579                        return r;
1580                break;
1581        case RADEON_RB3D_DEPTHOFFSET:
1582                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1583                if (r) {
1584                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1585                                  idx, reg);
1586                        radeon_cs_dump_packet(p, pkt);
1587                        return r;
1588                }
1589                track->zb.robj = reloc->robj;
1590                track->zb.offset = idx_value;
1591                track->zb_dirty = true;
1592                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1593                break;
1594        case RADEON_RB3D_COLOROFFSET:
1595                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1596                if (r) {
1597                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1598                                  idx, reg);
1599                        radeon_cs_dump_packet(p, pkt);
1600                        return r;
1601                }
1602                track->cb[0].robj = reloc->robj;
1603                track->cb[0].offset = idx_value;
1604                track->cb_dirty = true;
1605                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1606                break;
1607        case RADEON_PP_TXOFFSET_0:
1608        case RADEON_PP_TXOFFSET_1:
1609        case RADEON_PP_TXOFFSET_2:
1610                i = (reg - RADEON_PP_TXOFFSET_0) / 24;
1611                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1612                if (r) {
1613                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1614                                  idx, reg);
1615                        radeon_cs_dump_packet(p, pkt);
1616                        return r;
1617                }
1618                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1619                        if (reloc->tiling_flags & RADEON_TILING_MACRO)
1620                                tile_flags |= RADEON_TXO_MACRO_TILE;
1621                        if (reloc->tiling_flags & RADEON_TILING_MICRO)
1622                                tile_flags |= RADEON_TXO_MICRO_TILE_X2;
1623
1624                        tmp = idx_value & ~(0x7 << 2);
1625                        tmp |= tile_flags;
1626                        ib[idx] = tmp + ((u32)reloc->gpu_offset);
1627                } else
1628                        ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1629                track->textures[i].robj = reloc->robj;
1630                track->tex_dirty = true;
1631                break;
1632        case RADEON_PP_CUBIC_OFFSET_T0_0:
1633        case RADEON_PP_CUBIC_OFFSET_T0_1:
1634        case RADEON_PP_CUBIC_OFFSET_T0_2:
1635        case RADEON_PP_CUBIC_OFFSET_T0_3:
1636        case RADEON_PP_CUBIC_OFFSET_T0_4:
1637                i = (reg - RADEON_PP_CUBIC_OFFSET_T0_0) / 4;
1638                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1639                if (r) {
1640                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1641                                  idx, reg);
1642                        radeon_cs_dump_packet(p, pkt);
1643                        return r;
1644                }
1645                track->textures[0].cube_info[i].offset = idx_value;
1646                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1647                track->textures[0].cube_info[i].robj = reloc->robj;
1648                track->tex_dirty = true;
1649                break;
1650        case RADEON_PP_CUBIC_OFFSET_T1_0:
1651        case RADEON_PP_CUBIC_OFFSET_T1_1:
1652        case RADEON_PP_CUBIC_OFFSET_T1_2:
1653        case RADEON_PP_CUBIC_OFFSET_T1_3:
1654        case RADEON_PP_CUBIC_OFFSET_T1_4:
1655                i = (reg - RADEON_PP_CUBIC_OFFSET_T1_0) / 4;
1656                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1657                if (r) {
1658                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1659                                  idx, reg);
1660                        radeon_cs_dump_packet(p, pkt);
1661                        return r;
1662                }
1663                track->textures[1].cube_info[i].offset = idx_value;
1664                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1665                track->textures[1].cube_info[i].robj = reloc->robj;
1666                track->tex_dirty = true;
1667                break;
1668        case RADEON_PP_CUBIC_OFFSET_T2_0:
1669        case RADEON_PP_CUBIC_OFFSET_T2_1:
1670        case RADEON_PP_CUBIC_OFFSET_T2_2:
1671        case RADEON_PP_CUBIC_OFFSET_T2_3:
1672        case RADEON_PP_CUBIC_OFFSET_T2_4:
1673                i = (reg - RADEON_PP_CUBIC_OFFSET_T2_0) / 4;
1674                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1675                if (r) {
1676                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1677                                  idx, reg);
1678                        radeon_cs_dump_packet(p, pkt);
1679                        return r;
1680                }
1681                track->textures[2].cube_info[i].offset = idx_value;
1682                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1683                track->textures[2].cube_info[i].robj = reloc->robj;
1684                track->tex_dirty = true;
1685                break;
1686        case RADEON_RE_WIDTH_HEIGHT:
1687                track->maxy = ((idx_value >> 16) & 0x7FF);
1688                track->cb_dirty = true;
1689                track->zb_dirty = true;
1690                break;
1691        case RADEON_RB3D_COLORPITCH:
1692                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1693                if (r) {
1694                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1695                                  idx, reg);
1696                        radeon_cs_dump_packet(p, pkt);
1697                        return r;
1698                }
1699                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1700                        if (reloc->tiling_flags & RADEON_TILING_MACRO)
1701                                tile_flags |= RADEON_COLOR_TILE_ENABLE;
1702                        if (reloc->tiling_flags & RADEON_TILING_MICRO)
1703                                tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;
1704
1705                        tmp = idx_value & ~(0x7 << 16);
1706                        tmp |= tile_flags;
1707                        ib[idx] = tmp;
1708                } else
1709                        ib[idx] = idx_value;
1710
1711                track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK;
1712                track->cb_dirty = true;
1713                break;
1714        case RADEON_RB3D_DEPTHPITCH:
1715                track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK;
1716                track->zb_dirty = true;
1717                break;
1718        case RADEON_RB3D_CNTL:
1719                switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
1720                case 7:
1721                case 8:
1722                case 9:
1723                case 11:
1724                case 12:
1725                        track->cb[0].cpp = 1;
1726                        break;
1727                case 3:
1728                case 4:
1729                case 15:
1730                        track->cb[0].cpp = 2;
1731                        break;
1732                case 6:
1733                        track->cb[0].cpp = 4;
1734                        break;
1735                default:
1736                        DRM_ERROR("Invalid color buffer format (%d) !\n",
1737                                  ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f));
1738                        return -EINVAL;
1739                }
1740                track->z_enabled = !!(idx_value & RADEON_Z_ENABLE);
1741                track->cb_dirty = true;
1742                track->zb_dirty = true;
1743                break;
1744        case RADEON_RB3D_ZSTENCILCNTL:
1745                switch (idx_value & 0xf) {
1746                case 0:
1747                        track->zb.cpp = 2;
1748                        break;
1749                case 2:
1750                case 3:
1751                case 4:
1752                case 5:
1753                case 9:
1754                case 11:
1755                        track->zb.cpp = 4;
1756                        break;
1757                default:
1758                        break;
1759                }
1760                track->zb_dirty = true;
1761                break;
1762        case RADEON_RB3D_ZPASS_ADDR:
1763                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1764                if (r) {
1765                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1766                                  idx, reg);
1767                        radeon_cs_dump_packet(p, pkt);
1768                        return r;
1769                }
1770                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1771                break;
1772        case RADEON_PP_CNTL:
1773                {
1774                        uint32_t temp = idx_value >> 4;
1775                        for (i = 0; i < track->num_texture; i++)
1776                                track->textures[i].enabled = !!(temp & (1 << i));
1777                        track->tex_dirty = true;
1778                }
1779                break;
1780        case RADEON_SE_VF_CNTL:
1781                track->vap_vf_cntl = idx_value;
1782                break;
1783        case RADEON_SE_VTX_FMT:
1784                track->vtx_size = r100_get_vtx_size(idx_value);
1785                break;
1786        case RADEON_PP_TEX_SIZE_0:
1787        case RADEON_PP_TEX_SIZE_1:
1788        case RADEON_PP_TEX_SIZE_2:
1789                i = (reg - RADEON_PP_TEX_SIZE_0) / 8;
1790                track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1;
1791                track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
1792                track->tex_dirty = true;
1793                break;
1794        case RADEON_PP_TEX_PITCH_0:
1795        case RADEON_PP_TEX_PITCH_1:
1796        case RADEON_PP_TEX_PITCH_2:
1797                i = (reg - RADEON_PP_TEX_PITCH_0) / 8;
1798                track->textures[i].pitch = idx_value + 32;
1799                track->tex_dirty = true;
1800                break;
1801        case RADEON_PP_TXFILTER_0:
1802        case RADEON_PP_TXFILTER_1:
1803        case RADEON_PP_TXFILTER_2:
1804                i = (reg - RADEON_PP_TXFILTER_0) / 24;
1805                track->textures[i].num_levels = ((idx_value & RADEON_MAX_MIP_LEVEL_MASK)
1806                                                 >> RADEON_MAX_MIP_LEVEL_SHIFT);
1807                tmp = (idx_value >> 23) & 0x7;
1808                if (tmp == 2 || tmp == 6)
1809                        track->textures[i].roundup_w = false;
1810                tmp = (idx_value >> 27) & 0x7;
1811                if (tmp == 2 || tmp == 6)
1812                        track->textures[i].roundup_h = false;
1813                track->tex_dirty = true;
1814                break;
1815        case RADEON_PP_TXFORMAT_0:
1816        case RADEON_PP_TXFORMAT_1:
1817        case RADEON_PP_TXFORMAT_2:
1818                i = (reg - RADEON_PP_TXFORMAT_0) / 24;
1819                if (idx_value & RADEON_TXFORMAT_NON_POWER2) {
1820                        track->textures[i].use_pitch = 1;
1821                } else {
1822                        track->textures[i].use_pitch = 0;
1823                        track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK);
1824                        track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK);
1825                }
1826                if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE)
1827                        track->textures[i].tex_coord_type = 2;
1828                switch ((idx_value & RADEON_TXFORMAT_FORMAT_MASK)) {
1829                case RADEON_TXFORMAT_I8:
1830                case RADEON_TXFORMAT_RGB332:
1831                case RADEON_TXFORMAT_Y8:
1832                        track->textures[i].cpp = 1;
1833                        track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1834                        break;
1835                case RADEON_TXFORMAT_AI88:
1836                case RADEON_TXFORMAT_ARGB1555:
1837                case RADEON_TXFORMAT_RGB565:
1838                case RADEON_TXFORMAT_ARGB4444:
1839                case RADEON_TXFORMAT_VYUY422:
1840                case RADEON_TXFORMAT_YVYU422:
1841                case RADEON_TXFORMAT_SHADOW16:
1842                case RADEON_TXFORMAT_LDUDV655:
1843                case RADEON_TXFORMAT_DUDV88:
1844                        track->textures[i].cpp = 2;
1845                        track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1846                        break;
1847                case RADEON_TXFORMAT_ARGB8888:
1848                case RADEON_TXFORMAT_RGBA8888:
1849                case RADEON_TXFORMAT_SHADOW32:
1850                case RADEON_TXFORMAT_LDUDUV8888:
1851                        track->textures[i].cpp = 4;
1852                        track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1853                        break;
1854                case RADEON_TXFORMAT_DXT1:
1855                        track->textures[i].cpp = 1;
1856                        track->textures[i].compress_format = R100_TRACK_COMP_DXT1;
1857                        break;
1858                case RADEON_TXFORMAT_DXT23:
1859                case RADEON_TXFORMAT_DXT45:
1860                        track->textures[i].cpp = 1;
1861                        track->textures[i].compress_format = R100_TRACK_COMP_DXT35;
1862                        break;
1863                }
1864                track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf);
1865                track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf);
1866                track->tex_dirty = true;
1867                break;
1868        case RADEON_PP_CUBIC_FACES_0:
1869        case RADEON_PP_CUBIC_FACES_1:
1870        case RADEON_PP_CUBIC_FACES_2:
1871                tmp = idx_value;
1872                i = (reg - RADEON_PP_CUBIC_FACES_0) / 4;
1873                for (face = 0; face < 4; face++) {
1874                        track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
1875                        track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
1876                }
1877                track->tex_dirty = true;
1878                break;
1879        default:
1880                pr_err("Forbidden register 0x%04X in cs at %d\n", reg, idx);
1881                return -EINVAL;
1882        }
1883        return 0;
1884}
1885
1886int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
1887                                         struct radeon_cs_packet *pkt,
1888                                         struct radeon_bo *robj)
1889{
1890        unsigned idx;
1891        u32 value;
1892        idx = pkt->idx + 1;
1893        value = radeon_get_ib_value(p, idx + 2);
1894        if ((value + 1) > radeon_bo_size(robj)) {
1895                DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER "
1896                          "(need %u have %lu) !\n",
1897                          value + 1,
1898                          radeon_bo_size(robj));
1899                return -EINVAL;
1900        }
1901        return 0;
1902}
1903
1904static int r100_packet3_check(struct radeon_cs_parser *p,
1905                              struct radeon_cs_packet *pkt)
1906{
1907        struct radeon_bo_list *reloc;
1908        struct r100_cs_track *track;
1909        unsigned idx;
1910        volatile uint32_t *ib;
1911        int r;
1912
1913        ib = p->ib.ptr;
1914        idx = pkt->idx + 1;
1915        track = (struct r100_cs_track *)p->track;
1916        switch (pkt->opcode) {
1917        case PACKET3_3D_LOAD_VBPNTR:
1918                r = r100_packet3_load_vbpntr(p, pkt, idx);
1919                if (r)
1920                        return r;
1921                break;
1922        case PACKET3_INDX_BUFFER:
1923                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1924                if (r) {
1925                        DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1926                        radeon_cs_dump_packet(p, pkt);
1927                        return r;
1928                }
1929                ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->gpu_offset);
1930                r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1931                if (r) {
1932                        return r;
1933                }
1934                break;
1935        case 0x23:
1936                /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */
1937                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1938                if (r) {
1939                        DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1940                        radeon_cs_dump_packet(p, pkt);
1941                        return r;
1942                }
1943                ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->gpu_offset);
1944                track->num_arrays = 1;
1945                track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2));
1946
1947                track->arrays[0].robj = reloc->robj;
1948                track->arrays[0].esize = track->vtx_size;
1949
1950                track->max_indx = radeon_get_ib_value(p, idx+1);
1951
1952                track->vap_vf_cntl = radeon_get_ib_value(p, idx+3);
1953                track->immd_dwords = pkt->count - 1;
1954                r = r100_cs_track_check(p->rdev, track);
1955                if (r)
1956                        return r;
1957                break;
1958        case PACKET3_3D_DRAW_IMMD:
1959                if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) {
1960                        DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1961                        return -EINVAL;
1962                }
1963                track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 0));
1964                track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
1965                track->immd_dwords = pkt->count - 1;
1966                r = r100_cs_track_check(p->rdev, track);
1967                if (r)
1968                        return r;
1969                break;
1970                /* triggers drawing using in-packet vertex data */
1971        case PACKET3_3D_DRAW_IMMD_2:
1972                if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) {
1973                        DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1974                        return -EINVAL;
1975                }
1976                track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1977                track->immd_dwords = pkt->count;
1978                r = r100_cs_track_check(p->rdev, track);
1979                if (r)
1980                        return r;
1981                break;
1982                /* triggers drawing using in-packet vertex data */
1983        case PACKET3_3D_DRAW_VBUF_2:
1984                track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1985                r = r100_cs_track_check(p->rdev, track);
1986                if (r)
1987                        return r;
1988                break;
1989                /* triggers drawing of vertex buffers setup elsewhere */
1990        case PACKET3_3D_DRAW_INDX_2:
1991                track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1992                r = r100_cs_track_check(p->rdev, track);
1993                if (r)
1994                        return r;
1995                break;
1996                /* triggers drawing using indices to vertex buffer */
1997        case PACKET3_3D_DRAW_VBUF:
1998                track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
1999                r = r100_cs_track_check(p->rdev, track);
2000                if (r)
2001                        return r;
2002                break;
2003                /* triggers drawing of vertex buffers setup elsewhere */
2004        case PACKET3_3D_DRAW_INDX:
2005                track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
2006                r = r100_cs_track_check(p->rdev, track);
2007                if (r)
2008                        return r;
2009                break;
2010                /* triggers drawing using indices to vertex buffer */
2011        case PACKET3_3D_CLEAR_HIZ:
2012        case PACKET3_3D_CLEAR_ZMASK:
2013                if (p->rdev->hyperz_filp != p->filp)
2014                        return -EINVAL;
2015                break;
2016        case PACKET3_NOP:
2017                break;
2018        default:
2019                DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2020                return -EINVAL;
2021        }
2022        return 0;
2023}
2024
2025int r100_cs_parse(struct radeon_cs_parser *p)
2026{
2027        struct radeon_cs_packet pkt;
2028        struct r100_cs_track *track;
2029        int r;
2030
2031        track = kzalloc(sizeof(*track), GFP_KERNEL);
2032        if (!track)
2033                return -ENOMEM;
2034        r100_cs_track_clear(p->rdev, track);
2035        p->track = track;
2036        do {
2037                r = radeon_cs_packet_parse(p, &pkt, p->idx);
2038                if (r) {
2039                        return r;
2040                }
2041                p->idx += pkt.count + 2;
2042                switch (pkt.type) {
2043                case RADEON_PACKET_TYPE0:
2044                        if (p->rdev->family >= CHIP_R200)
2045                                r = r100_cs_parse_packet0(p, &pkt,
2046                                        p->rdev->config.r100.reg_safe_bm,
2047                                        p->rdev->config.r100.reg_safe_bm_size,
2048                                        &r200_packet0_check);
2049                        else
2050                                r = r100_cs_parse_packet0(p, &pkt,
2051                                        p->rdev->config.r100.reg_safe_bm,
2052                                        p->rdev->config.r100.reg_safe_bm_size,
2053                                        &r100_packet0_check);
2054                        break;
2055                case RADEON_PACKET_TYPE2:
2056                        break;
2057                case RADEON_PACKET_TYPE3:
2058                        r = r100_packet3_check(p, &pkt);
2059                        break;
2060                default:
2061                        DRM_ERROR("Unknown packet type %d !\n",
2062                                  pkt.type);
2063                        return -EINVAL;
2064                }
2065                if (r)
2066                        return r;
2067        } while (p->idx < p->chunk_ib->length_dw);
2068        return 0;
2069}
2070
2071static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
2072{
2073        DRM_ERROR("pitch                      %d\n", t->pitch);
2074        DRM_ERROR("use_pitch                  %d\n", t->use_pitch);
2075        DRM_ERROR("width                      %d\n", t->width);
2076        DRM_ERROR("width_11                   %d\n", t->width_11);
2077        DRM_ERROR("height                     %d\n", t->height);
2078        DRM_ERROR("height_11                  %d\n", t->height_11);
2079        DRM_ERROR("num levels                 %d\n", t->num_levels);
2080        DRM_ERROR("depth                      %d\n", t->txdepth);
2081        DRM_ERROR("bpp                        %d\n", t->cpp);
2082        DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
2083        DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
2084        DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
2085        DRM_ERROR("compress format            %d\n", t->compress_format);
2086}
2087
2088static int r100_track_compress_size(int compress_format, int w, int h)
2089{
2090        int block_width, block_height, block_bytes;
2091        int wblocks, hblocks;
2092        int min_wblocks;
2093        int sz;
2094
2095        block_width = 4;
2096        block_height = 4;
2097
2098        switch (compress_format) {
2099        case R100_TRACK_COMP_DXT1:
2100                block_bytes = 8;
2101                min_wblocks = 4;
2102                break;
2103        default:
2104        case R100_TRACK_COMP_DXT35:
2105                block_bytes = 16;
2106                min_wblocks = 2;
2107                break;
2108        }
2109
2110        hblocks = (h + block_height - 1) / block_height;
2111        wblocks = (w + block_width - 1) / block_width;
2112        if (wblocks < min_wblocks)
2113                wblocks = min_wblocks;
2114        sz = wblocks * hblocks * block_bytes;
2115        return sz;
2116}
2117
2118static int r100_cs_track_cube(struct radeon_device *rdev,
2119                              struct r100_cs_track *track, unsigned idx)
2120{
2121        unsigned face, w, h;
2122        struct radeon_bo *cube_robj;
2123        unsigned long size;
2124        unsigned compress_format = track->textures[idx].compress_format;
2125
2126        for (face = 0; face < 5; face++) {
2127                cube_robj = track->textures[idx].cube_info[face].robj;
2128                w = track->textures[idx].cube_info[face].width;
2129                h = track->textures[idx].cube_info[face].height;
2130
2131                if (compress_format) {
2132                        size = r100_track_compress_size(compress_format, w, h);
2133                } else
2134                        size = w * h;
2135                size *= track->textures[idx].cpp;
2136
2137                size += track->textures[idx].cube_info[face].offset;
2138
2139                if (size > radeon_bo_size(cube_robj)) {
2140                        DRM_ERROR("Cube texture offset greater than object size %lu %lu\n",
2141                                  size, radeon_bo_size(cube_robj));
2142                        r100_cs_track_texture_print(&track->textures[idx]);
2143                        return -1;
2144                }
2145        }
2146        return 0;
2147}
2148
2149static int r100_cs_track_texture_check(struct radeon_device *rdev,
2150                                       struct r100_cs_track *track)
2151{
2152        struct radeon_bo *robj;
2153        unsigned long size;
2154        unsigned u, i, w, h, d;
2155        int ret;
2156
2157        for (u = 0; u < track->num_texture; u++) {
2158                if (!track->textures[u].enabled)
2159                        continue;
2160                if (track->textures[u].lookup_disable)
2161                        continue;
2162                robj = track->textures[u].robj;
2163                if (robj == NULL) {
2164                        DRM_ERROR("No texture bound to unit %u\n", u);
2165                        return -EINVAL;
2166                }
2167                size = 0;
2168                for (i = 0; i <= track->textures[u].num_levels; i++) {
2169                        if (track->textures[u].use_pitch) {
2170                                if (rdev->family < CHIP_R300)
2171                                        w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i);
2172                                else
2173                                        w = track->textures[u].pitch / (1 << i);
2174                        } else {
2175                                w = track->textures[u].width;
2176                                if (rdev->family >= CHIP_RV515)
2177                                        w |= track->textures[u].width_11;
2178                                w = w / (1 << i);
2179                                if (track->textures[u].roundup_w)
2180                                        w = roundup_pow_of_two(w);
2181                        }
2182                        h = track->textures[u].height;
2183                        if (rdev->family >= CHIP_RV515)
2184                                h |= track->textures[u].height_11;
2185                        h = h / (1 << i);
2186                        if (track->textures[u].roundup_h)
2187                                h = roundup_pow_of_two(h);
2188                        if (track->textures[u].tex_coord_type == 1) {
2189                                d = (1 << track->textures[u].txdepth) / (1 << i);
2190                                if (!d)
2191                                        d = 1;
2192                        } else {
2193                                d = 1;
2194                        }
2195                        if (track->textures[u].compress_format) {
2196
2197                                size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d;
2198                                /* compressed textures are block based */
2199                        } else
2200                                size += w * h * d;
2201                }
2202                size *= track->textures[u].cpp;
2203
2204                switch (track->textures[u].tex_coord_type) {
2205                case 0:
2206                case 1:
2207                        break;
2208                case 2:
2209                        if (track->separate_cube) {
2210                                ret = r100_cs_track_cube(rdev, track, u);
2211                                if (ret)
2212                                        return ret;
2213                        } else
2214                                size *= 6;
2215                        break;
2216                default:
2217                        DRM_ERROR("Invalid texture coordinate type %u for unit "
2218                                  "%u\n", track->textures[u].tex_coord_type, u);
2219                        return -EINVAL;
2220                }
2221                if (size > radeon_bo_size(robj)) {
2222                        DRM_ERROR("Texture of unit %u needs %lu bytes but is "
2223                                  "%lu\n", u, size, radeon_bo_size(robj));
2224                        r100_cs_track_texture_print(&track->textures[u]);
2225                        return -EINVAL;
2226                }
2227        }
2228        return 0;
2229}
2230
2231int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
2232{
2233        unsigned i;
2234        unsigned long size;
2235        unsigned prim_walk;
2236        unsigned nverts;
2237        unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
2238
2239        if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
2240            !track->blend_read_enable)
2241                num_cb = 0;
2242
2243        for (i = 0; i < num_cb; i++) {
2244                if (track->cb[i].robj == NULL) {
2245                        DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
2246                        return -EINVAL;
2247                }
2248                size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
2249                size += track->cb[i].offset;
2250                if (size > radeon_bo_size(track->cb[i].robj)) {
2251                        DRM_ERROR("[drm] Buffer too small for color buffer %d "
2252                                  "(need %lu have %lu) !\n", i, size,
2253                                  radeon_bo_size(track->cb[i].robj));
2254                        DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
2255                                  i, track->cb[i].pitch, track->cb[i].cpp,
2256                                  track->cb[i].offset, track->maxy);
2257                        return -EINVAL;
2258                }
2259        }
2260        track->cb_dirty = false;
2261
2262        if (track->zb_dirty && track->z_enabled) {
2263                if (track->zb.robj == NULL) {
2264                        DRM_ERROR("[drm] No buffer for z buffer !\n");
2265                        return -EINVAL;
2266                }
2267                size = track->zb.pitch * track->zb.cpp * track->maxy;
2268                size += track->zb.offset;
2269                if (size > radeon_bo_size(track->zb.robj)) {
2270                        DRM_ERROR("[drm] Buffer too small for z buffer "
2271                                  "(need %lu have %lu) !\n", size,
2272                                  radeon_bo_size(track->zb.robj));
2273                        DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n",
2274                                  track->zb.pitch, track->zb.cpp,
2275                                  track->zb.offset, track->maxy);
2276                        return -EINVAL;
2277                }
2278        }
2279        track->zb_dirty = false;
2280
2281        if (track->aa_dirty && track->aaresolve) {
2282                if (track->aa.robj == NULL) {
2283                        DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i);
2284                        return -EINVAL;
2285                }
2286                /* I believe the format comes from colorbuffer0. */
2287                size = track->aa.pitch * track->cb[0].cpp * track->maxy;
2288                size += track->aa.offset;
2289                if (size > radeon_bo_size(track->aa.robj)) {
2290                        DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d "
2291                                  "(need %lu have %lu) !\n", i, size,
2292                                  radeon_bo_size(track->aa.robj));
2293                        DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n",
2294                                  i, track->aa.pitch, track->cb[0].cpp,
2295                                  track->aa.offset, track->maxy);
2296                        return -EINVAL;
2297                }
2298        }
2299        track->aa_dirty = false;
2300
2301        prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
2302        if (track->vap_vf_cntl & (1 << 14)) {
2303                nverts = track->vap_alt_nverts;
2304        } else {
2305                nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
2306        }
2307        switch (prim_walk) {
2308        case 1:
2309                for (i = 0; i < track->num_arrays; i++) {
2310                        size = track->arrays[i].esize * track->max_indx * 4;
2311                        if (track->arrays[i].robj == NULL) {
2312                                DRM_ERROR("(PW %u) Vertex array %u no buffer "
2313                                          "bound\n", prim_walk, i);
2314                                return -EINVAL;
2315                        }
2316                        if (size > radeon_bo_size(track->arrays[i].robj)) {
2317                                dev_err(rdev->dev, "(PW %u) Vertex array %u "
2318                                        "need %lu dwords have %lu dwords\n",
2319                                        prim_walk, i, size >> 2,
2320                                        radeon_bo_size(track->arrays[i].robj)
2321                                        >> 2);
2322                                DRM_ERROR("Max indices %u\n", track->max_indx);
2323                                return -EINVAL;
2324                        }
2325                }
2326                break;
2327        case 2:
2328                for (i = 0; i < track->num_arrays; i++) {
2329                        size = track->arrays[i].esize * (nverts - 1) * 4;
2330                        if (track->arrays[i].robj == NULL) {
2331                                DRM_ERROR("(PW %u) Vertex array %u no buffer "
2332                                          "bound\n", prim_walk, i);
2333                                return -EINVAL;
2334                        }
2335                        if (size > radeon_bo_size(track->arrays[i].robj)) {
2336                                dev_err(rdev->dev, "(PW %u) Vertex array %u "
2337                                        "need %lu dwords have %lu dwords\n",
2338                                        prim_walk, i, size >> 2,
2339                                        radeon_bo_size(track->arrays[i].robj)
2340                                        >> 2);
2341                                return -EINVAL;
2342                        }
2343                }
2344                break;
2345        case 3:
2346                size = track->vtx_size * nverts;
2347                if (size != track->immd_dwords) {
2348                        DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
2349                                  track->immd_dwords, size);
2350                        DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
2351                                  nverts, track->vtx_size);
2352                        return -EINVAL;
2353                }
2354                break;
2355        default:
2356                DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
2357                          prim_walk);
2358                return -EINVAL;
2359        }
2360
2361        if (track->tex_dirty) {
2362                track->tex_dirty = false;
2363                return r100_cs_track_texture_check(rdev, track);
2364        }
2365        return 0;
2366}
2367
2368void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
2369{
2370        unsigned i, face;
2371
2372        track->cb_dirty = true;
2373        track->zb_dirty = true;
2374        track->tex_dirty = true;
2375        track->aa_dirty = true;
2376
2377        if (rdev->family < CHIP_R300) {
2378                track->num_cb = 1;
2379                if (rdev->family <= CHIP_RS200)
2380                        track->num_texture = 3;
2381                else
2382                        track->num_texture = 6;
2383                track->maxy = 2048;
2384                track->separate_cube = 1;
2385        } else {
2386                track->num_cb = 4;
2387                track->num_texture = 16;
2388                track->maxy = 4096;
2389                track->separate_cube = 0;
2390                track->aaresolve = false;
2391                track->aa.robj = NULL;
2392        }
2393
2394        for (i = 0; i < track->num_cb; i++) {
2395                track->cb[i].robj = NULL;
2396                track->cb[i].pitch = 8192;
2397                track->cb[i].cpp = 16;
2398                track->cb[i].offset = 0;
2399        }
2400        track->z_enabled = true;
2401        track->zb.robj = NULL;
2402        track->zb.pitch = 8192;
2403        track->zb.cpp = 4;
2404        track->zb.offset = 0;
2405        track->vtx_size = 0x7F;
2406        track->immd_dwords = 0xFFFFFFFFUL;
2407        track->num_arrays = 11;
2408        track->max_indx = 0x00FFFFFFUL;
2409        for (i = 0; i < track->num_arrays; i++) {
2410                track->arrays[i].robj = NULL;
2411                track->arrays[i].esize = 0x7F;
2412        }
2413        for (i = 0; i < track->num_texture; i++) {
2414                track->textures[i].compress_format = R100_TRACK_COMP_NONE;
2415                track->textures[i].pitch = 16536;
2416                track->textures[i].width = 16536;
2417                track->textures[i].height = 16536;
2418                track->textures[i].width_11 = 1 << 11;
2419                track->textures[i].height_11 = 1 << 11;
2420                track->textures[i].num_levels = 12;
2421                if (rdev->family <= CHIP_RS200) {
2422                        track->textures[i].tex_coord_type = 0;
2423                        track->textures[i].txdepth = 0;
2424                } else {
2425                        track->textures[i].txdepth = 16;
2426                        track->textures[i].tex_coord_type = 1;
2427                }
2428                track->textures[i].cpp = 64;
2429                track->textures[i].robj = NULL;
2430                /* CS IB emission code makes sure texture unit are disabled */
2431                track->textures[i].enabled = false;
2432                track->textures[i].lookup_disable = false;
2433                track->textures[i].roundup_w = true;
2434                track->textures[i].roundup_h = true;
2435                if (track->separate_cube)
2436                        for (face = 0; face < 5; face++) {
2437                                track->textures[i].cube_info[face].robj = NULL;
2438                                track->textures[i].cube_info[face].width = 16536;
2439                                track->textures[i].cube_info[face].height = 16536;
2440                                track->textures[i].cube_info[face].offset = 0;
2441                        }
2442        }
2443}
2444
2445/*
2446 * Global GPU functions
2447 */
2448static void r100_errata(struct radeon_device *rdev)
2449{
2450        rdev->pll_errata = 0;
2451
2452        if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) {
2453                rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS;
2454        }
2455
2456        if (rdev->family == CHIP_RV100 ||
2457            rdev->family == CHIP_RS100 ||
2458            rdev->family == CHIP_RS200) {
2459                rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY;
2460        }
2461}
2462
2463static int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n)
2464{
2465        unsigned i;
2466        uint32_t tmp;
2467
2468        for (i = 0; i < rdev->usec_timeout; i++) {
2469                tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
2470                if (tmp >= n) {
2471                        return 0;
2472                }
2473                DRM_UDELAY(1);
2474        }
2475        return -1;
2476}
2477
2478int r100_gui_wait_for_idle(struct radeon_device *rdev)
2479{
2480        unsigned i;
2481        uint32_t tmp;
2482
2483        if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) {
2484                pr_warn("radeon: wait for empty RBBM fifo failed! Bad things might happen.\n");
2485        }
2486        for (i = 0; i < rdev->usec_timeout; i++) {
2487                tmp = RREG32(RADEON_RBBM_STATUS);
2488                if (!(tmp & RADEON_RBBM_ACTIVE)) {
2489                        return 0;
2490                }
2491                DRM_UDELAY(1);
2492        }
2493        return -1;
2494}
2495
2496int r100_mc_wait_for_idle(struct radeon_device *rdev)
2497{
2498        unsigned i;
2499        uint32_t tmp;
2500
2501        for (i = 0; i < rdev->usec_timeout; i++) {
2502                /* read MC_STATUS */
2503                tmp = RREG32(RADEON_MC_STATUS);
2504                if (tmp & RADEON_MC_IDLE) {
2505                        return 0;
2506                }
2507                DRM_UDELAY(1);
2508        }
2509        return -1;
2510}
2511
2512bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2513{
2514        u32 rbbm_status;
2515
2516        rbbm_status = RREG32(R_000E40_RBBM_STATUS);
2517        if (!G_000E40_GUI_ACTIVE(rbbm_status)) {
2518                radeon_ring_lockup_update(rdev, ring);
2519                return false;
2520        }
2521        return radeon_ring_test_lockup(rdev, ring);
2522}
2523
2524/* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */
2525void r100_enable_bm(struct radeon_device *rdev)
2526{
2527        uint32_t tmp;
2528        /* Enable bus mastering */
2529        tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
2530        WREG32(RADEON_BUS_CNTL, tmp);
2531}
2532
2533void r100_bm_disable(struct radeon_device *rdev)
2534{
2535        u32 tmp;
2536
2537        /* disable bus mastering */
2538        tmp = RREG32(R_000030_BUS_CNTL);
2539        WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000044);
2540        mdelay(1);
2541        WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000042);
2542        mdelay(1);
2543        WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040);
2544        tmp = RREG32(RADEON_BUS_CNTL);
2545        mdelay(1);
2546        pci_clear_master(rdev->pdev);
2547        mdelay(1);
2548}
2549
2550int r100_asic_reset(struct radeon_device *rdev, bool hard)
2551{
2552        struct r100_mc_save save;
2553        u32 status, tmp;
2554        int ret = 0;
2555
2556        status = RREG32(R_000E40_RBBM_STATUS);
2557        if (!G_000E40_GUI_ACTIVE(status)) {
2558                return 0;
2559        }
2560        r100_mc_stop(rdev, &save);
2561        status = RREG32(R_000E40_RBBM_STATUS);
2562        dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2563        /* stop CP */
2564        WREG32(RADEON_CP_CSQ_CNTL, 0);
2565        tmp = RREG32(RADEON_CP_RB_CNTL);
2566        WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
2567        WREG32(RADEON_CP_RB_RPTR_WR, 0);
2568        WREG32(RADEON_CP_RB_WPTR, 0);
2569        WREG32(RADEON_CP_RB_CNTL, tmp);
2570        /* save PCI state */
2571        pci_save_state(rdev->pdev);
2572        /* disable bus mastering */
2573        r100_bm_disable(rdev);
2574        WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_SE(1) |
2575                                        S_0000F0_SOFT_RESET_RE(1) |
2576                                        S_0000F0_SOFT_RESET_PP(1) |
2577                                        S_0000F0_SOFT_RESET_RB(1));
2578        RREG32(R_0000F0_RBBM_SOFT_RESET);
2579        mdelay(500);
2580        WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
2581        mdelay(1);
2582        status = RREG32(R_000E40_RBBM_STATUS);
2583        dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2584        /* reset CP */
2585        WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1));
2586        RREG32(R_0000F0_RBBM_SOFT_RESET);
2587        mdelay(500);
2588        WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
2589        mdelay(1);
2590        status = RREG32(R_000E40_RBBM_STATUS);
2591        dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2592        /* restore PCI & busmastering */
2593        pci_restore_state(rdev->pdev);
2594        r100_enable_bm(rdev);
2595        /* Check if GPU is idle */
2596        if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) ||
2597                G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) {
2598                dev_err(rdev->dev, "failed to reset GPU\n");
2599                ret = -1;
2600        } else
2601                dev_info(rdev->dev, "GPU reset succeed\n");
2602        r100_mc_resume(rdev, &save);
2603        return ret;
2604}
2605
2606void r100_set_common_regs(struct radeon_device *rdev)
2607{
2608        struct drm_device *dev = rdev->ddev;
2609        bool force_dac2 = false;
2610        u32 tmp;
2611
2612        /* set these so they don't interfere with anything */
2613        WREG32(RADEON_OV0_SCALE_CNTL, 0);
2614        WREG32(RADEON_SUBPIC_CNTL, 0);
2615        WREG32(RADEON_VIPH_CONTROL, 0);
2616        WREG32(RADEON_I2C_CNTL_1, 0);
2617        WREG32(RADEON_DVI_I2C_CNTL_1, 0);
2618        WREG32(RADEON_CAP0_TRIG_CNTL, 0);
2619        WREG32(RADEON_CAP1_TRIG_CNTL, 0);
2620
2621        /* always set up dac2 on rn50 and some rv100 as lots
2622         * of servers seem to wire it up to a VGA port but
2623         * don't report it in the bios connector
2624         * table.
2625         */
2626        switch (dev->pdev->device) {
2627                /* RN50 */
2628        case 0x515e:
2629        case 0x5969:
2630                force_dac2 = true;
2631                break;
2632                /* RV100*/
2633        case 0x5159:
2634        case 0x515a:
2635                /* DELL triple head servers */
2636                if ((dev->pdev->subsystem_vendor == 0x1028 /* DELL */) &&
2637                    ((dev->pdev->subsystem_device == 0x016c) ||
2638                     (dev->pdev->subsystem_device == 0x016d) ||
2639                     (dev->pdev->subsystem_device == 0x016e) ||
2640                     (dev->pdev->subsystem_device == 0x016f) ||
2641                     (dev->pdev->subsystem_device == 0x0170) ||
2642                     (dev->pdev->subsystem_device == 0x017d) ||
2643                     (dev->pdev->subsystem_device == 0x017e) ||
2644                     (dev->pdev->subsystem_device == 0x0183) ||
2645                     (dev->pdev->subsystem_device == 0x018a) ||
2646                     (dev->pdev->subsystem_device == 0x019a)))
2647                        force_dac2 = true;
2648                break;
2649        }
2650
2651        if (force_dac2) {
2652                u32 disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG);
2653                u32 tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL);
2654                u32 dac2_cntl = RREG32(RADEON_DAC_CNTL2);
2655
2656                /* For CRT on DAC2, don't turn it on if BIOS didn't
2657                   enable it, even it's detected.
2658                */
2659
2660                /* force it to crtc0 */
2661                dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL;
2662                dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL;
2663                disp_hw_debug |= RADEON_CRT2_DISP1_SEL;
2664
2665                /* set up the TV DAC */
2666                tv_dac_cntl &= ~(RADEON_TV_DAC_PEDESTAL |
2667                                 RADEON_TV_DAC_STD_MASK |
2668                                 RADEON_TV_DAC_RDACPD |
2669                                 RADEON_TV_DAC_GDACPD |
2670                                 RADEON_TV_DAC_BDACPD |
2671                                 RADEON_TV_DAC_BGADJ_MASK |
2672                                 RADEON_TV_DAC_DACADJ_MASK);
2673                tv_dac_cntl |= (RADEON_TV_DAC_NBLANK |
2674                                RADEON_TV_DAC_NHOLD |
2675                                RADEON_TV_DAC_STD_PS2 |
2676                                (0x58 << 16));
2677
2678                WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl);
2679                WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug);
2680                WREG32(RADEON_DAC_CNTL2, dac2_cntl);
2681        }
2682
2683        /* switch PM block to ACPI mode */
2684        tmp = RREG32_PLL(RADEON_PLL_PWRMGT_CNTL);
2685        tmp &= ~RADEON_PM_MODE_SEL;
2686        WREG32_PLL(RADEON_PLL_PWRMGT_CNTL, tmp);
2687
2688}
2689
2690/*
2691 * VRAM info
2692 */
2693static void r100_vram_get_type(struct radeon_device *rdev)
2694{
2695        uint32_t tmp;
2696
2697        rdev->mc.vram_is_ddr = false;
2698        if (rdev->flags & RADEON_IS_IGP)
2699                rdev->mc.vram_is_ddr = true;
2700        else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR)
2701                rdev->mc.vram_is_ddr = true;
2702        if ((rdev->family == CHIP_RV100) ||
2703            (rdev->family == CHIP_RS100) ||
2704            (rdev->family == CHIP_RS200)) {
2705                tmp = RREG32(RADEON_MEM_CNTL);
2706                if (tmp & RV100_HALF_MODE) {
2707                        rdev->mc.vram_width = 32;
2708                } else {
2709                        rdev->mc.vram_width = 64;
2710                }
2711                if (rdev->flags & RADEON_SINGLE_CRTC) {
2712                        rdev->mc.vram_width /= 4;
2713                        rdev->mc.vram_is_ddr = true;
2714                }
2715        } else if (rdev->family <= CHIP_RV280) {
2716                tmp = RREG32(RADEON_MEM_CNTL);
2717                if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) {
2718                        rdev->mc.vram_width = 128;
2719                } else {
2720                        rdev->mc.vram_width = 64;
2721                }
2722        } else {
2723                /* newer IGPs */
2724                rdev->mc.vram_width = 128;
2725        }
2726}
2727
2728static u32 r100_get_accessible_vram(struct radeon_device *rdev)
2729{
2730        u32 aper_size;
2731        u8 byte;
2732
2733        aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2734
2735        /* Set HDP_APER_CNTL only on cards that are known not to be broken,
2736         * that is has the 2nd generation multifunction PCI interface
2737         */
2738        if (rdev->family == CHIP_RV280 ||
2739            rdev->family >= CHIP_RV350) {
2740                WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL,
2741                       ~RADEON_HDP_APER_CNTL);
2742                DRM_INFO("Generation 2 PCI interface, using max accessible memory\n");
2743                return aper_size * 2;
2744        }
2745
2746        /* Older cards have all sorts of funny issues to deal with. First
2747         * check if it's a multifunction card by reading the PCI config
2748         * header type... Limit those to one aperture size
2749         */
2750        pci_read_config_byte(rdev->pdev, 0xe, &byte);
2751        if (byte & 0x80) {
2752                DRM_INFO("Generation 1 PCI interface in multifunction mode\n");
2753                DRM_INFO("Limiting VRAM to one aperture\n");
2754                return aper_size;
2755        }
2756
2757        /* Single function older card. We read HDP_APER_CNTL to see how the BIOS
2758         * have set it up. We don't write this as it's broken on some ASICs but
2759         * we expect the BIOS to have done the right thing (might be too optimistic...)
2760         */
2761        if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL)
2762                return aper_size * 2;
2763        return aper_size;
2764}
2765
2766void r100_vram_init_sizes(struct radeon_device *rdev)
2767{
2768        u64 config_aper_size;
2769
2770        /* work out accessible VRAM */
2771        rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
2772        rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
2773        rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev);
2774        /* FIXME we don't use the second aperture yet when we could use it */
2775        if (rdev->mc.visible_vram_size > rdev->mc.aper_size)
2776                rdev->mc.visible_vram_size = rdev->mc.aper_size;
2777        config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2778        if (rdev->flags & RADEON_IS_IGP) {
2779                uint32_t tom;
2780                /* read NB_TOM to get the amount of ram stolen for the GPU */
2781                tom = RREG32(RADEON_NB_TOM);
2782                rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
2783                WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
2784                rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
2785        } else {
2786                rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
2787                /* Some production boards of m6 will report 0
2788                 * if it's 8 MB
2789                 */
2790                if (rdev->mc.real_vram_size == 0) {
2791                        rdev->mc.real_vram_size = 8192 * 1024;
2792                        WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
2793                }
2794                /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM - 
2795                 * Novell bug 204882 + along with lots of ubuntu ones
2796                 */
2797                if (rdev->mc.aper_size > config_aper_size)
2798                        config_aper_size = rdev->mc.aper_size;
2799
2800                if (config_aper_size > rdev->mc.real_vram_size)
2801                        rdev->mc.mc_vram_size = config_aper_size;
2802                else
2803                        rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
2804        }
2805}
2806
2807void r100_vga_set_state(struct radeon_device *rdev, bool state)
2808{
2809        uint32_t temp;
2810
2811        temp = RREG32(RADEON_CONFIG_CNTL);
2812        if (state == false) {
2813                temp &= ~RADEON_CFG_VGA_RAM_EN;
2814                temp |= RADEON_CFG_VGA_IO_DIS;
2815        } else {
2816                temp &= ~RADEON_CFG_VGA_IO_DIS;
2817        }
2818        WREG32(RADEON_CONFIG_CNTL, temp);
2819}
2820
2821static void r100_mc_init(struct radeon_device *rdev)
2822{
2823        u64 base;
2824
2825        r100_vram_get_type(rdev);
2826        r100_vram_init_sizes(rdev);
2827        base = rdev->mc.aper_base;
2828        if (rdev->flags & RADEON_IS_IGP)
2829                base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16;
2830        radeon_vram_location(rdev, &rdev->mc, base);
2831        rdev->mc.gtt_base_align = 0;
2832        if (!(rdev->flags & RADEON_IS_AGP))
2833                radeon_gtt_location(rdev, &rdev->mc);
2834        radeon_update_bandwidth_info(rdev);
2835}
2836
2837
2838/*
2839 * Indirect registers accessor
2840 */
2841void r100_pll_errata_after_index(struct radeon_device *rdev)
2842{
2843        if (rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS) {
2844                (void)RREG32(RADEON_CLOCK_CNTL_DATA);
2845                (void)RREG32(RADEON_CRTC_GEN_CNTL);
2846        }
2847}
2848
2849static void r100_pll_errata_after_data(struct radeon_device *rdev)
2850{
2851        /* This workarounds is necessary on RV100, RS100 and RS200 chips
2852         * or the chip could hang on a subsequent access
2853         */
2854        if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) {
2855                mdelay(5);
2856        }
2857
2858        /* This function is required to workaround a hardware bug in some (all?)
2859         * revisions of the R300.  This workaround should be called after every
2860         * CLOCK_CNTL_INDEX register access.  If not, register reads afterward
2861         * may not be correct.
2862         */
2863        if (rdev->pll_errata & CHIP_ERRATA_R300_CG) {
2864                uint32_t save, tmp;
2865
2866                save = RREG32(RADEON_CLOCK_CNTL_INDEX);
2867                tmp = save & ~(0x3f | RADEON_PLL_WR_EN);
2868                WREG32(RADEON_CLOCK_CNTL_INDEX, tmp);
2869                tmp = RREG32(RADEON_CLOCK_CNTL_DATA);
2870                WREG32(RADEON_CLOCK_CNTL_INDEX, save);
2871        }
2872}
2873
2874uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg)
2875{
2876        unsigned long flags;
2877        uint32_t data;
2878
2879        spin_lock_irqsave(&rdev->pll_idx_lock, flags);
2880        WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f);
2881        r100_pll_errata_after_index(rdev);
2882        data = RREG32(RADEON_CLOCK_CNTL_DATA);
2883        r100_pll_errata_after_data(rdev);
2884        spin_unlock_irqrestore(&rdev->pll_idx_lock, flags);
2885        return data;
2886}
2887
2888void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
2889{
2890        unsigned long flags;
2891
2892        spin_lock_irqsave(&rdev->pll_idx_lock, flags);
2893        WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN));
2894        r100_pll_errata_after_index(rdev);
2895        WREG32(RADEON_CLOCK_CNTL_DATA, v);
2896        r100_pll_errata_after_data(rdev);
2897        spin_unlock_irqrestore(&rdev->pll_idx_lock, flags);
2898}
2899
2900static void r100_set_safe_registers(struct radeon_device *rdev)
2901{
2902        if (ASIC_IS_RN50(rdev)) {
2903                rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm;
2904                rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm);
2905        } else if (rdev->family < CHIP_R200) {
2906                rdev->config.r100.reg_safe_bm = r100_reg_safe_bm;
2907                rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm);
2908        } else {
2909                r200_set_safe_registers(rdev);
2910        }
2911}
2912
2913/*
2914 * Debugfs info
2915 */
2916#if defined(CONFIG_DEBUG_FS)
2917static int r100_debugfs_rbbm_info(struct seq_file *m, void *data)
2918{
2919        struct drm_info_node *node = (struct drm_info_node *) m->private;
2920        struct drm_device *dev = node->minor->dev;
2921        struct radeon_device *rdev = dev->dev_private;
2922        uint32_t reg, value;
2923        unsigned i;
2924
2925        seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS));
2926        seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C));
2927        seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2928        for (i = 0; i < 64; i++) {
2929                WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100);
2930                reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2;
2931                WREG32(RADEON_RBBM_CMDFIFO_ADDR, i);
2932                value = RREG32(RADEON_RBBM_CMDFIFO_DATA);
2933                seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value);
2934        }
2935        return 0;
2936}
2937
2938static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data)
2939{
2940        struct drm_info_node *node = (struct drm_info_node *) m->private;
2941        struct drm_device *dev = node->minor->dev;
2942        struct radeon_device *rdev = dev->dev_private;
2943        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2944        uint32_t rdp, wdp;
2945        unsigned count, i, j;
2946
2947        radeon_ring_free_size(rdev, ring);
2948        rdp = RREG32(RADEON_CP_RB_RPTR);
2949        wdp = RREG32(RADEON_CP_RB_WPTR);
2950        count = (rdp + ring->ring_size - wdp) & ring->ptr_mask;
2951        seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2952        seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
2953        seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
2954        seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
2955        seq_printf(m, "%u dwords in ring\n", count);
2956        if (ring->ready) {
2957                for (j = 0; j <= count; j++) {
2958                        i = (rdp + j) & ring->ptr_mask;
2959                        seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]);
2960                }
2961        }
2962        return 0;
2963}
2964
2965
2966static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data)
2967{
2968        struct drm_info_node *node = (struct drm_info_node *) m->private;
2969        struct drm_device *dev = node->minor->dev;
2970        struct radeon_device *rdev = dev->dev_private;
2971        uint32_t csq_stat, csq2_stat, tmp;
2972        unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr;
2973        unsigned i;
2974
2975        seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2976        seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE));
2977        csq_stat = RREG32(RADEON_CP_CSQ_STAT);
2978        csq2_stat = RREG32(RADEON_CP_CSQ2_STAT);
2979        r_rptr = (csq_stat >> 0) & 0x3ff;
2980        r_wptr = (csq_stat >> 10) & 0x3ff;
2981        ib1_rptr = (csq_stat >> 20) & 0x3ff;
2982        ib1_wptr = (csq2_stat >> 0) & 0x3ff;
2983        ib2_rptr = (csq2_stat >> 10) & 0x3ff;
2984        ib2_wptr = (csq2_stat >> 20) & 0x3ff;
2985        seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat);
2986        seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat);
2987        seq_printf(m, "Ring rptr %u\n", r_rptr);
2988        seq_printf(m, "Ring wptr %u\n", r_wptr);
2989        seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr);
2990        seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr);
2991        seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr);
2992        seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr);
2993        /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms
2994         * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */
2995        seq_printf(m, "Ring fifo:\n");
2996        for (i = 0; i < 256; i++) {
2997                WREG32(RADEON_CP_CSQ_ADDR, i << 2);
2998                tmp = RREG32(RADEON_CP_CSQ_DATA);
2999                seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp);
3000        }
3001        seq_printf(m, "Indirect1 fifo:\n");
3002        for (i = 256; i <= 512; i++) {
3003                WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3004                tmp = RREG32(RADEON_CP_CSQ_DATA);
3005                seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp);
3006        }
3007        seq_printf(m, "Indirect2 fifo:\n");
3008        for (i = 640; i < ib1_wptr; i++) {
3009                WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3010                tmp = RREG32(RADEON_CP_CSQ_DATA);
3011                seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp);
3012        }
3013        return 0;
3014}
3015
3016static int r100_debugfs_mc_info(struct seq_file *m, void *data)
3017{
3018        struct drm_info_node *node = (struct drm_info_node *) m->private;
3019        struct drm_device *dev = node->minor->dev;
3020        struct radeon_device *rdev = dev->dev_private;
3021        uint32_t tmp;
3022
3023        tmp = RREG32(RADEON_CONFIG_MEMSIZE);
3024        seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp);
3025        tmp = RREG32(RADEON_MC_FB_LOCATION);
3026        seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp);
3027        tmp = RREG32(RADEON_BUS_CNTL);
3028        seq_printf(m, "BUS_CNTL 0x%08x\n", tmp);
3029        tmp = RREG32(RADEON_MC_AGP_LOCATION);
3030        seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp);
3031        tmp = RREG32(RADEON_AGP_BASE);
3032        seq_printf(m, "AGP_BASE 0x%08x\n", tmp);
3033        tmp = RREG32(RADEON_HOST_PATH_CNTL);
3034        seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp);
3035        tmp = RREG32(0x01D0);
3036        seq_printf(m, "AIC_CTRL 0x%08x\n", tmp);
3037        tmp = RREG32(RADEON_AIC_LO_ADDR);
3038        seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp);
3039        tmp = RREG32(RADEON_AIC_HI_ADDR);
3040        seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp);
3041        tmp = RREG32(0x01E4);
3042        seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp);
3043        return 0;
3044}
3045
3046static struct drm_info_list r100_debugfs_rbbm_list[] = {
3047        {"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL},
3048};
3049
3050static struct drm_info_list r100_debugfs_cp_list[] = {
3051        {"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL},
3052        {"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL},
3053};
3054
3055static struct drm_info_list r100_debugfs_mc_info_list[] = {
3056        {"r100_mc_info", r100_debugfs_mc_info, 0, NULL},
3057};
3058#endif
3059
3060int r100_debugfs_rbbm_init(struct radeon_device *rdev)
3061{
3062#if defined(CONFIG_DEBUG_FS)
3063        return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1);
3064#else
3065        return 0;
3066#endif
3067}
3068
3069int r100_debugfs_cp_init(struct radeon_device *rdev)
3070{
3071#if defined(CONFIG_DEBUG_FS)
3072        return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2);
3073#else
3074        return 0;
3075#endif
3076}
3077
3078int r100_debugfs_mc_info_init(struct radeon_device *rdev)
3079{
3080#if defined(CONFIG_DEBUG_FS)
3081        return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1);
3082#else
3083        return 0;
3084#endif
3085}
3086
3087int r100_set_surface_reg(struct radeon_device *rdev, int reg,
3088                         uint32_t tiling_flags, uint32_t pitch,
3089                         uint32_t offset, uint32_t obj_size)
3090{
3091        int surf_index = reg * 16;
3092        int flags = 0;
3093
3094        if (rdev->family <= CHIP_RS200) {
3095                if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3096                                 == (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3097                        flags |= RADEON_SURF_TILE_COLOR_BOTH;
3098                if (tiling_flags & RADEON_TILING_MACRO)
3099                        flags |= RADEON_SURF_TILE_COLOR_MACRO;
3100                /* setting pitch to 0 disables tiling */
3101                if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3102                                == 0)
3103                        pitch = 0;
3104        } else if (rdev->family <= CHIP_RV280) {
3105                if (tiling_flags & (RADEON_TILING_MACRO))
3106                        flags |= R200_SURF_TILE_COLOR_MACRO;
3107                if (tiling_flags & RADEON_TILING_MICRO)
3108                        flags |= R200_SURF_TILE_COLOR_MICRO;
3109        } else {
3110                if (tiling_flags & RADEON_TILING_MACRO)
3111                        flags |= R300_SURF_TILE_MACRO;
3112                if (tiling_flags & RADEON_TILING_MICRO)
3113                        flags |= R300_SURF_TILE_MICRO;
3114        }
3115
3116        if (tiling_flags & RADEON_TILING_SWAP_16BIT)
3117                flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP;
3118        if (tiling_flags & RADEON_TILING_SWAP_32BIT)
3119                flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP;
3120
3121        /* r100/r200 divide by 16 */
3122        if (rdev->family < CHIP_R300)
3123                flags |= pitch / 16;
3124        else
3125                flags |= pitch / 8;
3126
3127
3128        DRM_DEBUG_KMS("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1);
3129        WREG32(RADEON_SURFACE0_INFO + surf_index, flags);
3130        WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset);
3131        WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1);
3132        return 0;
3133}
3134
3135void r100_clear_surface_reg(struct radeon_device *rdev, int reg)
3136{
3137        int surf_index = reg * 16;
3138        WREG32(RADEON_SURFACE0_INFO + surf_index, 0);
3139}
3140
3141void r100_bandwidth_update(struct radeon_device *rdev)
3142{
3143        fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff;
3144        fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff;
3145        fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff;
3146        fixed20_12 crit_point_ff = {0};
3147        uint32_t temp, data, mem_trcd, mem_trp, mem_tras;
3148        fixed20_12 memtcas_ff[8] = {
3149                dfixed_init(1),
3150                dfixed_init(2),
3151                dfixed_init(3),
3152                dfixed_init(0),
3153                dfixed_init_half(1),
3154                dfixed_init_half(2),
3155                dfixed_init(0),
3156        };
3157        fixed20_12 memtcas_rs480_ff[8] = {
3158                dfixed_init(0),
3159                dfixed_init(1),
3160                dfixed_init(2),
3161                dfixed_init(3),
3162                dfixed_init(0),
3163                dfixed_init_half(1),
3164                dfixed_init_half(2),
3165                dfixed_init_half(3),
3166        };
3167        fixed20_12 memtcas2_ff[8] = {
3168                dfixed_init(0),
3169                dfixed_init(1),
3170                dfixed_init(2),
3171                dfixed_init(3),
3172                dfixed_init(4),
3173                dfixed_init(5),
3174                dfixed_init(6),
3175                dfixed_init(7),
3176        };
3177        fixed20_12 memtrbs[8] = {
3178                dfixed_init(1),
3179                dfixed_init_half(1),
3180                dfixed_init(2),
3181                dfixed_init_half(2),
3182                dfixed_init(3),
3183                dfixed_init_half(3),
3184                dfixed_init(4),
3185                dfixed_init_half(4)
3186        };
3187        fixed20_12 memtrbs_r4xx[8] = {
3188                dfixed_init(4),
3189                dfixed_init(5),
3190                dfixed_init(6),
3191                dfixed_init(7),
3192                dfixed_init(8),
3193                dfixed_init(9),
3194                dfixed_init(10),
3195                dfixed_init(11)
3196        };
3197        fixed20_12 min_mem_eff;
3198        fixed20_12 mc_latency_sclk, mc_latency_mclk, k1;
3199        fixed20_12 cur_latency_mclk, cur_latency_sclk;
3200        fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate = {0},
3201                disp_drain_rate2, read_return_rate;
3202        fixed20_12 time_disp1_drop_priority;
3203        int c;
3204        int cur_size = 16;       /* in octawords */
3205        int critical_point = 0, critical_point2;
3206/*      uint32_t read_return_rate, time_disp1_drop_priority; */
3207        int stop_req, max_stop_req;
3208        struct drm_display_mode *mode1 = NULL;
3209        struct drm_display_mode *mode2 = NULL;
3210        uint32_t pixel_bytes1 = 0;
3211        uint32_t pixel_bytes2 = 0;
3212
3213        /* Guess line buffer size to be 8192 pixels */
3214        u32 lb_size = 8192;
3215
3216        if (!rdev->mode_info.mode_config_initialized)
3217                return;
3218
3219        radeon_update_display_priority(rdev);
3220
3221        if (rdev->mode_info.crtcs[0]->base.enabled) {
3222                const struct drm_framebuffer *fb =
3223                        rdev->mode_info.crtcs[0]->base.primary->fb;
3224
3225                mode1 = &rdev->mode_info.crtcs[0]->base.mode;
3226                pixel_bytes1 = fb->format->cpp[0];
3227        }
3228        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3229                if (rdev->mode_info.crtcs[1]->base.enabled) {
3230                        const struct drm_framebuffer *fb =
3231                                rdev->mode_info.crtcs[1]->base.primary->fb;
3232
3233                        mode2 = &rdev->mode_info.crtcs[1]->base.mode;
3234                        pixel_bytes2 = fb->format->cpp[0];
3235                }
3236        }
3237
3238        min_mem_eff.full = dfixed_const_8(0);
3239        /* get modes */
3240        if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) {
3241                uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER);
3242                mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT);
3243                mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT);
3244                /* check crtc enables */
3245                if (mode2)
3246                        mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT);
3247                if (mode1)
3248                        mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT);
3249                WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer);
3250        }
3251
3252        /*
3253         * determine is there is enough bw for current mode
3254         */
3255        sclk_ff = rdev->pm.sclk;
3256        mclk_ff = rdev->pm.mclk;
3257
3258        temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1);
3259        temp_ff.full = dfixed_const(temp);
3260        mem_bw.full = dfixed_mul(mclk_ff, temp_ff);
3261
3262        pix_clk.full = 0;
3263        pix_clk2.full = 0;
3264        peak_disp_bw.full = 0;
3265        if (mode1) {
3266                temp_ff.full = dfixed_const(1000);
3267                pix_clk.full = dfixed_const(mode1->clock); /* convert to fixed point */
3268                pix_clk.full = dfixed_div(pix_clk, temp_ff);
3269                temp_ff.full = dfixed_const(pixel_bytes1);
3270                peak_disp_bw.full += dfixed_mul(pix_clk, temp_ff);
3271        }
3272        if (mode2) {
3273                temp_ff.full = dfixed_const(1000);
3274                pix_clk2.full = dfixed_const(mode2->clock); /* convert to fixed point */
3275                pix_clk2.full = dfixed_div(pix_clk2, temp_ff);
3276                temp_ff.full = dfixed_const(pixel_bytes2);
3277                peak_disp_bw.full += dfixed_mul(pix_clk2, temp_ff);
3278        }
3279
3280        mem_bw.full = dfixed_mul(mem_bw, min_mem_eff);
3281        if (peak_disp_bw.full >= mem_bw.full) {
3282                DRM_ERROR("You may not have enough display bandwidth for current mode\n"
3283                          "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
3284        }
3285
3286        /*  Get values from the EXT_MEM_CNTL register...converting its contents. */
3287        temp = RREG32(RADEON_MEM_TIMING_CNTL);
3288        if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */
3289                mem_trcd = ((temp >> 2) & 0x3) + 1;
3290                mem_trp  = ((temp & 0x3)) + 1;
3291                mem_tras = ((temp & 0x70) >> 4) + 1;
3292        } else if (rdev->family == CHIP_R300 ||
3293                   rdev->family == CHIP_R350) { /* r300, r350 */
3294                mem_trcd = (temp & 0x7) + 1;
3295                mem_trp = ((temp >> 8) & 0x7) + 1;
3296                mem_tras = ((temp >> 11) & 0xf) + 4;
3297        } else if (rdev->family == CHIP_RV350 ||
3298                   rdev->family == CHIP_RV380) {
3299                /* rv3x0 */
3300                mem_trcd = (temp & 0x7) + 3;
3301                mem_trp = ((temp >> 8) & 0x7) + 3;
3302                mem_tras = ((temp >> 11) & 0xf) + 6;
3303        } else if (rdev->family == CHIP_R420 ||
3304                   rdev->family == CHIP_R423 ||
3305                   rdev->family == CHIP_RV410) {
3306                /* r4xx */
3307                mem_trcd = (temp & 0xf) + 3;
3308                if (mem_trcd > 15)
3309                        mem_trcd = 15;
3310                mem_trp = ((temp >> 8) & 0xf) + 3;
3311                if (mem_trp > 15)
3312                        mem_trp = 15;
3313                mem_tras = ((temp >> 12) & 0x1f) + 6;
3314                if (mem_tras > 31)
3315                        mem_tras = 31;
3316        } else { /* RV200, R200 */
3317                mem_trcd = (temp & 0x7) + 1;
3318                mem_trp = ((temp >> 8) & 0x7) + 1;
3319                mem_tras = ((temp >> 12) & 0xf) + 4;
3320        }
3321        /* convert to FF */
3322        trcd_ff.full = dfixed_const(mem_trcd);
3323        trp_ff.full = dfixed_const(mem_trp);
3324        tras_ff.full = dfixed_const(mem_tras);
3325
3326        /* Get values from the MEM_SDRAM_MODE_REG register...converting its */
3327        temp = RREG32(RADEON_MEM_SDRAM_MODE_REG);
3328        data = (temp & (7 << 20)) >> 20;
3329        if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) {
3330                if (rdev->family == CHIP_RS480) /* don't think rs400 */
3331                        tcas_ff = memtcas_rs480_ff[data];
3332                else
3333                        tcas_ff = memtcas_ff[data];
3334        } else
3335                tcas_ff = memtcas2_ff[data];
3336
3337        if (rdev->family == CHIP_RS400 ||
3338            rdev->family == CHIP_RS480) {
3339                /* extra cas latency stored in bits 23-25 0-4 clocks */
3340                data = (temp >> 23) & 0x7;
3341                if (data < 5)
3342                        tcas_ff.full += dfixed_const(data);
3343        }
3344
3345        if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) {
3346                /* on the R300, Tcas is included in Trbs.
3347                 */
3348                temp = RREG32(RADEON_MEM_CNTL);
3349                data = (R300_MEM_NUM_CHANNELS_MASK & temp);
3350                if (data == 1) {
3351                        if (R300_MEM_USE_CD_CH_ONLY & temp) {
3352                                temp = RREG32(R300_MC_IND_INDEX);
3353                                temp &= ~R300_MC_IND_ADDR_MASK;
3354                                temp |= R300_MC_READ_CNTL_CD_mcind;
3355                                WREG32(R300_MC_IND_INDEX, temp);
3356                                temp = RREG32(R300_MC_IND_DATA);
3357                                data = (R300_MEM_RBS_POSITION_C_MASK & temp);
3358                        } else {
3359                                temp = RREG32(R300_MC_READ_CNTL_AB);
3360                                data = (R300_MEM_RBS_POSITION_A_MASK & temp);
3361                        }
3362                } else {
3363                        temp = RREG32(R300_MC_READ_CNTL_AB);
3364                        data = (R300_MEM_RBS_POSITION_A_MASK & temp);
3365                }
3366                if (rdev->family == CHIP_RV410 ||
3367                    rdev->family == CHIP_R420 ||
3368                    rdev->family == CHIP_R423)
3369                        trbs_ff = memtrbs_r4xx[data];
3370                else
3371                        trbs_ff = memtrbs[data];
3372                tcas_ff.full += trbs_ff.full;
3373        }
3374
3375        sclk_eff_ff.full = sclk_ff.full;
3376
3377        if (rdev->flags & RADEON_IS_AGP) {
3378                fixed20_12 agpmode_ff;
3379                agpmode_ff.full = dfixed_const(radeon_agpmode);
3380                temp_ff.full = dfixed_const_666(16);
3381                sclk_eff_ff.full -= dfixed_mul(agpmode_ff, temp_ff);
3382        }
3383        /* TODO PCIE lanes may affect this - agpmode == 16?? */
3384
3385        if (ASIC_IS_R300(rdev)) {
3386                sclk_delay_ff.full = dfixed_const(250);
3387        } else {
3388                if ((rdev->family == CHIP_RV100) ||
3389                    rdev->flags & RADEON_IS_IGP) {
3390                        if (rdev->mc.vram_is_ddr)
3391                                sclk_delay_ff.full = dfixed_const(41);
3392                        else
3393                                sclk_delay_ff.full = dfixed_const(33);
3394                } else {
3395                        if (rdev->mc.vram_width == 128)
3396                                sclk_delay_ff.full = dfixed_const(57);
3397                        else
3398                                sclk_delay_ff.full = dfixed_const(41);
3399                }
3400        }
3401
3402        mc_latency_sclk.full = dfixed_div(sclk_delay_ff, sclk_eff_ff);
3403
3404        if (rdev->mc.vram_is_ddr) {
3405                if (rdev->mc.vram_width == 32) {
3406                        k1.full = dfixed_const(40);
3407                        c  = 3;
3408                } else {
3409                        k1.full = dfixed_const(20);
3410                        c  = 1;
3411                }
3412        } else {
3413                k1.full = dfixed_const(40);
3414                c  = 3;
3415        }
3416
3417        temp_ff.full = dfixed_const(2);
3418        mc_latency_mclk.full = dfixed_mul(trcd_ff, temp_ff);
3419        temp_ff.full = dfixed_const(c);
3420        mc_latency_mclk.full += dfixed_mul(tcas_ff, temp_ff);
3421        temp_ff.full = dfixed_const(4);
3422        mc_latency_mclk.full += dfixed_mul(tras_ff, temp_ff);
3423        mc_latency_mclk.full += dfixed_mul(trp_ff, temp_ff);
3424        mc_latency_mclk.full += k1.full;
3425
3426        mc_latency_mclk.full = dfixed_div(mc_latency_mclk, mclk_ff);
3427        mc_latency_mclk.full += dfixed_div(temp_ff, sclk_eff_ff);
3428
3429        /*
3430          HW cursor time assuming worst case of full size colour cursor.
3431        */
3432        temp_ff.full = dfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1))));
3433        temp_ff.full += trcd_ff.full;
3434        if (temp_ff.full < tras_ff.full)
3435                temp_ff.full = tras_ff.full;
3436        cur_latency_mclk.full = dfixed_div(temp_ff, mclk_ff);
3437
3438        temp_ff.full = dfixed_const(cur_size);
3439        cur_latency_sclk.full = dfixed_div(temp_ff, sclk_eff_ff);
3440        /*
3441          Find the total latency for the display data.
3442        */
3443        disp_latency_overhead.full = dfixed_const(8);
3444        disp_latency_overhead.full = dfixed_div(disp_latency_overhead, sclk_ff);
3445        mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full;
3446        mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full;
3447
3448        if (mc_latency_mclk.full > mc_latency_sclk.full)
3449                disp_latency.full = mc_latency_mclk.full;
3450        else
3451                disp_latency.full = mc_latency_sclk.full;
3452
3453        /* setup Max GRPH_STOP_REQ default value */
3454        if (ASIC_IS_RV100(rdev))
3455                max_stop_req = 0x5c;
3456        else
3457                max_stop_req = 0x7c;
3458
3459        if (mode1) {
3460                /*  CRTC1
3461                    Set GRPH_BUFFER_CNTL register using h/w defined optimal values.
3462                    GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ]
3463                */
3464                stop_req = mode1->hdisplay * pixel_bytes1 / 16;
3465
3466                if (stop_req > max_stop_req)
3467                        stop_req = max_stop_req;
3468
3469                /*
3470                  Find the drain rate of the display buffer.
3471                */
3472                temp_ff.full = dfixed_const((16/pixel_bytes1));
3473                disp_drain_rate.full = dfixed_div(pix_clk, temp_ff);
3474
3475                /*
3476                  Find the critical point of the display buffer.
3477                */
3478                crit_point_ff.full = dfixed_mul(disp_drain_rate, disp_latency);
3479                crit_point_ff.full += dfixed_const_half(0);
3480
3481                critical_point = dfixed_trunc(crit_point_ff);
3482
3483                if (rdev->disp_priority == 2) {
3484                        critical_point = 0;
3485                }
3486
3487                /*
3488                  The critical point should never be above max_stop_req-4.  Setting
3489                  GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time.
3490                */
3491                if (max_stop_req - critical_point < 4)
3492                        critical_point = 0;
3493
3494                if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) {
3495                        /* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/
3496                        critical_point = 0x10;
3497                }
3498
3499                temp = RREG32(RADEON_GRPH_BUFFER_CNTL);
3500                temp &= ~(RADEON_GRPH_STOP_REQ_MASK);
3501                temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3502                temp &= ~(RADEON_GRPH_START_REQ_MASK);
3503                if ((rdev->family == CHIP_R350) &&
3504                    (stop_req > 0x15)) {
3505                        stop_req -= 0x10;
3506                }
3507                temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3508                temp |= RADEON_GRPH_BUFFER_SIZE;
3509                temp &= ~(RADEON_GRPH_CRITICAL_CNTL   |
3510                          RADEON_GRPH_CRITICAL_AT_SOF |
3511                          RADEON_GRPH_STOP_CNTL);
3512                /*
3513                  Write the result into the register.
3514                */
3515                WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
3516                                                       (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3517
3518#if 0
3519                if ((rdev->family == CHIP_RS400) ||
3520                    (rdev->family == CHIP_RS480)) {
3521                        /* attempt to program RS400 disp regs correctly ??? */
3522                        temp = RREG32(RS400_DISP1_REG_CNTL);
3523                        temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK |
3524                                  RS400_DISP1_STOP_REQ_LEVEL_MASK);
3525                        WREG32(RS400_DISP1_REQ_CNTL1, (temp |
3526                                                       (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3527                                                       (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3528                        temp = RREG32(RS400_DMIF_MEM_CNTL1);
3529                        temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK |
3530                                  RS400_DISP1_CRITICAL_POINT_STOP_MASK);
3531                        WREG32(RS400_DMIF_MEM_CNTL1, (temp |
3532                                                      (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) |
3533                                                      (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT)));
3534                }
3535#endif
3536
3537                DRM_DEBUG_KMS("GRPH_BUFFER_CNTL from to %x\n",
3538                          /*      (unsigned int)info->SavedReg->grph_buffer_cntl, */
3539                          (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL));
3540        }
3541
3542        if (mode2) {
3543                u32 grph2_cntl;
3544                stop_req = mode2->hdisplay * pixel_bytes2 / 16;
3545
3546                if (stop_req > max_stop_req)
3547                        stop_req = max_stop_req;
3548
3549                /*
3550                  Find the drain rate of the display buffer.
3551                */
3552                temp_ff.full = dfixed_const((16/pixel_bytes2));
3553                disp_drain_rate2.full = dfixed_div(pix_clk2, temp_ff);
3554
3555                grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL);
3556                grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK);
3557                grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3558                grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK);
3559                if ((rdev->family == CHIP_R350) &&
3560                    (stop_req > 0x15)) {
3561                        stop_req -= 0x10;
3562                }
3563                grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3564                grph2_cntl |= RADEON_GRPH_BUFFER_SIZE;
3565                grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL   |
3566                          RADEON_GRPH_CRITICAL_AT_SOF |
3567                          RADEON_GRPH_STOP_CNTL);
3568
3569                if ((rdev->family == CHIP_RS100) ||
3570                    (rdev->family == CHIP_RS200))
3571                        critical_point2 = 0;
3572                else {
3573                        temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128;
3574                        temp_ff.full = dfixed_const(temp);
3575                        temp_ff.full = dfixed_mul(mclk_ff, temp_ff);
3576                        if (sclk_ff.full < temp_ff.full)
3577                                temp_ff.full = sclk_ff.full;
3578
3579                        read_return_rate.full = temp_ff.full;
3580
3581                        if (mode1) {
3582                                temp_ff.full = read_return_rate.full - disp_drain_rate.full;
3583                                time_disp1_drop_priority.full = dfixed_div(crit_point_ff, temp_ff);
3584                        } else {
3585                                time_disp1_drop_priority.full = 0;
3586                        }
3587                        crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full;
3588                        crit_point_ff.full = dfixed_mul(crit_point_ff, disp_drain_rate2);
3589                        crit_point_ff.full += dfixed_const_half(0);
3590
3591                        critical_point2 = dfixed_trunc(crit_point_ff);
3592
3593                        if (rdev->disp_priority == 2) {
3594                                critical_point2 = 0;
3595                        }
3596
3597                        if (max_stop_req - critical_point2 < 4)
3598                                critical_point2 = 0;
3599
3600                }
3601
3602                if (critical_point2 == 0 && rdev->family == CHIP_R300) {
3603                        /* some R300 cards have problem with this set to 0 */
3604                        critical_point2 = 0x10;
3605                }
3606
3607                WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
3608                                                  (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3609
3610                if ((rdev->family == CHIP_RS400) ||
3611                    (rdev->family == CHIP_RS480)) {
3612#if 0
3613                        /* attempt to program RS400 disp2 regs correctly ??? */
3614                        temp = RREG32(RS400_DISP2_REQ_CNTL1);
3615                        temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK |
3616                                  RS400_DISP2_STOP_REQ_LEVEL_MASK);
3617                        WREG32(RS400_DISP2_REQ_CNTL1, (temp |
3618                                                       (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3619                                                       (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3620                        temp = RREG32(RS400_DISP2_REQ_CNTL2);
3621                        temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK |
3622                                  RS400_DISP2_CRITICAL_POINT_STOP_MASK);
3623                        WREG32(RS400_DISP2_REQ_CNTL2, (temp |
3624                                                       (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) |
3625                                                       (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT)));
3626#endif
3627                        WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC);
3628                        WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000);
3629                        WREG32(RS400_DMIF_MEM_CNTL1,  0x29CA71DC);
3630                        WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC);
3631                }
3632
3633                DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n",
3634                          (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL));
3635        }
3636
3637        /* Save number of lines the linebuffer leads before the scanout */
3638        if (mode1)
3639            rdev->mode_info.crtcs[0]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode1->crtc_hdisplay);
3640
3641        if (mode2)
3642            rdev->mode_info.crtcs[1]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode2->crtc_hdisplay);
3643}
3644
3645int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3646{
3647        uint32_t scratch;
3648        uint32_t tmp = 0;
3649        unsigned i;
3650        int r;
3651
3652        r = radeon_scratch_get(rdev, &scratch);
3653        if (r) {
3654                DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3655                return r;
3656        }
3657        WREG32(scratch, 0xCAFEDEAD);
3658        r = radeon_ring_lock(rdev, ring, 2);
3659        if (r) {
3660                DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3661                radeon_scratch_free(rdev, scratch);
3662                return r;
3663        }
3664        radeon_ring_write(ring, PACKET0(scratch, 0));
3665        radeon_ring_write(ring, 0xDEADBEEF);
3666        radeon_ring_unlock_commit(rdev, ring, false);
3667        for (i = 0; i < rdev->usec_timeout; i++) {
3668                tmp = RREG32(scratch);
3669                if (tmp == 0xDEADBEEF) {
3670                        break;
3671                }
3672                DRM_UDELAY(1);
3673        }
3674        if (i < rdev->usec_timeout) {
3675                DRM_INFO("ring test succeeded in %d usecs\n", i);
3676        } else {
3677                DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n",
3678                          scratch, tmp);
3679                r = -EINVAL;
3680        }
3681        radeon_scratch_free(rdev, scratch);
3682        return r;
3683}
3684
3685void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3686{
3687        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3688
3689        if (ring->rptr_save_reg) {
3690                u32 next_rptr = ring->wptr + 2 + 3;
3691                radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
3692                radeon_ring_write(ring, next_rptr);
3693        }
3694
3695        radeon_ring_write(ring, PACKET0(RADEON_CP_IB_BASE, 1));
3696        radeon_ring_write(ring, ib->gpu_addr);
3697        radeon_ring_write(ring, ib->length_dw);
3698}
3699
3700int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3701{
3702        struct radeon_ib ib;
3703        uint32_t scratch;
3704        uint32_t tmp = 0;
3705        unsigned i;
3706        int r;
3707
3708        r = radeon_scratch_get(rdev, &scratch);
3709        if (r) {
3710                DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3711                return r;
3712        }
3713        WREG32(scratch, 0xCAFEDEAD);
3714        r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &ib, NULL, 256);
3715        if (r) {
3716                DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3717                goto free_scratch;
3718        }
3719        ib.ptr[0] = PACKET0(scratch, 0);
3720        ib.ptr[1] = 0xDEADBEEF;
3721        ib.ptr[2] = PACKET2(0);
3722        ib.ptr[3] = PACKET2(0);
3723        ib.ptr[4] = PACKET2(0);
3724        ib.ptr[5] = PACKET2(0);
3725        ib.ptr[6] = PACKET2(0);
3726        ib.ptr[7] = PACKET2(0);
3727        ib.length_dw = 8;
3728        r = radeon_ib_schedule(rdev, &ib, NULL, false);
3729        if (r) {
3730                DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3731                goto free_ib;
3732        }
3733        r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3734                RADEON_USEC_IB_TEST_TIMEOUT));
3735        if (r < 0) {
3736                DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3737                goto free_ib;
3738        } else if (r == 0) {
3739                DRM_ERROR("radeon: fence wait timed out.\n");
3740                r = -ETIMEDOUT;
3741                goto free_ib;
3742        }
3743        r = 0;
3744        for (i = 0; i < rdev->usec_timeout; i++) {
3745                tmp = RREG32(scratch);
3746                if (tmp == 0xDEADBEEF) {
3747                        break;
3748                }
3749                DRM_UDELAY(1);
3750        }
3751        if (i < rdev->usec_timeout) {
3752                DRM_INFO("ib test succeeded in %u usecs\n", i);
3753        } else {
3754                DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3755                          scratch, tmp);
3756                r = -EINVAL;
3757        }
3758free_ib:
3759        radeon_ib_free(rdev, &ib);
3760free_scratch:
3761        radeon_scratch_free(rdev, scratch);
3762        return r;
3763}
3764
3765void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save)
3766{
3767        /* Shutdown CP we shouldn't need to do that but better be safe than
3768         * sorry
3769         */
3770        rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3771        WREG32(R_000740_CP_CSQ_CNTL, 0);
3772
3773        /* Save few CRTC registers */
3774        save->GENMO_WT = RREG8(R_0003C2_GENMO_WT);
3775        save->CRTC_EXT_CNTL = RREG32(R_000054_CRTC_EXT_CNTL);
3776        save->CRTC_GEN_CNTL = RREG32(R_000050_CRTC_GEN_CNTL);
3777        save->CUR_OFFSET = RREG32(R_000260_CUR_OFFSET);
3778        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3779                save->CRTC2_GEN_CNTL = RREG32(R_0003F8_CRTC2_GEN_CNTL);
3780                save->CUR2_OFFSET = RREG32(R_000360_CUR2_OFFSET);
3781        }
3782
3783        /* Disable VGA aperture access */
3784        WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & save->GENMO_WT);
3785        /* Disable cursor, overlay, crtc */
3786        WREG32(R_000260_CUR_OFFSET, save->CUR_OFFSET | S_000260_CUR_LOCK(1));
3787        WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL |
3788                                        S_000054_CRTC_DISPLAY_DIS(1));
3789        WREG32(R_000050_CRTC_GEN_CNTL,
3790                        (C_000050_CRTC_CUR_EN & save->CRTC_GEN_CNTL) |
3791                        S_000050_CRTC_DISP_REQ_EN_B(1));
3792        WREG32(R_000420_OV0_SCALE_CNTL,
3793                C_000420_OV0_OVERLAY_EN & RREG32(R_000420_OV0_SCALE_CNTL));
3794        WREG32(R_000260_CUR_OFFSET, C_000260_CUR_LOCK & save->CUR_OFFSET);
3795        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3796                WREG32(R_000360_CUR2_OFFSET, save->CUR2_OFFSET |
3797                                                S_000360_CUR2_LOCK(1));
3798                WREG32(R_0003F8_CRTC2_GEN_CNTL,
3799                        (C_0003F8_CRTC2_CUR_EN & save->CRTC2_GEN_CNTL) |
3800                        S_0003F8_CRTC2_DISPLAY_DIS(1) |
3801                        S_0003F8_CRTC2_DISP_REQ_EN_B(1));
3802                WREG32(R_000360_CUR2_OFFSET,
3803                        C_000360_CUR2_LOCK & save->CUR2_OFFSET);
3804        }
3805}
3806
3807void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save)
3808{
3809        /* Update base address for crtc */
3810        WREG32(R_00023C_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
3811        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3812                WREG32(R_00033C_CRTC2_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
3813        }
3814        /* Restore CRTC registers */
3815        WREG8(R_0003C2_GENMO_WT, save->GENMO_WT);
3816        WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL);
3817        WREG32(R_000050_CRTC_GEN_CNTL, save->CRTC_GEN_CNTL);
3818        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3819                WREG32(R_0003F8_CRTC2_GEN_CNTL, save->CRTC2_GEN_CNTL);
3820        }
3821}
3822
3823void r100_vga_render_disable(struct radeon_device *rdev)
3824{
3825        u32 tmp;
3826
3827        tmp = RREG8(R_0003C2_GENMO_WT);
3828        WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & tmp);
3829}
3830
3831static void r100_debugfs(struct radeon_device *rdev)
3832{
3833        int r;
3834
3835        r = r100_debugfs_mc_info_init(rdev);
3836        if (r)
3837                dev_warn(rdev->dev, "Failed to create r100_mc debugfs file.\n");
3838}
3839
3840static void r100_mc_program(struct radeon_device *rdev)
3841{
3842        struct r100_mc_save save;
3843
3844        /* Stops all mc clients */
3845        r100_mc_stop(rdev, &save);
3846        if (rdev->flags & RADEON_IS_AGP) {
3847                WREG32(R_00014C_MC_AGP_LOCATION,
3848                        S_00014C_MC_AGP_START(rdev->mc.gtt_start >> 16) |
3849                        S_00014C_MC_AGP_TOP(rdev->mc.gtt_end >> 16));
3850                WREG32(R_000170_AGP_BASE, lower_32_bits(rdev->mc.agp_base));
3851                if (rdev->family > CHIP_RV200)
3852                        WREG32(R_00015C_AGP_BASE_2,
3853                                upper_32_bits(rdev->mc.agp_base) & 0xff);
3854        } else {
3855                WREG32(R_00014C_MC_AGP_LOCATION, 0x0FFFFFFF);
3856                WREG32(R_000170_AGP_BASE, 0);
3857                if (rdev->family > CHIP_RV200)
3858                        WREG32(R_00015C_AGP_BASE_2, 0);
3859        }
3860        /* Wait for mc idle */
3861        if (r100_mc_wait_for_idle(rdev))
3862                dev_warn(rdev->dev, "Wait for MC idle timeout.\n");
3863        /* Program MC, should be a 32bits limited address space */
3864        WREG32(R_000148_MC_FB_LOCATION,
3865                S_000148_MC_FB_START(rdev->mc.vram_start >> 16) |
3866                S_000148_MC_FB_TOP(rdev->mc.vram_end >> 16));
3867        r100_mc_resume(rdev, &save);
3868}
3869
3870static void r100_clock_startup(struct radeon_device *rdev)
3871{
3872        u32 tmp;
3873
3874        if (radeon_dynclks != -1 && radeon_dynclks)
3875                radeon_legacy_set_clock_gating(rdev, 1);
3876        /* We need to force on some of the block */
3877        tmp = RREG32_PLL(R_00000D_SCLK_CNTL);
3878        tmp |= S_00000D_FORCE_CP(1) | S_00000D_FORCE_VIP(1);
3879        if ((rdev->family == CHIP_RV250) || (rdev->family == CHIP_RV280))
3880                tmp |= S_00000D_FORCE_DISP1(1) | S_00000D_FORCE_DISP2(1);
3881        WREG32_PLL(R_00000D_SCLK_CNTL, tmp);
3882}
3883
3884static int r100_startup(struct radeon_device *rdev)
3885{
3886        int r;
3887
3888        /* set common regs */
3889        r100_set_common_regs(rdev);
3890        /* program mc */
3891        r100_mc_program(rdev);
3892        /* Resume clock */
3893        r100_clock_startup(rdev);
3894        /* Initialize GART (initialize after TTM so we can allocate
3895         * memory through TTM but finalize after TTM) */
3896        r100_enable_bm(rdev);
3897        if (rdev->flags & RADEON_IS_PCI) {
3898                r = r100_pci_gart_enable(rdev);
3899                if (r)
3900                        return r;
3901        }
3902
3903        /* allocate wb buffer */
3904        r = radeon_wb_init(rdev);
3905        if (r)
3906                return r;
3907
3908        r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
3909        if (r) {
3910                dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
3911                return r;
3912        }
3913
3914        /* Enable IRQ */
3915        if (!rdev->irq.installed) {
3916                r = radeon_irq_kms_init(rdev);
3917                if (r)
3918                        return r;
3919        }
3920
3921        r100_irq_set(rdev);
3922        rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
3923        /* 1M ring buffer */
3924        r = r100_cp_init(rdev, 1024 * 1024);
3925        if (r) {
3926                dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
3927                return r;
3928        }
3929
3930        r = radeon_ib_pool_init(rdev);
3931        if (r) {
3932                dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
3933                return r;
3934        }
3935
3936        return 0;
3937}
3938
3939int r100_resume(struct radeon_device *rdev)
3940{
3941        int r;
3942
3943        /* Make sur GART are not working */
3944        if (rdev->flags & RADEON_IS_PCI)
3945                r100_pci_gart_disable(rdev);
3946        /* Resume clock before doing reset */
3947        r100_clock_startup(rdev);
3948        /* Reset gpu before posting otherwise ATOM will enter infinite loop */
3949        if (radeon_asic_reset(rdev)) {
3950                dev_warn(rdev->dev, "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
3951                        RREG32(R_000E40_RBBM_STATUS),
3952                        RREG32(R_0007C0_CP_STAT));
3953        }
3954        /* post */
3955        radeon_combios_asic_init(rdev->ddev);
3956        /* Resume clock after posting */
3957        r100_clock_startup(rdev);
3958        /* Initialize surface registers */
3959        radeon_surface_init(rdev);
3960
3961        rdev->accel_working = true;
3962        r = r100_startup(rdev);
3963        if (r) {
3964                rdev->accel_working = false;
3965        }
3966        return r;
3967}
3968
3969int r100_suspend(struct radeon_device *rdev)
3970{
3971        radeon_pm_suspend(rdev);
3972        r100_cp_disable(rdev);
3973        radeon_wb_disable(rdev);
3974        r100_irq_disable(rdev);
3975        if (rdev->flags & RADEON_IS_PCI)
3976                r100_pci_gart_disable(rdev);
3977        return 0;
3978}
3979
3980void r100_fini(struct radeon_device *rdev)
3981{
3982        radeon_pm_fini(rdev);
3983        r100_cp_fini(rdev);
3984        radeon_wb_fini(rdev);
3985        radeon_ib_pool_fini(rdev);
3986        radeon_gem_fini(rdev);
3987        if (rdev->flags & RADEON_IS_PCI)
3988                r100_pci_gart_fini(rdev);
3989        radeon_agp_fini(rdev);
3990        radeon_irq_kms_fini(rdev);
3991        radeon_fence_driver_fini(rdev);
3992        radeon_bo_fini(rdev);
3993        radeon_atombios_fini(rdev);
3994        kfree(rdev->bios);
3995        rdev->bios = NULL;
3996}
3997
3998/*
3999 * Due to how kexec works, it can leave the hw fully initialised when it
4000 * boots the new kernel. However doing our init sequence with the CP and
4001 * WB stuff setup causes GPU hangs on the RN50 at least. So at startup
4002 * do some quick sanity checks and restore sane values to avoid this
4003 * problem.
4004 */
4005void r100_restore_sanity(struct radeon_device *rdev)
4006{
4007        u32 tmp;
4008
4009        tmp = RREG32(RADEON_CP_CSQ_CNTL);
4010        if (tmp) {
4011                WREG32(RADEON_CP_CSQ_CNTL, 0);
4012        }
4013        tmp = RREG32(RADEON_CP_RB_CNTL);
4014        if (tmp) {
4015                WREG32(RADEON_CP_RB_CNTL, 0);
4016        }
4017        tmp = RREG32(RADEON_SCRATCH_UMSK);
4018        if (tmp) {
4019                WREG32(RADEON_SCRATCH_UMSK, 0);
4020        }
4021}
4022
4023int r100_init(struct radeon_device *rdev)
4024{
4025        int r;
4026
4027        /* Register debugfs file specific to this group of asics */
4028        r100_debugfs(rdev);
4029        /* Disable VGA */
4030        r100_vga_render_disable(rdev);
4031        /* Initialize scratch registers */
4032        radeon_scratch_init(rdev);
4033        /* Initialize surface registers */
4034        radeon_surface_init(rdev);
4035        /* sanity check some register to avoid hangs like after kexec */
4036        r100_restore_sanity(rdev);
4037        /* TODO: disable VGA need to use VGA request */
4038        /* BIOS*/
4039        if (!radeon_get_bios(rdev)) {
4040                if (ASIC_IS_AVIVO(rdev))
4041                        return -EINVAL;
4042        }
4043        if (rdev->is_atom_bios) {
4044                dev_err(rdev->dev, "Expecting combios for RS400/RS480 GPU\n");
4045                return -EINVAL;
4046        } else {
4047                r = radeon_combios_init(rdev);
4048                if (r)
4049                        return r;
4050        }
4051        /* Reset gpu before posting otherwise ATOM will enter infinite loop */
4052        if (radeon_asic_reset(rdev)) {
4053                dev_warn(rdev->dev,
4054                        "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
4055                        RREG32(R_000E40_RBBM_STATUS),
4056                        RREG32(R_0007C0_CP_STAT));
4057        }
4058        /* check if cards are posted or not */
4059        if (radeon_boot_test_post_card(rdev) == false)
4060                return -EINVAL;
4061        /* Set asic errata */
4062        r100_errata(rdev);
4063        /* Initialize clocks */
4064        radeon_get_clock_info(rdev->ddev);
4065        /* initialize AGP */
4066        if (rdev->flags & RADEON_IS_AGP) {
4067                r = radeon_agp_init(rdev);
4068                if (r) {
4069                        radeon_agp_disable(rdev);
4070                }
4071        }
4072        /* initialize VRAM */
4073        r100_mc_init(rdev);
4074        /* Fence driver */
4075        r = radeon_fence_driver_init(rdev);
4076        if (r)
4077                return r;
4078        /* Memory manager */
4079        r = radeon_bo_init(rdev);
4080        if (r)
4081                return r;
4082        if (rdev->flags & RADEON_IS_PCI) {
4083                r = r100_pci_gart_init(rdev);
4084                if (r)
4085                        return r;
4086        }
4087        r100_set_safe_registers(rdev);
4088
4089        /* Initialize power management */
4090        radeon_pm_init(rdev);
4091
4092        rdev->accel_working = true;
4093        r = r100_startup(rdev);
4094        if (r) {
4095                /* Somethings want wront with the accel init stop accel */
4096                dev_err(rdev->dev, "Disabling GPU acceleration\n");
4097                r100_cp_fini(rdev);
4098                radeon_wb_fini(rdev);
4099                radeon_ib_pool_fini(rdev);
4100                radeon_irq_kms_fini(rdev);
4101                if (rdev->flags & RADEON_IS_PCI)
4102                        r100_pci_gart_fini(rdev);
4103                rdev->accel_working = false;
4104        }
4105        return 0;
4106}
4107
4108uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg)
4109{
4110        unsigned long flags;
4111        uint32_t ret;
4112
4113        spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
4114        writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4115        ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4116        spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
4117        return ret;
4118}
4119
4120void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg, uint32_t v)
4121{
4122        unsigned long flags;
4123
4124        spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
4125        writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4126        writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4127        spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
4128}
4129
4130u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
4131{
4132        if (reg < rdev->rio_mem_size)
4133                return ioread32(rdev->rio_mem + reg);
4134        else {
4135                iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
4136                return ioread32(rdev->rio_mem + RADEON_MM_DATA);
4137        }
4138}
4139
4140void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
4141{
4142        if (reg < rdev->rio_mem_size)
4143                iowrite32(v, rdev->rio_mem + reg);
4144        else {
4145                iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
4146                iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
4147        }
4148}
4149