linux/drivers/gpu/drm/radeon/r100.c
<<
>>
Prefs
   1/*
   2 * Copyright 2008 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 * Copyright 2009 Jerome Glisse.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22 * OTHER DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors: Dave Airlie
  25 *          Alex Deucher
  26 *          Jerome Glisse
  27 */
  28#include <linux/seq_file.h>
  29#include <linux/slab.h>
  30#include <drm/drmP.h>
  31#include <drm/radeon_drm.h>
  32#include "radeon_reg.h"
  33#include "radeon.h"
  34#include "radeon_asic.h"
  35#include "r100d.h"
  36#include "rs100d.h"
  37#include "rv200d.h"
  38#include "rv250d.h"
  39#include "atom.h"
  40
  41#include <linux/firmware.h>
  42#include <linux/module.h>
  43
  44#include "r100_reg_safe.h"
  45#include "rn50_reg_safe.h"
  46
  47/* Firmware Names */
  48#define FIRMWARE_R100           "radeon/R100_cp.bin"
  49#define FIRMWARE_R200           "radeon/R200_cp.bin"
  50#define FIRMWARE_R300           "radeon/R300_cp.bin"
  51#define FIRMWARE_R420           "radeon/R420_cp.bin"
  52#define FIRMWARE_RS690          "radeon/RS690_cp.bin"
  53#define FIRMWARE_RS600          "radeon/RS600_cp.bin"
  54#define FIRMWARE_R520           "radeon/R520_cp.bin"
  55
  56MODULE_FIRMWARE(FIRMWARE_R100);
  57MODULE_FIRMWARE(FIRMWARE_R200);
  58MODULE_FIRMWARE(FIRMWARE_R300);
  59MODULE_FIRMWARE(FIRMWARE_R420);
  60MODULE_FIRMWARE(FIRMWARE_RS690);
  61MODULE_FIRMWARE(FIRMWARE_RS600);
  62MODULE_FIRMWARE(FIRMWARE_R520);
  63
  64#include "r100_track.h"
  65
  66/* This files gather functions specifics to:
  67 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
  68 * and others in some cases.
  69 */
  70
  71static bool r100_is_in_vblank(struct radeon_device *rdev, int crtc)
  72{
  73        if (crtc == 0) {
  74                if (RREG32(RADEON_CRTC_STATUS) & RADEON_CRTC_VBLANK_CUR)
  75                        return true;
  76                else
  77                        return false;
  78        } else {
  79                if (RREG32(RADEON_CRTC2_STATUS) & RADEON_CRTC2_VBLANK_CUR)
  80                        return true;
  81                else
  82                        return false;
  83        }
  84}
  85
  86static bool r100_is_counter_moving(struct radeon_device *rdev, int crtc)
  87{
  88        u32 vline1, vline2;
  89
  90        if (crtc == 0) {
  91                vline1 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
  92                vline2 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
  93        } else {
  94                vline1 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
  95                vline2 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
  96        }
  97        if (vline1 != vline2)
  98                return true;
  99        else
 100                return false;
 101}
 102
 103/**
 104 * r100_wait_for_vblank - vblank wait asic callback.
 105 *
 106 * @rdev: radeon_device pointer
 107 * @crtc: crtc to wait for vblank on
 108 *
 109 * Wait for vblank on the requested crtc (r1xx-r4xx).
 110 */
 111void r100_wait_for_vblank(struct radeon_device *rdev, int crtc)
 112{
 113        unsigned i = 0;
 114
 115        if (crtc >= rdev->num_crtc)
 116                return;
 117
 118        if (crtc == 0) {
 119                if (!(RREG32(RADEON_CRTC_GEN_CNTL) & RADEON_CRTC_EN))
 120                        return;
 121        } else {
 122                if (!(RREG32(RADEON_CRTC2_GEN_CNTL) & RADEON_CRTC2_EN))
 123                        return;
 124        }
 125
 126        /* depending on when we hit vblank, we may be close to active; if so,
 127         * wait for another frame.
 128         */
 129        while (r100_is_in_vblank(rdev, crtc)) {
 130                if (i++ % 100 == 0) {
 131                        if (!r100_is_counter_moving(rdev, crtc))
 132                                break;
 133                }
 134        }
 135
 136        while (!r100_is_in_vblank(rdev, crtc)) {
 137                if (i++ % 100 == 0) {
 138                        if (!r100_is_counter_moving(rdev, crtc))
 139                                break;
 140                }
 141        }
 142}
 143
 144/**
 145 * r100_page_flip - pageflip callback.
 146 *
 147 * @rdev: radeon_device pointer
 148 * @crtc_id: crtc to cleanup pageflip on
 149 * @crtc_base: new address of the crtc (GPU MC address)
 150 *
 151 * Does the actual pageflip (r1xx-r4xx).
 152 * During vblank we take the crtc lock and wait for the update_pending
 153 * bit to go high, when it does, we release the lock, and allow the
 154 * double buffered update to take place.
 155 */
 156void r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
 157{
 158        struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
 159        u32 tmp = ((u32)crtc_base) | RADEON_CRTC_OFFSET__OFFSET_LOCK;
 160        int i;
 161
 162        /* Lock the graphics update lock */
 163        /* update the scanout addresses */
 164        WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
 165
 166        /* Wait for update_pending to go high. */
 167        for (i = 0; i < rdev->usec_timeout; i++) {
 168                if (RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET)
 169                        break;
 170                udelay(1);
 171        }
 172        DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
 173
 174        /* Unlock the lock, so double-buffering can take place inside vblank */
 175        tmp &= ~RADEON_CRTC_OFFSET__OFFSET_LOCK;
 176        WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
 177
 178}
 179
 180/**
 181 * r100_page_flip_pending - check if page flip is still pending
 182 *
 183 * @rdev: radeon_device pointer
 184 * @crtc_id: crtc to check
 185 *
 186 * Check if the last pagefilp is still pending (r1xx-r4xx).
 187 * Returns the current update pending status.
 188 */
 189bool r100_page_flip_pending(struct radeon_device *rdev, int crtc_id)
 190{
 191        struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
 192
 193        /* Return current update_pending status: */
 194        return !!(RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) &
 195                RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET);
 196}
 197
 198/**
 199 * r100_pm_get_dynpm_state - look up dynpm power state callback.
 200 *
 201 * @rdev: radeon_device pointer
 202 *
 203 * Look up the optimal power state based on the
 204 * current state of the GPU (r1xx-r5xx).
 205 * Used for dynpm only.
 206 */
 207void r100_pm_get_dynpm_state(struct radeon_device *rdev)
 208{
 209        int i;
 210        rdev->pm.dynpm_can_upclock = true;
 211        rdev->pm.dynpm_can_downclock = true;
 212
 213        switch (rdev->pm.dynpm_planned_action) {
 214        case DYNPM_ACTION_MINIMUM:
 215                rdev->pm.requested_power_state_index = 0;
 216                rdev->pm.dynpm_can_downclock = false;
 217                break;
 218        case DYNPM_ACTION_DOWNCLOCK:
 219                if (rdev->pm.current_power_state_index == 0) {
 220                        rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
 221                        rdev->pm.dynpm_can_downclock = false;
 222                } else {
 223                        if (rdev->pm.active_crtc_count > 1) {
 224                                for (i = 0; i < rdev->pm.num_power_states; i++) {
 225                                        if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
 226                                                continue;
 227                                        else if (i >= rdev->pm.current_power_state_index) {
 228                                                rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
 229                                                break;
 230                                        } else {
 231                                                rdev->pm.requested_power_state_index = i;
 232                                                break;
 233                                        }
 234                                }
 235                        } else
 236                                rdev->pm.requested_power_state_index =
 237                                        rdev->pm.current_power_state_index - 1;
 238                }
 239                /* don't use the power state if crtcs are active and no display flag is set */
 240                if ((rdev->pm.active_crtc_count > 0) &&
 241                    (rdev->pm.power_state[rdev->pm.requested_power_state_index].clock_info[0].flags &
 242                     RADEON_PM_MODE_NO_DISPLAY)) {
 243                        rdev->pm.requested_power_state_index++;
 244                }
 245                break;
 246        case DYNPM_ACTION_UPCLOCK:
 247                if (rdev->pm.current_power_state_index == (rdev->pm.num_power_states - 1)) {
 248                        rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
 249                        rdev->pm.dynpm_can_upclock = false;
 250                } else {
 251                        if (rdev->pm.active_crtc_count > 1) {
 252                                for (i = (rdev->pm.num_power_states - 1); i >= 0; i--) {
 253                                        if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
 254                                                continue;
 255                                        else if (i <= rdev->pm.current_power_state_index) {
 256                                                rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
 257                                                break;
 258                                        } else {
 259                                                rdev->pm.requested_power_state_index = i;
 260                                                break;
 261                                        }
 262                                }
 263                        } else
 264                                rdev->pm.requested_power_state_index =
 265                                        rdev->pm.current_power_state_index + 1;
 266                }
 267                break;
 268        case DYNPM_ACTION_DEFAULT:
 269                rdev->pm.requested_power_state_index = rdev->pm.default_power_state_index;
 270                rdev->pm.dynpm_can_upclock = false;
 271                break;
 272        case DYNPM_ACTION_NONE:
 273        default:
 274                DRM_ERROR("Requested mode for not defined action\n");
 275                return;
 276        }
 277        /* only one clock mode per power state */
 278        rdev->pm.requested_clock_mode_index = 0;
 279
 280        DRM_DEBUG_DRIVER("Requested: e: %d m: %d p: %d\n",
 281                  rdev->pm.power_state[rdev->pm.requested_power_state_index].
 282                  clock_info[rdev->pm.requested_clock_mode_index].sclk,
 283                  rdev->pm.power_state[rdev->pm.requested_power_state_index].
 284                  clock_info[rdev->pm.requested_clock_mode_index].mclk,
 285                  rdev->pm.power_state[rdev->pm.requested_power_state_index].
 286                  pcie_lanes);
 287}
 288
 289/**
 290 * r100_pm_init_profile - Initialize power profiles callback.
 291 *
 292 * @rdev: radeon_device pointer
 293 *
 294 * Initialize the power states used in profile mode
 295 * (r1xx-r3xx).
 296 * Used for profile mode only.
 297 */
 298void r100_pm_init_profile(struct radeon_device *rdev)
 299{
 300        /* default */
 301        rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_ps_idx = rdev->pm.default_power_state_index;
 302        rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 303        rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_cm_idx = 0;
 304        rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_cm_idx = 0;
 305        /* low sh */
 306        rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_ps_idx = 0;
 307        rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_ps_idx = 0;
 308        rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_cm_idx = 0;
 309        rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_cm_idx = 0;
 310        /* mid sh */
 311        rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_ps_idx = 0;
 312        rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_ps_idx = 0;
 313        rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_cm_idx = 0;
 314        rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_cm_idx = 0;
 315        /* high sh */
 316        rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_ps_idx = 0;
 317        rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 318        rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_cm_idx = 0;
 319        rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_cm_idx = 0;
 320        /* low mh */
 321        rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_ps_idx = 0;
 322        rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 323        rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_cm_idx = 0;
 324        rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_cm_idx = 0;
 325        /* mid mh */
 326        rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_ps_idx = 0;
 327        rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 328        rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_cm_idx = 0;
 329        rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_cm_idx = 0;
 330        /* high mh */
 331        rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_ps_idx = 0;
 332        rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 333        rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_cm_idx = 0;
 334        rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_cm_idx = 0;
 335}
 336
 337/**
 338 * r100_pm_misc - set additional pm hw parameters callback.
 339 *
 340 * @rdev: radeon_device pointer
 341 *
 342 * Set non-clock parameters associated with a power state
 343 * (voltage, pcie lanes, etc.) (r1xx-r4xx).
 344 */
 345void r100_pm_misc(struct radeon_device *rdev)
 346{
 347        int requested_index = rdev->pm.requested_power_state_index;
 348        struct radeon_power_state *ps = &rdev->pm.power_state[requested_index];
 349        struct radeon_voltage *voltage = &ps->clock_info[0].voltage;
 350        u32 tmp, sclk_cntl, sclk_cntl2, sclk_more_cntl;
 351
 352        if ((voltage->type == VOLTAGE_GPIO) && (voltage->gpio.valid)) {
 353                if (ps->misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_SUPPORT) {
 354                        tmp = RREG32(voltage->gpio.reg);
 355                        if (voltage->active_high)
 356                                tmp |= voltage->gpio.mask;
 357                        else
 358                                tmp &= ~(voltage->gpio.mask);
 359                        WREG32(voltage->gpio.reg, tmp);
 360                        if (voltage->delay)
 361                                udelay(voltage->delay);
 362                } else {
 363                        tmp = RREG32(voltage->gpio.reg);
 364                        if (voltage->active_high)
 365                                tmp &= ~voltage->gpio.mask;
 366                        else
 367                                tmp |= voltage->gpio.mask;
 368                        WREG32(voltage->gpio.reg, tmp);
 369                        if (voltage->delay)
 370                                udelay(voltage->delay);
 371                }
 372        }
 373
 374        sclk_cntl = RREG32_PLL(SCLK_CNTL);
 375        sclk_cntl2 = RREG32_PLL(SCLK_CNTL2);
 376        sclk_cntl2 &= ~REDUCED_SPEED_SCLK_SEL(3);
 377        sclk_more_cntl = RREG32_PLL(SCLK_MORE_CNTL);
 378        sclk_more_cntl &= ~VOLTAGE_DELAY_SEL(3);
 379        if (ps->misc & ATOM_PM_MISCINFO_ASIC_REDUCED_SPEED_SCLK_EN) {
 380                sclk_more_cntl |= REDUCED_SPEED_SCLK_EN;
 381                if (ps->misc & ATOM_PM_MISCINFO_DYN_CLK_3D_IDLE)
 382                        sclk_cntl2 |= REDUCED_SPEED_SCLK_MODE;
 383                else
 384                        sclk_cntl2 &= ~REDUCED_SPEED_SCLK_MODE;
 385                if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_2)
 386                        sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(0);
 387                else if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_4)
 388                        sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(2);
 389        } else
 390                sclk_more_cntl &= ~REDUCED_SPEED_SCLK_EN;
 391
 392        if (ps->misc & ATOM_PM_MISCINFO_ASIC_DYNAMIC_VOLTAGE_EN) {
 393                sclk_more_cntl |= IO_CG_VOLTAGE_DROP;
 394                if (voltage->delay) {
 395                        sclk_more_cntl |= VOLTAGE_DROP_SYNC;
 396                        switch (voltage->delay) {
 397                        case 33:
 398                                sclk_more_cntl |= VOLTAGE_DELAY_SEL(0);
 399                                break;
 400                        case 66:
 401                                sclk_more_cntl |= VOLTAGE_DELAY_SEL(1);
 402                                break;
 403                        case 99:
 404                                sclk_more_cntl |= VOLTAGE_DELAY_SEL(2);
 405                                break;
 406                        case 132:
 407                                sclk_more_cntl |= VOLTAGE_DELAY_SEL(3);
 408                                break;
 409                        }
 410                } else
 411                        sclk_more_cntl &= ~VOLTAGE_DROP_SYNC;
 412        } else
 413                sclk_more_cntl &= ~IO_CG_VOLTAGE_DROP;
 414
 415        if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_HDP_BLOCK_EN)
 416                sclk_cntl &= ~FORCE_HDP;
 417        else
 418                sclk_cntl |= FORCE_HDP;
 419
 420        WREG32_PLL(SCLK_CNTL, sclk_cntl);
 421        WREG32_PLL(SCLK_CNTL2, sclk_cntl2);
 422        WREG32_PLL(SCLK_MORE_CNTL, sclk_more_cntl);
 423
 424        /* set pcie lanes */
 425        if ((rdev->flags & RADEON_IS_PCIE) &&
 426            !(rdev->flags & RADEON_IS_IGP) &&
 427            rdev->asic->pm.set_pcie_lanes &&
 428            (ps->pcie_lanes !=
 429             rdev->pm.power_state[rdev->pm.current_power_state_index].pcie_lanes)) {
 430                radeon_set_pcie_lanes(rdev,
 431                                      ps->pcie_lanes);
 432                DRM_DEBUG_DRIVER("Setting: p: %d\n", ps->pcie_lanes);
 433        }
 434}
 435
 436/**
 437 * r100_pm_prepare - pre-power state change callback.
 438 *
 439 * @rdev: radeon_device pointer
 440 *
 441 * Prepare for a power state change (r1xx-r4xx).
 442 */
 443void r100_pm_prepare(struct radeon_device *rdev)
 444{
 445        struct drm_device *ddev = rdev->ddev;
 446        struct drm_crtc *crtc;
 447        struct radeon_crtc *radeon_crtc;
 448        u32 tmp;
 449
 450        /* disable any active CRTCs */
 451        list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
 452                radeon_crtc = to_radeon_crtc(crtc);
 453                if (radeon_crtc->enabled) {
 454                        if (radeon_crtc->crtc_id) {
 455                                tmp = RREG32(RADEON_CRTC2_GEN_CNTL);
 456                                tmp |= RADEON_CRTC2_DISP_REQ_EN_B;
 457                                WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
 458                        } else {
 459                                tmp = RREG32(RADEON_CRTC_GEN_CNTL);
 460                                tmp |= RADEON_CRTC_DISP_REQ_EN_B;
 461                                WREG32(RADEON_CRTC_GEN_CNTL, tmp);
 462                        }
 463                }
 464        }
 465}
 466
 467/**
 468 * r100_pm_finish - post-power state change callback.
 469 *
 470 * @rdev: radeon_device pointer
 471 *
 472 * Clean up after a power state change (r1xx-r4xx).
 473 */
 474void r100_pm_finish(struct radeon_device *rdev)
 475{
 476        struct drm_device *ddev = rdev->ddev;
 477        struct drm_crtc *crtc;
 478        struct radeon_crtc *radeon_crtc;
 479        u32 tmp;
 480
 481        /* enable any active CRTCs */
 482        list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
 483                radeon_crtc = to_radeon_crtc(crtc);
 484                if (radeon_crtc->enabled) {
 485                        if (radeon_crtc->crtc_id) {
 486                                tmp = RREG32(RADEON_CRTC2_GEN_CNTL);
 487                                tmp &= ~RADEON_CRTC2_DISP_REQ_EN_B;
 488                                WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
 489                        } else {
 490                                tmp = RREG32(RADEON_CRTC_GEN_CNTL);
 491                                tmp &= ~RADEON_CRTC_DISP_REQ_EN_B;
 492                                WREG32(RADEON_CRTC_GEN_CNTL, tmp);
 493                        }
 494                }
 495        }
 496}
 497
 498/**
 499 * r100_gui_idle - gui idle callback.
 500 *
 501 * @rdev: radeon_device pointer
 502 *
 503 * Check of the GUI (2D/3D engines) are idle (r1xx-r5xx).
 504 * Returns true if idle, false if not.
 505 */
 506bool r100_gui_idle(struct radeon_device *rdev)
 507{
 508        if (RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)
 509                return false;
 510        else
 511                return true;
 512}
 513
 514/* hpd for digital panel detect/disconnect */
 515/**
 516 * r100_hpd_sense - hpd sense callback.
 517 *
 518 * @rdev: radeon_device pointer
 519 * @hpd: hpd (hotplug detect) pin
 520 *
 521 * Checks if a digital monitor is connected (r1xx-r4xx).
 522 * Returns true if connected, false if not connected.
 523 */
 524bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
 525{
 526        bool connected = false;
 527
 528        switch (hpd) {
 529        case RADEON_HPD_1:
 530                if (RREG32(RADEON_FP_GEN_CNTL) & RADEON_FP_DETECT_SENSE)
 531                        connected = true;
 532                break;
 533        case RADEON_HPD_2:
 534                if (RREG32(RADEON_FP2_GEN_CNTL) & RADEON_FP2_DETECT_SENSE)
 535                        connected = true;
 536                break;
 537        default:
 538                break;
 539        }
 540        return connected;
 541}
 542
 543/**
 544 * r100_hpd_set_polarity - hpd set polarity callback.
 545 *
 546 * @rdev: radeon_device pointer
 547 * @hpd: hpd (hotplug detect) pin
 548 *
 549 * Set the polarity of the hpd pin (r1xx-r4xx).
 550 */
 551void r100_hpd_set_polarity(struct radeon_device *rdev,
 552                           enum radeon_hpd_id hpd)
 553{
 554        u32 tmp;
 555        bool connected = r100_hpd_sense(rdev, hpd);
 556
 557        switch (hpd) {
 558        case RADEON_HPD_1:
 559                tmp = RREG32(RADEON_FP_GEN_CNTL);
 560                if (connected)
 561                        tmp &= ~RADEON_FP_DETECT_INT_POL;
 562                else
 563                        tmp |= RADEON_FP_DETECT_INT_POL;
 564                WREG32(RADEON_FP_GEN_CNTL, tmp);
 565                break;
 566        case RADEON_HPD_2:
 567                tmp = RREG32(RADEON_FP2_GEN_CNTL);
 568                if (connected)
 569                        tmp &= ~RADEON_FP2_DETECT_INT_POL;
 570                else
 571                        tmp |= RADEON_FP2_DETECT_INT_POL;
 572                WREG32(RADEON_FP2_GEN_CNTL, tmp);
 573                break;
 574        default:
 575                break;
 576        }
 577}
 578
 579/**
 580 * r100_hpd_init - hpd setup callback.
 581 *
 582 * @rdev: radeon_device pointer
 583 *
 584 * Setup the hpd pins used by the card (r1xx-r4xx).
 585 * Set the polarity, and enable the hpd interrupts.
 586 */
 587void r100_hpd_init(struct radeon_device *rdev)
 588{
 589        struct drm_device *dev = rdev->ddev;
 590        struct drm_connector *connector;
 591        unsigned enable = 0;
 592
 593        list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 594                struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 595                enable |= 1 << radeon_connector->hpd.hpd;
 596                radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
 597        }
 598        radeon_irq_kms_enable_hpd(rdev, enable);
 599}
 600
 601/**
 602 * r100_hpd_fini - hpd tear down callback.
 603 *
 604 * @rdev: radeon_device pointer
 605 *
 606 * Tear down the hpd pins used by the card (r1xx-r4xx).
 607 * Disable the hpd interrupts.
 608 */
 609void r100_hpd_fini(struct radeon_device *rdev)
 610{
 611        struct drm_device *dev = rdev->ddev;
 612        struct drm_connector *connector;
 613        unsigned disable = 0;
 614
 615        list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 616                struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 617                disable |= 1 << radeon_connector->hpd.hpd;
 618        }
 619        radeon_irq_kms_disable_hpd(rdev, disable);
 620}
 621
 622/*
 623 * PCI GART
 624 */
 625void r100_pci_gart_tlb_flush(struct radeon_device *rdev)
 626{
 627        /* TODO: can we do somethings here ? */
 628        /* It seems hw only cache one entry so we should discard this
 629         * entry otherwise if first GPU GART read hit this entry it
 630         * could end up in wrong address. */
 631}
 632
 633int r100_pci_gart_init(struct radeon_device *rdev)
 634{
 635        int r;
 636
 637        if (rdev->gart.ptr) {
 638                WARN(1, "R100 PCI GART already initialized\n");
 639                return 0;
 640        }
 641        /* Initialize common gart structure */
 642        r = radeon_gart_init(rdev);
 643        if (r)
 644                return r;
 645        rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
 646        rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush;
 647        rdev->asic->gart.set_page = &r100_pci_gart_set_page;
 648        return radeon_gart_table_ram_alloc(rdev);
 649}
 650
 651int r100_pci_gart_enable(struct radeon_device *rdev)
 652{
 653        uint32_t tmp;
 654
 655        /* discard memory request outside of configured range */
 656        tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
 657        WREG32(RADEON_AIC_CNTL, tmp);
 658        /* set address range for PCI address translate */
 659        WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_start);
 660        WREG32(RADEON_AIC_HI_ADDR, rdev->mc.gtt_end);
 661        /* set PCI GART page-table base address */
 662        WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr);
 663        tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
 664        WREG32(RADEON_AIC_CNTL, tmp);
 665        r100_pci_gart_tlb_flush(rdev);
 666        DRM_INFO("PCI GART of %uM enabled (table at 0x%016llX).\n",
 667                 (unsigned)(rdev->mc.gtt_size >> 20),
 668                 (unsigned long long)rdev->gart.table_addr);
 669        rdev->gart.ready = true;
 670        return 0;
 671}
 672
 673void r100_pci_gart_disable(struct radeon_device *rdev)
 674{
 675        uint32_t tmp;
 676
 677        /* discard memory request outside of configured range */
 678        tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
 679        WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN);
 680        WREG32(RADEON_AIC_LO_ADDR, 0);
 681        WREG32(RADEON_AIC_HI_ADDR, 0);
 682}
 683
 684void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
 685                            uint64_t addr, uint32_t flags)
 686{
 687        u32 *gtt = rdev->gart.ptr;
 688        gtt[i] = cpu_to_le32(lower_32_bits(addr));
 689}
 690
 691void r100_pci_gart_fini(struct radeon_device *rdev)
 692{
 693        radeon_gart_fini(rdev);
 694        r100_pci_gart_disable(rdev);
 695        radeon_gart_table_ram_free(rdev);
 696}
 697
 698int r100_irq_set(struct radeon_device *rdev)
 699{
 700        uint32_t tmp = 0;
 701
 702        if (!rdev->irq.installed) {
 703                WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
 704                WREG32(R_000040_GEN_INT_CNTL, 0);
 705                return -EINVAL;
 706        }
 707        if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
 708                tmp |= RADEON_SW_INT_ENABLE;
 709        }
 710        if (rdev->irq.crtc_vblank_int[0] ||
 711            atomic_read(&rdev->irq.pflip[0])) {
 712                tmp |= RADEON_CRTC_VBLANK_MASK;
 713        }
 714        if (rdev->irq.crtc_vblank_int[1] ||
 715            atomic_read(&rdev->irq.pflip[1])) {
 716                tmp |= RADEON_CRTC2_VBLANK_MASK;
 717        }
 718        if (rdev->irq.hpd[0]) {
 719                tmp |= RADEON_FP_DETECT_MASK;
 720        }
 721        if (rdev->irq.hpd[1]) {
 722                tmp |= RADEON_FP2_DETECT_MASK;
 723        }
 724        WREG32(RADEON_GEN_INT_CNTL, tmp);
 725        return 0;
 726}
 727
 728void r100_irq_disable(struct radeon_device *rdev)
 729{
 730        u32 tmp;
 731
 732        WREG32(R_000040_GEN_INT_CNTL, 0);
 733        /* Wait and acknowledge irq */
 734        mdelay(1);
 735        tmp = RREG32(R_000044_GEN_INT_STATUS);
 736        WREG32(R_000044_GEN_INT_STATUS, tmp);
 737}
 738
 739static uint32_t r100_irq_ack(struct radeon_device *rdev)
 740{
 741        uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS);
 742        uint32_t irq_mask = RADEON_SW_INT_TEST |
 743                RADEON_CRTC_VBLANK_STAT | RADEON_CRTC2_VBLANK_STAT |
 744                RADEON_FP_DETECT_STAT | RADEON_FP2_DETECT_STAT;
 745
 746        if (irqs) {
 747                WREG32(RADEON_GEN_INT_STATUS, irqs);
 748        }
 749        return irqs & irq_mask;
 750}
 751
 752int r100_irq_process(struct radeon_device *rdev)
 753{
 754        uint32_t status, msi_rearm;
 755        bool queue_hotplug = false;
 756
 757        status = r100_irq_ack(rdev);
 758        if (!status) {
 759                return IRQ_NONE;
 760        }
 761        if (rdev->shutdown) {
 762                return IRQ_NONE;
 763        }
 764        while (status) {
 765                /* SW interrupt */
 766                if (status & RADEON_SW_INT_TEST) {
 767                        radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
 768                }
 769                /* Vertical blank interrupts */
 770                if (status & RADEON_CRTC_VBLANK_STAT) {
 771                        if (rdev->irq.crtc_vblank_int[0]) {
 772                                drm_handle_vblank(rdev->ddev, 0);
 773                                rdev->pm.vblank_sync = true;
 774                                wake_up(&rdev->irq.vblank_queue);
 775                        }
 776                        if (atomic_read(&rdev->irq.pflip[0]))
 777                                radeon_crtc_handle_vblank(rdev, 0);
 778                }
 779                if (status & RADEON_CRTC2_VBLANK_STAT) {
 780                        if (rdev->irq.crtc_vblank_int[1]) {
 781                                drm_handle_vblank(rdev->ddev, 1);
 782                                rdev->pm.vblank_sync = true;
 783                                wake_up(&rdev->irq.vblank_queue);
 784                        }
 785                        if (atomic_read(&rdev->irq.pflip[1]))
 786                                radeon_crtc_handle_vblank(rdev, 1);
 787                }
 788                if (status & RADEON_FP_DETECT_STAT) {
 789                        queue_hotplug = true;
 790                        DRM_DEBUG("HPD1\n");
 791                }
 792                if (status & RADEON_FP2_DETECT_STAT) {
 793                        queue_hotplug = true;
 794                        DRM_DEBUG("HPD2\n");
 795                }
 796                status = r100_irq_ack(rdev);
 797        }
 798        if (queue_hotplug)
 799                schedule_work(&rdev->hotplug_work);
 800        if (rdev->msi_enabled) {
 801                switch (rdev->family) {
 802                case CHIP_RS400:
 803                case CHIP_RS480:
 804                        msi_rearm = RREG32(RADEON_AIC_CNTL) & ~RS400_MSI_REARM;
 805                        WREG32(RADEON_AIC_CNTL, msi_rearm);
 806                        WREG32(RADEON_AIC_CNTL, msi_rearm | RS400_MSI_REARM);
 807                        break;
 808                default:
 809                        WREG32(RADEON_MSI_REARM_EN, RV370_MSI_REARM_EN);
 810                        break;
 811                }
 812        }
 813        return IRQ_HANDLED;
 814}
 815
 816u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc)
 817{
 818        if (crtc == 0)
 819                return RREG32(RADEON_CRTC_CRNT_FRAME);
 820        else
 821                return RREG32(RADEON_CRTC2_CRNT_FRAME);
 822}
 823
 824/**
 825 * r100_ring_hdp_flush - flush Host Data Path via the ring buffer
 826 * rdev: radeon device structure
 827 * ring: ring buffer struct for emitting packets
 828 */
 829static void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring)
 830{
 831        radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
 832        radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
 833                                RADEON_HDP_READ_BUFFER_INVALIDATE);
 834        radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
 835        radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
 836}
 837
 838/* Who ever call radeon_fence_emit should call ring_lock and ask
 839 * for enough space (today caller are ib schedule and buffer move) */
 840void r100_fence_ring_emit(struct radeon_device *rdev,
 841                          struct radeon_fence *fence)
 842{
 843        struct radeon_ring *ring = &rdev->ring[fence->ring];
 844
 845        /* We have to make sure that caches are flushed before
 846         * CPU might read something from VRAM. */
 847        radeon_ring_write(ring, PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0));
 848        radeon_ring_write(ring, RADEON_RB3D_DC_FLUSH_ALL);
 849        radeon_ring_write(ring, PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0));
 850        radeon_ring_write(ring, RADEON_RB3D_ZC_FLUSH_ALL);
 851        /* Wait until IDLE & CLEAN */
 852        radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
 853        radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
 854        r100_ring_hdp_flush(rdev, ring);
 855        /* Emit fence sequence & fire IRQ */
 856        radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0));
 857        radeon_ring_write(ring, fence->seq);
 858        radeon_ring_write(ring, PACKET0(RADEON_GEN_INT_STATUS, 0));
 859        radeon_ring_write(ring, RADEON_SW_INT_FIRE);
 860}
 861
 862bool r100_semaphore_ring_emit(struct radeon_device *rdev,
 863                              struct radeon_ring *ring,
 864                              struct radeon_semaphore *semaphore,
 865                              bool emit_wait)
 866{
 867        /* Unused on older asics, since we don't have semaphores or multiple rings */
 868        BUG();
 869        return false;
 870}
 871
 872int r100_copy_blit(struct radeon_device *rdev,
 873                   uint64_t src_offset,
 874                   uint64_t dst_offset,
 875                   unsigned num_gpu_pages,
 876                   struct radeon_fence **fence)
 877{
 878        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 879        uint32_t cur_pages;
 880        uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
 881        uint32_t pitch;
 882        uint32_t stride_pixels;
 883        unsigned ndw;
 884        int num_loops;
 885        int r = 0;
 886
 887        /* radeon limited to 16k stride */
 888        stride_bytes &= 0x3fff;
 889        /* radeon pitch is /64 */
 890        pitch = stride_bytes / 64;
 891        stride_pixels = stride_bytes / 4;
 892        num_loops = DIV_ROUND_UP(num_gpu_pages, 8191);
 893
 894        /* Ask for enough room for blit + flush + fence */
 895        ndw = 64 + (10 * num_loops);
 896        r = radeon_ring_lock(rdev, ring, ndw);
 897        if (r) {
 898                DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
 899                return -EINVAL;
 900        }
 901        while (num_gpu_pages > 0) {
 902                cur_pages = num_gpu_pages;
 903                if (cur_pages > 8191) {
 904                        cur_pages = 8191;
 905                }
 906                num_gpu_pages -= cur_pages;
 907
 908                /* pages are in Y direction - height
 909                   page width in X direction - width */
 910                radeon_ring_write(ring, PACKET3(PACKET3_BITBLT_MULTI, 8));
 911                radeon_ring_write(ring,
 912                                  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
 913                                  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
 914                                  RADEON_GMC_SRC_CLIPPING |
 915                                  RADEON_GMC_DST_CLIPPING |
 916                                  RADEON_GMC_BRUSH_NONE |
 917                                  (RADEON_COLOR_FORMAT_ARGB8888 << 8) |
 918                                  RADEON_GMC_SRC_DATATYPE_COLOR |
 919                                  RADEON_ROP3_S |
 920                                  RADEON_DP_SRC_SOURCE_MEMORY |
 921                                  RADEON_GMC_CLR_CMP_CNTL_DIS |
 922                                  RADEON_GMC_WR_MSK_DIS);
 923                radeon_ring_write(ring, (pitch << 22) | (src_offset >> 10));
 924                radeon_ring_write(ring, (pitch << 22) | (dst_offset >> 10));
 925                radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
 926                radeon_ring_write(ring, 0);
 927                radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
 928                radeon_ring_write(ring, num_gpu_pages);
 929                radeon_ring_write(ring, num_gpu_pages);
 930                radeon_ring_write(ring, cur_pages | (stride_pixels << 16));
 931        }
 932        radeon_ring_write(ring, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
 933        radeon_ring_write(ring, RADEON_RB2D_DC_FLUSH_ALL);
 934        radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
 935        radeon_ring_write(ring,
 936                          RADEON_WAIT_2D_IDLECLEAN |
 937                          RADEON_WAIT_HOST_IDLECLEAN |
 938                          RADEON_WAIT_DMA_GUI_IDLE);
 939        if (fence) {
 940                r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
 941        }
 942        radeon_ring_unlock_commit(rdev, ring, false);
 943        return r;
 944}
 945
 946static int r100_cp_wait_for_idle(struct radeon_device *rdev)
 947{
 948        unsigned i;
 949        u32 tmp;
 950
 951        for (i = 0; i < rdev->usec_timeout; i++) {
 952                tmp = RREG32(R_000E40_RBBM_STATUS);
 953                if (!G_000E40_CP_CMDSTRM_BUSY(tmp)) {
 954                        return 0;
 955                }
 956                udelay(1);
 957        }
 958        return -1;
 959}
 960
 961void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring)
 962{
 963        int r;
 964
 965        r = radeon_ring_lock(rdev, ring, 2);
 966        if (r) {
 967                return;
 968        }
 969        radeon_ring_write(ring, PACKET0(RADEON_ISYNC_CNTL, 0));
 970        radeon_ring_write(ring,
 971                          RADEON_ISYNC_ANY2D_IDLE3D |
 972                          RADEON_ISYNC_ANY3D_IDLE2D |
 973                          RADEON_ISYNC_WAIT_IDLEGUI |
 974                          RADEON_ISYNC_CPSCRATCH_IDLEGUI);
 975        radeon_ring_unlock_commit(rdev, ring, false);
 976}
 977
 978
 979/* Load the microcode for the CP */
 980static int r100_cp_init_microcode(struct radeon_device *rdev)
 981{
 982        const char *fw_name = NULL;
 983        int err;
 984
 985        DRM_DEBUG_KMS("\n");
 986
 987        if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) ||
 988            (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) ||
 989            (rdev->family == CHIP_RS200)) {
 990                DRM_INFO("Loading R100 Microcode\n");
 991                fw_name = FIRMWARE_R100;
 992        } else if ((rdev->family == CHIP_R200) ||
 993                   (rdev->family == CHIP_RV250) ||
 994                   (rdev->family == CHIP_RV280) ||
 995                   (rdev->family == CHIP_RS300)) {
 996                DRM_INFO("Loading R200 Microcode\n");
 997                fw_name = FIRMWARE_R200;
 998        } else if ((rdev->family == CHIP_R300) ||
 999                   (rdev->family == CHIP_R350) ||
1000                   (rdev->family == CHIP_RV350) ||
1001                   (rdev->family == CHIP_RV380) ||
1002                   (rdev->family == CHIP_RS400) ||
1003                   (rdev->family == CHIP_RS480)) {
1004                DRM_INFO("Loading R300 Microcode\n");
1005                fw_name = FIRMWARE_R300;
1006        } else if ((rdev->family == CHIP_R420) ||
1007                   (rdev->family == CHIP_R423) ||
1008                   (rdev->family == CHIP_RV410)) {
1009                DRM_INFO("Loading R400 Microcode\n");
1010                fw_name = FIRMWARE_R420;
1011        } else if ((rdev->family == CHIP_RS690) ||
1012                   (rdev->family == CHIP_RS740)) {
1013                DRM_INFO("Loading RS690/RS740 Microcode\n");
1014                fw_name = FIRMWARE_RS690;
1015        } else if (rdev->family == CHIP_RS600) {
1016                DRM_INFO("Loading RS600 Microcode\n");
1017                fw_name = FIRMWARE_RS600;
1018        } else if ((rdev->family == CHIP_RV515) ||
1019                   (rdev->family == CHIP_R520) ||
1020                   (rdev->family == CHIP_RV530) ||
1021                   (rdev->family == CHIP_R580) ||
1022                   (rdev->family == CHIP_RV560) ||
1023                   (rdev->family == CHIP_RV570)) {
1024                DRM_INFO("Loading R500 Microcode\n");
1025                fw_name = FIRMWARE_R520;
1026        }
1027
1028        err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1029        if (err) {
1030                printk(KERN_ERR "radeon_cp: Failed to load firmware \"%s\"\n",
1031                       fw_name);
1032        } else if (rdev->me_fw->size % 8) {
1033                printk(KERN_ERR
1034                       "radeon_cp: Bogus length %zu in firmware \"%s\"\n",
1035                       rdev->me_fw->size, fw_name);
1036                err = -EINVAL;
1037                release_firmware(rdev->me_fw);
1038                rdev->me_fw = NULL;
1039        }
1040        return err;
1041}
1042
1043u32 r100_gfx_get_rptr(struct radeon_device *rdev,
1044                      struct radeon_ring *ring)
1045{
1046        u32 rptr;
1047
1048        if (rdev->wb.enabled)
1049                rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
1050        else
1051                rptr = RREG32(RADEON_CP_RB_RPTR);
1052
1053        return rptr;
1054}
1055
1056u32 r100_gfx_get_wptr(struct radeon_device *rdev,
1057                      struct radeon_ring *ring)
1058{
1059        u32 wptr;
1060
1061        wptr = RREG32(RADEON_CP_RB_WPTR);
1062
1063        return wptr;
1064}
1065
1066void r100_gfx_set_wptr(struct radeon_device *rdev,
1067                       struct radeon_ring *ring)
1068{
1069        WREG32(RADEON_CP_RB_WPTR, ring->wptr);
1070        (void)RREG32(RADEON_CP_RB_WPTR);
1071}
1072
1073static void r100_cp_load_microcode(struct radeon_device *rdev)
1074{
1075        const __be32 *fw_data;
1076        int i, size;
1077
1078        if (r100_gui_wait_for_idle(rdev)) {
1079                printk(KERN_WARNING "Failed to wait GUI idle while "
1080                       "programming pipes. Bad things might happen.\n");
1081        }
1082
1083        if (rdev->me_fw) {
1084                size = rdev->me_fw->size / 4;
1085                fw_data = (const __be32 *)&rdev->me_fw->data[0];
1086                WREG32(RADEON_CP_ME_RAM_ADDR, 0);
1087                for (i = 0; i < size; i += 2) {
1088                        WREG32(RADEON_CP_ME_RAM_DATAH,
1089                               be32_to_cpup(&fw_data[i]));
1090                        WREG32(RADEON_CP_ME_RAM_DATAL,
1091                               be32_to_cpup(&fw_data[i + 1]));
1092                }
1093        }
1094}
1095
1096int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
1097{
1098        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1099        unsigned rb_bufsz;
1100        unsigned rb_blksz;
1101        unsigned max_fetch;
1102        unsigned pre_write_timer;
1103        unsigned pre_write_limit;
1104        unsigned indirect2_start;
1105        unsigned indirect1_start;
1106        uint32_t tmp;
1107        int r;
1108
1109        if (r100_debugfs_cp_init(rdev)) {
1110                DRM_ERROR("Failed to register debugfs file for CP !\n");
1111        }
1112        if (!rdev->me_fw) {
1113                r = r100_cp_init_microcode(rdev);
1114                if (r) {
1115                        DRM_ERROR("Failed to load firmware!\n");
1116                        return r;
1117                }
1118        }
1119
1120        /* Align ring size */
1121        rb_bufsz = order_base_2(ring_size / 8);
1122        ring_size = (1 << (rb_bufsz + 1)) * 4;
1123        r100_cp_load_microcode(rdev);
1124        r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET,
1125                             RADEON_CP_PACKET2);
1126        if (r) {
1127                return r;
1128        }
1129        /* Each time the cp read 1024 bytes (16 dword/quadword) update
1130         * the rptr copy in system ram */
1131        rb_blksz = 9;
1132        /* cp will read 128bytes at a time (4 dwords) */
1133        max_fetch = 1;
1134        ring->align_mask = 16 - 1;
1135        /* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
1136        pre_write_timer = 64;
1137        /* Force CP_RB_WPTR write if written more than one time before the
1138         * delay expire
1139         */
1140        pre_write_limit = 0;
1141        /* Setup the cp cache like this (cache size is 96 dwords) :
1142         *      RING            0  to 15
1143         *      INDIRECT1       16 to 79
1144         *      INDIRECT2       80 to 95
1145         * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
1146         *    indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords))
1147         *    indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
1148         * Idea being that most of the gpu cmd will be through indirect1 buffer
1149         * so it gets the bigger cache.
1150         */
1151        indirect2_start = 80;
1152        indirect1_start = 16;
1153        /* cp setup */
1154        WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
1155        tmp = (REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
1156               REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
1157               REG_SET(RADEON_MAX_FETCH, max_fetch));
1158#ifdef __BIG_ENDIAN
1159        tmp |= RADEON_BUF_SWAP_32BIT;
1160#endif
1161        WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_NO_UPDATE);
1162
1163        /* Set ring address */
1164        DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)ring->gpu_addr);
1165        WREG32(RADEON_CP_RB_BASE, ring->gpu_addr);
1166        /* Force read & write ptr to 0 */
1167        WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE);
1168        WREG32(RADEON_CP_RB_RPTR_WR, 0);
1169        ring->wptr = 0;
1170        WREG32(RADEON_CP_RB_WPTR, ring->wptr);
1171
1172        /* set the wb address whether it's enabled or not */
1173        WREG32(R_00070C_CP_RB_RPTR_ADDR,
1174                S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) >> 2));
1175        WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET);
1176
1177        if (rdev->wb.enabled)
1178                WREG32(R_000770_SCRATCH_UMSK, 0xff);
1179        else {
1180                tmp |= RADEON_RB_NO_UPDATE;
1181                WREG32(R_000770_SCRATCH_UMSK, 0);
1182        }
1183
1184        WREG32(RADEON_CP_RB_CNTL, tmp);
1185        udelay(10);
1186        /* Set cp mode to bus mastering & enable cp*/
1187        WREG32(RADEON_CP_CSQ_MODE,
1188               REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
1189               REG_SET(RADEON_INDIRECT1_START, indirect1_start));
1190        WREG32(RADEON_CP_RB_WPTR_DELAY, 0);
1191        WREG32(RADEON_CP_CSQ_MODE, 0x00004D4D);
1192        WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
1193
1194        /* at this point everything should be setup correctly to enable master */
1195        pci_set_master(rdev->pdev);
1196
1197        radeon_ring_start(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1198        r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, ring);
1199        if (r) {
1200                DRM_ERROR("radeon: cp isn't working (%d).\n", r);
1201                return r;
1202        }
1203        ring->ready = true;
1204        radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1205
1206        if (!ring->rptr_save_reg /* not resuming from suspend */
1207            && radeon_ring_supports_scratch_reg(rdev, ring)) {
1208                r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
1209                if (r) {
1210                        DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
1211                        ring->rptr_save_reg = 0;
1212                }
1213        }
1214        return 0;
1215}
1216
1217void r100_cp_fini(struct radeon_device *rdev)
1218{
1219        if (r100_cp_wait_for_idle(rdev)) {
1220                DRM_ERROR("Wait for CP idle timeout, shutting down CP.\n");
1221        }
1222        /* Disable ring */
1223        r100_cp_disable(rdev);
1224        radeon_scratch_free(rdev, rdev->ring[RADEON_RING_TYPE_GFX_INDEX].rptr_save_reg);
1225        radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1226        DRM_INFO("radeon: cp finalized\n");
1227}
1228
1229void r100_cp_disable(struct radeon_device *rdev)
1230{
1231        /* Disable ring */
1232        radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1233        rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1234        WREG32(RADEON_CP_CSQ_MODE, 0);
1235        WREG32(RADEON_CP_CSQ_CNTL, 0);
1236        WREG32(R_000770_SCRATCH_UMSK, 0);
1237        if (r100_gui_wait_for_idle(rdev)) {
1238                printk(KERN_WARNING "Failed to wait GUI idle while "
1239                       "programming pipes. Bad things might happen.\n");
1240        }
1241}
1242
1243/*
1244 * CS functions
1245 */
1246int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
1247                            struct radeon_cs_packet *pkt,
1248                            unsigned idx,
1249                            unsigned reg)
1250{
1251        int r;
1252        u32 tile_flags = 0;
1253        u32 tmp;
1254        struct radeon_cs_reloc *reloc;
1255        u32 value;
1256
1257        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1258        if (r) {
1259                DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1260                          idx, reg);
1261                radeon_cs_dump_packet(p, pkt);
1262                return r;
1263        }
1264
1265        value = radeon_get_ib_value(p, idx);
1266        tmp = value & 0x003fffff;
1267        tmp += (((u32)reloc->gpu_offset) >> 10);
1268
1269        if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1270                if (reloc->tiling_flags & RADEON_TILING_MACRO)
1271                        tile_flags |= RADEON_DST_TILE_MACRO;
1272                if (reloc->tiling_flags & RADEON_TILING_MICRO) {
1273                        if (reg == RADEON_SRC_PITCH_OFFSET) {
1274                                DRM_ERROR("Cannot src blit from microtiled surface\n");
1275                                radeon_cs_dump_packet(p, pkt);
1276                                return -EINVAL;
1277                        }
1278                        tile_flags |= RADEON_DST_TILE_MICRO;
1279                }
1280
1281                tmp |= tile_flags;
1282                p->ib.ptr[idx] = (value & 0x3fc00000) | tmp;
1283        } else
1284                p->ib.ptr[idx] = (value & 0xffc00000) | tmp;
1285        return 0;
1286}
1287
1288int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
1289                             struct radeon_cs_packet *pkt,
1290                             int idx)
1291{
1292        unsigned c, i;
1293        struct radeon_cs_reloc *reloc;
1294        struct r100_cs_track *track;
1295        int r = 0;
1296        volatile uint32_t *ib;
1297        u32 idx_value;
1298
1299        ib = p->ib.ptr;
1300        track = (struct r100_cs_track *)p->track;
1301        c = radeon_get_ib_value(p, idx++) & 0x1F;
1302        if (c > 16) {
1303            DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
1304                      pkt->opcode);
1305            radeon_cs_dump_packet(p, pkt);
1306            return -EINVAL;
1307        }
1308        track->num_arrays = c;
1309        for (i = 0; i < (c - 1); i+=2, idx+=3) {
1310                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1311                if (r) {
1312                        DRM_ERROR("No reloc for packet3 %d\n",
1313                                  pkt->opcode);
1314                        radeon_cs_dump_packet(p, pkt);
1315                        return r;
1316                }
1317                idx_value = radeon_get_ib_value(p, idx);
1318                ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);
1319
1320                track->arrays[i + 0].esize = idx_value >> 8;
1321                track->arrays[i + 0].robj = reloc->robj;
1322                track->arrays[i + 0].esize &= 0x7F;
1323                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1324                if (r) {
1325                        DRM_ERROR("No reloc for packet3 %d\n",
1326                                  pkt->opcode);
1327                        radeon_cs_dump_packet(p, pkt);
1328                        return r;
1329                }
1330                ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->gpu_offset);
1331                track->arrays[i + 1].robj = reloc->robj;
1332                track->arrays[i + 1].esize = idx_value >> 24;
1333                track->arrays[i + 1].esize &= 0x7F;
1334        }
1335        if (c & 1) {
1336                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1337                if (r) {
1338                        DRM_ERROR("No reloc for packet3 %d\n",
1339                                          pkt->opcode);
1340                        radeon_cs_dump_packet(p, pkt);
1341                        return r;
1342                }
1343                idx_value = radeon_get_ib_value(p, idx);
1344                ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);
1345                track->arrays[i + 0].robj = reloc->robj;
1346                track->arrays[i + 0].esize = idx_value >> 8;
1347                track->arrays[i + 0].esize &= 0x7F;
1348        }
1349        return r;
1350}
1351
1352int r100_cs_parse_packet0(struct radeon_cs_parser *p,
1353                          struct radeon_cs_packet *pkt,
1354                          const unsigned *auth, unsigned n,
1355                          radeon_packet0_check_t check)
1356{
1357        unsigned reg;
1358        unsigned i, j, m;
1359        unsigned idx;
1360        int r;
1361
1362        idx = pkt->idx + 1;
1363        reg = pkt->reg;
1364        /* Check that register fall into register range
1365         * determined by the number of entry (n) in the
1366         * safe register bitmap.
1367         */
1368        if (pkt->one_reg_wr) {
1369                if ((reg >> 7) > n) {
1370                        return -EINVAL;
1371                }
1372        } else {
1373                if (((reg + (pkt->count << 2)) >> 7) > n) {
1374                        return -EINVAL;
1375                }
1376        }
1377        for (i = 0; i <= pkt->count; i++, idx++) {
1378                j = (reg >> 7);
1379                m = 1 << ((reg >> 2) & 31);
1380                if (auth[j] & m) {
1381                        r = check(p, pkt, idx, reg);
1382                        if (r) {
1383                                return r;
1384                        }
1385                }
1386                if (pkt->one_reg_wr) {
1387                        if (!(auth[j] & m)) {
1388                                break;
1389                        }
1390                } else {
1391                        reg += 4;
1392                }
1393        }
1394        return 0;
1395}
1396
1397/**
1398 * r100_cs_packet_next_vline() - parse userspace VLINE packet
1399 * @parser:             parser structure holding parsing context.
1400 *
1401 * Userspace sends a special sequence for VLINE waits.
1402 * PACKET0 - VLINE_START_END + value
1403 * PACKET0 - WAIT_UNTIL +_value
1404 * RELOC (P3) - crtc_id in reloc.
1405 *
1406 * This function parses this and relocates the VLINE START END
1407 * and WAIT UNTIL packets to the correct crtc.
1408 * It also detects a switched off crtc and nulls out the
1409 * wait in that case.
1410 */
1411int r100_cs_packet_parse_vline(struct radeon_cs_parser *p)
1412{
1413        struct drm_crtc *crtc;
1414        struct radeon_crtc *radeon_crtc;
1415        struct radeon_cs_packet p3reloc, waitreloc;
1416        int crtc_id;
1417        int r;
1418        uint32_t header, h_idx, reg;
1419        volatile uint32_t *ib;
1420
1421        ib = p->ib.ptr;
1422
1423        /* parse the wait until */
1424        r = radeon_cs_packet_parse(p, &waitreloc, p->idx);
1425        if (r)
1426                return r;
1427
1428        /* check its a wait until and only 1 count */
1429        if (waitreloc.reg != RADEON_WAIT_UNTIL ||
1430            waitreloc.count != 0) {
1431                DRM_ERROR("vline wait had illegal wait until segment\n");
1432                return -EINVAL;
1433        }
1434
1435        if (radeon_get_ib_value(p, waitreloc.idx + 1) != RADEON_WAIT_CRTC_VLINE) {
1436                DRM_ERROR("vline wait had illegal wait until\n");
1437                return -EINVAL;
1438        }
1439
1440        /* jump over the NOP */
1441        r = radeon_cs_packet_parse(p, &p3reloc, p->idx + waitreloc.count + 2);
1442        if (r)
1443                return r;
1444
1445        h_idx = p->idx - 2;
1446        p->idx += waitreloc.count + 2;
1447        p->idx += p3reloc.count + 2;
1448
1449        header = radeon_get_ib_value(p, h_idx);
1450        crtc_id = radeon_get_ib_value(p, h_idx + 5);
1451        reg = R100_CP_PACKET0_GET_REG(header);
1452        crtc = drm_crtc_find(p->rdev->ddev, crtc_id);
1453        if (!crtc) {
1454                DRM_ERROR("cannot find crtc %d\n", crtc_id);
1455                return -ENOENT;
1456        }
1457        radeon_crtc = to_radeon_crtc(crtc);
1458        crtc_id = radeon_crtc->crtc_id;
1459
1460        if (!crtc->enabled) {
1461                /* if the CRTC isn't enabled - we need to nop out the wait until */
1462                ib[h_idx + 2] = PACKET2(0);
1463                ib[h_idx + 3] = PACKET2(0);
1464        } else if (crtc_id == 1) {
1465                switch (reg) {
1466                case AVIVO_D1MODE_VLINE_START_END:
1467                        header &= ~R300_CP_PACKET0_REG_MASK;
1468                        header |= AVIVO_D2MODE_VLINE_START_END >> 2;
1469                        break;
1470                case RADEON_CRTC_GUI_TRIG_VLINE:
1471                        header &= ~R300_CP_PACKET0_REG_MASK;
1472                        header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2;
1473                        break;
1474                default:
1475                        DRM_ERROR("unknown crtc reloc\n");
1476                        return -EINVAL;
1477                }
1478                ib[h_idx] = header;
1479                ib[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1;
1480        }
1481
1482        return 0;
1483}
1484
1485static int r100_get_vtx_size(uint32_t vtx_fmt)
1486{
1487        int vtx_size;
1488        vtx_size = 2;
1489        /* ordered according to bits in spec */
1490        if (vtx_fmt & RADEON_SE_VTX_FMT_W0)
1491                vtx_size++;
1492        if (vtx_fmt & RADEON_SE_VTX_FMT_FPCOLOR)
1493                vtx_size += 3;
1494        if (vtx_fmt & RADEON_SE_VTX_FMT_FPALPHA)
1495                vtx_size++;
1496        if (vtx_fmt & RADEON_SE_VTX_FMT_PKCOLOR)
1497                vtx_size++;
1498        if (vtx_fmt & RADEON_SE_VTX_FMT_FPSPEC)
1499                vtx_size += 3;
1500        if (vtx_fmt & RADEON_SE_VTX_FMT_FPFOG)
1501                vtx_size++;
1502        if (vtx_fmt & RADEON_SE_VTX_FMT_PKSPEC)
1503                vtx_size++;
1504        if (vtx_fmt & RADEON_SE_VTX_FMT_ST0)
1505                vtx_size += 2;
1506        if (vtx_fmt & RADEON_SE_VTX_FMT_ST1)
1507                vtx_size += 2;
1508        if (vtx_fmt & RADEON_SE_VTX_FMT_Q1)
1509                vtx_size++;
1510        if (vtx_fmt & RADEON_SE_VTX_FMT_ST2)
1511                vtx_size += 2;
1512        if (vtx_fmt & RADEON_SE_VTX_FMT_Q2)
1513                vtx_size++;
1514        if (vtx_fmt & RADEON_SE_VTX_FMT_ST3)
1515                vtx_size += 2;
1516        if (vtx_fmt & RADEON_SE_VTX_FMT_Q3)
1517                vtx_size++;
1518        if (vtx_fmt & RADEON_SE_VTX_FMT_Q0)
1519                vtx_size++;
1520        /* blend weight */
1521        if (vtx_fmt & (0x7 << 15))
1522                vtx_size += (vtx_fmt >> 15) & 0x7;
1523        if (vtx_fmt & RADEON_SE_VTX_FMT_N0)
1524                vtx_size += 3;
1525        if (vtx_fmt & RADEON_SE_VTX_FMT_XY1)
1526                vtx_size += 2;
1527        if (vtx_fmt & RADEON_SE_VTX_FMT_Z1)
1528                vtx_size++;
1529        if (vtx_fmt & RADEON_SE_VTX_FMT_W1)
1530                vtx_size++;
1531        if (vtx_fmt & RADEON_SE_VTX_FMT_N1)
1532                vtx_size++;
1533        if (vtx_fmt & RADEON_SE_VTX_FMT_Z)
1534                vtx_size++;
1535        return vtx_size;
1536}
1537
1538static int r100_packet0_check(struct radeon_cs_parser *p,
1539                              struct radeon_cs_packet *pkt,
1540                              unsigned idx, unsigned reg)
1541{
1542        struct radeon_cs_reloc *reloc;
1543        struct r100_cs_track *track;
1544        volatile uint32_t *ib;
1545        uint32_t tmp;
1546        int r;
1547        int i, face;
1548        u32 tile_flags = 0;
1549        u32 idx_value;
1550
1551        ib = p->ib.ptr;
1552        track = (struct r100_cs_track *)p->track;
1553
1554        idx_value = radeon_get_ib_value(p, idx);
1555
1556        switch (reg) {
1557        case RADEON_CRTC_GUI_TRIG_VLINE:
1558                r = r100_cs_packet_parse_vline(p);
1559                if (r) {
1560                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1561                                  idx, reg);
1562                        radeon_cs_dump_packet(p, pkt);
1563                        return r;
1564                }
1565                break;
1566                /* FIXME: only allow PACKET3 blit? easier to check for out of
1567                 * range access */
1568        case RADEON_DST_PITCH_OFFSET:
1569        case RADEON_SRC_PITCH_OFFSET:
1570                r = r100_reloc_pitch_offset(p, pkt, idx, reg);
1571                if (r)
1572                        return r;
1573                break;
1574        case RADEON_RB3D_DEPTHOFFSET:
1575                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1576                if (r) {
1577                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1578                                  idx, reg);
1579                        radeon_cs_dump_packet(p, pkt);
1580                        return r;
1581                }
1582                track->zb.robj = reloc->robj;
1583                track->zb.offset = idx_value;
1584                track->zb_dirty = true;
1585                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1586                break;
1587        case RADEON_RB3D_COLOROFFSET:
1588                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1589                if (r) {
1590                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1591                                  idx, reg);
1592                        radeon_cs_dump_packet(p, pkt);
1593                        return r;
1594                }
1595                track->cb[0].robj = reloc->robj;
1596                track->cb[0].offset = idx_value;
1597                track->cb_dirty = true;
1598                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1599                break;
1600        case RADEON_PP_TXOFFSET_0:
1601        case RADEON_PP_TXOFFSET_1:
1602        case RADEON_PP_TXOFFSET_2:
1603                i = (reg - RADEON_PP_TXOFFSET_0) / 24;
1604                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1605                if (r) {
1606                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1607                                  idx, reg);
1608                        radeon_cs_dump_packet(p, pkt);
1609                        return r;
1610                }
1611                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1612                        if (reloc->tiling_flags & RADEON_TILING_MACRO)
1613                                tile_flags |= RADEON_TXO_MACRO_TILE;
1614                        if (reloc->tiling_flags & RADEON_TILING_MICRO)
1615                                tile_flags |= RADEON_TXO_MICRO_TILE_X2;
1616
1617                        tmp = idx_value & ~(0x7 << 2);
1618                        tmp |= tile_flags;
1619                        ib[idx] = tmp + ((u32)reloc->gpu_offset);
1620                } else
1621                        ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1622                track->textures[i].robj = reloc->robj;
1623                track->tex_dirty = true;
1624                break;
1625        case RADEON_PP_CUBIC_OFFSET_T0_0:
1626        case RADEON_PP_CUBIC_OFFSET_T0_1:
1627        case RADEON_PP_CUBIC_OFFSET_T0_2:
1628        case RADEON_PP_CUBIC_OFFSET_T0_3:
1629        case RADEON_PP_CUBIC_OFFSET_T0_4:
1630                i = (reg - RADEON_PP_CUBIC_OFFSET_T0_0) / 4;
1631                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1632                if (r) {
1633                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1634                                  idx, reg);
1635                        radeon_cs_dump_packet(p, pkt);
1636                        return r;
1637                }
1638                track->textures[0].cube_info[i].offset = idx_value;
1639                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1640                track->textures[0].cube_info[i].robj = reloc->robj;
1641                track->tex_dirty = true;
1642                break;
1643        case RADEON_PP_CUBIC_OFFSET_T1_0:
1644        case RADEON_PP_CUBIC_OFFSET_T1_1:
1645        case RADEON_PP_CUBIC_OFFSET_T1_2:
1646        case RADEON_PP_CUBIC_OFFSET_T1_3:
1647        case RADEON_PP_CUBIC_OFFSET_T1_4:
1648                i = (reg - RADEON_PP_CUBIC_OFFSET_T1_0) / 4;
1649                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1650                if (r) {
1651                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1652                                  idx, reg);
1653                        radeon_cs_dump_packet(p, pkt);
1654                        return r;
1655                }
1656                track->textures[1].cube_info[i].offset = idx_value;
1657                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1658                track->textures[1].cube_info[i].robj = reloc->robj;
1659                track->tex_dirty = true;
1660                break;
1661        case RADEON_PP_CUBIC_OFFSET_T2_0:
1662        case RADEON_PP_CUBIC_OFFSET_T2_1:
1663        case RADEON_PP_CUBIC_OFFSET_T2_2:
1664        case RADEON_PP_CUBIC_OFFSET_T2_3:
1665        case RADEON_PP_CUBIC_OFFSET_T2_4:
1666                i = (reg - RADEON_PP_CUBIC_OFFSET_T2_0) / 4;
1667                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1668                if (r) {
1669                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1670                                  idx, reg);
1671                        radeon_cs_dump_packet(p, pkt);
1672                        return r;
1673                }
1674                track->textures[2].cube_info[i].offset = idx_value;
1675                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1676                track->textures[2].cube_info[i].robj = reloc->robj;
1677                track->tex_dirty = true;
1678                break;
1679        case RADEON_RE_WIDTH_HEIGHT:
1680                track->maxy = ((idx_value >> 16) & 0x7FF);
1681                track->cb_dirty = true;
1682                track->zb_dirty = true;
1683                break;
1684        case RADEON_RB3D_COLORPITCH:
1685                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1686                if (r) {
1687                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1688                                  idx, reg);
1689                        radeon_cs_dump_packet(p, pkt);
1690                        return r;
1691                }
1692                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1693                        if (reloc->tiling_flags & RADEON_TILING_MACRO)
1694                                tile_flags |= RADEON_COLOR_TILE_ENABLE;
1695                        if (reloc->tiling_flags & RADEON_TILING_MICRO)
1696                                tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;
1697
1698                        tmp = idx_value & ~(0x7 << 16);
1699                        tmp |= tile_flags;
1700                        ib[idx] = tmp;
1701                } else
1702                        ib[idx] = idx_value;
1703
1704                track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK;
1705                track->cb_dirty = true;
1706                break;
1707        case RADEON_RB3D_DEPTHPITCH:
1708                track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK;
1709                track->zb_dirty = true;
1710                break;
1711        case RADEON_RB3D_CNTL:
1712                switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
1713                case 7:
1714                case 8:
1715                case 9:
1716                case 11:
1717                case 12:
1718                        track->cb[0].cpp = 1;
1719                        break;
1720                case 3:
1721                case 4:
1722                case 15:
1723                        track->cb[0].cpp = 2;
1724                        break;
1725                case 6:
1726                        track->cb[0].cpp = 4;
1727                        break;
1728                default:
1729                        DRM_ERROR("Invalid color buffer format (%d) !\n",
1730                                  ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f));
1731                        return -EINVAL;
1732                }
1733                track->z_enabled = !!(idx_value & RADEON_Z_ENABLE);
1734                track->cb_dirty = true;
1735                track->zb_dirty = true;
1736                break;
1737        case RADEON_RB3D_ZSTENCILCNTL:
1738                switch (idx_value & 0xf) {
1739                case 0:
1740                        track->zb.cpp = 2;
1741                        break;
1742                case 2:
1743                case 3:
1744                case 4:
1745                case 5:
1746                case 9:
1747                case 11:
1748                        track->zb.cpp = 4;
1749                        break;
1750                default:
1751                        break;
1752                }
1753                track->zb_dirty = true;
1754                break;
1755        case RADEON_RB3D_ZPASS_ADDR:
1756                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1757                if (r) {
1758                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1759                                  idx, reg);
1760                        radeon_cs_dump_packet(p, pkt);
1761                        return r;
1762                }
1763                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1764                break;
1765        case RADEON_PP_CNTL:
1766                {
1767                        uint32_t temp = idx_value >> 4;
1768                        for (i = 0; i < track->num_texture; i++)
1769                                track->textures[i].enabled = !!(temp & (1 << i));
1770                        track->tex_dirty = true;
1771                }
1772                break;
1773        case RADEON_SE_VF_CNTL:
1774                track->vap_vf_cntl = idx_value;
1775                break;
1776        case RADEON_SE_VTX_FMT:
1777                track->vtx_size = r100_get_vtx_size(idx_value);
1778                break;
1779        case RADEON_PP_TEX_SIZE_0:
1780        case RADEON_PP_TEX_SIZE_1:
1781        case RADEON_PP_TEX_SIZE_2:
1782                i = (reg - RADEON_PP_TEX_SIZE_0) / 8;
1783                track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1;
1784                track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
1785                track->tex_dirty = true;
1786                break;
1787        case RADEON_PP_TEX_PITCH_0:
1788        case RADEON_PP_TEX_PITCH_1:
1789        case RADEON_PP_TEX_PITCH_2:
1790                i = (reg - RADEON_PP_TEX_PITCH_0) / 8;
1791                track->textures[i].pitch = idx_value + 32;
1792                track->tex_dirty = true;
1793                break;
1794        case RADEON_PP_TXFILTER_0:
1795        case RADEON_PP_TXFILTER_1:
1796        case RADEON_PP_TXFILTER_2:
1797                i = (reg - RADEON_PP_TXFILTER_0) / 24;
1798                track->textures[i].num_levels = ((idx_value & RADEON_MAX_MIP_LEVEL_MASK)
1799                                                 >> RADEON_MAX_MIP_LEVEL_SHIFT);
1800                tmp = (idx_value >> 23) & 0x7;
1801                if (tmp == 2 || tmp == 6)
1802                        track->textures[i].roundup_w = false;
1803                tmp = (idx_value >> 27) & 0x7;
1804                if (tmp == 2 || tmp == 6)
1805                        track->textures[i].roundup_h = false;
1806                track->tex_dirty = true;
1807                break;
1808        case RADEON_PP_TXFORMAT_0:
1809        case RADEON_PP_TXFORMAT_1:
1810        case RADEON_PP_TXFORMAT_2:
1811                i = (reg - RADEON_PP_TXFORMAT_0) / 24;
1812                if (idx_value & RADEON_TXFORMAT_NON_POWER2) {
1813                        track->textures[i].use_pitch = 1;
1814                } else {
1815                        track->textures[i].use_pitch = 0;
1816                        track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK);
1817                        track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK);
1818                }
1819                if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE)
1820                        track->textures[i].tex_coord_type = 2;
1821                switch ((idx_value & RADEON_TXFORMAT_FORMAT_MASK)) {
1822                case RADEON_TXFORMAT_I8:
1823                case RADEON_TXFORMAT_RGB332:
1824                case RADEON_TXFORMAT_Y8:
1825                        track->textures[i].cpp = 1;
1826                        track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1827                        break;
1828                case RADEON_TXFORMAT_AI88:
1829                case RADEON_TXFORMAT_ARGB1555:
1830                case RADEON_TXFORMAT_RGB565:
1831                case RADEON_TXFORMAT_ARGB4444:
1832                case RADEON_TXFORMAT_VYUY422:
1833                case RADEON_TXFORMAT_YVYU422:
1834                case RADEON_TXFORMAT_SHADOW16:
1835                case RADEON_TXFORMAT_LDUDV655:
1836                case RADEON_TXFORMAT_DUDV88:
1837                        track->textures[i].cpp = 2;
1838                        track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1839                        break;
1840                case RADEON_TXFORMAT_ARGB8888:
1841                case RADEON_TXFORMAT_RGBA8888:
1842                case RADEON_TXFORMAT_SHADOW32:
1843                case RADEON_TXFORMAT_LDUDUV8888:
1844                        track->textures[i].cpp = 4;
1845                        track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1846                        break;
1847                case RADEON_TXFORMAT_DXT1:
1848                        track->textures[i].cpp = 1;
1849                        track->textures[i].compress_format = R100_TRACK_COMP_DXT1;
1850                        break;
1851                case RADEON_TXFORMAT_DXT23:
1852                case RADEON_TXFORMAT_DXT45:
1853                        track->textures[i].cpp = 1;
1854                        track->textures[i].compress_format = R100_TRACK_COMP_DXT35;
1855                        break;
1856                }
1857                track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf);
1858                track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf);
1859                track->tex_dirty = true;
1860                break;
1861        case RADEON_PP_CUBIC_FACES_0:
1862        case RADEON_PP_CUBIC_FACES_1:
1863        case RADEON_PP_CUBIC_FACES_2:
1864                tmp = idx_value;
1865                i = (reg - RADEON_PP_CUBIC_FACES_0) / 4;
1866                for (face = 0; face < 4; face++) {
1867                        track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
1868                        track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
1869                }
1870                track->tex_dirty = true;
1871                break;
1872        default:
1873                printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1874                       reg, idx);
1875                return -EINVAL;
1876        }
1877        return 0;
1878}
1879
1880int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
1881                                         struct radeon_cs_packet *pkt,
1882                                         struct radeon_bo *robj)
1883{
1884        unsigned idx;
1885        u32 value;
1886        idx = pkt->idx + 1;
1887        value = radeon_get_ib_value(p, idx + 2);
1888        if ((value + 1) > radeon_bo_size(robj)) {
1889                DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER "
1890                          "(need %u have %lu) !\n",
1891                          value + 1,
1892                          radeon_bo_size(robj));
1893                return -EINVAL;
1894        }
1895        return 0;
1896}
1897
1898static int r100_packet3_check(struct radeon_cs_parser *p,
1899                              struct radeon_cs_packet *pkt)
1900{
1901        struct radeon_cs_reloc *reloc;
1902        struct r100_cs_track *track;
1903        unsigned idx;
1904        volatile uint32_t *ib;
1905        int r;
1906
1907        ib = p->ib.ptr;
1908        idx = pkt->idx + 1;
1909        track = (struct r100_cs_track *)p->track;
1910        switch (pkt->opcode) {
1911        case PACKET3_3D_LOAD_VBPNTR:
1912                r = r100_packet3_load_vbpntr(p, pkt, idx);
1913                if (r)
1914                        return r;
1915                break;
1916        case PACKET3_INDX_BUFFER:
1917                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1918                if (r) {
1919                        DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1920                        radeon_cs_dump_packet(p, pkt);
1921                        return r;
1922                }
1923                ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->gpu_offset);
1924                r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1925                if (r) {
1926                        return r;
1927                }
1928                break;
1929        case 0x23:
1930                /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */
1931                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1932                if (r) {
1933                        DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1934                        radeon_cs_dump_packet(p, pkt);
1935                        return r;
1936                }
1937                ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->gpu_offset);
1938                track->num_arrays = 1;
1939                track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2));
1940
1941                track->arrays[0].robj = reloc->robj;
1942                track->arrays[0].esize = track->vtx_size;
1943
1944                track->max_indx = radeon_get_ib_value(p, idx+1);
1945
1946                track->vap_vf_cntl = radeon_get_ib_value(p, idx+3);
1947                track->immd_dwords = pkt->count - 1;
1948                r = r100_cs_track_check(p->rdev, track);
1949                if (r)
1950                        return r;
1951                break;
1952        case PACKET3_3D_DRAW_IMMD:
1953                if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) {
1954                        DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1955                        return -EINVAL;
1956                }
1957                track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 0));
1958                track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
1959                track->immd_dwords = pkt->count - 1;
1960                r = r100_cs_track_check(p->rdev, track);
1961                if (r)
1962                        return r;
1963                break;
1964                /* triggers drawing using in-packet vertex data */
1965        case PACKET3_3D_DRAW_IMMD_2:
1966                if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) {
1967                        DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1968                        return -EINVAL;
1969                }
1970                track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1971                track->immd_dwords = pkt->count;
1972                r = r100_cs_track_check(p->rdev, track);
1973                if (r)
1974                        return r;
1975                break;
1976                /* triggers drawing using in-packet vertex data */
1977        case PACKET3_3D_DRAW_VBUF_2:
1978                track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1979                r = r100_cs_track_check(p->rdev, track);
1980                if (r)
1981                        return r;
1982                break;
1983                /* triggers drawing of vertex buffers setup elsewhere */
1984        case PACKET3_3D_DRAW_INDX_2:
1985                track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1986                r = r100_cs_track_check(p->rdev, track);
1987                if (r)
1988                        return r;
1989                break;
1990                /* triggers drawing using indices to vertex buffer */
1991        case PACKET3_3D_DRAW_VBUF:
1992                track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
1993                r = r100_cs_track_check(p->rdev, track);
1994                if (r)
1995                        return r;
1996                break;
1997                /* triggers drawing of vertex buffers setup elsewhere */
1998        case PACKET3_3D_DRAW_INDX:
1999                track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
2000                r = r100_cs_track_check(p->rdev, track);
2001                if (r)
2002                        return r;
2003                break;
2004                /* triggers drawing using indices to vertex buffer */
2005        case PACKET3_3D_CLEAR_HIZ:
2006        case PACKET3_3D_CLEAR_ZMASK:
2007                if (p->rdev->hyperz_filp != p->filp)
2008                        return -EINVAL;
2009                break;
2010        case PACKET3_NOP:
2011                break;
2012        default:
2013                DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2014                return -EINVAL;
2015        }
2016        return 0;
2017}
2018
2019int r100_cs_parse(struct radeon_cs_parser *p)
2020{
2021        struct radeon_cs_packet pkt;
2022        struct r100_cs_track *track;
2023        int r;
2024
2025        track = kzalloc(sizeof(*track), GFP_KERNEL);
2026        if (!track)
2027                return -ENOMEM;
2028        r100_cs_track_clear(p->rdev, track);
2029        p->track = track;
2030        do {
2031                r = radeon_cs_packet_parse(p, &pkt, p->idx);
2032                if (r) {
2033                        return r;
2034                }
2035                p->idx += pkt.count + 2;
2036                switch (pkt.type) {
2037                case RADEON_PACKET_TYPE0:
2038                        if (p->rdev->family >= CHIP_R200)
2039                                r = r100_cs_parse_packet0(p, &pkt,
2040                                        p->rdev->config.r100.reg_safe_bm,
2041                                        p->rdev->config.r100.reg_safe_bm_size,
2042                                        &r200_packet0_check);
2043                        else
2044                                r = r100_cs_parse_packet0(p, &pkt,
2045                                        p->rdev->config.r100.reg_safe_bm,
2046                                        p->rdev->config.r100.reg_safe_bm_size,
2047                                        &r100_packet0_check);
2048                        break;
2049                case RADEON_PACKET_TYPE2:
2050                        break;
2051                case RADEON_PACKET_TYPE3:
2052                        r = r100_packet3_check(p, &pkt);
2053                        break;
2054                default:
2055                        DRM_ERROR("Unknown packet type %d !\n",
2056                                  pkt.type);
2057                        return -EINVAL;
2058                }
2059                if (r)
2060                        return r;
2061        } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
2062        return 0;
2063}
2064
2065static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
2066{
2067        DRM_ERROR("pitch                      %d\n", t->pitch);
2068        DRM_ERROR("use_pitch                  %d\n", t->use_pitch);
2069        DRM_ERROR("width                      %d\n", t->width);
2070        DRM_ERROR("width_11                   %d\n", t->width_11);
2071        DRM_ERROR("height                     %d\n", t->height);
2072        DRM_ERROR("height_11                  %d\n", t->height_11);
2073        DRM_ERROR("num levels                 %d\n", t->num_levels);
2074        DRM_ERROR("depth                      %d\n", t->txdepth);
2075        DRM_ERROR("bpp                        %d\n", t->cpp);
2076        DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
2077        DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
2078        DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
2079        DRM_ERROR("compress format            %d\n", t->compress_format);
2080}
2081
2082static int r100_track_compress_size(int compress_format, int w, int h)
2083{
2084        int block_width, block_height, block_bytes;
2085        int wblocks, hblocks;
2086        int min_wblocks;
2087        int sz;
2088
2089        block_width = 4;
2090        block_height = 4;
2091
2092        switch (compress_format) {
2093        case R100_TRACK_COMP_DXT1:
2094                block_bytes = 8;
2095                min_wblocks = 4;
2096                break;
2097        default:
2098        case R100_TRACK_COMP_DXT35:
2099                block_bytes = 16;
2100                min_wblocks = 2;
2101                break;
2102        }
2103
2104        hblocks = (h + block_height - 1) / block_height;
2105        wblocks = (w + block_width - 1) / block_width;
2106        if (wblocks < min_wblocks)
2107                wblocks = min_wblocks;
2108        sz = wblocks * hblocks * block_bytes;
2109        return sz;
2110}
2111
2112static int r100_cs_track_cube(struct radeon_device *rdev,
2113                              struct r100_cs_track *track, unsigned idx)
2114{
2115        unsigned face, w, h;
2116        struct radeon_bo *cube_robj;
2117        unsigned long size;
2118        unsigned compress_format = track->textures[idx].compress_format;
2119
2120        for (face = 0; face < 5; face++) {
2121                cube_robj = track->textures[idx].cube_info[face].robj;
2122                w = track->textures[idx].cube_info[face].width;
2123                h = track->textures[idx].cube_info[face].height;
2124
2125                if (compress_format) {
2126                        size = r100_track_compress_size(compress_format, w, h);
2127                } else
2128                        size = w * h;
2129                size *= track->textures[idx].cpp;
2130
2131                size += track->textures[idx].cube_info[face].offset;
2132
2133                if (size > radeon_bo_size(cube_robj)) {
2134                        DRM_ERROR("Cube texture offset greater than object size %lu %lu\n",
2135                                  size, radeon_bo_size(cube_robj));
2136                        r100_cs_track_texture_print(&track->textures[idx]);
2137                        return -1;
2138                }
2139        }
2140        return 0;
2141}
2142
2143static int r100_cs_track_texture_check(struct radeon_device *rdev,
2144                                       struct r100_cs_track *track)
2145{
2146        struct radeon_bo *robj;
2147        unsigned long size;
2148        unsigned u, i, w, h, d;
2149        int ret;
2150
2151        for (u = 0; u < track->num_texture; u++) {
2152                if (!track->textures[u].enabled)
2153                        continue;
2154                if (track->textures[u].lookup_disable)
2155                        continue;
2156                robj = track->textures[u].robj;
2157                if (robj == NULL) {
2158                        DRM_ERROR("No texture bound to unit %u\n", u);
2159                        return -EINVAL;
2160                }
2161                size = 0;
2162                for (i = 0; i <= track->textures[u].num_levels; i++) {
2163                        if (track->textures[u].use_pitch) {
2164                                if (rdev->family < CHIP_R300)
2165                                        w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i);
2166                                else
2167                                        w = track->textures[u].pitch / (1 << i);
2168                        } else {
2169                                w = track->textures[u].width;
2170                                if (rdev->family >= CHIP_RV515)
2171                                        w |= track->textures[u].width_11;
2172                                w = w / (1 << i);
2173                                if (track->textures[u].roundup_w)
2174                                        w = roundup_pow_of_two(w);
2175                        }
2176                        h = track->textures[u].height;
2177                        if (rdev->family >= CHIP_RV515)
2178                                h |= track->textures[u].height_11;
2179                        h = h / (1 << i);
2180                        if (track->textures[u].roundup_h)
2181                                h = roundup_pow_of_two(h);
2182                        if (track->textures[u].tex_coord_type == 1) {
2183                                d = (1 << track->textures[u].txdepth) / (1 << i);
2184                                if (!d)
2185                                        d = 1;
2186                        } else {
2187                                d = 1;
2188                        }
2189                        if (track->textures[u].compress_format) {
2190
2191                                size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d;
2192                                /* compressed textures are block based */
2193                        } else
2194                                size += w * h * d;
2195                }
2196                size *= track->textures[u].cpp;
2197
2198                switch (track->textures[u].tex_coord_type) {
2199                case 0:
2200                case 1:
2201                        break;
2202                case 2:
2203                        if (track->separate_cube) {
2204                                ret = r100_cs_track_cube(rdev, track, u);
2205                                if (ret)
2206                                        return ret;
2207                        } else
2208                                size *= 6;
2209                        break;
2210                default:
2211                        DRM_ERROR("Invalid texture coordinate type %u for unit "
2212                                  "%u\n", track->textures[u].tex_coord_type, u);
2213                        return -EINVAL;
2214                }
2215                if (size > radeon_bo_size(robj)) {
2216                        DRM_ERROR("Texture of unit %u needs %lu bytes but is "
2217                                  "%lu\n", u, size, radeon_bo_size(robj));
2218                        r100_cs_track_texture_print(&track->textures[u]);
2219                        return -EINVAL;
2220                }
2221        }
2222        return 0;
2223}
2224
2225int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
2226{
2227        unsigned i;
2228        unsigned long size;
2229        unsigned prim_walk;
2230        unsigned nverts;
2231        unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
2232
2233        if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
2234            !track->blend_read_enable)
2235                num_cb = 0;
2236
2237        for (i = 0; i < num_cb; i++) {
2238                if (track->cb[i].robj == NULL) {
2239                        DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
2240                        return -EINVAL;
2241                }
2242                size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
2243                size += track->cb[i].offset;
2244                if (size > radeon_bo_size(track->cb[i].robj)) {
2245                        DRM_ERROR("[drm] Buffer too small for color buffer %d "
2246                                  "(need %lu have %lu) !\n", i, size,
2247                                  radeon_bo_size(track->cb[i].robj));
2248                        DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
2249                                  i, track->cb[i].pitch, track->cb[i].cpp,
2250                                  track->cb[i].offset, track->maxy);
2251                        return -EINVAL;
2252                }
2253        }
2254        track->cb_dirty = false;
2255
2256        if (track->zb_dirty && track->z_enabled) {
2257                if (track->zb.robj == NULL) {
2258                        DRM_ERROR("[drm] No buffer for z buffer !\n");
2259                        return -EINVAL;
2260                }
2261                size = track->zb.pitch * track->zb.cpp * track->maxy;
2262                size += track->zb.offset;
2263                if (size > radeon_bo_size(track->zb.robj)) {
2264                        DRM_ERROR("[drm] Buffer too small for z buffer "
2265                                  "(need %lu have %lu) !\n", size,
2266                                  radeon_bo_size(track->zb.robj));
2267                        DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n",
2268                                  track->zb.pitch, track->zb.cpp,
2269                                  track->zb.offset, track->maxy);
2270                        return -EINVAL;
2271                }
2272        }
2273        track->zb_dirty = false;
2274
2275        if (track->aa_dirty && track->aaresolve) {
2276                if (track->aa.robj == NULL) {
2277                        DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i);
2278                        return -EINVAL;
2279                }
2280                /* I believe the format comes from colorbuffer0. */
2281                size = track->aa.pitch * track->cb[0].cpp * track->maxy;
2282                size += track->aa.offset;
2283                if (size > radeon_bo_size(track->aa.robj)) {
2284                        DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d "
2285                                  "(need %lu have %lu) !\n", i, size,
2286                                  radeon_bo_size(track->aa.robj));
2287                        DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n",
2288                                  i, track->aa.pitch, track->cb[0].cpp,
2289                                  track->aa.offset, track->maxy);
2290                        return -EINVAL;
2291                }
2292        }
2293        track->aa_dirty = false;
2294
2295        prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
2296        if (track->vap_vf_cntl & (1 << 14)) {
2297                nverts = track->vap_alt_nverts;
2298        } else {
2299                nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
2300        }
2301        switch (prim_walk) {
2302        case 1:
2303                for (i = 0; i < track->num_arrays; i++) {
2304                        size = track->arrays[i].esize * track->max_indx * 4;
2305                        if (track->arrays[i].robj == NULL) {
2306                                DRM_ERROR("(PW %u) Vertex array %u no buffer "
2307                                          "bound\n", prim_walk, i);
2308                                return -EINVAL;
2309                        }
2310                        if (size > radeon_bo_size(track->arrays[i].robj)) {
2311                                dev_err(rdev->dev, "(PW %u) Vertex array %u "
2312                                        "need %lu dwords have %lu dwords\n",
2313                                        prim_walk, i, size >> 2,
2314                                        radeon_bo_size(track->arrays[i].robj)
2315                                        >> 2);
2316                                DRM_ERROR("Max indices %u\n", track->max_indx);
2317                                return -EINVAL;
2318                        }
2319                }
2320                break;
2321        case 2:
2322                for (i = 0; i < track->num_arrays; i++) {
2323                        size = track->arrays[i].esize * (nverts - 1) * 4;
2324                        if (track->arrays[i].robj == NULL) {
2325                                DRM_ERROR("(PW %u) Vertex array %u no buffer "
2326                                          "bound\n", prim_walk, i);
2327                                return -EINVAL;
2328                        }
2329                        if (size > radeon_bo_size(track->arrays[i].robj)) {
2330                                dev_err(rdev->dev, "(PW %u) Vertex array %u "
2331                                        "need %lu dwords have %lu dwords\n",
2332                                        prim_walk, i, size >> 2,
2333                                        radeon_bo_size(track->arrays[i].robj)
2334                                        >> 2);
2335                                return -EINVAL;
2336                        }
2337                }
2338                break;
2339        case 3:
2340                size = track->vtx_size * nverts;
2341                if (size != track->immd_dwords) {
2342                        DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
2343                                  track->immd_dwords, size);
2344                        DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
2345                                  nverts, track->vtx_size);
2346                        return -EINVAL;
2347                }
2348                break;
2349        default:
2350                DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
2351                          prim_walk);
2352                return -EINVAL;
2353        }
2354
2355        if (track->tex_dirty) {
2356                track->tex_dirty = false;
2357                return r100_cs_track_texture_check(rdev, track);
2358        }
2359        return 0;
2360}
2361
2362void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
2363{
2364        unsigned i, face;
2365
2366        track->cb_dirty = true;
2367        track->zb_dirty = true;
2368        track->tex_dirty = true;
2369        track->aa_dirty = true;
2370
2371        if (rdev->family < CHIP_R300) {
2372                track->num_cb = 1;
2373                if (rdev->family <= CHIP_RS200)
2374                        track->num_texture = 3;
2375                else
2376                        track->num_texture = 6;
2377                track->maxy = 2048;
2378                track->separate_cube = 1;
2379        } else {
2380                track->num_cb = 4;
2381                track->num_texture = 16;
2382                track->maxy = 4096;
2383                track->separate_cube = 0;
2384                track->aaresolve = false;
2385                track->aa.robj = NULL;
2386        }
2387
2388        for (i = 0; i < track->num_cb; i++) {
2389                track->cb[i].robj = NULL;
2390                track->cb[i].pitch = 8192;
2391                track->cb[i].cpp = 16;
2392                track->cb[i].offset = 0;
2393        }
2394        track->z_enabled = true;
2395        track->zb.robj = NULL;
2396        track->zb.pitch = 8192;
2397        track->zb.cpp = 4;
2398        track->zb.offset = 0;
2399        track->vtx_size = 0x7F;
2400        track->immd_dwords = 0xFFFFFFFFUL;
2401        track->num_arrays = 11;
2402        track->max_indx = 0x00FFFFFFUL;
2403        for (i = 0; i < track->num_arrays; i++) {
2404                track->arrays[i].robj = NULL;
2405                track->arrays[i].esize = 0x7F;
2406        }
2407        for (i = 0; i < track->num_texture; i++) {
2408                track->textures[i].compress_format = R100_TRACK_COMP_NONE;
2409                track->textures[i].pitch = 16536;
2410                track->textures[i].width = 16536;
2411                track->textures[i].height = 16536;
2412                track->textures[i].width_11 = 1 << 11;
2413                track->textures[i].height_11 = 1 << 11;
2414                track->textures[i].num_levels = 12;
2415                if (rdev->family <= CHIP_RS200) {
2416                        track->textures[i].tex_coord_type = 0;
2417                        track->textures[i].txdepth = 0;
2418                } else {
2419                        track->textures[i].txdepth = 16;
2420                        track->textures[i].tex_coord_type = 1;
2421                }
2422                track->textures[i].cpp = 64;
2423                track->textures[i].robj = NULL;
2424                /* CS IB emission code makes sure texture unit are disabled */
2425                track->textures[i].enabled = false;
2426                track->textures[i].lookup_disable = false;
2427                track->textures[i].roundup_w = true;
2428                track->textures[i].roundup_h = true;
2429                if (track->separate_cube)
2430                        for (face = 0; face < 5; face++) {
2431                                track->textures[i].cube_info[face].robj = NULL;
2432                                track->textures[i].cube_info[face].width = 16536;
2433                                track->textures[i].cube_info[face].height = 16536;
2434                                track->textures[i].cube_info[face].offset = 0;
2435                        }
2436        }
2437}
2438
2439/*
2440 * Global GPU functions
2441 */
2442static void r100_errata(struct radeon_device *rdev)
2443{
2444        rdev->pll_errata = 0;
2445
2446        if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) {
2447                rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS;
2448        }
2449
2450        if (rdev->family == CHIP_RV100 ||
2451            rdev->family == CHIP_RS100 ||
2452            rdev->family == CHIP_RS200) {
2453                rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY;
2454        }
2455}
2456
2457static int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n)
2458{
2459        unsigned i;
2460        uint32_t tmp;
2461
2462        for (i = 0; i < rdev->usec_timeout; i++) {
2463                tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
2464                if (tmp >= n) {
2465                        return 0;
2466                }
2467                DRM_UDELAY(1);
2468        }
2469        return -1;
2470}
2471
2472int r100_gui_wait_for_idle(struct radeon_device *rdev)
2473{
2474        unsigned i;
2475        uint32_t tmp;
2476
2477        if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) {
2478                printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !"
2479                       " Bad things might happen.\n");
2480        }
2481        for (i = 0; i < rdev->usec_timeout; i++) {
2482                tmp = RREG32(RADEON_RBBM_STATUS);
2483                if (!(tmp & RADEON_RBBM_ACTIVE)) {
2484                        return 0;
2485                }
2486                DRM_UDELAY(1);
2487        }
2488        return -1;
2489}
2490
2491int r100_mc_wait_for_idle(struct radeon_device *rdev)
2492{
2493        unsigned i;
2494        uint32_t tmp;
2495
2496        for (i = 0; i < rdev->usec_timeout; i++) {
2497                /* read MC_STATUS */
2498                tmp = RREG32(RADEON_MC_STATUS);
2499                if (tmp & RADEON_MC_IDLE) {
2500                        return 0;
2501                }
2502                DRM_UDELAY(1);
2503        }
2504        return -1;
2505}
2506
2507bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2508{
2509        u32 rbbm_status;
2510
2511        rbbm_status = RREG32(R_000E40_RBBM_STATUS);
2512        if (!G_000E40_GUI_ACTIVE(rbbm_status)) {
2513                radeon_ring_lockup_update(rdev, ring);
2514                return false;
2515        }
2516        return radeon_ring_test_lockup(rdev, ring);
2517}
2518
2519/* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */
2520void r100_enable_bm(struct radeon_device *rdev)
2521{
2522        uint32_t tmp;
2523        /* Enable bus mastering */
2524        tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
2525        WREG32(RADEON_BUS_CNTL, tmp);
2526}
2527
2528void r100_bm_disable(struct radeon_device *rdev)
2529{
2530        u32 tmp;
2531
2532        /* disable bus mastering */
2533        tmp = RREG32(R_000030_BUS_CNTL);
2534        WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000044);
2535        mdelay(1);
2536        WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000042);
2537        mdelay(1);
2538        WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040);
2539        tmp = RREG32(RADEON_BUS_CNTL);
2540        mdelay(1);
2541        pci_clear_master(rdev->pdev);
2542        mdelay(1);
2543}
2544
2545int r100_asic_reset(struct radeon_device *rdev)
2546{
2547        struct r100_mc_save save;
2548        u32 status, tmp;
2549        int ret = 0;
2550
2551        status = RREG32(R_000E40_RBBM_STATUS);
2552        if (!G_000E40_GUI_ACTIVE(status)) {
2553                return 0;
2554        }
2555        r100_mc_stop(rdev, &save);
2556        status = RREG32(R_000E40_RBBM_STATUS);
2557        dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2558        /* stop CP */
2559        WREG32(RADEON_CP_CSQ_CNTL, 0);
2560        tmp = RREG32(RADEON_CP_RB_CNTL);
2561        WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
2562        WREG32(RADEON_CP_RB_RPTR_WR, 0);
2563        WREG32(RADEON_CP_RB_WPTR, 0);
2564        WREG32(RADEON_CP_RB_CNTL, tmp);
2565        /* save PCI state */
2566        pci_save_state(rdev->pdev);
2567        /* disable bus mastering */
2568        r100_bm_disable(rdev);
2569        WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_SE(1) |
2570                                        S_0000F0_SOFT_RESET_RE(1) |
2571                                        S_0000F0_SOFT_RESET_PP(1) |
2572                                        S_0000F0_SOFT_RESET_RB(1));
2573        RREG32(R_0000F0_RBBM_SOFT_RESET);
2574        mdelay(500);
2575        WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
2576        mdelay(1);
2577        status = RREG32(R_000E40_RBBM_STATUS);
2578        dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2579        /* reset CP */
2580        WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1));
2581        RREG32(R_0000F0_RBBM_SOFT_RESET);
2582        mdelay(500);
2583        WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
2584        mdelay(1);
2585        status = RREG32(R_000E40_RBBM_STATUS);
2586        dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2587        /* restore PCI & busmastering */
2588        pci_restore_state(rdev->pdev);
2589        r100_enable_bm(rdev);
2590        /* Check if GPU is idle */
2591        if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) ||
2592                G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) {
2593                dev_err(rdev->dev, "failed to reset GPU\n");
2594                ret = -1;
2595        } else
2596                dev_info(rdev->dev, "GPU reset succeed\n");
2597        r100_mc_resume(rdev, &save);
2598        return ret;
2599}
2600
2601void r100_set_common_regs(struct radeon_device *rdev)
2602{
2603        struct drm_device *dev = rdev->ddev;
2604        bool force_dac2 = false;
2605        u32 tmp;
2606
2607        /* set these so they don't interfere with anything */
2608        WREG32(RADEON_OV0_SCALE_CNTL, 0);
2609        WREG32(RADEON_SUBPIC_CNTL, 0);
2610        WREG32(RADEON_VIPH_CONTROL, 0);
2611        WREG32(RADEON_I2C_CNTL_1, 0);
2612        WREG32(RADEON_DVI_I2C_CNTL_1, 0);
2613        WREG32(RADEON_CAP0_TRIG_CNTL, 0);
2614        WREG32(RADEON_CAP1_TRIG_CNTL, 0);
2615
2616        /* always set up dac2 on rn50 and some rv100 as lots
2617         * of servers seem to wire it up to a VGA port but
2618         * don't report it in the bios connector
2619         * table.
2620         */
2621        switch (dev->pdev->device) {
2622                /* RN50 */
2623        case 0x515e:
2624        case 0x5969:
2625                force_dac2 = true;
2626                break;
2627                /* RV100*/
2628        case 0x5159:
2629        case 0x515a:
2630                /* DELL triple head servers */
2631                if ((dev->pdev->subsystem_vendor == 0x1028 /* DELL */) &&
2632                    ((dev->pdev->subsystem_device == 0x016c) ||
2633                     (dev->pdev->subsystem_device == 0x016d) ||
2634                     (dev->pdev->subsystem_device == 0x016e) ||
2635                     (dev->pdev->subsystem_device == 0x016f) ||
2636                     (dev->pdev->subsystem_device == 0x0170) ||
2637                     (dev->pdev->subsystem_device == 0x017d) ||
2638                     (dev->pdev->subsystem_device == 0x017e) ||
2639                     (dev->pdev->subsystem_device == 0x0183) ||
2640                     (dev->pdev->subsystem_device == 0x018a) ||
2641                     (dev->pdev->subsystem_device == 0x019a)))
2642                        force_dac2 = true;
2643                break;
2644        }
2645
2646        if (force_dac2) {
2647                u32 disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG);
2648                u32 tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL);
2649                u32 dac2_cntl = RREG32(RADEON_DAC_CNTL2);
2650
2651                /* For CRT on DAC2, don't turn it on if BIOS didn't
2652                   enable it, even it's detected.
2653                */
2654
2655                /* force it to crtc0 */
2656                dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL;
2657                dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL;
2658                disp_hw_debug |= RADEON_CRT2_DISP1_SEL;
2659
2660                /* set up the TV DAC */
2661                tv_dac_cntl &= ~(RADEON_TV_DAC_PEDESTAL |
2662                                 RADEON_TV_DAC_STD_MASK |
2663                                 RADEON_TV_DAC_RDACPD |
2664                                 RADEON_TV_DAC_GDACPD |
2665                                 RADEON_TV_DAC_BDACPD |
2666                                 RADEON_TV_DAC_BGADJ_MASK |
2667                                 RADEON_TV_DAC_DACADJ_MASK);
2668                tv_dac_cntl |= (RADEON_TV_DAC_NBLANK |
2669                                RADEON_TV_DAC_NHOLD |
2670                                RADEON_TV_DAC_STD_PS2 |
2671                                (0x58 << 16));
2672
2673                WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl);
2674                WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug);
2675                WREG32(RADEON_DAC_CNTL2, dac2_cntl);
2676        }
2677
2678        /* switch PM block to ACPI mode */
2679        tmp = RREG32_PLL(RADEON_PLL_PWRMGT_CNTL);
2680        tmp &= ~RADEON_PM_MODE_SEL;
2681        WREG32_PLL(RADEON_PLL_PWRMGT_CNTL, tmp);
2682
2683}
2684
2685/*
2686 * VRAM info
2687 */
2688static void r100_vram_get_type(struct radeon_device *rdev)
2689{
2690        uint32_t tmp;
2691
2692        rdev->mc.vram_is_ddr = false;
2693        if (rdev->flags & RADEON_IS_IGP)
2694                rdev->mc.vram_is_ddr = true;
2695        else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR)
2696                rdev->mc.vram_is_ddr = true;
2697        if ((rdev->family == CHIP_RV100) ||
2698            (rdev->family == CHIP_RS100) ||
2699            (rdev->family == CHIP_RS200)) {
2700                tmp = RREG32(RADEON_MEM_CNTL);
2701                if (tmp & RV100_HALF_MODE) {
2702                        rdev->mc.vram_width = 32;
2703                } else {
2704                        rdev->mc.vram_width = 64;
2705                }
2706                if (rdev->flags & RADEON_SINGLE_CRTC) {
2707                        rdev->mc.vram_width /= 4;
2708                        rdev->mc.vram_is_ddr = true;
2709                }
2710        } else if (rdev->family <= CHIP_RV280) {
2711                tmp = RREG32(RADEON_MEM_CNTL);
2712                if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) {
2713                        rdev->mc.vram_width = 128;
2714                } else {
2715                        rdev->mc.vram_width = 64;
2716                }
2717        } else {
2718                /* newer IGPs */
2719                rdev->mc.vram_width = 128;
2720        }
2721}
2722
2723static u32 r100_get_accessible_vram(struct radeon_device *rdev)
2724{
2725        u32 aper_size;
2726        u8 byte;
2727
2728        aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2729
2730        /* Set HDP_APER_CNTL only on cards that are known not to be broken,
2731         * that is has the 2nd generation multifunction PCI interface
2732         */
2733        if (rdev->family == CHIP_RV280 ||
2734            rdev->family >= CHIP_RV350) {
2735                WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL,
2736                       ~RADEON_HDP_APER_CNTL);
2737                DRM_INFO("Generation 2 PCI interface, using max accessible memory\n");
2738                return aper_size * 2;
2739        }
2740
2741        /* Older cards have all sorts of funny issues to deal with. First
2742         * check if it's a multifunction card by reading the PCI config
2743         * header type... Limit those to one aperture size
2744         */
2745        pci_read_config_byte(rdev->pdev, 0xe, &byte);
2746        if (byte & 0x80) {
2747                DRM_INFO("Generation 1 PCI interface in multifunction mode\n");
2748                DRM_INFO("Limiting VRAM to one aperture\n");
2749                return aper_size;
2750        }
2751
2752        /* Single function older card. We read HDP_APER_CNTL to see how the BIOS
2753         * have set it up. We don't write this as it's broken on some ASICs but
2754         * we expect the BIOS to have done the right thing (might be too optimistic...)
2755         */
2756        if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL)
2757                return aper_size * 2;
2758        return aper_size;
2759}
2760
2761void r100_vram_init_sizes(struct radeon_device *rdev)
2762{
2763        u64 config_aper_size;
2764
2765        /* work out accessible VRAM */
2766        rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
2767        rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
2768        rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev);
2769        /* FIXME we don't use the second aperture yet when we could use it */
2770        if (rdev->mc.visible_vram_size > rdev->mc.aper_size)
2771                rdev->mc.visible_vram_size = rdev->mc.aper_size;
2772        config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2773        if (rdev->flags & RADEON_IS_IGP) {
2774                uint32_t tom;
2775                /* read NB_TOM to get the amount of ram stolen for the GPU */
2776                tom = RREG32(RADEON_NB_TOM);
2777                rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
2778                WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
2779                rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
2780        } else {
2781                rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
2782                /* Some production boards of m6 will report 0
2783                 * if it's 8 MB
2784                 */
2785                if (rdev->mc.real_vram_size == 0) {
2786                        rdev->mc.real_vram_size = 8192 * 1024;
2787                        WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
2788                }
2789                /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM - 
2790                 * Novell bug 204882 + along with lots of ubuntu ones
2791                 */
2792                if (rdev->mc.aper_size > config_aper_size)
2793                        config_aper_size = rdev->mc.aper_size;
2794
2795                if (config_aper_size > rdev->mc.real_vram_size)
2796                        rdev->mc.mc_vram_size = config_aper_size;
2797                else
2798                        rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
2799        }
2800}
2801
2802void r100_vga_set_state(struct radeon_device *rdev, bool state)
2803{
2804        uint32_t temp;
2805
2806        temp = RREG32(RADEON_CONFIG_CNTL);
2807        if (state == false) {
2808                temp &= ~RADEON_CFG_VGA_RAM_EN;
2809                temp |= RADEON_CFG_VGA_IO_DIS;
2810        } else {
2811                temp &= ~RADEON_CFG_VGA_IO_DIS;
2812        }
2813        WREG32(RADEON_CONFIG_CNTL, temp);
2814}
2815
2816static void r100_mc_init(struct radeon_device *rdev)
2817{
2818        u64 base;
2819
2820        r100_vram_get_type(rdev);
2821        r100_vram_init_sizes(rdev);
2822        base = rdev->mc.aper_base;
2823        if (rdev->flags & RADEON_IS_IGP)
2824                base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16;
2825        radeon_vram_location(rdev, &rdev->mc, base);
2826        rdev->mc.gtt_base_align = 0;
2827        if (!(rdev->flags & RADEON_IS_AGP))
2828                radeon_gtt_location(rdev, &rdev->mc);
2829        radeon_update_bandwidth_info(rdev);
2830}
2831
2832
2833/*
2834 * Indirect registers accessor
2835 */
2836void r100_pll_errata_after_index(struct radeon_device *rdev)
2837{
2838        if (rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS) {
2839                (void)RREG32(RADEON_CLOCK_CNTL_DATA);
2840                (void)RREG32(RADEON_CRTC_GEN_CNTL);
2841        }
2842}
2843
2844static void r100_pll_errata_after_data(struct radeon_device *rdev)
2845{
2846        /* This workarounds is necessary on RV100, RS100 and RS200 chips
2847         * or the chip could hang on a subsequent access
2848         */
2849        if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) {
2850                mdelay(5);
2851        }
2852
2853        /* This function is required to workaround a hardware bug in some (all?)
2854         * revisions of the R300.  This workaround should be called after every
2855         * CLOCK_CNTL_INDEX register access.  If not, register reads afterward
2856         * may not be correct.
2857         */
2858        if (rdev->pll_errata & CHIP_ERRATA_R300_CG) {
2859                uint32_t save, tmp;
2860
2861                save = RREG32(RADEON_CLOCK_CNTL_INDEX);
2862                tmp = save & ~(0x3f | RADEON_PLL_WR_EN);
2863                WREG32(RADEON_CLOCK_CNTL_INDEX, tmp);
2864                tmp = RREG32(RADEON_CLOCK_CNTL_DATA);
2865                WREG32(RADEON_CLOCK_CNTL_INDEX, save);
2866        }
2867}
2868
2869uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg)
2870{
2871        unsigned long flags;
2872        uint32_t data;
2873
2874        spin_lock_irqsave(&rdev->pll_idx_lock, flags);
2875        WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f);
2876        r100_pll_errata_after_index(rdev);
2877        data = RREG32(RADEON_CLOCK_CNTL_DATA);
2878        r100_pll_errata_after_data(rdev);
2879        spin_unlock_irqrestore(&rdev->pll_idx_lock, flags);
2880        return data;
2881}
2882
2883void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
2884{
2885        unsigned long flags;
2886
2887        spin_lock_irqsave(&rdev->pll_idx_lock, flags);
2888        WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN));
2889        r100_pll_errata_after_index(rdev);
2890        WREG32(RADEON_CLOCK_CNTL_DATA, v);
2891        r100_pll_errata_after_data(rdev);
2892        spin_unlock_irqrestore(&rdev->pll_idx_lock, flags);
2893}
2894
2895static void r100_set_safe_registers(struct radeon_device *rdev)
2896{
2897        if (ASIC_IS_RN50(rdev)) {
2898                rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm;
2899                rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm);
2900        } else if (rdev->family < CHIP_R200) {
2901                rdev->config.r100.reg_safe_bm = r100_reg_safe_bm;
2902                rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm);
2903        } else {
2904                r200_set_safe_registers(rdev);
2905        }
2906}
2907
2908/*
2909 * Debugfs info
2910 */
2911#if defined(CONFIG_DEBUG_FS)
2912static int r100_debugfs_rbbm_info(struct seq_file *m, void *data)
2913{
2914        struct drm_info_node *node = (struct drm_info_node *) m->private;
2915        struct drm_device *dev = node->minor->dev;
2916        struct radeon_device *rdev = dev->dev_private;
2917        uint32_t reg, value;
2918        unsigned i;
2919
2920        seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS));
2921        seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C));
2922        seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2923        for (i = 0; i < 64; i++) {
2924                WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100);
2925                reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2;
2926                WREG32(RADEON_RBBM_CMDFIFO_ADDR, i);
2927                value = RREG32(RADEON_RBBM_CMDFIFO_DATA);
2928                seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value);
2929        }
2930        return 0;
2931}
2932
2933static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data)
2934{
2935        struct drm_info_node *node = (struct drm_info_node *) m->private;
2936        struct drm_device *dev = node->minor->dev;
2937        struct radeon_device *rdev = dev->dev_private;
2938        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2939        uint32_t rdp, wdp;
2940        unsigned count, i, j;
2941
2942        radeon_ring_free_size(rdev, ring);
2943        rdp = RREG32(RADEON_CP_RB_RPTR);
2944        wdp = RREG32(RADEON_CP_RB_WPTR);
2945        count = (rdp + ring->ring_size - wdp) & ring->ptr_mask;
2946        seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2947        seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
2948        seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
2949        seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
2950        seq_printf(m, "%u dwords in ring\n", count);
2951        if (ring->ready) {
2952                for (j = 0; j <= count; j++) {
2953                        i = (rdp + j) & ring->ptr_mask;
2954                        seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]);
2955                }
2956        }
2957        return 0;
2958}
2959
2960
2961static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data)
2962{
2963        struct drm_info_node *node = (struct drm_info_node *) m->private;
2964        struct drm_device *dev = node->minor->dev;
2965        struct radeon_device *rdev = dev->dev_private;
2966        uint32_t csq_stat, csq2_stat, tmp;
2967        unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr;
2968        unsigned i;
2969
2970        seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2971        seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE));
2972        csq_stat = RREG32(RADEON_CP_CSQ_STAT);
2973        csq2_stat = RREG32(RADEON_CP_CSQ2_STAT);
2974        r_rptr = (csq_stat >> 0) & 0x3ff;
2975        r_wptr = (csq_stat >> 10) & 0x3ff;
2976        ib1_rptr = (csq_stat >> 20) & 0x3ff;
2977        ib1_wptr = (csq2_stat >> 0) & 0x3ff;
2978        ib2_rptr = (csq2_stat >> 10) & 0x3ff;
2979        ib2_wptr = (csq2_stat >> 20) & 0x3ff;
2980        seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat);
2981        seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat);
2982        seq_printf(m, "Ring rptr %u\n", r_rptr);
2983        seq_printf(m, "Ring wptr %u\n", r_wptr);
2984        seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr);
2985        seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr);
2986        seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr);
2987        seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr);
2988        /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms
2989         * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */
2990        seq_printf(m, "Ring fifo:\n");
2991        for (i = 0; i < 256; i++) {
2992                WREG32(RADEON_CP_CSQ_ADDR, i << 2);
2993                tmp = RREG32(RADEON_CP_CSQ_DATA);
2994                seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp);
2995        }
2996        seq_printf(m, "Indirect1 fifo:\n");
2997        for (i = 256; i <= 512; i++) {
2998                WREG32(RADEON_CP_CSQ_ADDR, i << 2);
2999                tmp = RREG32(RADEON_CP_CSQ_DATA);
3000                seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp);
3001        }
3002        seq_printf(m, "Indirect2 fifo:\n");
3003        for (i = 640; i < ib1_wptr; i++) {
3004                WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3005                tmp = RREG32(RADEON_CP_CSQ_DATA);
3006                seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp);
3007        }
3008        return 0;
3009}
3010
3011static int r100_debugfs_mc_info(struct seq_file *m, void *data)
3012{
3013        struct drm_info_node *node = (struct drm_info_node *) m->private;
3014        struct drm_device *dev = node->minor->dev;
3015        struct radeon_device *rdev = dev->dev_private;
3016        uint32_t tmp;
3017
3018        tmp = RREG32(RADEON_CONFIG_MEMSIZE);
3019        seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp);
3020        tmp = RREG32(RADEON_MC_FB_LOCATION);
3021        seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp);
3022        tmp = RREG32(RADEON_BUS_CNTL);
3023        seq_printf(m, "BUS_CNTL 0x%08x\n", tmp);
3024        tmp = RREG32(RADEON_MC_AGP_LOCATION);
3025        seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp);
3026        tmp = RREG32(RADEON_AGP_BASE);
3027        seq_printf(m, "AGP_BASE 0x%08x\n", tmp);
3028        tmp = RREG32(RADEON_HOST_PATH_CNTL);
3029        seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp);
3030        tmp = RREG32(0x01D0);
3031        seq_printf(m, "AIC_CTRL 0x%08x\n", tmp);
3032        tmp = RREG32(RADEON_AIC_LO_ADDR);
3033        seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp);
3034        tmp = RREG32(RADEON_AIC_HI_ADDR);
3035        seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp);
3036        tmp = RREG32(0x01E4);
3037        seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp);
3038        return 0;
3039}
3040
3041static struct drm_info_list r100_debugfs_rbbm_list[] = {
3042        {"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL},
3043};
3044
3045static struct drm_info_list r100_debugfs_cp_list[] = {
3046        {"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL},
3047        {"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL},
3048};
3049
3050static struct drm_info_list r100_debugfs_mc_info_list[] = {
3051        {"r100_mc_info", r100_debugfs_mc_info, 0, NULL},
3052};
3053#endif
3054
3055int r100_debugfs_rbbm_init(struct radeon_device *rdev)
3056{
3057#if defined(CONFIG_DEBUG_FS)
3058        return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1);
3059#else
3060        return 0;
3061#endif
3062}
3063
3064int r100_debugfs_cp_init(struct radeon_device *rdev)
3065{
3066#if defined(CONFIG_DEBUG_FS)
3067        return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2);
3068#else
3069        return 0;
3070#endif
3071}
3072
3073int r100_debugfs_mc_info_init(struct radeon_device *rdev)
3074{
3075#if defined(CONFIG_DEBUG_FS)
3076        return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1);
3077#else
3078        return 0;
3079#endif
3080}
3081
3082int r100_set_surface_reg(struct radeon_device *rdev, int reg,
3083                         uint32_t tiling_flags, uint32_t pitch,
3084                         uint32_t offset, uint32_t obj_size)
3085{
3086        int surf_index = reg * 16;
3087        int flags = 0;
3088
3089        if (rdev->family <= CHIP_RS200) {
3090                if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3091                                 == (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3092                        flags |= RADEON_SURF_TILE_COLOR_BOTH;
3093                if (tiling_flags & RADEON_TILING_MACRO)
3094                        flags |= RADEON_SURF_TILE_COLOR_MACRO;
3095                /* setting pitch to 0 disables tiling */
3096                if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3097                                == 0)
3098                        pitch = 0;
3099        } else if (rdev->family <= CHIP_RV280) {
3100                if (tiling_flags & (RADEON_TILING_MACRO))
3101                        flags |= R200_SURF_TILE_COLOR_MACRO;
3102                if (tiling_flags & RADEON_TILING_MICRO)
3103                        flags |= R200_SURF_TILE_COLOR_MICRO;
3104        } else {
3105                if (tiling_flags & RADEON_TILING_MACRO)
3106                        flags |= R300_SURF_TILE_MACRO;
3107                if (tiling_flags & RADEON_TILING_MICRO)
3108                        flags |= R300_SURF_TILE_MICRO;
3109        }
3110
3111        if (tiling_flags & RADEON_TILING_SWAP_16BIT)
3112                flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP;
3113        if (tiling_flags & RADEON_TILING_SWAP_32BIT)
3114                flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP;
3115
3116        /* r100/r200 divide by 16 */
3117        if (rdev->family < CHIP_R300)
3118                flags |= pitch / 16;
3119        else
3120                flags |= pitch / 8;
3121
3122
3123        DRM_DEBUG_KMS("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1);
3124        WREG32(RADEON_SURFACE0_INFO + surf_index, flags);
3125        WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset);
3126        WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1);
3127        return 0;
3128}
3129
3130void r100_clear_surface_reg(struct radeon_device *rdev, int reg)
3131{
3132        int surf_index = reg * 16;
3133        WREG32(RADEON_SURFACE0_INFO + surf_index, 0);
3134}
3135
3136void r100_bandwidth_update(struct radeon_device *rdev)
3137{
3138        fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff;
3139        fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff;
3140        fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff, crit_point_ff;
3141        uint32_t temp, data, mem_trcd, mem_trp, mem_tras;
3142        fixed20_12 memtcas_ff[8] = {
3143                dfixed_init(1),
3144                dfixed_init(2),
3145                dfixed_init(3),
3146                dfixed_init(0),
3147                dfixed_init_half(1),
3148                dfixed_init_half(2),
3149                dfixed_init(0),
3150        };
3151        fixed20_12 memtcas_rs480_ff[8] = {
3152                dfixed_init(0),
3153                dfixed_init(1),
3154                dfixed_init(2),
3155                dfixed_init(3),
3156                dfixed_init(0),
3157                dfixed_init_half(1),
3158                dfixed_init_half(2),
3159                dfixed_init_half(3),
3160        };
3161        fixed20_12 memtcas2_ff[8] = {
3162                dfixed_init(0),
3163                dfixed_init(1),
3164                dfixed_init(2),
3165                dfixed_init(3),
3166                dfixed_init(4),
3167                dfixed_init(5),
3168                dfixed_init(6),
3169                dfixed_init(7),
3170        };
3171        fixed20_12 memtrbs[8] = {
3172                dfixed_init(1),
3173                dfixed_init_half(1),
3174                dfixed_init(2),
3175                dfixed_init_half(2),
3176                dfixed_init(3),
3177                dfixed_init_half(3),
3178                dfixed_init(4),
3179                dfixed_init_half(4)
3180        };
3181        fixed20_12 memtrbs_r4xx[8] = {
3182                dfixed_init(4),
3183                dfixed_init(5),
3184                dfixed_init(6),
3185                dfixed_init(7),
3186                dfixed_init(8),
3187                dfixed_init(9),
3188                dfixed_init(10),
3189                dfixed_init(11)
3190        };
3191        fixed20_12 min_mem_eff;
3192        fixed20_12 mc_latency_sclk, mc_latency_mclk, k1;
3193        fixed20_12 cur_latency_mclk, cur_latency_sclk;
3194        fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate,
3195                disp_drain_rate2, read_return_rate;
3196        fixed20_12 time_disp1_drop_priority;
3197        int c;
3198        int cur_size = 16;       /* in octawords */
3199        int critical_point = 0, critical_point2;
3200/*      uint32_t read_return_rate, time_disp1_drop_priority; */
3201        int stop_req, max_stop_req;
3202        struct drm_display_mode *mode1 = NULL;
3203        struct drm_display_mode *mode2 = NULL;
3204        uint32_t pixel_bytes1 = 0;
3205        uint32_t pixel_bytes2 = 0;
3206
3207        radeon_update_display_priority(rdev);
3208
3209        if (rdev->mode_info.crtcs[0]->base.enabled) {
3210                mode1 = &rdev->mode_info.crtcs[0]->base.mode;
3211                pixel_bytes1 = rdev->mode_info.crtcs[0]->base.primary->fb->bits_per_pixel / 8;
3212        }
3213        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3214                if (rdev->mode_info.crtcs[1]->base.enabled) {
3215                        mode2 = &rdev->mode_info.crtcs[1]->base.mode;
3216                        pixel_bytes2 = rdev->mode_info.crtcs[1]->base.primary->fb->bits_per_pixel / 8;
3217                }
3218        }
3219
3220        min_mem_eff.full = dfixed_const_8(0);
3221        /* get modes */
3222        if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) {
3223                uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER);
3224                mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT);
3225                mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT);
3226                /* check crtc enables */
3227                if (mode2)
3228                        mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT);
3229                if (mode1)
3230                        mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT);
3231                WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer);
3232        }
3233
3234        /*
3235         * determine is there is enough bw for current mode
3236         */
3237        sclk_ff = rdev->pm.sclk;
3238        mclk_ff = rdev->pm.mclk;
3239
3240        temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1);
3241        temp_ff.full = dfixed_const(temp);
3242        mem_bw.full = dfixed_mul(mclk_ff, temp_ff);
3243
3244        pix_clk.full = 0;
3245        pix_clk2.full = 0;
3246        peak_disp_bw.full = 0;
3247        if (mode1) {
3248                temp_ff.full = dfixed_const(1000);
3249                pix_clk.full = dfixed_const(mode1->clock); /* convert to fixed point */
3250                pix_clk.full = dfixed_div(pix_clk, temp_ff);
3251                temp_ff.full = dfixed_const(pixel_bytes1);
3252                peak_disp_bw.full += dfixed_mul(pix_clk, temp_ff);
3253        }
3254        if (mode2) {
3255                temp_ff.full = dfixed_const(1000);
3256                pix_clk2.full = dfixed_const(mode2->clock); /* convert to fixed point */
3257                pix_clk2.full = dfixed_div(pix_clk2, temp_ff);
3258                temp_ff.full = dfixed_const(pixel_bytes2);
3259                peak_disp_bw.full += dfixed_mul(pix_clk2, temp_ff);
3260        }
3261
3262        mem_bw.full = dfixed_mul(mem_bw, min_mem_eff);
3263        if (peak_disp_bw.full >= mem_bw.full) {
3264                DRM_ERROR("You may not have enough display bandwidth for current mode\n"
3265                          "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
3266        }
3267
3268        /*  Get values from the EXT_MEM_CNTL register...converting its contents. */
3269        temp = RREG32(RADEON_MEM_TIMING_CNTL);
3270        if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */
3271                mem_trcd = ((temp >> 2) & 0x3) + 1;
3272                mem_trp  = ((temp & 0x3)) + 1;
3273                mem_tras = ((temp & 0x70) >> 4) + 1;
3274        } else if (rdev->family == CHIP_R300 ||
3275                   rdev->family == CHIP_R350) { /* r300, r350 */
3276                mem_trcd = (temp & 0x7) + 1;
3277                mem_trp = ((temp >> 8) & 0x7) + 1;
3278                mem_tras = ((temp >> 11) & 0xf) + 4;
3279        } else if (rdev->family == CHIP_RV350 ||
3280                   rdev->family <= CHIP_RV380) {
3281                /* rv3x0 */
3282                mem_trcd = (temp & 0x7) + 3;
3283                mem_trp = ((temp >> 8) & 0x7) + 3;
3284                mem_tras = ((temp >> 11) & 0xf) + 6;
3285        } else if (rdev->family == CHIP_R420 ||
3286                   rdev->family == CHIP_R423 ||
3287                   rdev->family == CHIP_RV410) {
3288                /* r4xx */
3289                mem_trcd = (temp & 0xf) + 3;
3290                if (mem_trcd > 15)
3291                        mem_trcd = 15;
3292                mem_trp = ((temp >> 8) & 0xf) + 3;
3293                if (mem_trp > 15)
3294                        mem_trp = 15;
3295                mem_tras = ((temp >> 12) & 0x1f) + 6;
3296                if (mem_tras > 31)
3297                        mem_tras = 31;
3298        } else { /* RV200, R200 */
3299                mem_trcd = (temp & 0x7) + 1;
3300                mem_trp = ((temp >> 8) & 0x7) + 1;
3301                mem_tras = ((temp >> 12) & 0xf) + 4;
3302        }
3303        /* convert to FF */
3304        trcd_ff.full = dfixed_const(mem_trcd);
3305        trp_ff.full = dfixed_const(mem_trp);
3306        tras_ff.full = dfixed_const(mem_tras);
3307
3308        /* Get values from the MEM_SDRAM_MODE_REG register...converting its */
3309        temp = RREG32(RADEON_MEM_SDRAM_MODE_REG);
3310        data = (temp & (7 << 20)) >> 20;
3311        if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) {
3312                if (rdev->family == CHIP_RS480) /* don't think rs400 */
3313                        tcas_ff = memtcas_rs480_ff[data];
3314                else
3315                        tcas_ff = memtcas_ff[data];
3316        } else
3317                tcas_ff = memtcas2_ff[data];
3318
3319        if (rdev->family == CHIP_RS400 ||
3320            rdev->family == CHIP_RS480) {
3321                /* extra cas latency stored in bits 23-25 0-4 clocks */
3322                data = (temp >> 23) & 0x7;
3323                if (data < 5)
3324                        tcas_ff.full += dfixed_const(data);
3325        }
3326
3327        if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) {
3328                /* on the R300, Tcas is included in Trbs.
3329                 */
3330                temp = RREG32(RADEON_MEM_CNTL);
3331                data = (R300_MEM_NUM_CHANNELS_MASK & temp);
3332                if (data == 1) {
3333                        if (R300_MEM_USE_CD_CH_ONLY & temp) {
3334                                temp = RREG32(R300_MC_IND_INDEX);
3335                                temp &= ~R300_MC_IND_ADDR_MASK;
3336                                temp |= R300_MC_READ_CNTL_CD_mcind;
3337                                WREG32(R300_MC_IND_INDEX, temp);
3338                                temp = RREG32(R300_MC_IND_DATA);
3339                                data = (R300_MEM_RBS_POSITION_C_MASK & temp);
3340                        } else {
3341                                temp = RREG32(R300_MC_READ_CNTL_AB);
3342                                data = (R300_MEM_RBS_POSITION_A_MASK & temp);
3343                        }
3344                } else {
3345                        temp = RREG32(R300_MC_READ_CNTL_AB);
3346                        data = (R300_MEM_RBS_POSITION_A_MASK & temp);
3347                }
3348                if (rdev->family == CHIP_RV410 ||
3349                    rdev->family == CHIP_R420 ||
3350                    rdev->family == CHIP_R423)
3351                        trbs_ff = memtrbs_r4xx[data];
3352                else
3353                        trbs_ff = memtrbs[data];
3354                tcas_ff.full += trbs_ff.full;
3355        }
3356
3357        sclk_eff_ff.full = sclk_ff.full;
3358
3359        if (rdev->flags & RADEON_IS_AGP) {
3360                fixed20_12 agpmode_ff;
3361                agpmode_ff.full = dfixed_const(radeon_agpmode);
3362                temp_ff.full = dfixed_const_666(16);
3363                sclk_eff_ff.full -= dfixed_mul(agpmode_ff, temp_ff);
3364        }
3365        /* TODO PCIE lanes may affect this - agpmode == 16?? */
3366
3367        if (ASIC_IS_R300(rdev)) {
3368                sclk_delay_ff.full = dfixed_const(250);
3369        } else {
3370                if ((rdev->family == CHIP_RV100) ||
3371                    rdev->flags & RADEON_IS_IGP) {
3372                        if (rdev->mc.vram_is_ddr)
3373                                sclk_delay_ff.full = dfixed_const(41);
3374                        else
3375                                sclk_delay_ff.full = dfixed_const(33);
3376                } else {
3377                        if (rdev->mc.vram_width == 128)
3378                                sclk_delay_ff.full = dfixed_const(57);
3379                        else
3380                                sclk_delay_ff.full = dfixed_const(41);
3381                }
3382        }
3383
3384        mc_latency_sclk.full = dfixed_div(sclk_delay_ff, sclk_eff_ff);
3385
3386        if (rdev->mc.vram_is_ddr) {
3387                if (rdev->mc.vram_width == 32) {
3388                        k1.full = dfixed_const(40);
3389                        c  = 3;
3390                } else {
3391                        k1.full = dfixed_const(20);
3392                        c  = 1;
3393                }
3394        } else {
3395                k1.full = dfixed_const(40);
3396                c  = 3;
3397        }
3398
3399        temp_ff.full = dfixed_const(2);
3400        mc_latency_mclk.full = dfixed_mul(trcd_ff, temp_ff);
3401        temp_ff.full = dfixed_const(c);
3402        mc_latency_mclk.full += dfixed_mul(tcas_ff, temp_ff);
3403        temp_ff.full = dfixed_const(4);
3404        mc_latency_mclk.full += dfixed_mul(tras_ff, temp_ff);
3405        mc_latency_mclk.full += dfixed_mul(trp_ff, temp_ff);
3406        mc_latency_mclk.full += k1.full;
3407
3408        mc_latency_mclk.full = dfixed_div(mc_latency_mclk, mclk_ff);
3409        mc_latency_mclk.full += dfixed_div(temp_ff, sclk_eff_ff);
3410
3411        /*
3412          HW cursor time assuming worst case of full size colour cursor.
3413        */
3414        temp_ff.full = dfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1))));
3415        temp_ff.full += trcd_ff.full;
3416        if (temp_ff.full < tras_ff.full)
3417                temp_ff.full = tras_ff.full;
3418        cur_latency_mclk.full = dfixed_div(temp_ff, mclk_ff);
3419
3420        temp_ff.full = dfixed_const(cur_size);
3421        cur_latency_sclk.full = dfixed_div(temp_ff, sclk_eff_ff);
3422        /*
3423          Find the total latency for the display data.
3424        */
3425        disp_latency_overhead.full = dfixed_const(8);
3426        disp_latency_overhead.full = dfixed_div(disp_latency_overhead, sclk_ff);
3427        mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full;
3428        mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full;
3429
3430        if (mc_latency_mclk.full > mc_latency_sclk.full)
3431                disp_latency.full = mc_latency_mclk.full;
3432        else
3433                disp_latency.full = mc_latency_sclk.full;
3434
3435        /* setup Max GRPH_STOP_REQ default value */
3436        if (ASIC_IS_RV100(rdev))
3437                max_stop_req = 0x5c;
3438        else
3439                max_stop_req = 0x7c;
3440
3441        if (mode1) {
3442                /*  CRTC1
3443                    Set GRPH_BUFFER_CNTL register using h/w defined optimal values.
3444                    GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ]
3445                */
3446                stop_req = mode1->hdisplay * pixel_bytes1 / 16;
3447
3448                if (stop_req > max_stop_req)
3449                        stop_req = max_stop_req;
3450
3451                /*
3452                  Find the drain rate of the display buffer.
3453                */
3454                temp_ff.full = dfixed_const((16/pixel_bytes1));
3455                disp_drain_rate.full = dfixed_div(pix_clk, temp_ff);
3456
3457                /*
3458                  Find the critical point of the display buffer.
3459                */
3460                crit_point_ff.full = dfixed_mul(disp_drain_rate, disp_latency);
3461                crit_point_ff.full += dfixed_const_half(0);
3462
3463                critical_point = dfixed_trunc(crit_point_ff);
3464
3465                if (rdev->disp_priority == 2) {
3466                        critical_point = 0;
3467                }
3468
3469                /*
3470                  The critical point should never be above max_stop_req-4.  Setting
3471                  GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time.
3472                */
3473                if (max_stop_req - critical_point < 4)
3474                        critical_point = 0;
3475
3476                if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) {
3477                        /* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/
3478                        critical_point = 0x10;
3479                }
3480
3481                temp = RREG32(RADEON_GRPH_BUFFER_CNTL);
3482                temp &= ~(RADEON_GRPH_STOP_REQ_MASK);
3483                temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3484                temp &= ~(RADEON_GRPH_START_REQ_MASK);
3485                if ((rdev->family == CHIP_R350) &&
3486                    (stop_req > 0x15)) {
3487                        stop_req -= 0x10;
3488                }
3489                temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3490                temp |= RADEON_GRPH_BUFFER_SIZE;
3491                temp &= ~(RADEON_GRPH_CRITICAL_CNTL   |
3492                          RADEON_GRPH_CRITICAL_AT_SOF |
3493                          RADEON_GRPH_STOP_CNTL);
3494                /*
3495                  Write the result into the register.
3496                */
3497                WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
3498                                                       (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3499
3500#if 0
3501                if ((rdev->family == CHIP_RS400) ||
3502                    (rdev->family == CHIP_RS480)) {
3503                        /* attempt to program RS400 disp regs correctly ??? */
3504                        temp = RREG32(RS400_DISP1_REG_CNTL);
3505                        temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK |
3506                                  RS400_DISP1_STOP_REQ_LEVEL_MASK);
3507                        WREG32(RS400_DISP1_REQ_CNTL1, (temp |
3508                                                       (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3509                                                       (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3510                        temp = RREG32(RS400_DMIF_MEM_CNTL1);
3511                        temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK |
3512                                  RS400_DISP1_CRITICAL_POINT_STOP_MASK);
3513                        WREG32(RS400_DMIF_MEM_CNTL1, (temp |
3514                                                      (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) |
3515                                                      (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT)));
3516                }
3517#endif
3518
3519                DRM_DEBUG_KMS("GRPH_BUFFER_CNTL from to %x\n",
3520                          /*      (unsigned int)info->SavedReg->grph_buffer_cntl, */
3521                          (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL));
3522        }
3523
3524        if (mode2) {
3525                u32 grph2_cntl;
3526                stop_req = mode2->hdisplay * pixel_bytes2 / 16;
3527
3528                if (stop_req > max_stop_req)
3529                        stop_req = max_stop_req;
3530
3531                /*
3532                  Find the drain rate of the display buffer.
3533                */
3534                temp_ff.full = dfixed_const((16/pixel_bytes2));
3535                disp_drain_rate2.full = dfixed_div(pix_clk2, temp_ff);
3536
3537                grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL);
3538                grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK);
3539                grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3540                grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK);
3541                if ((rdev->family == CHIP_R350) &&
3542                    (stop_req > 0x15)) {
3543                        stop_req -= 0x10;
3544                }
3545                grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3546                grph2_cntl |= RADEON_GRPH_BUFFER_SIZE;
3547                grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL   |
3548                          RADEON_GRPH_CRITICAL_AT_SOF |
3549                          RADEON_GRPH_STOP_CNTL);
3550
3551                if ((rdev->family == CHIP_RS100) ||
3552                    (rdev->family == CHIP_RS200))
3553                        critical_point2 = 0;
3554                else {
3555                        temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128;
3556                        temp_ff.full = dfixed_const(temp);
3557                        temp_ff.full = dfixed_mul(mclk_ff, temp_ff);
3558                        if (sclk_ff.full < temp_ff.full)
3559                                temp_ff.full = sclk_ff.full;
3560
3561                        read_return_rate.full = temp_ff.full;
3562
3563                        if (mode1) {
3564                                temp_ff.full = read_return_rate.full - disp_drain_rate.full;
3565                                time_disp1_drop_priority.full = dfixed_div(crit_point_ff, temp_ff);
3566                        } else {
3567                                time_disp1_drop_priority.full = 0;
3568                        }
3569                        crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full;
3570                        crit_point_ff.full = dfixed_mul(crit_point_ff, disp_drain_rate2);
3571                        crit_point_ff.full += dfixed_const_half(0);
3572
3573                        critical_point2 = dfixed_trunc(crit_point_ff);
3574
3575                        if (rdev->disp_priority == 2) {
3576                                critical_point2 = 0;
3577                        }
3578
3579                        if (max_stop_req - critical_point2 < 4)
3580                                critical_point2 = 0;
3581
3582                }
3583
3584                if (critical_point2 == 0 && rdev->family == CHIP_R300) {
3585                        /* some R300 cards have problem with this set to 0 */
3586                        critical_point2 = 0x10;
3587                }
3588
3589                WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
3590                                                  (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3591
3592                if ((rdev->family == CHIP_RS400) ||
3593                    (rdev->family == CHIP_RS480)) {
3594#if 0
3595                        /* attempt to program RS400 disp2 regs correctly ??? */
3596                        temp = RREG32(RS400_DISP2_REQ_CNTL1);
3597                        temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK |
3598                                  RS400_DISP2_STOP_REQ_LEVEL_MASK);
3599                        WREG32(RS400_DISP2_REQ_CNTL1, (temp |
3600                                                       (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3601                                                       (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3602                        temp = RREG32(RS400_DISP2_REQ_CNTL2);
3603                        temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK |
3604                                  RS400_DISP2_CRITICAL_POINT_STOP_MASK);
3605                        WREG32(RS400_DISP2_REQ_CNTL2, (temp |
3606                                                       (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) |
3607                                                       (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT)));
3608#endif
3609                        WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC);
3610                        WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000);
3611                        WREG32(RS400_DMIF_MEM_CNTL1,  0x29CA71DC);
3612                        WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC);
3613                }
3614
3615                DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n",
3616                          (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL));
3617        }
3618}
3619
3620int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3621{
3622        uint32_t scratch;
3623        uint32_t tmp = 0;
3624        unsigned i;
3625        int r;
3626
3627        r = radeon_scratch_get(rdev, &scratch);
3628        if (r) {
3629                DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3630                return r;
3631        }
3632        WREG32(scratch, 0xCAFEDEAD);
3633        r = radeon_ring_lock(rdev, ring, 2);
3634        if (r) {
3635                DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3636                radeon_scratch_free(rdev, scratch);
3637                return r;
3638        }
3639        radeon_ring_write(ring, PACKET0(scratch, 0));
3640        radeon_ring_write(ring, 0xDEADBEEF);
3641        radeon_ring_unlock_commit(rdev, ring, false);
3642        for (i = 0; i < rdev->usec_timeout; i++) {
3643                tmp = RREG32(scratch);
3644                if (tmp == 0xDEADBEEF) {
3645                        break;
3646                }
3647                DRM_UDELAY(1);
3648        }
3649        if (i < rdev->usec_timeout) {
3650                DRM_INFO("ring test succeeded in %d usecs\n", i);
3651        } else {
3652                DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n",
3653                          scratch, tmp);
3654                r = -EINVAL;
3655        }
3656        radeon_scratch_free(rdev, scratch);
3657        return r;
3658}
3659
3660void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3661{
3662        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3663
3664        if (ring->rptr_save_reg) {
3665                u32 next_rptr = ring->wptr + 2 + 3;
3666                radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
3667                radeon_ring_write(ring, next_rptr);
3668        }
3669
3670        radeon_ring_write(ring, PACKET0(RADEON_CP_IB_BASE, 1));
3671        radeon_ring_write(ring, ib->gpu_addr);
3672        radeon_ring_write(ring, ib->length_dw);
3673}
3674
3675int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3676{
3677        struct radeon_ib ib;
3678        uint32_t scratch;
3679        uint32_t tmp = 0;
3680        unsigned i;
3681        int r;
3682
3683        r = radeon_scratch_get(rdev, &scratch);
3684        if (r) {
3685                DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3686                return r;
3687        }
3688        WREG32(scratch, 0xCAFEDEAD);
3689        r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &ib, NULL, 256);
3690        if (r) {
3691                DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3692                goto free_scratch;
3693        }
3694        ib.ptr[0] = PACKET0(scratch, 0);
3695        ib.ptr[1] = 0xDEADBEEF;
3696        ib.ptr[2] = PACKET2(0);
3697        ib.ptr[3] = PACKET2(0);
3698        ib.ptr[4] = PACKET2(0);
3699        ib.ptr[5] = PACKET2(0);
3700        ib.ptr[6] = PACKET2(0);
3701        ib.ptr[7] = PACKET2(0);
3702        ib.length_dw = 8;
3703        r = radeon_ib_schedule(rdev, &ib, NULL, false);
3704        if (r) {
3705                DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3706                goto free_ib;
3707        }
3708        r = radeon_fence_wait(ib.fence, false);
3709        if (r) {
3710                DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3711                goto free_ib;
3712        }
3713        for (i = 0; i < rdev->usec_timeout; i++) {
3714                tmp = RREG32(scratch);
3715                if (tmp == 0xDEADBEEF) {
3716                        break;
3717                }
3718                DRM_UDELAY(1);
3719        }
3720        if (i < rdev->usec_timeout) {
3721                DRM_INFO("ib test succeeded in %u usecs\n", i);
3722        } else {
3723                DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3724                          scratch, tmp);
3725                r = -EINVAL;
3726        }
3727free_ib:
3728        radeon_ib_free(rdev, &ib);
3729free_scratch:
3730        radeon_scratch_free(rdev, scratch);
3731        return r;
3732}
3733
3734void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save)
3735{
3736        /* Shutdown CP we shouldn't need to do that but better be safe than
3737         * sorry
3738         */
3739        rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3740        WREG32(R_000740_CP_CSQ_CNTL, 0);
3741
3742        /* Save few CRTC registers */
3743        save->GENMO_WT = RREG8(R_0003C2_GENMO_WT);
3744        save->CRTC_EXT_CNTL = RREG32(R_000054_CRTC_EXT_CNTL);
3745        save->CRTC_GEN_CNTL = RREG32(R_000050_CRTC_GEN_CNTL);
3746        save->CUR_OFFSET = RREG32(R_000260_CUR_OFFSET);
3747        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3748                save->CRTC2_GEN_CNTL = RREG32(R_0003F8_CRTC2_GEN_CNTL);
3749                save->CUR2_OFFSET = RREG32(R_000360_CUR2_OFFSET);
3750        }
3751
3752        /* Disable VGA aperture access */
3753        WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & save->GENMO_WT);
3754        /* Disable cursor, overlay, crtc */
3755        WREG32(R_000260_CUR_OFFSET, save->CUR_OFFSET | S_000260_CUR_LOCK(1));
3756        WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL |
3757                                        S_000054_CRTC_DISPLAY_DIS(1));
3758        WREG32(R_000050_CRTC_GEN_CNTL,
3759                        (C_000050_CRTC_CUR_EN & save->CRTC_GEN_CNTL) |
3760                        S_000050_CRTC_DISP_REQ_EN_B(1));
3761        WREG32(R_000420_OV0_SCALE_CNTL,
3762                C_000420_OV0_OVERLAY_EN & RREG32(R_000420_OV0_SCALE_CNTL));
3763        WREG32(R_000260_CUR_OFFSET, C_000260_CUR_LOCK & save->CUR_OFFSET);
3764        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3765                WREG32(R_000360_CUR2_OFFSET, save->CUR2_OFFSET |
3766                                                S_000360_CUR2_LOCK(1));
3767                WREG32(R_0003F8_CRTC2_GEN_CNTL,
3768                        (C_0003F8_CRTC2_CUR_EN & save->CRTC2_GEN_CNTL) |
3769                        S_0003F8_CRTC2_DISPLAY_DIS(1) |
3770                        S_0003F8_CRTC2_DISP_REQ_EN_B(1));
3771                WREG32(R_000360_CUR2_OFFSET,
3772                        C_000360_CUR2_LOCK & save->CUR2_OFFSET);
3773        }
3774}
3775
3776void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save)
3777{
3778        /* Update base address for crtc */
3779        WREG32(R_00023C_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
3780        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3781                WREG32(R_00033C_CRTC2_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
3782        }
3783        /* Restore CRTC registers */
3784        WREG8(R_0003C2_GENMO_WT, save->GENMO_WT);
3785        WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL);
3786        WREG32(R_000050_CRTC_GEN_CNTL, save->CRTC_GEN_CNTL);
3787        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3788                WREG32(R_0003F8_CRTC2_GEN_CNTL, save->CRTC2_GEN_CNTL);
3789        }
3790}
3791
3792void r100_vga_render_disable(struct radeon_device *rdev)
3793{
3794        u32 tmp;
3795
3796        tmp = RREG8(R_0003C2_GENMO_WT);
3797        WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & tmp);
3798}
3799
3800static void r100_debugfs(struct radeon_device *rdev)
3801{
3802        int r;
3803
3804        r = r100_debugfs_mc_info_init(rdev);
3805        if (r)
3806                dev_warn(rdev->dev, "Failed to create r100_mc debugfs file.\n");
3807}
3808
3809static void r100_mc_program(struct radeon_device *rdev)
3810{
3811        struct r100_mc_save save;
3812
3813        /* Stops all mc clients */
3814        r100_mc_stop(rdev, &save);
3815        if (rdev->flags & RADEON_IS_AGP) {
3816                WREG32(R_00014C_MC_AGP_LOCATION,
3817                        S_00014C_MC_AGP_START(rdev->mc.gtt_start >> 16) |
3818                        S_00014C_MC_AGP_TOP(rdev->mc.gtt_end >> 16));
3819                WREG32(R_000170_AGP_BASE, lower_32_bits(rdev->mc.agp_base));
3820                if (rdev->family > CHIP_RV200)
3821                        WREG32(R_00015C_AGP_BASE_2,
3822                                upper_32_bits(rdev->mc.agp_base) & 0xff);
3823        } else {
3824                WREG32(R_00014C_MC_AGP_LOCATION, 0x0FFFFFFF);
3825                WREG32(R_000170_AGP_BASE, 0);
3826                if (rdev->family > CHIP_RV200)
3827                        WREG32(R_00015C_AGP_BASE_2, 0);
3828        }
3829        /* Wait for mc idle */
3830        if (r100_mc_wait_for_idle(rdev))
3831                dev_warn(rdev->dev, "Wait for MC idle timeout.\n");
3832        /* Program MC, should be a 32bits limited address space */
3833        WREG32(R_000148_MC_FB_LOCATION,
3834                S_000148_MC_FB_START(rdev->mc.vram_start >> 16) |
3835                S_000148_MC_FB_TOP(rdev->mc.vram_end >> 16));
3836        r100_mc_resume(rdev, &save);
3837}
3838
3839static void r100_clock_startup(struct radeon_device *rdev)
3840{
3841        u32 tmp;
3842
3843        if (radeon_dynclks != -1 && radeon_dynclks)
3844                radeon_legacy_set_clock_gating(rdev, 1);
3845        /* We need to force on some of the block */
3846        tmp = RREG32_PLL(R_00000D_SCLK_CNTL);
3847        tmp |= S_00000D_FORCE_CP(1) | S_00000D_FORCE_VIP(1);
3848        if ((rdev->family == CHIP_RV250) || (rdev->family == CHIP_RV280))
3849                tmp |= S_00000D_FORCE_DISP1(1) | S_00000D_FORCE_DISP2(1);
3850        WREG32_PLL(R_00000D_SCLK_CNTL, tmp);
3851}
3852
3853static int r100_startup(struct radeon_device *rdev)
3854{
3855        int r;
3856
3857        /* set common regs */
3858        r100_set_common_regs(rdev);
3859        /* program mc */
3860        r100_mc_program(rdev);
3861        /* Resume clock */
3862        r100_clock_startup(rdev);
3863        /* Initialize GART (initialize after TTM so we can allocate
3864         * memory through TTM but finalize after TTM) */
3865        r100_enable_bm(rdev);
3866        if (rdev->flags & RADEON_IS_PCI) {
3867                r = r100_pci_gart_enable(rdev);
3868                if (r)
3869                        return r;
3870        }
3871
3872        /* allocate wb buffer */
3873        r = radeon_wb_init(rdev);
3874        if (r)
3875                return r;
3876
3877        r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
3878        if (r) {
3879                dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
3880                return r;
3881        }
3882
3883        /* Enable IRQ */
3884        if (!rdev->irq.installed) {
3885                r = radeon_irq_kms_init(rdev);
3886                if (r)
3887                        return r;
3888        }
3889
3890        r100_irq_set(rdev);
3891        rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
3892        /* 1M ring buffer */
3893        r = r100_cp_init(rdev, 1024 * 1024);
3894        if (r) {
3895                dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
3896                return r;
3897        }
3898
3899        r = radeon_ib_pool_init(rdev);
3900        if (r) {
3901                dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
3902                return r;
3903        }
3904
3905        return 0;
3906}
3907
3908int r100_resume(struct radeon_device *rdev)
3909{
3910        int r;
3911
3912        /* Make sur GART are not working */
3913        if (rdev->flags & RADEON_IS_PCI)
3914                r100_pci_gart_disable(rdev);
3915        /* Resume clock before doing reset */
3916        r100_clock_startup(rdev);
3917        /* Reset gpu before posting otherwise ATOM will enter infinite loop */
3918        if (radeon_asic_reset(rdev)) {
3919                dev_warn(rdev->dev, "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
3920                        RREG32(R_000E40_RBBM_STATUS),
3921                        RREG32(R_0007C0_CP_STAT));
3922        }
3923        /* post */
3924        radeon_combios_asic_init(rdev->ddev);
3925        /* Resume clock after posting */
3926        r100_clock_startup(rdev);
3927        /* Initialize surface registers */
3928        radeon_surface_init(rdev);
3929
3930        rdev->accel_working = true;
3931        r = r100_startup(rdev);
3932        if (r) {
3933                rdev->accel_working = false;
3934        }
3935        return r;
3936}
3937
3938int r100_suspend(struct radeon_device *rdev)
3939{
3940        radeon_pm_suspend(rdev);
3941        r100_cp_disable(rdev);
3942        radeon_wb_disable(rdev);
3943        r100_irq_disable(rdev);
3944        if (rdev->flags & RADEON_IS_PCI)
3945                r100_pci_gart_disable(rdev);
3946        return 0;
3947}
3948
3949void r100_fini(struct radeon_device *rdev)
3950{
3951        radeon_pm_fini(rdev);
3952        r100_cp_fini(rdev);
3953        radeon_wb_fini(rdev);
3954        radeon_ib_pool_fini(rdev);
3955        radeon_gem_fini(rdev);
3956        if (rdev->flags & RADEON_IS_PCI)
3957                r100_pci_gart_fini(rdev);
3958        radeon_agp_fini(rdev);
3959        radeon_irq_kms_fini(rdev);
3960        radeon_fence_driver_fini(rdev);
3961        radeon_bo_fini(rdev);
3962        radeon_atombios_fini(rdev);
3963        kfree(rdev->bios);
3964        rdev->bios = NULL;
3965}
3966
3967/*
3968 * Due to how kexec works, it can leave the hw fully initialised when it
3969 * boots the new kernel. However doing our init sequence with the CP and
3970 * WB stuff setup causes GPU hangs on the RN50 at least. So at startup
3971 * do some quick sanity checks and restore sane values to avoid this
3972 * problem.
3973 */
3974void r100_restore_sanity(struct radeon_device *rdev)
3975{
3976        u32 tmp;
3977
3978        tmp = RREG32(RADEON_CP_CSQ_CNTL);
3979        if (tmp) {
3980                WREG32(RADEON_CP_CSQ_CNTL, 0);
3981        }
3982        tmp = RREG32(RADEON_CP_RB_CNTL);
3983        if (tmp) {
3984                WREG32(RADEON_CP_RB_CNTL, 0);
3985        }
3986        tmp = RREG32(RADEON_SCRATCH_UMSK);
3987        if (tmp) {
3988                WREG32(RADEON_SCRATCH_UMSK, 0);
3989        }
3990}
3991
3992int r100_init(struct radeon_device *rdev)
3993{
3994        int r;
3995
3996        /* Register debugfs file specific to this group of asics */
3997        r100_debugfs(rdev);
3998        /* Disable VGA */
3999        r100_vga_render_disable(rdev);
4000        /* Initialize scratch registers */
4001        radeon_scratch_init(rdev);
4002        /* Initialize surface registers */
4003        radeon_surface_init(rdev);
4004        /* sanity check some register to avoid hangs like after kexec */
4005        r100_restore_sanity(rdev);
4006        /* TODO: disable VGA need to use VGA request */
4007        /* BIOS*/
4008        if (!radeon_get_bios(rdev)) {
4009                if (ASIC_IS_AVIVO(rdev))
4010                        return -EINVAL;
4011        }
4012        if (rdev->is_atom_bios) {
4013                dev_err(rdev->dev, "Expecting combios for RS400/RS480 GPU\n");
4014                return -EINVAL;
4015        } else {
4016                r = radeon_combios_init(rdev);
4017                if (r)
4018                        return r;
4019        }
4020        /* Reset gpu before posting otherwise ATOM will enter infinite loop */
4021        if (radeon_asic_reset(rdev)) {
4022                dev_warn(rdev->dev,
4023                        "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
4024                        RREG32(R_000E40_RBBM_STATUS),
4025                        RREG32(R_0007C0_CP_STAT));
4026        }
4027        /* check if cards are posted or not */
4028        if (radeon_boot_test_post_card(rdev) == false)
4029                return -EINVAL;
4030        /* Set asic errata */
4031        r100_errata(rdev);
4032        /* Initialize clocks */
4033        radeon_get_clock_info(rdev->ddev);
4034        /* initialize AGP */
4035        if (rdev->flags & RADEON_IS_AGP) {
4036                r = radeon_agp_init(rdev);
4037                if (r) {
4038                        radeon_agp_disable(rdev);
4039                }
4040        }
4041        /* initialize VRAM */
4042        r100_mc_init(rdev);
4043        /* Fence driver */
4044        r = radeon_fence_driver_init(rdev);
4045        if (r)
4046                return r;
4047        /* Memory manager */
4048        r = radeon_bo_init(rdev);
4049        if (r)
4050                return r;
4051        if (rdev->flags & RADEON_IS_PCI) {
4052                r = r100_pci_gart_init(rdev);
4053                if (r)
4054                        return r;
4055        }
4056        r100_set_safe_registers(rdev);
4057
4058        /* Initialize power management */
4059        radeon_pm_init(rdev);
4060
4061        rdev->accel_working = true;
4062        r = r100_startup(rdev);
4063        if (r) {
4064                /* Somethings want wront with the accel init stop accel */
4065                dev_err(rdev->dev, "Disabling GPU acceleration\n");
4066                r100_cp_fini(rdev);
4067                radeon_wb_fini(rdev);
4068                radeon_ib_pool_fini(rdev);
4069                radeon_irq_kms_fini(rdev);
4070                if (rdev->flags & RADEON_IS_PCI)
4071                        r100_pci_gart_fini(rdev);
4072                rdev->accel_working = false;
4073        }
4074        return 0;
4075}
4076
4077u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
4078{
4079        if (reg < rdev->rio_mem_size)
4080                return ioread32(rdev->rio_mem + reg);
4081        else {
4082                iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
4083                return ioread32(rdev->rio_mem + RADEON_MM_DATA);
4084        }
4085}
4086
4087void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
4088{
4089        if (reg < rdev->rio_mem_size)
4090                iowrite32(v, rdev->rio_mem + reg);
4091        else {
4092                iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
4093                iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
4094        }
4095}
4096