linux/drivers/gpu/drm/radeon/r100.c
<<
>>
Prefs
   1/*
   2 * Copyright 2008 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 * Copyright 2009 Jerome Glisse.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22 * OTHER DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors: Dave Airlie
  25 *          Alex Deucher
  26 *          Jerome Glisse
  27 */
  28
  29#include <linux/firmware.h>
  30#include <linux/module.h>
  31#include <linux/pci.h>
  32#include <linux/seq_file.h>
  33#include <linux/slab.h>
  34
  35#include <drm/drm_device.h>
  36#include <drm/drm_file.h>
  37#include <drm/drm_fourcc.h>
  38#include <drm/drm_vblank.h>
  39#include <drm/radeon_drm.h>
  40
  41#include "atom.h"
  42#include "r100_reg_safe.h"
  43#include "r100d.h"
  44#include "radeon.h"
  45#include "radeon_asic.h"
  46#include "radeon_reg.h"
  47#include "rn50_reg_safe.h"
  48#include "rs100d.h"
  49#include "rv200d.h"
  50#include "rv250d.h"
  51
  52/* Firmware Names */
  53#define FIRMWARE_R100           "radeon/R100_cp.bin"
  54#define FIRMWARE_R200           "radeon/R200_cp.bin"
  55#define FIRMWARE_R300           "radeon/R300_cp.bin"
  56#define FIRMWARE_R420           "radeon/R420_cp.bin"
  57#define FIRMWARE_RS690          "radeon/RS690_cp.bin"
  58#define FIRMWARE_RS600          "radeon/RS600_cp.bin"
  59#define FIRMWARE_R520           "radeon/R520_cp.bin"
  60
  61MODULE_FIRMWARE(FIRMWARE_R100);
  62MODULE_FIRMWARE(FIRMWARE_R200);
  63MODULE_FIRMWARE(FIRMWARE_R300);
  64MODULE_FIRMWARE(FIRMWARE_R420);
  65MODULE_FIRMWARE(FIRMWARE_RS690);
  66MODULE_FIRMWARE(FIRMWARE_RS600);
  67MODULE_FIRMWARE(FIRMWARE_R520);
  68
  69#include "r100_track.h"
  70
  71/* This files gather functions specifics to:
  72 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
  73 * and others in some cases.
  74 */
  75
  76static bool r100_is_in_vblank(struct radeon_device *rdev, int crtc)
  77{
  78        if (crtc == 0) {
  79                if (RREG32(RADEON_CRTC_STATUS) & RADEON_CRTC_VBLANK_CUR)
  80                        return true;
  81                else
  82                        return false;
  83        } else {
  84                if (RREG32(RADEON_CRTC2_STATUS) & RADEON_CRTC2_VBLANK_CUR)
  85                        return true;
  86                else
  87                        return false;
  88        }
  89}
  90
  91static bool r100_is_counter_moving(struct radeon_device *rdev, int crtc)
  92{
  93        u32 vline1, vline2;
  94
  95        if (crtc == 0) {
  96                vline1 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
  97                vline2 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
  98        } else {
  99                vline1 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
 100                vline2 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
 101        }
 102        if (vline1 != vline2)
 103                return true;
 104        else
 105                return false;
 106}
 107
 108/**
 109 * r100_wait_for_vblank - vblank wait asic callback.
 110 *
 111 * @rdev: radeon_device pointer
 112 * @crtc: crtc to wait for vblank on
 113 *
 114 * Wait for vblank on the requested crtc (r1xx-r4xx).
 115 */
 116void r100_wait_for_vblank(struct radeon_device *rdev, int crtc)
 117{
 118        unsigned i = 0;
 119
 120        if (crtc >= rdev->num_crtc)
 121                return;
 122
 123        if (crtc == 0) {
 124                if (!(RREG32(RADEON_CRTC_GEN_CNTL) & RADEON_CRTC_EN))
 125                        return;
 126        } else {
 127                if (!(RREG32(RADEON_CRTC2_GEN_CNTL) & RADEON_CRTC2_EN))
 128                        return;
 129        }
 130
 131        /* depending on when we hit vblank, we may be close to active; if so,
 132         * wait for another frame.
 133         */
 134        while (r100_is_in_vblank(rdev, crtc)) {
 135                if (i++ % 100 == 0) {
 136                        if (!r100_is_counter_moving(rdev, crtc))
 137                                break;
 138                }
 139        }
 140
 141        while (!r100_is_in_vblank(rdev, crtc)) {
 142                if (i++ % 100 == 0) {
 143                        if (!r100_is_counter_moving(rdev, crtc))
 144                                break;
 145                }
 146        }
 147}
 148
 149/**
 150 * r100_page_flip - pageflip callback.
 151 *
 152 * @rdev: radeon_device pointer
 153 * @crtc_id: crtc to cleanup pageflip on
 154 * @crtc_base: new address of the crtc (GPU MC address)
 155 * @async: asynchronous flip
 156 *
 157 * Does the actual pageflip (r1xx-r4xx).
 158 * During vblank we take the crtc lock and wait for the update_pending
 159 * bit to go high, when it does, we release the lock, and allow the
 160 * double buffered update to take place.
 161 */
 162void r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base, bool async)
 163{
 164        struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
 165        uint32_t crtc_pitch, pitch_pixels;
 166        struct drm_framebuffer *fb = radeon_crtc->base.primary->fb;
 167        u32 tmp = ((u32)crtc_base) | RADEON_CRTC_OFFSET__OFFSET_LOCK;
 168        int i;
 169
 170        /* Lock the graphics update lock */
 171        /* update the scanout addresses */
 172        WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
 173
 174        /* update pitch */
 175        pitch_pixels = fb->pitches[0] / fb->format->cpp[0];
 176        crtc_pitch = DIV_ROUND_UP(pitch_pixels * fb->format->cpp[0] * 8,
 177                                  fb->format->cpp[0] * 8 * 8);
 178        crtc_pitch |= crtc_pitch << 16;
 179        WREG32(RADEON_CRTC_PITCH + radeon_crtc->crtc_offset, crtc_pitch);
 180
 181        /* Wait for update_pending to go high. */
 182        for (i = 0; i < rdev->usec_timeout; i++) {
 183                if (RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET)
 184                        break;
 185                udelay(1);
 186        }
 187        DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
 188
 189        /* Unlock the lock, so double-buffering can take place inside vblank */
 190        tmp &= ~RADEON_CRTC_OFFSET__OFFSET_LOCK;
 191        WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
 192
 193}
 194
 195/**
 196 * r100_page_flip_pending - check if page flip is still pending
 197 *
 198 * @rdev: radeon_device pointer
 199 * @crtc_id: crtc to check
 200 *
 201 * Check if the last pagefilp is still pending (r1xx-r4xx).
 202 * Returns the current update pending status.
 203 */
 204bool r100_page_flip_pending(struct radeon_device *rdev, int crtc_id)
 205{
 206        struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
 207
 208        /* Return current update_pending status: */
 209        return !!(RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) &
 210                RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET);
 211}
 212
 213/**
 214 * r100_pm_get_dynpm_state - look up dynpm power state callback.
 215 *
 216 * @rdev: radeon_device pointer
 217 *
 218 * Look up the optimal power state based on the
 219 * current state of the GPU (r1xx-r5xx).
 220 * Used for dynpm only.
 221 */
 222void r100_pm_get_dynpm_state(struct radeon_device *rdev)
 223{
 224        int i;
 225        rdev->pm.dynpm_can_upclock = true;
 226        rdev->pm.dynpm_can_downclock = true;
 227
 228        switch (rdev->pm.dynpm_planned_action) {
 229        case DYNPM_ACTION_MINIMUM:
 230                rdev->pm.requested_power_state_index = 0;
 231                rdev->pm.dynpm_can_downclock = false;
 232                break;
 233        case DYNPM_ACTION_DOWNCLOCK:
 234                if (rdev->pm.current_power_state_index == 0) {
 235                        rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
 236                        rdev->pm.dynpm_can_downclock = false;
 237                } else {
 238                        if (rdev->pm.active_crtc_count > 1) {
 239                                for (i = 0; i < rdev->pm.num_power_states; i++) {
 240                                        if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
 241                                                continue;
 242                                        else if (i >= rdev->pm.current_power_state_index) {
 243                                                rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
 244                                                break;
 245                                        } else {
 246                                                rdev->pm.requested_power_state_index = i;
 247                                                break;
 248                                        }
 249                                }
 250                        } else
 251                                rdev->pm.requested_power_state_index =
 252                                        rdev->pm.current_power_state_index - 1;
 253                }
 254                /* don't use the power state if crtcs are active and no display flag is set */
 255                if ((rdev->pm.active_crtc_count > 0) &&
 256                    (rdev->pm.power_state[rdev->pm.requested_power_state_index].clock_info[0].flags &
 257                     RADEON_PM_MODE_NO_DISPLAY)) {
 258                        rdev->pm.requested_power_state_index++;
 259                }
 260                break;
 261        case DYNPM_ACTION_UPCLOCK:
 262                if (rdev->pm.current_power_state_index == (rdev->pm.num_power_states - 1)) {
 263                        rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
 264                        rdev->pm.dynpm_can_upclock = false;
 265                } else {
 266                        if (rdev->pm.active_crtc_count > 1) {
 267                                for (i = (rdev->pm.num_power_states - 1); i >= 0; i--) {
 268                                        if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
 269                                                continue;
 270                                        else if (i <= rdev->pm.current_power_state_index) {
 271                                                rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
 272                                                break;
 273                                        } else {
 274                                                rdev->pm.requested_power_state_index = i;
 275                                                break;
 276                                        }
 277                                }
 278                        } else
 279                                rdev->pm.requested_power_state_index =
 280                                        rdev->pm.current_power_state_index + 1;
 281                }
 282                break;
 283        case DYNPM_ACTION_DEFAULT:
 284                rdev->pm.requested_power_state_index = rdev->pm.default_power_state_index;
 285                rdev->pm.dynpm_can_upclock = false;
 286                break;
 287        case DYNPM_ACTION_NONE:
 288        default:
 289                DRM_ERROR("Requested mode for not defined action\n");
 290                return;
 291        }
 292        /* only one clock mode per power state */
 293        rdev->pm.requested_clock_mode_index = 0;
 294
 295        DRM_DEBUG_DRIVER("Requested: e: %d m: %d p: %d\n",
 296                  rdev->pm.power_state[rdev->pm.requested_power_state_index].
 297                  clock_info[rdev->pm.requested_clock_mode_index].sclk,
 298                  rdev->pm.power_state[rdev->pm.requested_power_state_index].
 299                  clock_info[rdev->pm.requested_clock_mode_index].mclk,
 300                  rdev->pm.power_state[rdev->pm.requested_power_state_index].
 301                  pcie_lanes);
 302}
 303
 304/**
 305 * r100_pm_init_profile - Initialize power profiles callback.
 306 *
 307 * @rdev: radeon_device pointer
 308 *
 309 * Initialize the power states used in profile mode
 310 * (r1xx-r3xx).
 311 * Used for profile mode only.
 312 */
 313void r100_pm_init_profile(struct radeon_device *rdev)
 314{
 315        /* default */
 316        rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_ps_idx = rdev->pm.default_power_state_index;
 317        rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 318        rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_cm_idx = 0;
 319        rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_cm_idx = 0;
 320        /* low sh */
 321        rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_ps_idx = 0;
 322        rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_ps_idx = 0;
 323        rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_cm_idx = 0;
 324        rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_cm_idx = 0;
 325        /* mid sh */
 326        rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_ps_idx = 0;
 327        rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_ps_idx = 0;
 328        rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_cm_idx = 0;
 329        rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_cm_idx = 0;
 330        /* high sh */
 331        rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_ps_idx = 0;
 332        rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 333        rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_cm_idx = 0;
 334        rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_cm_idx = 0;
 335        /* low mh */
 336        rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_ps_idx = 0;
 337        rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 338        rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_cm_idx = 0;
 339        rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_cm_idx = 0;
 340        /* mid mh */
 341        rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_ps_idx = 0;
 342        rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 343        rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_cm_idx = 0;
 344        rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_cm_idx = 0;
 345        /* high mh */
 346        rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_ps_idx = 0;
 347        rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 348        rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_cm_idx = 0;
 349        rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_cm_idx = 0;
 350}
 351
 352/**
 353 * r100_pm_misc - set additional pm hw parameters callback.
 354 *
 355 * @rdev: radeon_device pointer
 356 *
 357 * Set non-clock parameters associated with a power state
 358 * (voltage, pcie lanes, etc.) (r1xx-r4xx).
 359 */
 360void r100_pm_misc(struct radeon_device *rdev)
 361{
 362        int requested_index = rdev->pm.requested_power_state_index;
 363        struct radeon_power_state *ps = &rdev->pm.power_state[requested_index];
 364        struct radeon_voltage *voltage = &ps->clock_info[0].voltage;
 365        u32 tmp, sclk_cntl, sclk_cntl2, sclk_more_cntl;
 366
 367        if ((voltage->type == VOLTAGE_GPIO) && (voltage->gpio.valid)) {
 368                if (ps->misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_SUPPORT) {
 369                        tmp = RREG32(voltage->gpio.reg);
 370                        if (voltage->active_high)
 371                                tmp |= voltage->gpio.mask;
 372                        else
 373                                tmp &= ~(voltage->gpio.mask);
 374                        WREG32(voltage->gpio.reg, tmp);
 375                        if (voltage->delay)
 376                                udelay(voltage->delay);
 377                } else {
 378                        tmp = RREG32(voltage->gpio.reg);
 379                        if (voltage->active_high)
 380                                tmp &= ~voltage->gpio.mask;
 381                        else
 382                                tmp |= voltage->gpio.mask;
 383                        WREG32(voltage->gpio.reg, tmp);
 384                        if (voltage->delay)
 385                                udelay(voltage->delay);
 386                }
 387        }
 388
 389        sclk_cntl = RREG32_PLL(SCLK_CNTL);
 390        sclk_cntl2 = RREG32_PLL(SCLK_CNTL2);
 391        sclk_cntl2 &= ~REDUCED_SPEED_SCLK_SEL(3);
 392        sclk_more_cntl = RREG32_PLL(SCLK_MORE_CNTL);
 393        sclk_more_cntl &= ~VOLTAGE_DELAY_SEL(3);
 394        if (ps->misc & ATOM_PM_MISCINFO_ASIC_REDUCED_SPEED_SCLK_EN) {
 395                sclk_more_cntl |= REDUCED_SPEED_SCLK_EN;
 396                if (ps->misc & ATOM_PM_MISCINFO_DYN_CLK_3D_IDLE)
 397                        sclk_cntl2 |= REDUCED_SPEED_SCLK_MODE;
 398                else
 399                        sclk_cntl2 &= ~REDUCED_SPEED_SCLK_MODE;
 400                if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_2)
 401                        sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(0);
 402                else if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_4)
 403                        sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(2);
 404        } else
 405                sclk_more_cntl &= ~REDUCED_SPEED_SCLK_EN;
 406
 407        if (ps->misc & ATOM_PM_MISCINFO_ASIC_DYNAMIC_VOLTAGE_EN) {
 408                sclk_more_cntl |= IO_CG_VOLTAGE_DROP;
 409                if (voltage->delay) {
 410                        sclk_more_cntl |= VOLTAGE_DROP_SYNC;
 411                        switch (voltage->delay) {
 412                        case 33:
 413                                sclk_more_cntl |= VOLTAGE_DELAY_SEL(0);
 414                                break;
 415                        case 66:
 416                                sclk_more_cntl |= VOLTAGE_DELAY_SEL(1);
 417                                break;
 418                        case 99:
 419                                sclk_more_cntl |= VOLTAGE_DELAY_SEL(2);
 420                                break;
 421                        case 132:
 422                                sclk_more_cntl |= VOLTAGE_DELAY_SEL(3);
 423                                break;
 424                        }
 425                } else
 426                        sclk_more_cntl &= ~VOLTAGE_DROP_SYNC;
 427        } else
 428                sclk_more_cntl &= ~IO_CG_VOLTAGE_DROP;
 429
 430        if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_HDP_BLOCK_EN)
 431                sclk_cntl &= ~FORCE_HDP;
 432        else
 433                sclk_cntl |= FORCE_HDP;
 434
 435        WREG32_PLL(SCLK_CNTL, sclk_cntl);
 436        WREG32_PLL(SCLK_CNTL2, sclk_cntl2);
 437        WREG32_PLL(SCLK_MORE_CNTL, sclk_more_cntl);
 438
 439        /* set pcie lanes */
 440        if ((rdev->flags & RADEON_IS_PCIE) &&
 441            !(rdev->flags & RADEON_IS_IGP) &&
 442            rdev->asic->pm.set_pcie_lanes &&
 443            (ps->pcie_lanes !=
 444             rdev->pm.power_state[rdev->pm.current_power_state_index].pcie_lanes)) {
 445                radeon_set_pcie_lanes(rdev,
 446                                      ps->pcie_lanes);
 447                DRM_DEBUG_DRIVER("Setting: p: %d\n", ps->pcie_lanes);
 448        }
 449}
 450
 451/**
 452 * r100_pm_prepare - pre-power state change callback.
 453 *
 454 * @rdev: radeon_device pointer
 455 *
 456 * Prepare for a power state change (r1xx-r4xx).
 457 */
 458void r100_pm_prepare(struct radeon_device *rdev)
 459{
 460        struct drm_device *ddev = rdev->ddev;
 461        struct drm_crtc *crtc;
 462        struct radeon_crtc *radeon_crtc;
 463        u32 tmp;
 464
 465        /* disable any active CRTCs */
 466        list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
 467                radeon_crtc = to_radeon_crtc(crtc);
 468                if (radeon_crtc->enabled) {
 469                        if (radeon_crtc->crtc_id) {
 470                                tmp = RREG32(RADEON_CRTC2_GEN_CNTL);
 471                                tmp |= RADEON_CRTC2_DISP_REQ_EN_B;
 472                                WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
 473                        } else {
 474                                tmp = RREG32(RADEON_CRTC_GEN_CNTL);
 475                                tmp |= RADEON_CRTC_DISP_REQ_EN_B;
 476                                WREG32(RADEON_CRTC_GEN_CNTL, tmp);
 477                        }
 478                }
 479        }
 480}
 481
 482/**
 483 * r100_pm_finish - post-power state change callback.
 484 *
 485 * @rdev: radeon_device pointer
 486 *
 487 * Clean up after a power state change (r1xx-r4xx).
 488 */
 489void r100_pm_finish(struct radeon_device *rdev)
 490{
 491        struct drm_device *ddev = rdev->ddev;
 492        struct drm_crtc *crtc;
 493        struct radeon_crtc *radeon_crtc;
 494        u32 tmp;
 495
 496        /* enable any active CRTCs */
 497        list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
 498                radeon_crtc = to_radeon_crtc(crtc);
 499                if (radeon_crtc->enabled) {
 500                        if (radeon_crtc->crtc_id) {
 501                                tmp = RREG32(RADEON_CRTC2_GEN_CNTL);
 502                                tmp &= ~RADEON_CRTC2_DISP_REQ_EN_B;
 503                                WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
 504                        } else {
 505                                tmp = RREG32(RADEON_CRTC_GEN_CNTL);
 506                                tmp &= ~RADEON_CRTC_DISP_REQ_EN_B;
 507                                WREG32(RADEON_CRTC_GEN_CNTL, tmp);
 508                        }
 509                }
 510        }
 511}
 512
 513/**
 514 * r100_gui_idle - gui idle callback.
 515 *
 516 * @rdev: radeon_device pointer
 517 *
 518 * Check of the GUI (2D/3D engines) are idle (r1xx-r5xx).
 519 * Returns true if idle, false if not.
 520 */
 521bool r100_gui_idle(struct radeon_device *rdev)
 522{
 523        if (RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)
 524                return false;
 525        else
 526                return true;
 527}
 528
 529/* hpd for digital panel detect/disconnect */
 530/**
 531 * r100_hpd_sense - hpd sense callback.
 532 *
 533 * @rdev: radeon_device pointer
 534 * @hpd: hpd (hotplug detect) pin
 535 *
 536 * Checks if a digital monitor is connected (r1xx-r4xx).
 537 * Returns true if connected, false if not connected.
 538 */
 539bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
 540{
 541        bool connected = false;
 542
 543        switch (hpd) {
 544        case RADEON_HPD_1:
 545                if (RREG32(RADEON_FP_GEN_CNTL) & RADEON_FP_DETECT_SENSE)
 546                        connected = true;
 547                break;
 548        case RADEON_HPD_2:
 549                if (RREG32(RADEON_FP2_GEN_CNTL) & RADEON_FP2_DETECT_SENSE)
 550                        connected = true;
 551                break;
 552        default:
 553                break;
 554        }
 555        return connected;
 556}
 557
 558/**
 559 * r100_hpd_set_polarity - hpd set polarity callback.
 560 *
 561 * @rdev: radeon_device pointer
 562 * @hpd: hpd (hotplug detect) pin
 563 *
 564 * Set the polarity of the hpd pin (r1xx-r4xx).
 565 */
 566void r100_hpd_set_polarity(struct radeon_device *rdev,
 567                           enum radeon_hpd_id hpd)
 568{
 569        u32 tmp;
 570        bool connected = r100_hpd_sense(rdev, hpd);
 571
 572        switch (hpd) {
 573        case RADEON_HPD_1:
 574                tmp = RREG32(RADEON_FP_GEN_CNTL);
 575                if (connected)
 576                        tmp &= ~RADEON_FP_DETECT_INT_POL;
 577                else
 578                        tmp |= RADEON_FP_DETECT_INT_POL;
 579                WREG32(RADEON_FP_GEN_CNTL, tmp);
 580                break;
 581        case RADEON_HPD_2:
 582                tmp = RREG32(RADEON_FP2_GEN_CNTL);
 583                if (connected)
 584                        tmp &= ~RADEON_FP2_DETECT_INT_POL;
 585                else
 586                        tmp |= RADEON_FP2_DETECT_INT_POL;
 587                WREG32(RADEON_FP2_GEN_CNTL, tmp);
 588                break;
 589        default:
 590                break;
 591        }
 592}
 593
 594/**
 595 * r100_hpd_init - hpd setup callback.
 596 *
 597 * @rdev: radeon_device pointer
 598 *
 599 * Setup the hpd pins used by the card (r1xx-r4xx).
 600 * Set the polarity, and enable the hpd interrupts.
 601 */
 602void r100_hpd_init(struct radeon_device *rdev)
 603{
 604        struct drm_device *dev = rdev->ddev;
 605        struct drm_connector *connector;
 606        unsigned enable = 0;
 607
 608        list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 609                struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 610                if (radeon_connector->hpd.hpd != RADEON_HPD_NONE)
 611                        enable |= 1 << radeon_connector->hpd.hpd;
 612                radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
 613        }
 614        radeon_irq_kms_enable_hpd(rdev, enable);
 615}
 616
 617/**
 618 * r100_hpd_fini - hpd tear down callback.
 619 *
 620 * @rdev: radeon_device pointer
 621 *
 622 * Tear down the hpd pins used by the card (r1xx-r4xx).
 623 * Disable the hpd interrupts.
 624 */
 625void r100_hpd_fini(struct radeon_device *rdev)
 626{
 627        struct drm_device *dev = rdev->ddev;
 628        struct drm_connector *connector;
 629        unsigned disable = 0;
 630
 631        list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 632                struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 633                if (radeon_connector->hpd.hpd != RADEON_HPD_NONE)
 634                        disable |= 1 << radeon_connector->hpd.hpd;
 635        }
 636        radeon_irq_kms_disable_hpd(rdev, disable);
 637}
 638
 639/*
 640 * PCI GART
 641 */
 642void r100_pci_gart_tlb_flush(struct radeon_device *rdev)
 643{
 644        /* TODO: can we do somethings here ? */
 645        /* It seems hw only cache one entry so we should discard this
 646         * entry otherwise if first GPU GART read hit this entry it
 647         * could end up in wrong address. */
 648}
 649
 650int r100_pci_gart_init(struct radeon_device *rdev)
 651{
 652        int r;
 653
 654        if (rdev->gart.ptr) {
 655                WARN(1, "R100 PCI GART already initialized\n");
 656                return 0;
 657        }
 658        /* Initialize common gart structure */
 659        r = radeon_gart_init(rdev);
 660        if (r)
 661                return r;
 662        rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
 663        rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush;
 664        rdev->asic->gart.get_page_entry = &r100_pci_gart_get_page_entry;
 665        rdev->asic->gart.set_page = &r100_pci_gart_set_page;
 666        return radeon_gart_table_ram_alloc(rdev);
 667}
 668
 669int r100_pci_gart_enable(struct radeon_device *rdev)
 670{
 671        uint32_t tmp;
 672
 673        /* discard memory request outside of configured range */
 674        tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
 675        WREG32(RADEON_AIC_CNTL, tmp);
 676        /* set address range for PCI address translate */
 677        WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_start);
 678        WREG32(RADEON_AIC_HI_ADDR, rdev->mc.gtt_end);
 679        /* set PCI GART page-table base address */
 680        WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr);
 681        tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
 682        WREG32(RADEON_AIC_CNTL, tmp);
 683        r100_pci_gart_tlb_flush(rdev);
 684        DRM_INFO("PCI GART of %uM enabled (table at 0x%016llX).\n",
 685                 (unsigned)(rdev->mc.gtt_size >> 20),
 686                 (unsigned long long)rdev->gart.table_addr);
 687        rdev->gart.ready = true;
 688        return 0;
 689}
 690
 691void r100_pci_gart_disable(struct radeon_device *rdev)
 692{
 693        uint32_t tmp;
 694
 695        /* discard memory request outside of configured range */
 696        tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
 697        WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN);
 698        WREG32(RADEON_AIC_LO_ADDR, 0);
 699        WREG32(RADEON_AIC_HI_ADDR, 0);
 700}
 701
 702uint64_t r100_pci_gart_get_page_entry(uint64_t addr, uint32_t flags)
 703{
 704        return addr;
 705}
 706
 707void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
 708                            uint64_t entry)
 709{
 710        u32 *gtt = rdev->gart.ptr;
 711        gtt[i] = cpu_to_le32(lower_32_bits(entry));
 712}
 713
 714void r100_pci_gart_fini(struct radeon_device *rdev)
 715{
 716        radeon_gart_fini(rdev);
 717        r100_pci_gart_disable(rdev);
 718        radeon_gart_table_ram_free(rdev);
 719}
 720
 721int r100_irq_set(struct radeon_device *rdev)
 722{
 723        uint32_t tmp = 0;
 724
 725        if (!rdev->irq.installed) {
 726                WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
 727                WREG32(R_000040_GEN_INT_CNTL, 0);
 728                return -EINVAL;
 729        }
 730        if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
 731                tmp |= RADEON_SW_INT_ENABLE;
 732        }
 733        if (rdev->irq.crtc_vblank_int[0] ||
 734            atomic_read(&rdev->irq.pflip[0])) {
 735                tmp |= RADEON_CRTC_VBLANK_MASK;
 736        }
 737        if (rdev->irq.crtc_vblank_int[1] ||
 738            atomic_read(&rdev->irq.pflip[1])) {
 739                tmp |= RADEON_CRTC2_VBLANK_MASK;
 740        }
 741        if (rdev->irq.hpd[0]) {
 742                tmp |= RADEON_FP_DETECT_MASK;
 743        }
 744        if (rdev->irq.hpd[1]) {
 745                tmp |= RADEON_FP2_DETECT_MASK;
 746        }
 747        WREG32(RADEON_GEN_INT_CNTL, tmp);
 748
 749        /* read back to post the write */
 750        RREG32(RADEON_GEN_INT_CNTL);
 751
 752        return 0;
 753}
 754
 755void r100_irq_disable(struct radeon_device *rdev)
 756{
 757        u32 tmp;
 758
 759        WREG32(R_000040_GEN_INT_CNTL, 0);
 760        /* Wait and acknowledge irq */
 761        mdelay(1);
 762        tmp = RREG32(R_000044_GEN_INT_STATUS);
 763        WREG32(R_000044_GEN_INT_STATUS, tmp);
 764}
 765
 766static uint32_t r100_irq_ack(struct radeon_device *rdev)
 767{
 768        uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS);
 769        uint32_t irq_mask = RADEON_SW_INT_TEST |
 770                RADEON_CRTC_VBLANK_STAT | RADEON_CRTC2_VBLANK_STAT |
 771                RADEON_FP_DETECT_STAT | RADEON_FP2_DETECT_STAT;
 772
 773        if (irqs) {
 774                WREG32(RADEON_GEN_INT_STATUS, irqs);
 775        }
 776        return irqs & irq_mask;
 777}
 778
 779int r100_irq_process(struct radeon_device *rdev)
 780{
 781        uint32_t status, msi_rearm;
 782        bool queue_hotplug = false;
 783
 784        status = r100_irq_ack(rdev);
 785        if (!status) {
 786                return IRQ_NONE;
 787        }
 788        if (rdev->shutdown) {
 789                return IRQ_NONE;
 790        }
 791        while (status) {
 792                /* SW interrupt */
 793                if (status & RADEON_SW_INT_TEST) {
 794                        radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
 795                }
 796                /* Vertical blank interrupts */
 797                if (status & RADEON_CRTC_VBLANK_STAT) {
 798                        if (rdev->irq.crtc_vblank_int[0]) {
 799                                drm_handle_vblank(rdev->ddev, 0);
 800                                rdev->pm.vblank_sync = true;
 801                                wake_up(&rdev->irq.vblank_queue);
 802                        }
 803                        if (atomic_read(&rdev->irq.pflip[0]))
 804                                radeon_crtc_handle_vblank(rdev, 0);
 805                }
 806                if (status & RADEON_CRTC2_VBLANK_STAT) {
 807                        if (rdev->irq.crtc_vblank_int[1]) {
 808                                drm_handle_vblank(rdev->ddev, 1);
 809                                rdev->pm.vblank_sync = true;
 810                                wake_up(&rdev->irq.vblank_queue);
 811                        }
 812                        if (atomic_read(&rdev->irq.pflip[1]))
 813                                radeon_crtc_handle_vblank(rdev, 1);
 814                }
 815                if (status & RADEON_FP_DETECT_STAT) {
 816                        queue_hotplug = true;
 817                        DRM_DEBUG("HPD1\n");
 818                }
 819                if (status & RADEON_FP2_DETECT_STAT) {
 820                        queue_hotplug = true;
 821                        DRM_DEBUG("HPD2\n");
 822                }
 823                status = r100_irq_ack(rdev);
 824        }
 825        if (queue_hotplug)
 826                schedule_delayed_work(&rdev->hotplug_work, 0);
 827        if (rdev->msi_enabled) {
 828                switch (rdev->family) {
 829                case CHIP_RS400:
 830                case CHIP_RS480:
 831                        msi_rearm = RREG32(RADEON_AIC_CNTL) & ~RS400_MSI_REARM;
 832                        WREG32(RADEON_AIC_CNTL, msi_rearm);
 833                        WREG32(RADEON_AIC_CNTL, msi_rearm | RS400_MSI_REARM);
 834                        break;
 835                default:
 836                        WREG32(RADEON_MSI_REARM_EN, RV370_MSI_REARM_EN);
 837                        break;
 838                }
 839        }
 840        return IRQ_HANDLED;
 841}
 842
 843u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc)
 844{
 845        if (crtc == 0)
 846                return RREG32(RADEON_CRTC_CRNT_FRAME);
 847        else
 848                return RREG32(RADEON_CRTC2_CRNT_FRAME);
 849}
 850
 851/**
 852 * r100_ring_hdp_flush - flush Host Data Path via the ring buffer
 853 * @rdev: radeon device structure
 854 * @ring: ring buffer struct for emitting packets
 855 */
 856static void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring)
 857{
 858        radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
 859        radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
 860                                RADEON_HDP_READ_BUFFER_INVALIDATE);
 861        radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
 862        radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
 863}
 864
 865/* Who ever call radeon_fence_emit should call ring_lock and ask
 866 * for enough space (today caller are ib schedule and buffer move) */
 867void r100_fence_ring_emit(struct radeon_device *rdev,
 868                          struct radeon_fence *fence)
 869{
 870        struct radeon_ring *ring = &rdev->ring[fence->ring];
 871
 872        /* We have to make sure that caches are flushed before
 873         * CPU might read something from VRAM. */
 874        radeon_ring_write(ring, PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0));
 875        radeon_ring_write(ring, RADEON_RB3D_DC_FLUSH_ALL);
 876        radeon_ring_write(ring, PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0));
 877        radeon_ring_write(ring, RADEON_RB3D_ZC_FLUSH_ALL);
 878        /* Wait until IDLE & CLEAN */
 879        radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
 880        radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
 881        r100_ring_hdp_flush(rdev, ring);
 882        /* Emit fence sequence & fire IRQ */
 883        radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0));
 884        radeon_ring_write(ring, fence->seq);
 885        radeon_ring_write(ring, PACKET0(RADEON_GEN_INT_STATUS, 0));
 886        radeon_ring_write(ring, RADEON_SW_INT_FIRE);
 887}
 888
 889bool r100_semaphore_ring_emit(struct radeon_device *rdev,
 890                              struct radeon_ring *ring,
 891                              struct radeon_semaphore *semaphore,
 892                              bool emit_wait)
 893{
 894        /* Unused on older asics, since we don't have semaphores or multiple rings */
 895        BUG();
 896        return false;
 897}
 898
 899struct radeon_fence *r100_copy_blit(struct radeon_device *rdev,
 900                                    uint64_t src_offset,
 901                                    uint64_t dst_offset,
 902                                    unsigned num_gpu_pages,
 903                                    struct dma_resv *resv)
 904{
 905        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 906        struct radeon_fence *fence;
 907        uint32_t cur_pages;
 908        uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
 909        uint32_t pitch;
 910        uint32_t stride_pixels;
 911        unsigned ndw;
 912        int num_loops;
 913        int r = 0;
 914
 915        /* radeon limited to 16k stride */
 916        stride_bytes &= 0x3fff;
 917        /* radeon pitch is /64 */
 918        pitch = stride_bytes / 64;
 919        stride_pixels = stride_bytes / 4;
 920        num_loops = DIV_ROUND_UP(num_gpu_pages, 8191);
 921
 922        /* Ask for enough room for blit + flush + fence */
 923        ndw = 64 + (10 * num_loops);
 924        r = radeon_ring_lock(rdev, ring, ndw);
 925        if (r) {
 926                DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
 927                return ERR_PTR(-EINVAL);
 928        }
 929        while (num_gpu_pages > 0) {
 930                cur_pages = num_gpu_pages;
 931                if (cur_pages > 8191) {
 932                        cur_pages = 8191;
 933                }
 934                num_gpu_pages -= cur_pages;
 935
 936                /* pages are in Y direction - height
 937                   page width in X direction - width */
 938                radeon_ring_write(ring, PACKET3(PACKET3_BITBLT_MULTI, 8));
 939                radeon_ring_write(ring,
 940                                  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
 941                                  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
 942                                  RADEON_GMC_SRC_CLIPPING |
 943                                  RADEON_GMC_DST_CLIPPING |
 944                                  RADEON_GMC_BRUSH_NONE |
 945                                  (RADEON_COLOR_FORMAT_ARGB8888 << 8) |
 946                                  RADEON_GMC_SRC_DATATYPE_COLOR |
 947                                  RADEON_ROP3_S |
 948                                  RADEON_DP_SRC_SOURCE_MEMORY |
 949                                  RADEON_GMC_CLR_CMP_CNTL_DIS |
 950                                  RADEON_GMC_WR_MSK_DIS);
 951                radeon_ring_write(ring, (pitch << 22) | (src_offset >> 10));
 952                radeon_ring_write(ring, (pitch << 22) | (dst_offset >> 10));
 953                radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
 954                radeon_ring_write(ring, 0);
 955                radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
 956                radeon_ring_write(ring, num_gpu_pages);
 957                radeon_ring_write(ring, num_gpu_pages);
 958                radeon_ring_write(ring, cur_pages | (stride_pixels << 16));
 959        }
 960        radeon_ring_write(ring, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
 961        radeon_ring_write(ring, RADEON_RB2D_DC_FLUSH_ALL);
 962        radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
 963        radeon_ring_write(ring,
 964                          RADEON_WAIT_2D_IDLECLEAN |
 965                          RADEON_WAIT_HOST_IDLECLEAN |
 966                          RADEON_WAIT_DMA_GUI_IDLE);
 967        r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
 968        if (r) {
 969                radeon_ring_unlock_undo(rdev, ring);
 970                return ERR_PTR(r);
 971        }
 972        radeon_ring_unlock_commit(rdev, ring, false);
 973        return fence;
 974}
 975
 976static int r100_cp_wait_for_idle(struct radeon_device *rdev)
 977{
 978        unsigned i;
 979        u32 tmp;
 980
 981        for (i = 0; i < rdev->usec_timeout; i++) {
 982                tmp = RREG32(R_000E40_RBBM_STATUS);
 983                if (!G_000E40_CP_CMDSTRM_BUSY(tmp)) {
 984                        return 0;
 985                }
 986                udelay(1);
 987        }
 988        return -1;
 989}
 990
 991void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring)
 992{
 993        int r;
 994
 995        r = radeon_ring_lock(rdev, ring, 2);
 996        if (r) {
 997                return;
 998        }
 999        radeon_ring_write(ring, PACKET0(RADEON_ISYNC_CNTL, 0));
1000        radeon_ring_write(ring,
1001                          RADEON_ISYNC_ANY2D_IDLE3D |
1002                          RADEON_ISYNC_ANY3D_IDLE2D |
1003                          RADEON_ISYNC_WAIT_IDLEGUI |
1004                          RADEON_ISYNC_CPSCRATCH_IDLEGUI);
1005        radeon_ring_unlock_commit(rdev, ring, false);
1006}
1007
1008
1009/* Load the microcode for the CP */
1010static int r100_cp_init_microcode(struct radeon_device *rdev)
1011{
1012        const char *fw_name = NULL;
1013        int err;
1014
1015        DRM_DEBUG_KMS("\n");
1016
1017        if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) ||
1018            (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) ||
1019            (rdev->family == CHIP_RS200)) {
1020                DRM_INFO("Loading R100 Microcode\n");
1021                fw_name = FIRMWARE_R100;
1022        } else if ((rdev->family == CHIP_R200) ||
1023                   (rdev->family == CHIP_RV250) ||
1024                   (rdev->family == CHIP_RV280) ||
1025                   (rdev->family == CHIP_RS300)) {
1026                DRM_INFO("Loading R200 Microcode\n");
1027                fw_name = FIRMWARE_R200;
1028        } else if ((rdev->family == CHIP_R300) ||
1029                   (rdev->family == CHIP_R350) ||
1030                   (rdev->family == CHIP_RV350) ||
1031                   (rdev->family == CHIP_RV380) ||
1032                   (rdev->family == CHIP_RS400) ||
1033                   (rdev->family == CHIP_RS480)) {
1034                DRM_INFO("Loading R300 Microcode\n");
1035                fw_name = FIRMWARE_R300;
1036        } else if ((rdev->family == CHIP_R420) ||
1037                   (rdev->family == CHIP_R423) ||
1038                   (rdev->family == CHIP_RV410)) {
1039                DRM_INFO("Loading R400 Microcode\n");
1040                fw_name = FIRMWARE_R420;
1041        } else if ((rdev->family == CHIP_RS690) ||
1042                   (rdev->family == CHIP_RS740)) {
1043                DRM_INFO("Loading RS690/RS740 Microcode\n");
1044                fw_name = FIRMWARE_RS690;
1045        } else if (rdev->family == CHIP_RS600) {
1046                DRM_INFO("Loading RS600 Microcode\n");
1047                fw_name = FIRMWARE_RS600;
1048        } else if ((rdev->family == CHIP_RV515) ||
1049                   (rdev->family == CHIP_R520) ||
1050                   (rdev->family == CHIP_RV530) ||
1051                   (rdev->family == CHIP_R580) ||
1052                   (rdev->family == CHIP_RV560) ||
1053                   (rdev->family == CHIP_RV570)) {
1054                DRM_INFO("Loading R500 Microcode\n");
1055                fw_name = FIRMWARE_R520;
1056        }
1057
1058        err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1059        if (err) {
1060                pr_err("radeon_cp: Failed to load firmware \"%s\"\n", fw_name);
1061        } else if (rdev->me_fw->size % 8) {
1062                pr_err("radeon_cp: Bogus length %zu in firmware \"%s\"\n",
1063                       rdev->me_fw->size, fw_name);
1064                err = -EINVAL;
1065                release_firmware(rdev->me_fw);
1066                rdev->me_fw = NULL;
1067        }
1068        return err;
1069}
1070
1071u32 r100_gfx_get_rptr(struct radeon_device *rdev,
1072                      struct radeon_ring *ring)
1073{
1074        u32 rptr;
1075
1076        if (rdev->wb.enabled)
1077                rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
1078        else
1079                rptr = RREG32(RADEON_CP_RB_RPTR);
1080
1081        return rptr;
1082}
1083
1084u32 r100_gfx_get_wptr(struct radeon_device *rdev,
1085                      struct radeon_ring *ring)
1086{
1087        return RREG32(RADEON_CP_RB_WPTR);
1088}
1089
1090void r100_gfx_set_wptr(struct radeon_device *rdev,
1091                       struct radeon_ring *ring)
1092{
1093        WREG32(RADEON_CP_RB_WPTR, ring->wptr);
1094        (void)RREG32(RADEON_CP_RB_WPTR);
1095}
1096
1097static void r100_cp_load_microcode(struct radeon_device *rdev)
1098{
1099        const __be32 *fw_data;
1100        int i, size;
1101
1102        if (r100_gui_wait_for_idle(rdev)) {
1103                pr_warn("Failed to wait GUI idle while programming pipes. Bad things might happen.\n");
1104        }
1105
1106        if (rdev->me_fw) {
1107                size = rdev->me_fw->size / 4;
1108                fw_data = (const __be32 *)&rdev->me_fw->data[0];
1109                WREG32(RADEON_CP_ME_RAM_ADDR, 0);
1110                for (i = 0; i < size; i += 2) {
1111                        WREG32(RADEON_CP_ME_RAM_DATAH,
1112                               be32_to_cpup(&fw_data[i]));
1113                        WREG32(RADEON_CP_ME_RAM_DATAL,
1114                               be32_to_cpup(&fw_data[i + 1]));
1115                }
1116        }
1117}
1118
1119int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
1120{
1121        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1122        unsigned rb_bufsz;
1123        unsigned rb_blksz;
1124        unsigned max_fetch;
1125        unsigned pre_write_timer;
1126        unsigned pre_write_limit;
1127        unsigned indirect2_start;
1128        unsigned indirect1_start;
1129        uint32_t tmp;
1130        int r;
1131
1132        r100_debugfs_cp_init(rdev);
1133        if (!rdev->me_fw) {
1134                r = r100_cp_init_microcode(rdev);
1135                if (r) {
1136                        DRM_ERROR("Failed to load firmware!\n");
1137                        return r;
1138                }
1139        }
1140
1141        /* Align ring size */
1142        rb_bufsz = order_base_2(ring_size / 8);
1143        ring_size = (1 << (rb_bufsz + 1)) * 4;
1144        r100_cp_load_microcode(rdev);
1145        r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET,
1146                             RADEON_CP_PACKET2);
1147        if (r) {
1148                return r;
1149        }
1150        /* Each time the cp read 1024 bytes (16 dword/quadword) update
1151         * the rptr copy in system ram */
1152        rb_blksz = 9;
1153        /* cp will read 128bytes at a time (4 dwords) */
1154        max_fetch = 1;
1155        ring->align_mask = 16 - 1;
1156        /* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
1157        pre_write_timer = 64;
1158        /* Force CP_RB_WPTR write if written more than one time before the
1159         * delay expire
1160         */
1161        pre_write_limit = 0;
1162        /* Setup the cp cache like this (cache size is 96 dwords) :
1163         *      RING            0  to 15
1164         *      INDIRECT1       16 to 79
1165         *      INDIRECT2       80 to 95
1166         * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
1167         *    indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords))
1168         *    indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
1169         * Idea being that most of the gpu cmd will be through indirect1 buffer
1170         * so it gets the bigger cache.
1171         */
1172        indirect2_start = 80;
1173        indirect1_start = 16;
1174        /* cp setup */
1175        WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
1176        tmp = (REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
1177               REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
1178               REG_SET(RADEON_MAX_FETCH, max_fetch));
1179#ifdef __BIG_ENDIAN
1180        tmp |= RADEON_BUF_SWAP_32BIT;
1181#endif
1182        WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_NO_UPDATE);
1183
1184        /* Set ring address */
1185        DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)ring->gpu_addr);
1186        WREG32(RADEON_CP_RB_BASE, ring->gpu_addr);
1187        /* Force read & write ptr to 0 */
1188        WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE);
1189        WREG32(RADEON_CP_RB_RPTR_WR, 0);
1190        ring->wptr = 0;
1191        WREG32(RADEON_CP_RB_WPTR, ring->wptr);
1192
1193        /* set the wb address whether it's enabled or not */
1194        WREG32(R_00070C_CP_RB_RPTR_ADDR,
1195                S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) >> 2));
1196        WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET);
1197
1198        if (rdev->wb.enabled)
1199                WREG32(R_000770_SCRATCH_UMSK, 0xff);
1200        else {
1201                tmp |= RADEON_RB_NO_UPDATE;
1202                WREG32(R_000770_SCRATCH_UMSK, 0);
1203        }
1204
1205        WREG32(RADEON_CP_RB_CNTL, tmp);
1206        udelay(10);
1207        /* Set cp mode to bus mastering & enable cp*/
1208        WREG32(RADEON_CP_CSQ_MODE,
1209               REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
1210               REG_SET(RADEON_INDIRECT1_START, indirect1_start));
1211        WREG32(RADEON_CP_RB_WPTR_DELAY, 0);
1212        WREG32(RADEON_CP_CSQ_MODE, 0x00004D4D);
1213        WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
1214
1215        /* at this point everything should be setup correctly to enable master */
1216        pci_set_master(rdev->pdev);
1217
1218        radeon_ring_start(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1219        r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, ring);
1220        if (r) {
1221                DRM_ERROR("radeon: cp isn't working (%d).\n", r);
1222                return r;
1223        }
1224        ring->ready = true;
1225        radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1226
1227        if (!ring->rptr_save_reg /* not resuming from suspend */
1228            && radeon_ring_supports_scratch_reg(rdev, ring)) {
1229                r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
1230                if (r) {
1231                        DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
1232                        ring->rptr_save_reg = 0;
1233                }
1234        }
1235        return 0;
1236}
1237
1238void r100_cp_fini(struct radeon_device *rdev)
1239{
1240        if (r100_cp_wait_for_idle(rdev)) {
1241                DRM_ERROR("Wait for CP idle timeout, shutting down CP.\n");
1242        }
1243        /* Disable ring */
1244        r100_cp_disable(rdev);
1245        radeon_scratch_free(rdev, rdev->ring[RADEON_RING_TYPE_GFX_INDEX].rptr_save_reg);
1246        radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1247        DRM_INFO("radeon: cp finalized\n");
1248}
1249
1250void r100_cp_disable(struct radeon_device *rdev)
1251{
1252        /* Disable ring */
1253        radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1254        rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1255        WREG32(RADEON_CP_CSQ_MODE, 0);
1256        WREG32(RADEON_CP_CSQ_CNTL, 0);
1257        WREG32(R_000770_SCRATCH_UMSK, 0);
1258        if (r100_gui_wait_for_idle(rdev)) {
1259                pr_warn("Failed to wait GUI idle while programming pipes. Bad things might happen.\n");
1260        }
1261}
1262
1263/*
1264 * CS functions
1265 */
1266int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
1267                            struct radeon_cs_packet *pkt,
1268                            unsigned idx,
1269                            unsigned reg)
1270{
1271        int r;
1272        u32 tile_flags = 0;
1273        u32 tmp;
1274        struct radeon_bo_list *reloc;
1275        u32 value;
1276
1277        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1278        if (r) {
1279                DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1280                          idx, reg);
1281                radeon_cs_dump_packet(p, pkt);
1282                return r;
1283        }
1284
1285        value = radeon_get_ib_value(p, idx);
1286        tmp = value & 0x003fffff;
1287        tmp += (((u32)reloc->gpu_offset) >> 10);
1288
1289        if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1290                if (reloc->tiling_flags & RADEON_TILING_MACRO)
1291                        tile_flags |= RADEON_DST_TILE_MACRO;
1292                if (reloc->tiling_flags & RADEON_TILING_MICRO) {
1293                        if (reg == RADEON_SRC_PITCH_OFFSET) {
1294                                DRM_ERROR("Cannot src blit from microtiled surface\n");
1295                                radeon_cs_dump_packet(p, pkt);
1296                                return -EINVAL;
1297                        }
1298                        tile_flags |= RADEON_DST_TILE_MICRO;
1299                }
1300
1301                tmp |= tile_flags;
1302                p->ib.ptr[idx] = (value & 0x3fc00000) | tmp;
1303        } else
1304                p->ib.ptr[idx] = (value & 0xffc00000) | tmp;
1305        return 0;
1306}
1307
1308int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
1309                             struct radeon_cs_packet *pkt,
1310                             int idx)
1311{
1312        unsigned c, i;
1313        struct radeon_bo_list *reloc;
1314        struct r100_cs_track *track;
1315        int r = 0;
1316        volatile uint32_t *ib;
1317        u32 idx_value;
1318
1319        ib = p->ib.ptr;
1320        track = (struct r100_cs_track *)p->track;
1321        c = radeon_get_ib_value(p, idx++) & 0x1F;
1322        if (c > 16) {
1323            DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
1324                      pkt->opcode);
1325            radeon_cs_dump_packet(p, pkt);
1326            return -EINVAL;
1327        }
1328        track->num_arrays = c;
1329        for (i = 0; i < (c - 1); i+=2, idx+=3) {
1330                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1331                if (r) {
1332                        DRM_ERROR("No reloc for packet3 %d\n",
1333                                  pkt->opcode);
1334                        radeon_cs_dump_packet(p, pkt);
1335                        return r;
1336                }
1337                idx_value = radeon_get_ib_value(p, idx);
1338                ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);
1339
1340                track->arrays[i + 0].esize = idx_value >> 8;
1341                track->arrays[i + 0].robj = reloc->robj;
1342                track->arrays[i + 0].esize &= 0x7F;
1343                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1344                if (r) {
1345                        DRM_ERROR("No reloc for packet3 %d\n",
1346                                  pkt->opcode);
1347                        radeon_cs_dump_packet(p, pkt);
1348                        return r;
1349                }
1350                ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->gpu_offset);
1351                track->arrays[i + 1].robj = reloc->robj;
1352                track->arrays[i + 1].esize = idx_value >> 24;
1353                track->arrays[i + 1].esize &= 0x7F;
1354        }
1355        if (c & 1) {
1356                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1357                if (r) {
1358                        DRM_ERROR("No reloc for packet3 %d\n",
1359                                          pkt->opcode);
1360                        radeon_cs_dump_packet(p, pkt);
1361                        return r;
1362                }
1363                idx_value = radeon_get_ib_value(p, idx);
1364                ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);
1365                track->arrays[i + 0].robj = reloc->robj;
1366                track->arrays[i + 0].esize = idx_value >> 8;
1367                track->arrays[i + 0].esize &= 0x7F;
1368        }
1369        return r;
1370}
1371
1372int r100_cs_parse_packet0(struct radeon_cs_parser *p,
1373                          struct radeon_cs_packet *pkt,
1374                          const unsigned *auth, unsigned n,
1375                          radeon_packet0_check_t check)
1376{
1377        unsigned reg;
1378        unsigned i, j, m;
1379        unsigned idx;
1380        int r;
1381
1382        idx = pkt->idx + 1;
1383        reg = pkt->reg;
1384        /* Check that register fall into register range
1385         * determined by the number of entry (n) in the
1386         * safe register bitmap.
1387         */
1388        if (pkt->one_reg_wr) {
1389                if ((reg >> 7) > n) {
1390                        return -EINVAL;
1391                }
1392        } else {
1393                if (((reg + (pkt->count << 2)) >> 7) > n) {
1394                        return -EINVAL;
1395                }
1396        }
1397        for (i = 0; i <= pkt->count; i++, idx++) {
1398                j = (reg >> 7);
1399                m = 1 << ((reg >> 2) & 31);
1400                if (auth[j] & m) {
1401                        r = check(p, pkt, idx, reg);
1402                        if (r) {
1403                                return r;
1404                        }
1405                }
1406                if (pkt->one_reg_wr) {
1407                        if (!(auth[j] & m)) {
1408                                break;
1409                        }
1410                } else {
1411                        reg += 4;
1412                }
1413        }
1414        return 0;
1415}
1416
1417/**
1418 * r100_cs_packet_parse_vline() - parse userspace VLINE packet
1419 * @p:          parser structure holding parsing context.
1420 *
1421 * Userspace sends a special sequence for VLINE waits.
1422 * PACKET0 - VLINE_START_END + value
1423 * PACKET0 - WAIT_UNTIL +_value
1424 * RELOC (P3) - crtc_id in reloc.
1425 *
1426 * This function parses this and relocates the VLINE START END
1427 * and WAIT UNTIL packets to the correct crtc.
1428 * It also detects a switched off crtc and nulls out the
1429 * wait in that case.
1430 */
1431int r100_cs_packet_parse_vline(struct radeon_cs_parser *p)
1432{
1433        struct drm_crtc *crtc;
1434        struct radeon_crtc *radeon_crtc;
1435        struct radeon_cs_packet p3reloc, waitreloc;
1436        int crtc_id;
1437        int r;
1438        uint32_t header, h_idx, reg;
1439        volatile uint32_t *ib;
1440
1441        ib = p->ib.ptr;
1442
1443        /* parse the wait until */
1444        r = radeon_cs_packet_parse(p, &waitreloc, p->idx);
1445        if (r)
1446                return r;
1447
1448        /* check its a wait until and only 1 count */
1449        if (waitreloc.reg != RADEON_WAIT_UNTIL ||
1450            waitreloc.count != 0) {
1451                DRM_ERROR("vline wait had illegal wait until segment\n");
1452                return -EINVAL;
1453        }
1454
1455        if (radeon_get_ib_value(p, waitreloc.idx + 1) != RADEON_WAIT_CRTC_VLINE) {
1456                DRM_ERROR("vline wait had illegal wait until\n");
1457                return -EINVAL;
1458        }
1459
1460        /* jump over the NOP */
1461        r = radeon_cs_packet_parse(p, &p3reloc, p->idx + waitreloc.count + 2);
1462        if (r)
1463                return r;
1464
1465        h_idx = p->idx - 2;
1466        p->idx += waitreloc.count + 2;
1467        p->idx += p3reloc.count + 2;
1468
1469        header = radeon_get_ib_value(p, h_idx);
1470        crtc_id = radeon_get_ib_value(p, h_idx + 5);
1471        reg = R100_CP_PACKET0_GET_REG(header);
1472        crtc = drm_crtc_find(p->rdev->ddev, p->filp, crtc_id);
1473        if (!crtc) {
1474                DRM_ERROR("cannot find crtc %d\n", crtc_id);
1475                return -ENOENT;
1476        }
1477        radeon_crtc = to_radeon_crtc(crtc);
1478        crtc_id = radeon_crtc->crtc_id;
1479
1480        if (!crtc->enabled) {
1481                /* if the CRTC isn't enabled - we need to nop out the wait until */
1482                ib[h_idx + 2] = PACKET2(0);
1483                ib[h_idx + 3] = PACKET2(0);
1484        } else if (crtc_id == 1) {
1485                switch (reg) {
1486                case AVIVO_D1MODE_VLINE_START_END:
1487                        header &= ~R300_CP_PACKET0_REG_MASK;
1488                        header |= AVIVO_D2MODE_VLINE_START_END >> 2;
1489                        break;
1490                case RADEON_CRTC_GUI_TRIG_VLINE:
1491                        header &= ~R300_CP_PACKET0_REG_MASK;
1492                        header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2;
1493                        break;
1494                default:
1495                        DRM_ERROR("unknown crtc reloc\n");
1496                        return -EINVAL;
1497                }
1498                ib[h_idx] = header;
1499                ib[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1;
1500        }
1501
1502        return 0;
1503}
1504
1505static int r100_get_vtx_size(uint32_t vtx_fmt)
1506{
1507        int vtx_size;
1508        vtx_size = 2;
1509        /* ordered according to bits in spec */
1510        if (vtx_fmt & RADEON_SE_VTX_FMT_W0)
1511                vtx_size++;
1512        if (vtx_fmt & RADEON_SE_VTX_FMT_FPCOLOR)
1513                vtx_size += 3;
1514        if (vtx_fmt & RADEON_SE_VTX_FMT_FPALPHA)
1515                vtx_size++;
1516        if (vtx_fmt & RADEON_SE_VTX_FMT_PKCOLOR)
1517                vtx_size++;
1518        if (vtx_fmt & RADEON_SE_VTX_FMT_FPSPEC)
1519                vtx_size += 3;
1520        if (vtx_fmt & RADEON_SE_VTX_FMT_FPFOG)
1521                vtx_size++;
1522        if (vtx_fmt & RADEON_SE_VTX_FMT_PKSPEC)
1523                vtx_size++;
1524        if (vtx_fmt & RADEON_SE_VTX_FMT_ST0)
1525                vtx_size += 2;
1526        if (vtx_fmt & RADEON_SE_VTX_FMT_ST1)
1527                vtx_size += 2;
1528        if (vtx_fmt & RADEON_SE_VTX_FMT_Q1)
1529                vtx_size++;
1530        if (vtx_fmt & RADEON_SE_VTX_FMT_ST2)
1531                vtx_size += 2;
1532        if (vtx_fmt & RADEON_SE_VTX_FMT_Q2)
1533                vtx_size++;
1534        if (vtx_fmt & RADEON_SE_VTX_FMT_ST3)
1535                vtx_size += 2;
1536        if (vtx_fmt & RADEON_SE_VTX_FMT_Q3)
1537                vtx_size++;
1538        if (vtx_fmt & RADEON_SE_VTX_FMT_Q0)
1539                vtx_size++;
1540        /* blend weight */
1541        if (vtx_fmt & (0x7 << 15))
1542                vtx_size += (vtx_fmt >> 15) & 0x7;
1543        if (vtx_fmt & RADEON_SE_VTX_FMT_N0)
1544                vtx_size += 3;
1545        if (vtx_fmt & RADEON_SE_VTX_FMT_XY1)
1546                vtx_size += 2;
1547        if (vtx_fmt & RADEON_SE_VTX_FMT_Z1)
1548                vtx_size++;
1549        if (vtx_fmt & RADEON_SE_VTX_FMT_W1)
1550                vtx_size++;
1551        if (vtx_fmt & RADEON_SE_VTX_FMT_N1)
1552                vtx_size++;
1553        if (vtx_fmt & RADEON_SE_VTX_FMT_Z)
1554                vtx_size++;
1555        return vtx_size;
1556}
1557
1558static int r100_packet0_check(struct radeon_cs_parser *p,
1559                              struct radeon_cs_packet *pkt,
1560                              unsigned idx, unsigned reg)
1561{
1562        struct radeon_bo_list *reloc;
1563        struct r100_cs_track *track;
1564        volatile uint32_t *ib;
1565        uint32_t tmp;
1566        int r;
1567        int i, face;
1568        u32 tile_flags = 0;
1569        u32 idx_value;
1570
1571        ib = p->ib.ptr;
1572        track = (struct r100_cs_track *)p->track;
1573
1574        idx_value = radeon_get_ib_value(p, idx);
1575
1576        switch (reg) {
1577        case RADEON_CRTC_GUI_TRIG_VLINE:
1578                r = r100_cs_packet_parse_vline(p);
1579                if (r) {
1580                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1581                                  idx, reg);
1582                        radeon_cs_dump_packet(p, pkt);
1583                        return r;
1584                }
1585                break;
1586                /* FIXME: only allow PACKET3 blit? easier to check for out of
1587                 * range access */
1588        case RADEON_DST_PITCH_OFFSET:
1589        case RADEON_SRC_PITCH_OFFSET:
1590                r = r100_reloc_pitch_offset(p, pkt, idx, reg);
1591                if (r)
1592                        return r;
1593                break;
1594        case RADEON_RB3D_DEPTHOFFSET:
1595                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1596                if (r) {
1597                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1598                                  idx, reg);
1599                        radeon_cs_dump_packet(p, pkt);
1600                        return r;
1601                }
1602                track->zb.robj = reloc->robj;
1603                track->zb.offset = idx_value;
1604                track->zb_dirty = true;
1605                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1606                break;
1607        case RADEON_RB3D_COLOROFFSET:
1608                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1609                if (r) {
1610                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1611                                  idx, reg);
1612                        radeon_cs_dump_packet(p, pkt);
1613                        return r;
1614                }
1615                track->cb[0].robj = reloc->robj;
1616                track->cb[0].offset = idx_value;
1617                track->cb_dirty = true;
1618                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1619                break;
1620        case RADEON_PP_TXOFFSET_0:
1621        case RADEON_PP_TXOFFSET_1:
1622        case RADEON_PP_TXOFFSET_2:
1623                i = (reg - RADEON_PP_TXOFFSET_0) / 24;
1624                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1625                if (r) {
1626                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1627                                  idx, reg);
1628                        radeon_cs_dump_packet(p, pkt);
1629                        return r;
1630                }
1631                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1632                        if (reloc->tiling_flags & RADEON_TILING_MACRO)
1633                                tile_flags |= RADEON_TXO_MACRO_TILE;
1634                        if (reloc->tiling_flags & RADEON_TILING_MICRO)
1635                                tile_flags |= RADEON_TXO_MICRO_TILE_X2;
1636
1637                        tmp = idx_value & ~(0x7 << 2);
1638                        tmp |= tile_flags;
1639                        ib[idx] = tmp + ((u32)reloc->gpu_offset);
1640                } else
1641                        ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1642                track->textures[i].robj = reloc->robj;
1643                track->tex_dirty = true;
1644                break;
1645        case RADEON_PP_CUBIC_OFFSET_T0_0:
1646        case RADEON_PP_CUBIC_OFFSET_T0_1:
1647        case RADEON_PP_CUBIC_OFFSET_T0_2:
1648        case RADEON_PP_CUBIC_OFFSET_T0_3:
1649        case RADEON_PP_CUBIC_OFFSET_T0_4:
1650                i = (reg - RADEON_PP_CUBIC_OFFSET_T0_0) / 4;
1651                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1652                if (r) {
1653                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1654                                  idx, reg);
1655                        radeon_cs_dump_packet(p, pkt);
1656                        return r;
1657                }
1658                track->textures[0].cube_info[i].offset = idx_value;
1659                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1660                track->textures[0].cube_info[i].robj = reloc->robj;
1661                track->tex_dirty = true;
1662                break;
1663        case RADEON_PP_CUBIC_OFFSET_T1_0:
1664        case RADEON_PP_CUBIC_OFFSET_T1_1:
1665        case RADEON_PP_CUBIC_OFFSET_T1_2:
1666        case RADEON_PP_CUBIC_OFFSET_T1_3:
1667        case RADEON_PP_CUBIC_OFFSET_T1_4:
1668                i = (reg - RADEON_PP_CUBIC_OFFSET_T1_0) / 4;
1669                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1670                if (r) {
1671                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1672                                  idx, reg);
1673                        radeon_cs_dump_packet(p, pkt);
1674                        return r;
1675                }
1676                track->textures[1].cube_info[i].offset = idx_value;
1677                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1678                track->textures[1].cube_info[i].robj = reloc->robj;
1679                track->tex_dirty = true;
1680                break;
1681        case RADEON_PP_CUBIC_OFFSET_T2_0:
1682        case RADEON_PP_CUBIC_OFFSET_T2_1:
1683        case RADEON_PP_CUBIC_OFFSET_T2_2:
1684        case RADEON_PP_CUBIC_OFFSET_T2_3:
1685        case RADEON_PP_CUBIC_OFFSET_T2_4:
1686                i = (reg - RADEON_PP_CUBIC_OFFSET_T2_0) / 4;
1687                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1688                if (r) {
1689                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1690                                  idx, reg);
1691                        radeon_cs_dump_packet(p, pkt);
1692                        return r;
1693                }
1694                track->textures[2].cube_info[i].offset = idx_value;
1695                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1696                track->textures[2].cube_info[i].robj = reloc->robj;
1697                track->tex_dirty = true;
1698                break;
1699        case RADEON_RE_WIDTH_HEIGHT:
1700                track->maxy = ((idx_value >> 16) & 0x7FF);
1701                track->cb_dirty = true;
1702                track->zb_dirty = true;
1703                break;
1704        case RADEON_RB3D_COLORPITCH:
1705                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1706                if (r) {
1707                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1708                                  idx, reg);
1709                        radeon_cs_dump_packet(p, pkt);
1710                        return r;
1711                }
1712                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1713                        if (reloc->tiling_flags & RADEON_TILING_MACRO)
1714                                tile_flags |= RADEON_COLOR_TILE_ENABLE;
1715                        if (reloc->tiling_flags & RADEON_TILING_MICRO)
1716                                tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;
1717
1718                        tmp = idx_value & ~(0x7 << 16);
1719                        tmp |= tile_flags;
1720                        ib[idx] = tmp;
1721                } else
1722                        ib[idx] = idx_value;
1723
1724                track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK;
1725                track->cb_dirty = true;
1726                break;
1727        case RADEON_RB3D_DEPTHPITCH:
1728                track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK;
1729                track->zb_dirty = true;
1730                break;
1731        case RADEON_RB3D_CNTL:
1732                switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
1733                case 7:
1734                case 8:
1735                case 9:
1736                case 11:
1737                case 12:
1738                        track->cb[0].cpp = 1;
1739                        break;
1740                case 3:
1741                case 4:
1742                case 15:
1743                        track->cb[0].cpp = 2;
1744                        break;
1745                case 6:
1746                        track->cb[0].cpp = 4;
1747                        break;
1748                default:
1749                        DRM_ERROR("Invalid color buffer format (%d) !\n",
1750                                  ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f));
1751                        return -EINVAL;
1752                }
1753                track->z_enabled = !!(idx_value & RADEON_Z_ENABLE);
1754                track->cb_dirty = true;
1755                track->zb_dirty = true;
1756                break;
1757        case RADEON_RB3D_ZSTENCILCNTL:
1758                switch (idx_value & 0xf) {
1759                case 0:
1760                        track->zb.cpp = 2;
1761                        break;
1762                case 2:
1763                case 3:
1764                case 4:
1765                case 5:
1766                case 9:
1767                case 11:
1768                        track->zb.cpp = 4;
1769                        break;
1770                default:
1771                        break;
1772                }
1773                track->zb_dirty = true;
1774                break;
1775        case RADEON_RB3D_ZPASS_ADDR:
1776                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1777                if (r) {
1778                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1779                                  idx, reg);
1780                        radeon_cs_dump_packet(p, pkt);
1781                        return r;
1782                }
1783                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1784                break;
1785        case RADEON_PP_CNTL:
1786                {
1787                        uint32_t temp = idx_value >> 4;
1788                        for (i = 0; i < track->num_texture; i++)
1789                                track->textures[i].enabled = !!(temp & (1 << i));
1790                        track->tex_dirty = true;
1791                }
1792                break;
1793        case RADEON_SE_VF_CNTL:
1794                track->vap_vf_cntl = idx_value;
1795                break;
1796        case RADEON_SE_VTX_FMT:
1797                track->vtx_size = r100_get_vtx_size(idx_value);
1798                break;
1799        case RADEON_PP_TEX_SIZE_0:
1800        case RADEON_PP_TEX_SIZE_1:
1801        case RADEON_PP_TEX_SIZE_2:
1802                i = (reg - RADEON_PP_TEX_SIZE_0) / 8;
1803                track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1;
1804                track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
1805                track->tex_dirty = true;
1806                break;
1807        case RADEON_PP_TEX_PITCH_0:
1808        case RADEON_PP_TEX_PITCH_1:
1809        case RADEON_PP_TEX_PITCH_2:
1810                i = (reg - RADEON_PP_TEX_PITCH_0) / 8;
1811                track->textures[i].pitch = idx_value + 32;
1812                track->tex_dirty = true;
1813                break;
1814        case RADEON_PP_TXFILTER_0:
1815        case RADEON_PP_TXFILTER_1:
1816        case RADEON_PP_TXFILTER_2:
1817                i = (reg - RADEON_PP_TXFILTER_0) / 24;
1818                track->textures[i].num_levels = ((idx_value & RADEON_MAX_MIP_LEVEL_MASK)
1819                                                 >> RADEON_MAX_MIP_LEVEL_SHIFT);
1820                tmp = (idx_value >> 23) & 0x7;
1821                if (tmp == 2 || tmp == 6)
1822                        track->textures[i].roundup_w = false;
1823                tmp = (idx_value >> 27) & 0x7;
1824                if (tmp == 2 || tmp == 6)
1825                        track->textures[i].roundup_h = false;
1826                track->tex_dirty = true;
1827                break;
1828        case RADEON_PP_TXFORMAT_0:
1829        case RADEON_PP_TXFORMAT_1:
1830        case RADEON_PP_TXFORMAT_2:
1831                i = (reg - RADEON_PP_TXFORMAT_0) / 24;
1832                if (idx_value & RADEON_TXFORMAT_NON_POWER2) {
1833                        track->textures[i].use_pitch = true;
1834                } else {
1835                        track->textures[i].use_pitch = false;
1836                        track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT);
1837                        track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT);
1838                }
1839                if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE)
1840                        track->textures[i].tex_coord_type = 2;
1841                switch ((idx_value & RADEON_TXFORMAT_FORMAT_MASK)) {
1842                case RADEON_TXFORMAT_I8:
1843                case RADEON_TXFORMAT_RGB332:
1844                case RADEON_TXFORMAT_Y8:
1845                        track->textures[i].cpp = 1;
1846                        track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1847                        break;
1848                case RADEON_TXFORMAT_AI88:
1849                case RADEON_TXFORMAT_ARGB1555:
1850                case RADEON_TXFORMAT_RGB565:
1851                case RADEON_TXFORMAT_ARGB4444:
1852                case RADEON_TXFORMAT_VYUY422:
1853                case RADEON_TXFORMAT_YVYU422:
1854                case RADEON_TXFORMAT_SHADOW16:
1855                case RADEON_TXFORMAT_LDUDV655:
1856                case RADEON_TXFORMAT_DUDV88:
1857                        track->textures[i].cpp = 2;
1858                        track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1859                        break;
1860                case RADEON_TXFORMAT_ARGB8888:
1861                case RADEON_TXFORMAT_RGBA8888:
1862                case RADEON_TXFORMAT_SHADOW32:
1863                case RADEON_TXFORMAT_LDUDUV8888:
1864                        track->textures[i].cpp = 4;
1865                        track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1866                        break;
1867                case RADEON_TXFORMAT_DXT1:
1868                        track->textures[i].cpp = 1;
1869                        track->textures[i].compress_format = R100_TRACK_COMP_DXT1;
1870                        break;
1871                case RADEON_TXFORMAT_DXT23:
1872                case RADEON_TXFORMAT_DXT45:
1873                        track->textures[i].cpp = 1;
1874                        track->textures[i].compress_format = R100_TRACK_COMP_DXT35;
1875                        break;
1876                }
1877                track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf);
1878                track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf);
1879                track->tex_dirty = true;
1880                break;
1881        case RADEON_PP_CUBIC_FACES_0:
1882        case RADEON_PP_CUBIC_FACES_1:
1883        case RADEON_PP_CUBIC_FACES_2:
1884                tmp = idx_value;
1885                i = (reg - RADEON_PP_CUBIC_FACES_0) / 4;
1886                for (face = 0; face < 4; face++) {
1887                        track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
1888                        track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
1889                }
1890                track->tex_dirty = true;
1891                break;
1892        default:
1893                pr_err("Forbidden register 0x%04X in cs at %d\n", reg, idx);
1894                return -EINVAL;
1895        }
1896        return 0;
1897}
1898
1899int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
1900                                         struct radeon_cs_packet *pkt,
1901                                         struct radeon_bo *robj)
1902{
1903        unsigned idx;
1904        u32 value;
1905        idx = pkt->idx + 1;
1906        value = radeon_get_ib_value(p, idx + 2);
1907        if ((value + 1) > radeon_bo_size(robj)) {
1908                DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER "
1909                          "(need %u have %lu) !\n",
1910                          value + 1,
1911                          radeon_bo_size(robj));
1912                return -EINVAL;
1913        }
1914        return 0;
1915}
1916
1917static int r100_packet3_check(struct radeon_cs_parser *p,
1918                              struct radeon_cs_packet *pkt)
1919{
1920        struct radeon_bo_list *reloc;
1921        struct r100_cs_track *track;
1922        unsigned idx;
1923        volatile uint32_t *ib;
1924        int r;
1925
1926        ib = p->ib.ptr;
1927        idx = pkt->idx + 1;
1928        track = (struct r100_cs_track *)p->track;
1929        switch (pkt->opcode) {
1930        case PACKET3_3D_LOAD_VBPNTR:
1931                r = r100_packet3_load_vbpntr(p, pkt, idx);
1932                if (r)
1933                        return r;
1934                break;
1935        case PACKET3_INDX_BUFFER:
1936                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1937                if (r) {
1938                        DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1939                        radeon_cs_dump_packet(p, pkt);
1940                        return r;
1941                }
1942                ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->gpu_offset);
1943                r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1944                if (r) {
1945                        return r;
1946                }
1947                break;
1948        case 0x23:
1949                /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */
1950                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1951                if (r) {
1952                        DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1953                        radeon_cs_dump_packet(p, pkt);
1954                        return r;
1955                }
1956                ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->gpu_offset);
1957                track->num_arrays = 1;
1958                track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2));
1959
1960                track->arrays[0].robj = reloc->robj;
1961                track->arrays[0].esize = track->vtx_size;
1962
1963                track->max_indx = radeon_get_ib_value(p, idx+1);
1964
1965                track->vap_vf_cntl = radeon_get_ib_value(p, idx+3);
1966                track->immd_dwords = pkt->count - 1;
1967                r = r100_cs_track_check(p->rdev, track);
1968                if (r)
1969                        return r;
1970                break;
1971        case PACKET3_3D_DRAW_IMMD:
1972                if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) {
1973                        DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1974                        return -EINVAL;
1975                }
1976                track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 0));
1977                track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
1978                track->immd_dwords = pkt->count - 1;
1979                r = r100_cs_track_check(p->rdev, track);
1980                if (r)
1981                        return r;
1982                break;
1983                /* triggers drawing using in-packet vertex data */
1984        case PACKET3_3D_DRAW_IMMD_2:
1985                if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) {
1986                        DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1987                        return -EINVAL;
1988                }
1989                track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1990                track->immd_dwords = pkt->count;
1991                r = r100_cs_track_check(p->rdev, track);
1992                if (r)
1993                        return r;
1994                break;
1995                /* triggers drawing using in-packet vertex data */
1996        case PACKET3_3D_DRAW_VBUF_2:
1997                track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1998                r = r100_cs_track_check(p->rdev, track);
1999                if (r)
2000                        return r;
2001                break;
2002                /* triggers drawing of vertex buffers setup elsewhere */
2003        case PACKET3_3D_DRAW_INDX_2:
2004                track->vap_vf_cntl = radeon_get_ib_value(p, idx);
2005                r = r100_cs_track_check(p->rdev, track);
2006                if (r)
2007                        return r;
2008                break;
2009                /* triggers drawing using indices to vertex buffer */
2010        case PACKET3_3D_DRAW_VBUF:
2011                track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
2012                r = r100_cs_track_check(p->rdev, track);
2013                if (r)
2014                        return r;
2015                break;
2016                /* triggers drawing of vertex buffers setup elsewhere */
2017        case PACKET3_3D_DRAW_INDX:
2018                track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
2019                r = r100_cs_track_check(p->rdev, track);
2020                if (r)
2021                        return r;
2022                break;
2023                /* triggers drawing using indices to vertex buffer */
2024        case PACKET3_3D_CLEAR_HIZ:
2025        case PACKET3_3D_CLEAR_ZMASK:
2026                if (p->rdev->hyperz_filp != p->filp)
2027                        return -EINVAL;
2028                break;
2029        case PACKET3_NOP:
2030                break;
2031        default:
2032                DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2033                return -EINVAL;
2034        }
2035        return 0;
2036}
2037
2038int r100_cs_parse(struct radeon_cs_parser *p)
2039{
2040        struct radeon_cs_packet pkt;
2041        struct r100_cs_track *track;
2042        int r;
2043
2044        track = kzalloc(sizeof(*track), GFP_KERNEL);
2045        if (!track)
2046                return -ENOMEM;
2047        r100_cs_track_clear(p->rdev, track);
2048        p->track = track;
2049        do {
2050                r = radeon_cs_packet_parse(p, &pkt, p->idx);
2051                if (r) {
2052                        return r;
2053                }
2054                p->idx += pkt.count + 2;
2055                switch (pkt.type) {
2056                case RADEON_PACKET_TYPE0:
2057                        if (p->rdev->family >= CHIP_R200)
2058                                r = r100_cs_parse_packet0(p, &pkt,
2059                                        p->rdev->config.r100.reg_safe_bm,
2060                                        p->rdev->config.r100.reg_safe_bm_size,
2061                                        &r200_packet0_check);
2062                        else
2063                                r = r100_cs_parse_packet0(p, &pkt,
2064                                        p->rdev->config.r100.reg_safe_bm,
2065                                        p->rdev->config.r100.reg_safe_bm_size,
2066                                        &r100_packet0_check);
2067                        break;
2068                case RADEON_PACKET_TYPE2:
2069                        break;
2070                case RADEON_PACKET_TYPE3:
2071                        r = r100_packet3_check(p, &pkt);
2072                        break;
2073                default:
2074                        DRM_ERROR("Unknown packet type %d !\n",
2075                                  pkt.type);
2076                        return -EINVAL;
2077                }
2078                if (r)
2079                        return r;
2080        } while (p->idx < p->chunk_ib->length_dw);
2081        return 0;
2082}
2083
2084static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
2085{
2086        DRM_ERROR("pitch                      %d\n", t->pitch);
2087        DRM_ERROR("use_pitch                  %d\n", t->use_pitch);
2088        DRM_ERROR("width                      %d\n", t->width);
2089        DRM_ERROR("width_11                   %d\n", t->width_11);
2090        DRM_ERROR("height                     %d\n", t->height);
2091        DRM_ERROR("height_11                  %d\n", t->height_11);
2092        DRM_ERROR("num levels                 %d\n", t->num_levels);
2093        DRM_ERROR("depth                      %d\n", t->txdepth);
2094        DRM_ERROR("bpp                        %d\n", t->cpp);
2095        DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
2096        DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
2097        DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
2098        DRM_ERROR("compress format            %d\n", t->compress_format);
2099}
2100
2101static int r100_track_compress_size(int compress_format, int w, int h)
2102{
2103        int block_width, block_height, block_bytes;
2104        int wblocks, hblocks;
2105        int min_wblocks;
2106        int sz;
2107
2108        block_width = 4;
2109        block_height = 4;
2110
2111        switch (compress_format) {
2112        case R100_TRACK_COMP_DXT1:
2113                block_bytes = 8;
2114                min_wblocks = 4;
2115                break;
2116        default:
2117        case R100_TRACK_COMP_DXT35:
2118                block_bytes = 16;
2119                min_wblocks = 2;
2120                break;
2121        }
2122
2123        hblocks = (h + block_height - 1) / block_height;
2124        wblocks = (w + block_width - 1) / block_width;
2125        if (wblocks < min_wblocks)
2126                wblocks = min_wblocks;
2127        sz = wblocks * hblocks * block_bytes;
2128        return sz;
2129}
2130
2131static int r100_cs_track_cube(struct radeon_device *rdev,
2132                              struct r100_cs_track *track, unsigned idx)
2133{
2134        unsigned face, w, h;
2135        struct radeon_bo *cube_robj;
2136        unsigned long size;
2137        unsigned compress_format = track->textures[idx].compress_format;
2138
2139        for (face = 0; face < 5; face++) {
2140                cube_robj = track->textures[idx].cube_info[face].robj;
2141                w = track->textures[idx].cube_info[face].width;
2142                h = track->textures[idx].cube_info[face].height;
2143
2144                if (compress_format) {
2145                        size = r100_track_compress_size(compress_format, w, h);
2146                } else
2147                        size = w * h;
2148                size *= track->textures[idx].cpp;
2149
2150                size += track->textures[idx].cube_info[face].offset;
2151
2152                if (size > radeon_bo_size(cube_robj)) {
2153                        DRM_ERROR("Cube texture offset greater than object size %lu %lu\n",
2154                                  size, radeon_bo_size(cube_robj));
2155                        r100_cs_track_texture_print(&track->textures[idx]);
2156                        return -1;
2157                }
2158        }
2159        return 0;
2160}
2161
2162static int r100_cs_track_texture_check(struct radeon_device *rdev,
2163                                       struct r100_cs_track *track)
2164{
2165        struct radeon_bo *robj;
2166        unsigned long size;
2167        unsigned u, i, w, h, d;
2168        int ret;
2169
2170        for (u = 0; u < track->num_texture; u++) {
2171                if (!track->textures[u].enabled)
2172                        continue;
2173                if (track->textures[u].lookup_disable)
2174                        continue;
2175                robj = track->textures[u].robj;
2176                if (robj == NULL) {
2177                        DRM_ERROR("No texture bound to unit %u\n", u);
2178                        return -EINVAL;
2179                }
2180                size = 0;
2181                for (i = 0; i <= track->textures[u].num_levels; i++) {
2182                        if (track->textures[u].use_pitch) {
2183                                if (rdev->family < CHIP_R300)
2184                                        w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i);
2185                                else
2186                                        w = track->textures[u].pitch / (1 << i);
2187                        } else {
2188                                w = track->textures[u].width;
2189                                if (rdev->family >= CHIP_RV515)
2190                                        w |= track->textures[u].width_11;
2191                                w = w / (1 << i);
2192                                if (track->textures[u].roundup_w)
2193                                        w = roundup_pow_of_two(w);
2194                        }
2195                        h = track->textures[u].height;
2196                        if (rdev->family >= CHIP_RV515)
2197                                h |= track->textures[u].height_11;
2198                        h = h / (1 << i);
2199                        if (track->textures[u].roundup_h)
2200                                h = roundup_pow_of_two(h);
2201                        if (track->textures[u].tex_coord_type == 1) {
2202                                d = (1 << track->textures[u].txdepth) / (1 << i);
2203                                if (!d)
2204                                        d = 1;
2205                        } else {
2206                                d = 1;
2207                        }
2208                        if (track->textures[u].compress_format) {
2209
2210                                size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d;
2211                                /* compressed textures are block based */
2212                        } else
2213                                size += w * h * d;
2214                }
2215                size *= track->textures[u].cpp;
2216
2217                switch (track->textures[u].tex_coord_type) {
2218                case 0:
2219                case 1:
2220                        break;
2221                case 2:
2222                        if (track->separate_cube) {
2223                                ret = r100_cs_track_cube(rdev, track, u);
2224                                if (ret)
2225                                        return ret;
2226                        } else
2227                                size *= 6;
2228                        break;
2229                default:
2230                        DRM_ERROR("Invalid texture coordinate type %u for unit "
2231                                  "%u\n", track->textures[u].tex_coord_type, u);
2232                        return -EINVAL;
2233                }
2234                if (size > radeon_bo_size(robj)) {
2235                        DRM_ERROR("Texture of unit %u needs %lu bytes but is "
2236                                  "%lu\n", u, size, radeon_bo_size(robj));
2237                        r100_cs_track_texture_print(&track->textures[u]);
2238                        return -EINVAL;
2239                }
2240        }
2241        return 0;
2242}
2243
2244int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
2245{
2246        unsigned i;
2247        unsigned long size;
2248        unsigned prim_walk;
2249        unsigned nverts;
2250        unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
2251
2252        if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
2253            !track->blend_read_enable)
2254                num_cb = 0;
2255
2256        for (i = 0; i < num_cb; i++) {
2257                if (track->cb[i].robj == NULL) {
2258                        DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
2259                        return -EINVAL;
2260                }
2261                size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
2262                size += track->cb[i].offset;
2263                if (size > radeon_bo_size(track->cb[i].robj)) {
2264                        DRM_ERROR("[drm] Buffer too small for color buffer %d "
2265                                  "(need %lu have %lu) !\n", i, size,
2266                                  radeon_bo_size(track->cb[i].robj));
2267                        DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
2268                                  i, track->cb[i].pitch, track->cb[i].cpp,
2269                                  track->cb[i].offset, track->maxy);
2270                        return -EINVAL;
2271                }
2272        }
2273        track->cb_dirty = false;
2274
2275        if (track->zb_dirty && track->z_enabled) {
2276                if (track->zb.robj == NULL) {
2277                        DRM_ERROR("[drm] No buffer for z buffer !\n");
2278                        return -EINVAL;
2279                }
2280                size = track->zb.pitch * track->zb.cpp * track->maxy;
2281                size += track->zb.offset;
2282                if (size > radeon_bo_size(track->zb.robj)) {
2283                        DRM_ERROR("[drm] Buffer too small for z buffer "
2284                                  "(need %lu have %lu) !\n", size,
2285                                  radeon_bo_size(track->zb.robj));
2286                        DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n",
2287                                  track->zb.pitch, track->zb.cpp,
2288                                  track->zb.offset, track->maxy);
2289                        return -EINVAL;
2290                }
2291        }
2292        track->zb_dirty = false;
2293
2294        if (track->aa_dirty && track->aaresolve) {
2295                if (track->aa.robj == NULL) {
2296                        DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i);
2297                        return -EINVAL;
2298                }
2299                /* I believe the format comes from colorbuffer0. */
2300                size = track->aa.pitch * track->cb[0].cpp * track->maxy;
2301                size += track->aa.offset;
2302                if (size > radeon_bo_size(track->aa.robj)) {
2303                        DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d "
2304                                  "(need %lu have %lu) !\n", i, size,
2305                                  radeon_bo_size(track->aa.robj));
2306                        DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n",
2307                                  i, track->aa.pitch, track->cb[0].cpp,
2308                                  track->aa.offset, track->maxy);
2309                        return -EINVAL;
2310                }
2311        }
2312        track->aa_dirty = false;
2313
2314        prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
2315        if (track->vap_vf_cntl & (1 << 14)) {
2316                nverts = track->vap_alt_nverts;
2317        } else {
2318                nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
2319        }
2320        switch (prim_walk) {
2321        case 1:
2322                for (i = 0; i < track->num_arrays; i++) {
2323                        size = track->arrays[i].esize * track->max_indx * 4;
2324                        if (track->arrays[i].robj == NULL) {
2325                                DRM_ERROR("(PW %u) Vertex array %u no buffer "
2326                                          "bound\n", prim_walk, i);
2327                                return -EINVAL;
2328                        }
2329                        if (size > radeon_bo_size(track->arrays[i].robj)) {
2330                                dev_err(rdev->dev, "(PW %u) Vertex array %u "
2331                                        "need %lu dwords have %lu dwords\n",
2332                                        prim_walk, i, size >> 2,
2333                                        radeon_bo_size(track->arrays[i].robj)
2334                                        >> 2);
2335                                DRM_ERROR("Max indices %u\n", track->max_indx);
2336                                return -EINVAL;
2337                        }
2338                }
2339                break;
2340        case 2:
2341                for (i = 0; i < track->num_arrays; i++) {
2342                        size = track->arrays[i].esize * (nverts - 1) * 4;
2343                        if (track->arrays[i].robj == NULL) {
2344                                DRM_ERROR("(PW %u) Vertex array %u no buffer "
2345                                          "bound\n", prim_walk, i);
2346                                return -EINVAL;
2347                        }
2348                        if (size > radeon_bo_size(track->arrays[i].robj)) {
2349                                dev_err(rdev->dev, "(PW %u) Vertex array %u "
2350                                        "need %lu dwords have %lu dwords\n",
2351                                        prim_walk, i, size >> 2,
2352                                        radeon_bo_size(track->arrays[i].robj)
2353                                        >> 2);
2354                                return -EINVAL;
2355                        }
2356                }
2357                break;
2358        case 3:
2359                size = track->vtx_size * nverts;
2360                if (size != track->immd_dwords) {
2361                        DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
2362                                  track->immd_dwords, size);
2363                        DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
2364                                  nverts, track->vtx_size);
2365                        return -EINVAL;
2366                }
2367                break;
2368        default:
2369                DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
2370                          prim_walk);
2371                return -EINVAL;
2372        }
2373
2374        if (track->tex_dirty) {
2375                track->tex_dirty = false;
2376                return r100_cs_track_texture_check(rdev, track);
2377        }
2378        return 0;
2379}
2380
2381void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
2382{
2383        unsigned i, face;
2384
2385        track->cb_dirty = true;
2386        track->zb_dirty = true;
2387        track->tex_dirty = true;
2388        track->aa_dirty = true;
2389
2390        if (rdev->family < CHIP_R300) {
2391                track->num_cb = 1;
2392                if (rdev->family <= CHIP_RS200)
2393                        track->num_texture = 3;
2394                else
2395                        track->num_texture = 6;
2396                track->maxy = 2048;
2397                track->separate_cube = true;
2398        } else {
2399                track->num_cb = 4;
2400                track->num_texture = 16;
2401                track->maxy = 4096;
2402                track->separate_cube = false;
2403                track->aaresolve = false;
2404                track->aa.robj = NULL;
2405        }
2406
2407        for (i = 0; i < track->num_cb; i++) {
2408                track->cb[i].robj = NULL;
2409                track->cb[i].pitch = 8192;
2410                track->cb[i].cpp = 16;
2411                track->cb[i].offset = 0;
2412        }
2413        track->z_enabled = true;
2414        track->zb.robj = NULL;
2415        track->zb.pitch = 8192;
2416        track->zb.cpp = 4;
2417        track->zb.offset = 0;
2418        track->vtx_size = 0x7F;
2419        track->immd_dwords = 0xFFFFFFFFUL;
2420        track->num_arrays = 11;
2421        track->max_indx = 0x00FFFFFFUL;
2422        for (i = 0; i < track->num_arrays; i++) {
2423                track->arrays[i].robj = NULL;
2424                track->arrays[i].esize = 0x7F;
2425        }
2426        for (i = 0; i < track->num_texture; i++) {
2427                track->textures[i].compress_format = R100_TRACK_COMP_NONE;
2428                track->textures[i].pitch = 16536;
2429                track->textures[i].width = 16536;
2430                track->textures[i].height = 16536;
2431                track->textures[i].width_11 = 1 << 11;
2432                track->textures[i].height_11 = 1 << 11;
2433                track->textures[i].num_levels = 12;
2434                if (rdev->family <= CHIP_RS200) {
2435                        track->textures[i].tex_coord_type = 0;
2436                        track->textures[i].txdepth = 0;
2437                } else {
2438                        track->textures[i].txdepth = 16;
2439                        track->textures[i].tex_coord_type = 1;
2440                }
2441                track->textures[i].cpp = 64;
2442                track->textures[i].robj = NULL;
2443                /* CS IB emission code makes sure texture unit are disabled */
2444                track->textures[i].enabled = false;
2445                track->textures[i].lookup_disable = false;
2446                track->textures[i].roundup_w = true;
2447                track->textures[i].roundup_h = true;
2448                if (track->separate_cube)
2449                        for (face = 0; face < 5; face++) {
2450                                track->textures[i].cube_info[face].robj = NULL;
2451                                track->textures[i].cube_info[face].width = 16536;
2452                                track->textures[i].cube_info[face].height = 16536;
2453                                track->textures[i].cube_info[face].offset = 0;
2454                        }
2455        }
2456}
2457
2458/*
2459 * Global GPU functions
2460 */
2461static void r100_errata(struct radeon_device *rdev)
2462{
2463        rdev->pll_errata = 0;
2464
2465        if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) {
2466                rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS;
2467        }
2468
2469        if (rdev->family == CHIP_RV100 ||
2470            rdev->family == CHIP_RS100 ||
2471            rdev->family == CHIP_RS200) {
2472                rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY;
2473        }
2474}
2475
2476static int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n)
2477{
2478        unsigned i;
2479        uint32_t tmp;
2480
2481        for (i = 0; i < rdev->usec_timeout; i++) {
2482                tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
2483                if (tmp >= n) {
2484                        return 0;
2485                }
2486                udelay(1);
2487        }
2488        return -1;
2489}
2490
2491int r100_gui_wait_for_idle(struct radeon_device *rdev)
2492{
2493        unsigned i;
2494        uint32_t tmp;
2495
2496        if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) {
2497                pr_warn("radeon: wait for empty RBBM fifo failed! Bad things might happen.\n");
2498        }
2499        for (i = 0; i < rdev->usec_timeout; i++) {
2500                tmp = RREG32(RADEON_RBBM_STATUS);
2501                if (!(tmp & RADEON_RBBM_ACTIVE)) {
2502                        return 0;
2503                }
2504                udelay(1);
2505        }
2506        return -1;
2507}
2508
2509int r100_mc_wait_for_idle(struct radeon_device *rdev)
2510{
2511        unsigned i;
2512        uint32_t tmp;
2513
2514        for (i = 0; i < rdev->usec_timeout; i++) {
2515                /* read MC_STATUS */
2516                tmp = RREG32(RADEON_MC_STATUS);
2517                if (tmp & RADEON_MC_IDLE) {
2518                        return 0;
2519                }
2520                udelay(1);
2521        }
2522        return -1;
2523}
2524
2525bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2526{
2527        u32 rbbm_status;
2528
2529        rbbm_status = RREG32(R_000E40_RBBM_STATUS);
2530        if (!G_000E40_GUI_ACTIVE(rbbm_status)) {
2531                radeon_ring_lockup_update(rdev, ring);
2532                return false;
2533        }
2534        return radeon_ring_test_lockup(rdev, ring);
2535}
2536
2537/* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */
2538void r100_enable_bm(struct radeon_device *rdev)
2539{
2540        uint32_t tmp;
2541        /* Enable bus mastering */
2542        tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
2543        WREG32(RADEON_BUS_CNTL, tmp);
2544}
2545
2546void r100_bm_disable(struct radeon_device *rdev)
2547{
2548        u32 tmp;
2549
2550        /* disable bus mastering */
2551        tmp = RREG32(R_000030_BUS_CNTL);
2552        WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000044);
2553        mdelay(1);
2554        WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000042);
2555        mdelay(1);
2556        WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040);
2557        tmp = RREG32(RADEON_BUS_CNTL);
2558        mdelay(1);
2559        pci_clear_master(rdev->pdev);
2560        mdelay(1);
2561}
2562
2563int r100_asic_reset(struct radeon_device *rdev, bool hard)
2564{
2565        struct r100_mc_save save;
2566        u32 status, tmp;
2567        int ret = 0;
2568
2569        status = RREG32(R_000E40_RBBM_STATUS);
2570        if (!G_000E40_GUI_ACTIVE(status)) {
2571                return 0;
2572        }
2573        r100_mc_stop(rdev, &save);
2574        status = RREG32(R_000E40_RBBM_STATUS);
2575        dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2576        /* stop CP */
2577        WREG32(RADEON_CP_CSQ_CNTL, 0);
2578        tmp = RREG32(RADEON_CP_RB_CNTL);
2579        WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
2580        WREG32(RADEON_CP_RB_RPTR_WR, 0);
2581        WREG32(RADEON_CP_RB_WPTR, 0);
2582        WREG32(RADEON_CP_RB_CNTL, tmp);
2583        /* save PCI state */
2584        pci_save_state(rdev->pdev);
2585        /* disable bus mastering */
2586        r100_bm_disable(rdev);
2587        WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_SE(1) |
2588                                        S_0000F0_SOFT_RESET_RE(1) |
2589                                        S_0000F0_SOFT_RESET_PP(1) |
2590                                        S_0000F0_SOFT_RESET_RB(1));
2591        RREG32(R_0000F0_RBBM_SOFT_RESET);
2592        mdelay(500);
2593        WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
2594        mdelay(1);
2595        status = RREG32(R_000E40_RBBM_STATUS);
2596        dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2597        /* reset CP */
2598        WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1));
2599        RREG32(R_0000F0_RBBM_SOFT_RESET);
2600        mdelay(500);
2601        WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
2602        mdelay(1);
2603        status = RREG32(R_000E40_RBBM_STATUS);
2604        dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2605        /* restore PCI & busmastering */
2606        pci_restore_state(rdev->pdev);
2607        r100_enable_bm(rdev);
2608        /* Check if GPU is idle */
2609        if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) ||
2610                G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) {
2611                dev_err(rdev->dev, "failed to reset GPU\n");
2612                ret = -1;
2613        } else
2614                dev_info(rdev->dev, "GPU reset succeed\n");
2615        r100_mc_resume(rdev, &save);
2616        return ret;
2617}
2618
2619void r100_set_common_regs(struct radeon_device *rdev)
2620{
2621        bool force_dac2 = false;
2622        u32 tmp;
2623
2624        /* set these so they don't interfere with anything */
2625        WREG32(RADEON_OV0_SCALE_CNTL, 0);
2626        WREG32(RADEON_SUBPIC_CNTL, 0);
2627        WREG32(RADEON_VIPH_CONTROL, 0);
2628        WREG32(RADEON_I2C_CNTL_1, 0);
2629        WREG32(RADEON_DVI_I2C_CNTL_1, 0);
2630        WREG32(RADEON_CAP0_TRIG_CNTL, 0);
2631        WREG32(RADEON_CAP1_TRIG_CNTL, 0);
2632
2633        /* always set up dac2 on rn50 and some rv100 as lots
2634         * of servers seem to wire it up to a VGA port but
2635         * don't report it in the bios connector
2636         * table.
2637         */
2638        switch (rdev->pdev->device) {
2639                /* RN50 */
2640        case 0x515e:
2641        case 0x5969:
2642                force_dac2 = true;
2643                break;
2644                /* RV100*/
2645        case 0x5159:
2646        case 0x515a:
2647                /* DELL triple head servers */
2648                if ((rdev->pdev->subsystem_vendor == 0x1028 /* DELL */) &&
2649                    ((rdev->pdev->subsystem_device == 0x016c) ||
2650                     (rdev->pdev->subsystem_device == 0x016d) ||
2651                     (rdev->pdev->subsystem_device == 0x016e) ||
2652                     (rdev->pdev->subsystem_device == 0x016f) ||
2653                     (rdev->pdev->subsystem_device == 0x0170) ||
2654                     (rdev->pdev->subsystem_device == 0x017d) ||
2655                     (rdev->pdev->subsystem_device == 0x017e) ||
2656                     (rdev->pdev->subsystem_device == 0x0183) ||
2657                     (rdev->pdev->subsystem_device == 0x018a) ||
2658                     (rdev->pdev->subsystem_device == 0x019a)))
2659                        force_dac2 = true;
2660                break;
2661        }
2662
2663        if (force_dac2) {
2664                u32 disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG);
2665                u32 tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL);
2666                u32 dac2_cntl = RREG32(RADEON_DAC_CNTL2);
2667
2668                /* For CRT on DAC2, don't turn it on if BIOS didn't
2669                   enable it, even it's detected.
2670                */
2671
2672                /* force it to crtc0 */
2673                dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL;
2674                dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL;
2675                disp_hw_debug |= RADEON_CRT2_DISP1_SEL;
2676
2677                /* set up the TV DAC */
2678                tv_dac_cntl &= ~(RADEON_TV_DAC_PEDESTAL |
2679                                 RADEON_TV_DAC_STD_MASK |
2680                                 RADEON_TV_DAC_RDACPD |
2681                                 RADEON_TV_DAC_GDACPD |
2682                                 RADEON_TV_DAC_BDACPD |
2683                                 RADEON_TV_DAC_BGADJ_MASK |
2684                                 RADEON_TV_DAC_DACADJ_MASK);
2685                tv_dac_cntl |= (RADEON_TV_DAC_NBLANK |
2686                                RADEON_TV_DAC_NHOLD |
2687                                RADEON_TV_DAC_STD_PS2 |
2688                                (0x58 << 16));
2689
2690                WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl);
2691                WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug);
2692                WREG32(RADEON_DAC_CNTL2, dac2_cntl);
2693        }
2694
2695        /* switch PM block to ACPI mode */
2696        tmp = RREG32_PLL(RADEON_PLL_PWRMGT_CNTL);
2697        tmp &= ~RADEON_PM_MODE_SEL;
2698        WREG32_PLL(RADEON_PLL_PWRMGT_CNTL, tmp);
2699
2700}
2701
2702/*
2703 * VRAM info
2704 */
2705static void r100_vram_get_type(struct radeon_device *rdev)
2706{
2707        uint32_t tmp;
2708
2709        rdev->mc.vram_is_ddr = false;
2710        if (rdev->flags & RADEON_IS_IGP)
2711                rdev->mc.vram_is_ddr = true;
2712        else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR)
2713                rdev->mc.vram_is_ddr = true;
2714        if ((rdev->family == CHIP_RV100) ||
2715            (rdev->family == CHIP_RS100) ||
2716            (rdev->family == CHIP_RS200)) {
2717                tmp = RREG32(RADEON_MEM_CNTL);
2718                if (tmp & RV100_HALF_MODE) {
2719                        rdev->mc.vram_width = 32;
2720                } else {
2721                        rdev->mc.vram_width = 64;
2722                }
2723                if (rdev->flags & RADEON_SINGLE_CRTC) {
2724                        rdev->mc.vram_width /= 4;
2725                        rdev->mc.vram_is_ddr = true;
2726                }
2727        } else if (rdev->family <= CHIP_RV280) {
2728                tmp = RREG32(RADEON_MEM_CNTL);
2729                if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) {
2730                        rdev->mc.vram_width = 128;
2731                } else {
2732                        rdev->mc.vram_width = 64;
2733                }
2734        } else {
2735                /* newer IGPs */
2736                rdev->mc.vram_width = 128;
2737        }
2738}
2739
2740static u32 r100_get_accessible_vram(struct radeon_device *rdev)
2741{
2742        u32 aper_size;
2743        u8 byte;
2744
2745        aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2746
2747        /* Set HDP_APER_CNTL only on cards that are known not to be broken,
2748         * that is has the 2nd generation multifunction PCI interface
2749         */
2750        if (rdev->family == CHIP_RV280 ||
2751            rdev->family >= CHIP_RV350) {
2752                WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL,
2753                       ~RADEON_HDP_APER_CNTL);
2754                DRM_INFO("Generation 2 PCI interface, using max accessible memory\n");
2755                return aper_size * 2;
2756        }
2757
2758        /* Older cards have all sorts of funny issues to deal with. First
2759         * check if it's a multifunction card by reading the PCI config
2760         * header type... Limit those to one aperture size
2761         */
2762        pci_read_config_byte(rdev->pdev, 0xe, &byte);
2763        if (byte & 0x80) {
2764                DRM_INFO("Generation 1 PCI interface in multifunction mode\n");
2765                DRM_INFO("Limiting VRAM to one aperture\n");
2766                return aper_size;
2767        }
2768
2769        /* Single function older card. We read HDP_APER_CNTL to see how the BIOS
2770         * have set it up. We don't write this as it's broken on some ASICs but
2771         * we expect the BIOS to have done the right thing (might be too optimistic...)
2772         */
2773        if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL)
2774                return aper_size * 2;
2775        return aper_size;
2776}
2777
2778void r100_vram_init_sizes(struct radeon_device *rdev)
2779{
2780        u64 config_aper_size;
2781
2782        /* work out accessible VRAM */
2783        rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
2784        rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
2785        rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev);
2786        /* FIXME we don't use the second aperture yet when we could use it */
2787        if (rdev->mc.visible_vram_size > rdev->mc.aper_size)
2788                rdev->mc.visible_vram_size = rdev->mc.aper_size;
2789        config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2790        if (rdev->flags & RADEON_IS_IGP) {
2791                uint32_t tom;
2792                /* read NB_TOM to get the amount of ram stolen for the GPU */
2793                tom = RREG32(RADEON_NB_TOM);
2794                rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
2795                WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
2796                rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
2797        } else {
2798                rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
2799                /* Some production boards of m6 will report 0
2800                 * if it's 8 MB
2801                 */
2802                if (rdev->mc.real_vram_size == 0) {
2803                        rdev->mc.real_vram_size = 8192 * 1024;
2804                        WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
2805                }
2806                /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM -
2807                 * Novell bug 204882 + along with lots of ubuntu ones
2808                 */
2809                if (rdev->mc.aper_size > config_aper_size)
2810                        config_aper_size = rdev->mc.aper_size;
2811
2812                if (config_aper_size > rdev->mc.real_vram_size)
2813                        rdev->mc.mc_vram_size = config_aper_size;
2814                else
2815                        rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
2816        }
2817}
2818
2819void r100_vga_set_state(struct radeon_device *rdev, bool state)
2820{
2821        uint32_t temp;
2822
2823        temp = RREG32(RADEON_CONFIG_CNTL);
2824        if (!state) {
2825                temp &= ~RADEON_CFG_VGA_RAM_EN;
2826                temp |= RADEON_CFG_VGA_IO_DIS;
2827        } else {
2828                temp &= ~RADEON_CFG_VGA_IO_DIS;
2829        }
2830        WREG32(RADEON_CONFIG_CNTL, temp);
2831}
2832
2833static void r100_mc_init(struct radeon_device *rdev)
2834{
2835        u64 base;
2836
2837        r100_vram_get_type(rdev);
2838        r100_vram_init_sizes(rdev);
2839        base = rdev->mc.aper_base;
2840        if (rdev->flags & RADEON_IS_IGP)
2841                base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16;
2842        radeon_vram_location(rdev, &rdev->mc, base);
2843        rdev->mc.gtt_base_align = 0;
2844        if (!(rdev->flags & RADEON_IS_AGP))
2845                radeon_gtt_location(rdev, &rdev->mc);
2846        radeon_update_bandwidth_info(rdev);
2847}
2848
2849
2850/*
2851 * Indirect registers accessor
2852 */
2853void r100_pll_errata_after_index(struct radeon_device *rdev)
2854{
2855        if (rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS) {
2856                (void)RREG32(RADEON_CLOCK_CNTL_DATA);
2857                (void)RREG32(RADEON_CRTC_GEN_CNTL);
2858        }
2859}
2860
2861static void r100_pll_errata_after_data(struct radeon_device *rdev)
2862{
2863        /* This workarounds is necessary on RV100, RS100 and RS200 chips
2864         * or the chip could hang on a subsequent access
2865         */
2866        if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) {
2867                mdelay(5);
2868        }
2869
2870        /* This function is required to workaround a hardware bug in some (all?)
2871         * revisions of the R300.  This workaround should be called after every
2872         * CLOCK_CNTL_INDEX register access.  If not, register reads afterward
2873         * may not be correct.
2874         */
2875        if (rdev->pll_errata & CHIP_ERRATA_R300_CG) {
2876                uint32_t save, tmp;
2877
2878                save = RREG32(RADEON_CLOCK_CNTL_INDEX);
2879                tmp = save & ~(0x3f | RADEON_PLL_WR_EN);
2880                WREG32(RADEON_CLOCK_CNTL_INDEX, tmp);
2881                tmp = RREG32(RADEON_CLOCK_CNTL_DATA);
2882                WREG32(RADEON_CLOCK_CNTL_INDEX, save);
2883        }
2884}
2885
2886uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg)
2887{
2888        unsigned long flags;
2889        uint32_t data;
2890
2891        spin_lock_irqsave(&rdev->pll_idx_lock, flags);
2892        WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f);
2893        r100_pll_errata_after_index(rdev);
2894        data = RREG32(RADEON_CLOCK_CNTL_DATA);
2895        r100_pll_errata_after_data(rdev);
2896        spin_unlock_irqrestore(&rdev->pll_idx_lock, flags);
2897        return data;
2898}
2899
2900void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
2901{
2902        unsigned long flags;
2903
2904        spin_lock_irqsave(&rdev->pll_idx_lock, flags);
2905        WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN));
2906        r100_pll_errata_after_index(rdev);
2907        WREG32(RADEON_CLOCK_CNTL_DATA, v);
2908        r100_pll_errata_after_data(rdev);
2909        spin_unlock_irqrestore(&rdev->pll_idx_lock, flags);
2910}
2911
2912static void r100_set_safe_registers(struct radeon_device *rdev)
2913{
2914        if (ASIC_IS_RN50(rdev)) {
2915                rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm;
2916                rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm);
2917        } else if (rdev->family < CHIP_R200) {
2918                rdev->config.r100.reg_safe_bm = r100_reg_safe_bm;
2919                rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm);
2920        } else {
2921                r200_set_safe_registers(rdev);
2922        }
2923}
2924
2925/*
2926 * Debugfs info
2927 */
2928#if defined(CONFIG_DEBUG_FS)
2929static int r100_debugfs_rbbm_info_show(struct seq_file *m, void *unused)
2930{
2931        struct radeon_device *rdev = (struct radeon_device *)m->private;
2932        uint32_t reg, value;
2933        unsigned i;
2934
2935        seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS));
2936        seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C));
2937        seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2938        for (i = 0; i < 64; i++) {
2939                WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100);
2940                reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2;
2941                WREG32(RADEON_RBBM_CMDFIFO_ADDR, i);
2942                value = RREG32(RADEON_RBBM_CMDFIFO_DATA);
2943                seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value);
2944        }
2945        return 0;
2946}
2947
2948static int r100_debugfs_cp_ring_info_show(struct seq_file *m, void *unused)
2949{
2950        struct radeon_device *rdev = (struct radeon_device *)m->private;
2951        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2952        uint32_t rdp, wdp;
2953        unsigned count, i, j;
2954
2955        radeon_ring_free_size(rdev, ring);
2956        rdp = RREG32(RADEON_CP_RB_RPTR);
2957        wdp = RREG32(RADEON_CP_RB_WPTR);
2958        count = (rdp + ring->ring_size - wdp) & ring->ptr_mask;
2959        seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2960        seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
2961        seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
2962        seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
2963        seq_printf(m, "%u dwords in ring\n", count);
2964        if (ring->ready) {
2965                for (j = 0; j <= count; j++) {
2966                        i = (rdp + j) & ring->ptr_mask;
2967                        seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]);
2968                }
2969        }
2970        return 0;
2971}
2972
2973
2974static int r100_debugfs_cp_csq_fifo_show(struct seq_file *m, void *unused)
2975{
2976        struct radeon_device *rdev = (struct radeon_device *)m->private;
2977        uint32_t csq_stat, csq2_stat, tmp;
2978        unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr;
2979        unsigned i;
2980
2981        seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2982        seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE));
2983        csq_stat = RREG32(RADEON_CP_CSQ_STAT);
2984        csq2_stat = RREG32(RADEON_CP_CSQ2_STAT);
2985        r_rptr = (csq_stat >> 0) & 0x3ff;
2986        r_wptr = (csq_stat >> 10) & 0x3ff;
2987        ib1_rptr = (csq_stat >> 20) & 0x3ff;
2988        ib1_wptr = (csq2_stat >> 0) & 0x3ff;
2989        ib2_rptr = (csq2_stat >> 10) & 0x3ff;
2990        ib2_wptr = (csq2_stat >> 20) & 0x3ff;
2991        seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat);
2992        seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat);
2993        seq_printf(m, "Ring rptr %u\n", r_rptr);
2994        seq_printf(m, "Ring wptr %u\n", r_wptr);
2995        seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr);
2996        seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr);
2997        seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr);
2998        seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr);
2999        /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms
3000         * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */
3001        seq_printf(m, "Ring fifo:\n");
3002        for (i = 0; i < 256; i++) {
3003                WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3004                tmp = RREG32(RADEON_CP_CSQ_DATA);
3005                seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp);
3006        }
3007        seq_printf(m, "Indirect1 fifo:\n");
3008        for (i = 256; i <= 512; i++) {
3009                WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3010                tmp = RREG32(RADEON_CP_CSQ_DATA);
3011                seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp);
3012        }
3013        seq_printf(m, "Indirect2 fifo:\n");
3014        for (i = 640; i < ib1_wptr; i++) {
3015                WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3016                tmp = RREG32(RADEON_CP_CSQ_DATA);
3017                seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp);
3018        }
3019        return 0;
3020}
3021
3022static int r100_debugfs_mc_info_show(struct seq_file *m, void *unused)
3023{
3024        struct radeon_device *rdev = (struct radeon_device *)m->private;
3025        uint32_t tmp;
3026
3027        tmp = RREG32(RADEON_CONFIG_MEMSIZE);
3028        seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp);
3029        tmp = RREG32(RADEON_MC_FB_LOCATION);
3030        seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp);
3031        tmp = RREG32(RADEON_BUS_CNTL);
3032        seq_printf(m, "BUS_CNTL 0x%08x\n", tmp);
3033        tmp = RREG32(RADEON_MC_AGP_LOCATION);
3034        seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp);
3035        tmp = RREG32(RADEON_AGP_BASE);
3036        seq_printf(m, "AGP_BASE 0x%08x\n", tmp);
3037        tmp = RREG32(RADEON_HOST_PATH_CNTL);
3038        seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp);
3039        tmp = RREG32(0x01D0);
3040        seq_printf(m, "AIC_CTRL 0x%08x\n", tmp);
3041        tmp = RREG32(RADEON_AIC_LO_ADDR);
3042        seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp);
3043        tmp = RREG32(RADEON_AIC_HI_ADDR);
3044        seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp);
3045        tmp = RREG32(0x01E4);
3046        seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp);
3047        return 0;
3048}
3049
3050DEFINE_SHOW_ATTRIBUTE(r100_debugfs_rbbm_info);
3051DEFINE_SHOW_ATTRIBUTE(r100_debugfs_cp_ring_info);
3052DEFINE_SHOW_ATTRIBUTE(r100_debugfs_cp_csq_fifo);
3053DEFINE_SHOW_ATTRIBUTE(r100_debugfs_mc_info);
3054
3055#endif
3056
3057void  r100_debugfs_rbbm_init(struct radeon_device *rdev)
3058{
3059#if defined(CONFIG_DEBUG_FS)
3060        struct dentry *root = rdev->ddev->primary->debugfs_root;
3061
3062        debugfs_create_file("r100_rbbm_info", 0444, root, rdev,
3063                            &r100_debugfs_rbbm_info_fops);
3064#endif
3065}
3066
3067void r100_debugfs_cp_init(struct radeon_device *rdev)
3068{
3069#if defined(CONFIG_DEBUG_FS)
3070        struct dentry *root = rdev->ddev->primary->debugfs_root;
3071
3072        debugfs_create_file("r100_cp_ring_info", 0444, root, rdev,
3073                            &r100_debugfs_cp_ring_info_fops);
3074        debugfs_create_file("r100_cp_csq_fifo", 0444, root, rdev,
3075                            &r100_debugfs_cp_csq_fifo_fops);
3076#endif
3077}
3078
3079void  r100_debugfs_mc_info_init(struct radeon_device *rdev)
3080{
3081#if defined(CONFIG_DEBUG_FS)
3082        struct dentry *root = rdev->ddev->primary->debugfs_root;
3083
3084        debugfs_create_file("r100_mc_info", 0444, root, rdev,
3085                            &r100_debugfs_mc_info_fops);
3086#endif
3087}
3088
3089int r100_set_surface_reg(struct radeon_device *rdev, int reg,
3090                         uint32_t tiling_flags, uint32_t pitch,
3091                         uint32_t offset, uint32_t obj_size)
3092{
3093        int surf_index = reg * 16;
3094        int flags = 0;
3095
3096        if (rdev->family <= CHIP_RS200) {
3097                if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3098                                 == (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3099                        flags |= RADEON_SURF_TILE_COLOR_BOTH;
3100                if (tiling_flags & RADEON_TILING_MACRO)
3101                        flags |= RADEON_SURF_TILE_COLOR_MACRO;
3102                /* setting pitch to 0 disables tiling */
3103                if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3104                                == 0)
3105                        pitch = 0;
3106        } else if (rdev->family <= CHIP_RV280) {
3107                if (tiling_flags & (RADEON_TILING_MACRO))
3108                        flags |= R200_SURF_TILE_COLOR_MACRO;
3109                if (tiling_flags & RADEON_TILING_MICRO)
3110                        flags |= R200_SURF_TILE_COLOR_MICRO;
3111        } else {
3112                if (tiling_flags & RADEON_TILING_MACRO)
3113                        flags |= R300_SURF_TILE_MACRO;
3114                if (tiling_flags & RADEON_TILING_MICRO)
3115                        flags |= R300_SURF_TILE_MICRO;
3116        }
3117
3118        if (tiling_flags & RADEON_TILING_SWAP_16BIT)
3119                flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP;
3120        if (tiling_flags & RADEON_TILING_SWAP_32BIT)
3121                flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP;
3122
3123        /* r100/r200 divide by 16 */
3124        if (rdev->family < CHIP_R300)
3125                flags |= pitch / 16;
3126        else
3127                flags |= pitch / 8;
3128
3129
3130        DRM_DEBUG_KMS("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1);
3131        WREG32(RADEON_SURFACE0_INFO + surf_index, flags);
3132        WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset);
3133        WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1);
3134        return 0;
3135}
3136
3137void r100_clear_surface_reg(struct radeon_device *rdev, int reg)
3138{
3139        int surf_index = reg * 16;
3140        WREG32(RADEON_SURFACE0_INFO + surf_index, 0);
3141}
3142
3143void r100_bandwidth_update(struct radeon_device *rdev)
3144{
3145        fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff;
3146        fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff;
3147        fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff;
3148        fixed20_12 crit_point_ff = {0};
3149        uint32_t temp, data, mem_trcd, mem_trp, mem_tras;
3150        fixed20_12 memtcas_ff[8] = {
3151                dfixed_init(1),
3152                dfixed_init(2),
3153                dfixed_init(3),
3154                dfixed_init(0),
3155                dfixed_init_half(1),
3156                dfixed_init_half(2),
3157                dfixed_init(0),
3158        };
3159        fixed20_12 memtcas_rs480_ff[8] = {
3160                dfixed_init(0),
3161                dfixed_init(1),
3162                dfixed_init(2),
3163                dfixed_init(3),
3164                dfixed_init(0),
3165                dfixed_init_half(1),
3166                dfixed_init_half(2),
3167                dfixed_init_half(3),
3168        };
3169        fixed20_12 memtcas2_ff[8] = {
3170                dfixed_init(0),
3171                dfixed_init(1),
3172                dfixed_init(2),
3173                dfixed_init(3),
3174                dfixed_init(4),
3175                dfixed_init(5),
3176                dfixed_init(6),
3177                dfixed_init(7),
3178        };
3179        fixed20_12 memtrbs[8] = {
3180                dfixed_init(1),
3181                dfixed_init_half(1),
3182                dfixed_init(2),
3183                dfixed_init_half(2),
3184                dfixed_init(3),
3185                dfixed_init_half(3),
3186                dfixed_init(4),
3187                dfixed_init_half(4)
3188        };
3189        fixed20_12 memtrbs_r4xx[8] = {
3190                dfixed_init(4),
3191                dfixed_init(5),
3192                dfixed_init(6),
3193                dfixed_init(7),
3194                dfixed_init(8),
3195                dfixed_init(9),
3196                dfixed_init(10),
3197                dfixed_init(11)
3198        };
3199        fixed20_12 min_mem_eff;
3200        fixed20_12 mc_latency_sclk, mc_latency_mclk, k1;
3201        fixed20_12 cur_latency_mclk, cur_latency_sclk;
3202        fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate = {0},
3203                disp_drain_rate2, read_return_rate;
3204        fixed20_12 time_disp1_drop_priority;
3205        int c;
3206        int cur_size = 16;       /* in octawords */
3207        int critical_point = 0, critical_point2;
3208/*      uint32_t read_return_rate, time_disp1_drop_priority; */
3209        int stop_req, max_stop_req;
3210        struct drm_display_mode *mode1 = NULL;
3211        struct drm_display_mode *mode2 = NULL;
3212        uint32_t pixel_bytes1 = 0;
3213        uint32_t pixel_bytes2 = 0;
3214
3215        /* Guess line buffer size to be 8192 pixels */
3216        u32 lb_size = 8192;
3217
3218        if (!rdev->mode_info.mode_config_initialized)
3219                return;
3220
3221        radeon_update_display_priority(rdev);
3222
3223        if (rdev->mode_info.crtcs[0]->base.enabled) {
3224                const struct drm_framebuffer *fb =
3225                        rdev->mode_info.crtcs[0]->base.primary->fb;
3226
3227                mode1 = &rdev->mode_info.crtcs[0]->base.mode;
3228                pixel_bytes1 = fb->format->cpp[0];
3229        }
3230        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3231                if (rdev->mode_info.crtcs[1]->base.enabled) {
3232                        const struct drm_framebuffer *fb =
3233                                rdev->mode_info.crtcs[1]->base.primary->fb;
3234
3235                        mode2 = &rdev->mode_info.crtcs[1]->base.mode;
3236                        pixel_bytes2 = fb->format->cpp[0];
3237                }
3238        }
3239
3240        min_mem_eff.full = dfixed_const_8(0);
3241        /* get modes */
3242        if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) {
3243                uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER);
3244                mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT);
3245                mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT);
3246                /* check crtc enables */
3247                if (mode2)
3248                        mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT);
3249                if (mode1)
3250                        mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT);
3251                WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer);
3252        }
3253
3254        /*
3255         * determine is there is enough bw for current mode
3256         */
3257        sclk_ff = rdev->pm.sclk;
3258        mclk_ff = rdev->pm.mclk;
3259
3260        temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1);
3261        temp_ff.full = dfixed_const(temp);
3262        mem_bw.full = dfixed_mul(mclk_ff, temp_ff);
3263
3264        pix_clk.full = 0;
3265        pix_clk2.full = 0;
3266        peak_disp_bw.full = 0;
3267        if (mode1) {
3268                temp_ff.full = dfixed_const(1000);
3269                pix_clk.full = dfixed_const(mode1->clock); /* convert to fixed point */
3270                pix_clk.full = dfixed_div(pix_clk, temp_ff);
3271                temp_ff.full = dfixed_const(pixel_bytes1);
3272                peak_disp_bw.full += dfixed_mul(pix_clk, temp_ff);
3273        }
3274        if (mode2) {
3275                temp_ff.full = dfixed_const(1000);
3276                pix_clk2.full = dfixed_const(mode2->clock); /* convert to fixed point */
3277                pix_clk2.full = dfixed_div(pix_clk2, temp_ff);
3278                temp_ff.full = dfixed_const(pixel_bytes2);
3279                peak_disp_bw.full += dfixed_mul(pix_clk2, temp_ff);
3280        }
3281
3282        mem_bw.full = dfixed_mul(mem_bw, min_mem_eff);
3283        if (peak_disp_bw.full >= mem_bw.full) {
3284                DRM_ERROR("You may not have enough display bandwidth for current mode\n"
3285                          "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
3286        }
3287
3288        /*  Get values from the EXT_MEM_CNTL register...converting its contents. */
3289        temp = RREG32(RADEON_MEM_TIMING_CNTL);
3290        if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */
3291                mem_trcd = ((temp >> 2) & 0x3) + 1;
3292                mem_trp  = ((temp & 0x3)) + 1;
3293                mem_tras = ((temp & 0x70) >> 4) + 1;
3294        } else if (rdev->family == CHIP_R300 ||
3295                   rdev->family == CHIP_R350) { /* r300, r350 */
3296                mem_trcd = (temp & 0x7) + 1;
3297                mem_trp = ((temp >> 8) & 0x7) + 1;
3298                mem_tras = ((temp >> 11) & 0xf) + 4;
3299        } else if (rdev->family == CHIP_RV350 ||
3300                   rdev->family == CHIP_RV380) {
3301                /* rv3x0 */
3302                mem_trcd = (temp & 0x7) + 3;
3303                mem_trp = ((temp >> 8) & 0x7) + 3;
3304                mem_tras = ((temp >> 11) & 0xf) + 6;
3305        } else if (rdev->family == CHIP_R420 ||
3306                   rdev->family == CHIP_R423 ||
3307                   rdev->family == CHIP_RV410) {
3308                /* r4xx */
3309                mem_trcd = (temp & 0xf) + 3;
3310                if (mem_trcd > 15)
3311                        mem_trcd = 15;
3312                mem_trp = ((temp >> 8) & 0xf) + 3;
3313                if (mem_trp > 15)
3314                        mem_trp = 15;
3315                mem_tras = ((temp >> 12) & 0x1f) + 6;
3316                if (mem_tras > 31)
3317                        mem_tras = 31;
3318        } else { /* RV200, R200 */
3319                mem_trcd = (temp & 0x7) + 1;
3320                mem_trp = ((temp >> 8) & 0x7) + 1;
3321                mem_tras = ((temp >> 12) & 0xf) + 4;
3322        }
3323        /* convert to FF */
3324        trcd_ff.full = dfixed_const(mem_trcd);
3325        trp_ff.full = dfixed_const(mem_trp);
3326        tras_ff.full = dfixed_const(mem_tras);
3327
3328        /* Get values from the MEM_SDRAM_MODE_REG register...converting its */
3329        temp = RREG32(RADEON_MEM_SDRAM_MODE_REG);
3330        data = (temp & (7 << 20)) >> 20;
3331        if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) {
3332                if (rdev->family == CHIP_RS480) /* don't think rs400 */
3333                        tcas_ff = memtcas_rs480_ff[data];
3334                else
3335                        tcas_ff = memtcas_ff[data];
3336        } else
3337                tcas_ff = memtcas2_ff[data];
3338
3339        if (rdev->family == CHIP_RS400 ||
3340            rdev->family == CHIP_RS480) {
3341                /* extra cas latency stored in bits 23-25 0-4 clocks */
3342                data = (temp >> 23) & 0x7;
3343                if (data < 5)
3344                        tcas_ff.full += dfixed_const(data);
3345        }
3346
3347        if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) {
3348                /* on the R300, Tcas is included in Trbs.
3349                 */
3350                temp = RREG32(RADEON_MEM_CNTL);
3351                data = (R300_MEM_NUM_CHANNELS_MASK & temp);
3352                if (data == 1) {
3353                        if (R300_MEM_USE_CD_CH_ONLY & temp) {
3354                                temp = RREG32(R300_MC_IND_INDEX);
3355                                temp &= ~R300_MC_IND_ADDR_MASK;
3356                                temp |= R300_MC_READ_CNTL_CD_mcind;
3357                                WREG32(R300_MC_IND_INDEX, temp);
3358                                temp = RREG32(R300_MC_IND_DATA);
3359                                data = (R300_MEM_RBS_POSITION_C_MASK & temp);
3360                        } else {
3361                                temp = RREG32(R300_MC_READ_CNTL_AB);
3362                                data = (R300_MEM_RBS_POSITION_A_MASK & temp);
3363                        }
3364                } else {
3365                        temp = RREG32(R300_MC_READ_CNTL_AB);
3366                        data = (R300_MEM_RBS_POSITION_A_MASK & temp);
3367                }
3368                if (rdev->family == CHIP_RV410 ||
3369                    rdev->family == CHIP_R420 ||
3370                    rdev->family == CHIP_R423)
3371                        trbs_ff = memtrbs_r4xx[data];
3372                else
3373                        trbs_ff = memtrbs[data];
3374                tcas_ff.full += trbs_ff.full;
3375        }
3376
3377        sclk_eff_ff.full = sclk_ff.full;
3378
3379        if (rdev->flags & RADEON_IS_AGP) {
3380                fixed20_12 agpmode_ff;
3381                agpmode_ff.full = dfixed_const(radeon_agpmode);
3382                temp_ff.full = dfixed_const_666(16);
3383                sclk_eff_ff.full -= dfixed_mul(agpmode_ff, temp_ff);
3384        }
3385        /* TODO PCIE lanes may affect this - agpmode == 16?? */
3386
3387        if (ASIC_IS_R300(rdev)) {
3388                sclk_delay_ff.full = dfixed_const(250);
3389        } else {
3390                if ((rdev->family == CHIP_RV100) ||
3391                    rdev->flags & RADEON_IS_IGP) {
3392                        if (rdev->mc.vram_is_ddr)
3393                                sclk_delay_ff.full = dfixed_const(41);
3394                        else
3395                                sclk_delay_ff.full = dfixed_const(33);
3396                } else {
3397                        if (rdev->mc.vram_width == 128)
3398                                sclk_delay_ff.full = dfixed_const(57);
3399                        else
3400                                sclk_delay_ff.full = dfixed_const(41);
3401                }
3402        }
3403
3404        mc_latency_sclk.full = dfixed_div(sclk_delay_ff, sclk_eff_ff);
3405
3406        if (rdev->mc.vram_is_ddr) {
3407                if (rdev->mc.vram_width == 32) {
3408                        k1.full = dfixed_const(40);
3409                        c  = 3;
3410                } else {
3411                        k1.full = dfixed_const(20);
3412                        c  = 1;
3413                }
3414        } else {
3415                k1.full = dfixed_const(40);
3416                c  = 3;
3417        }
3418
3419        temp_ff.full = dfixed_const(2);
3420        mc_latency_mclk.full = dfixed_mul(trcd_ff, temp_ff);
3421        temp_ff.full = dfixed_const(c);
3422        mc_latency_mclk.full += dfixed_mul(tcas_ff, temp_ff);
3423        temp_ff.full = dfixed_const(4);
3424        mc_latency_mclk.full += dfixed_mul(tras_ff, temp_ff);
3425        mc_latency_mclk.full += dfixed_mul(trp_ff, temp_ff);
3426        mc_latency_mclk.full += k1.full;
3427
3428        mc_latency_mclk.full = dfixed_div(mc_latency_mclk, mclk_ff);
3429        mc_latency_mclk.full += dfixed_div(temp_ff, sclk_eff_ff);
3430
3431        /*
3432          HW cursor time assuming worst case of full size colour cursor.
3433        */
3434        temp_ff.full = dfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1))));
3435        temp_ff.full += trcd_ff.full;
3436        if (temp_ff.full < tras_ff.full)
3437                temp_ff.full = tras_ff.full;
3438        cur_latency_mclk.full = dfixed_div(temp_ff, mclk_ff);
3439
3440        temp_ff.full = dfixed_const(cur_size);
3441        cur_latency_sclk.full = dfixed_div(temp_ff, sclk_eff_ff);
3442        /*
3443          Find the total latency for the display data.
3444        */
3445        disp_latency_overhead.full = dfixed_const(8);
3446        disp_latency_overhead.full = dfixed_div(disp_latency_overhead, sclk_ff);
3447        mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full;
3448        mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full;
3449
3450        if (mc_latency_mclk.full > mc_latency_sclk.full)
3451                disp_latency.full = mc_latency_mclk.full;
3452        else
3453                disp_latency.full = mc_latency_sclk.full;
3454
3455        /* setup Max GRPH_STOP_REQ default value */
3456        if (ASIC_IS_RV100(rdev))
3457                max_stop_req = 0x5c;
3458        else
3459                max_stop_req = 0x7c;
3460
3461        if (mode1) {
3462                /*  CRTC1
3463                    Set GRPH_BUFFER_CNTL register using h/w defined optimal values.
3464                    GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ]
3465                */
3466                stop_req = mode1->hdisplay * pixel_bytes1 / 16;
3467
3468                if (stop_req > max_stop_req)
3469                        stop_req = max_stop_req;
3470
3471                /*
3472                  Find the drain rate of the display buffer.
3473                */
3474                temp_ff.full = dfixed_const((16/pixel_bytes1));
3475                disp_drain_rate.full = dfixed_div(pix_clk, temp_ff);
3476
3477                /*
3478                  Find the critical point of the display buffer.
3479                */
3480                crit_point_ff.full = dfixed_mul(disp_drain_rate, disp_latency);
3481                crit_point_ff.full += dfixed_const_half(0);
3482
3483                critical_point = dfixed_trunc(crit_point_ff);
3484
3485                if (rdev->disp_priority == 2) {
3486                        critical_point = 0;
3487                }
3488
3489                /*
3490                  The critical point should never be above max_stop_req-4.  Setting
3491                  GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time.
3492                */
3493                if (max_stop_req - critical_point < 4)
3494                        critical_point = 0;
3495
3496                if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) {
3497                        /* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/
3498                        critical_point = 0x10;
3499                }
3500
3501                temp = RREG32(RADEON_GRPH_BUFFER_CNTL);
3502                temp &= ~(RADEON_GRPH_STOP_REQ_MASK);
3503                temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3504                temp &= ~(RADEON_GRPH_START_REQ_MASK);
3505                if ((rdev->family == CHIP_R350) &&
3506                    (stop_req > 0x15)) {
3507                        stop_req -= 0x10;
3508                }
3509                temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3510                temp |= RADEON_GRPH_BUFFER_SIZE;
3511                temp &= ~(RADEON_GRPH_CRITICAL_CNTL   |
3512                          RADEON_GRPH_CRITICAL_AT_SOF |
3513                          RADEON_GRPH_STOP_CNTL);
3514                /*
3515                  Write the result into the register.
3516                */
3517                WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
3518                                                       (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3519
3520#if 0
3521                if ((rdev->family == CHIP_RS400) ||
3522                    (rdev->family == CHIP_RS480)) {
3523                        /* attempt to program RS400 disp regs correctly ??? */
3524                        temp = RREG32(RS400_DISP1_REG_CNTL);
3525                        temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK |
3526                                  RS400_DISP1_STOP_REQ_LEVEL_MASK);
3527                        WREG32(RS400_DISP1_REQ_CNTL1, (temp |
3528                                                       (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3529                                                       (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3530                        temp = RREG32(RS400_DMIF_MEM_CNTL1);
3531                        temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK |
3532                                  RS400_DISP1_CRITICAL_POINT_STOP_MASK);
3533                        WREG32(RS400_DMIF_MEM_CNTL1, (temp |
3534                                                      (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) |
3535                                                      (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT)));
3536                }
3537#endif
3538
3539                DRM_DEBUG_KMS("GRPH_BUFFER_CNTL from to %x\n",
3540                          /*      (unsigned int)info->SavedReg->grph_buffer_cntl, */
3541                          (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL));
3542        }
3543
3544        if (mode2) {
3545                u32 grph2_cntl;
3546                stop_req = mode2->hdisplay * pixel_bytes2 / 16;
3547
3548                if (stop_req > max_stop_req)
3549                        stop_req = max_stop_req;
3550
3551                /*
3552                  Find the drain rate of the display buffer.
3553                */
3554                temp_ff.full = dfixed_const((16/pixel_bytes2));
3555                disp_drain_rate2.full = dfixed_div(pix_clk2, temp_ff);
3556
3557                grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL);
3558                grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK);
3559                grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3560                grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK);
3561                if ((rdev->family == CHIP_R350) &&
3562                    (stop_req > 0x15)) {
3563                        stop_req -= 0x10;
3564                }
3565                grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3566                grph2_cntl |= RADEON_GRPH_BUFFER_SIZE;
3567                grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL   |
3568                          RADEON_GRPH_CRITICAL_AT_SOF |
3569                          RADEON_GRPH_STOP_CNTL);
3570
3571                if ((rdev->family == CHIP_RS100) ||
3572                    (rdev->family == CHIP_RS200))
3573                        critical_point2 = 0;
3574                else {
3575                        temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128;
3576                        temp_ff.full = dfixed_const(temp);
3577                        temp_ff.full = dfixed_mul(mclk_ff, temp_ff);
3578                        if (sclk_ff.full < temp_ff.full)
3579                                temp_ff.full = sclk_ff.full;
3580
3581                        read_return_rate.full = temp_ff.full;
3582
3583                        if (mode1) {
3584                                temp_ff.full = read_return_rate.full - disp_drain_rate.full;
3585                                time_disp1_drop_priority.full = dfixed_div(crit_point_ff, temp_ff);
3586                        } else {
3587                                time_disp1_drop_priority.full = 0;
3588                        }
3589                        crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full;
3590                        crit_point_ff.full = dfixed_mul(crit_point_ff, disp_drain_rate2);
3591                        crit_point_ff.full += dfixed_const_half(0);
3592
3593                        critical_point2 = dfixed_trunc(crit_point_ff);
3594
3595                        if (rdev->disp_priority == 2) {
3596                                critical_point2 = 0;
3597                        }
3598
3599                        if (max_stop_req - critical_point2 < 4)
3600                                critical_point2 = 0;
3601
3602                }
3603
3604                if (critical_point2 == 0 && rdev->family == CHIP_R300) {
3605                        /* some R300 cards have problem with this set to 0 */
3606                        critical_point2 = 0x10;
3607                }
3608
3609                WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
3610                                                  (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3611
3612                if ((rdev->family == CHIP_RS400) ||
3613                    (rdev->family == CHIP_RS480)) {
3614#if 0
3615                        /* attempt to program RS400 disp2 regs correctly ??? */
3616                        temp = RREG32(RS400_DISP2_REQ_CNTL1);
3617                        temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK |
3618                                  RS400_DISP2_STOP_REQ_LEVEL_MASK);
3619                        WREG32(RS400_DISP2_REQ_CNTL1, (temp |
3620                                                       (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3621                                                       (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3622                        temp = RREG32(RS400_DISP2_REQ_CNTL2);
3623                        temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK |
3624                                  RS400_DISP2_CRITICAL_POINT_STOP_MASK);
3625                        WREG32(RS400_DISP2_REQ_CNTL2, (temp |
3626                                                       (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) |
3627                                                       (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT)));
3628#endif
3629                        WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC);
3630                        WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000);
3631                        WREG32(RS400_DMIF_MEM_CNTL1,  0x29CA71DC);
3632                        WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC);
3633                }
3634
3635                DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n",
3636                          (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL));
3637        }
3638
3639        /* Save number of lines the linebuffer leads before the scanout */
3640        if (mode1)
3641            rdev->mode_info.crtcs[0]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode1->crtc_hdisplay);
3642
3643        if (mode2)
3644            rdev->mode_info.crtcs[1]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode2->crtc_hdisplay);
3645}
3646
3647int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3648{
3649        uint32_t scratch;
3650        uint32_t tmp = 0;
3651        unsigned i;
3652        int r;
3653
3654        r = radeon_scratch_get(rdev, &scratch);
3655        if (r) {
3656                DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3657                return r;
3658        }
3659        WREG32(scratch, 0xCAFEDEAD);
3660        r = radeon_ring_lock(rdev, ring, 2);
3661        if (r) {
3662                DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3663                radeon_scratch_free(rdev, scratch);
3664                return r;
3665        }
3666        radeon_ring_write(ring, PACKET0(scratch, 0));
3667        radeon_ring_write(ring, 0xDEADBEEF);
3668        radeon_ring_unlock_commit(rdev, ring, false);
3669        for (i = 0; i < rdev->usec_timeout; i++) {
3670                tmp = RREG32(scratch);
3671                if (tmp == 0xDEADBEEF) {
3672                        break;
3673                }
3674                udelay(1);
3675        }
3676        if (i < rdev->usec_timeout) {
3677                DRM_INFO("ring test succeeded in %d usecs\n", i);
3678        } else {
3679                DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n",
3680                          scratch, tmp);
3681                r = -EINVAL;
3682        }
3683        radeon_scratch_free(rdev, scratch);
3684        return r;
3685}
3686
3687void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3688{
3689        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3690
3691        if (ring->rptr_save_reg) {
3692                u32 next_rptr = ring->wptr + 2 + 3;
3693                radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
3694                radeon_ring_write(ring, next_rptr);
3695        }
3696
3697        radeon_ring_write(ring, PACKET0(RADEON_CP_IB_BASE, 1));
3698        radeon_ring_write(ring, ib->gpu_addr);
3699        radeon_ring_write(ring, ib->length_dw);
3700}
3701
3702int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3703{
3704        struct radeon_ib ib;
3705        uint32_t scratch;
3706        uint32_t tmp = 0;
3707        unsigned i;
3708        int r;
3709
3710        r = radeon_scratch_get(rdev, &scratch);
3711        if (r) {
3712                DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3713                return r;
3714        }
3715        WREG32(scratch, 0xCAFEDEAD);
3716        r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &ib, NULL, 256);
3717        if (r) {
3718                DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3719                goto free_scratch;
3720        }
3721        ib.ptr[0] = PACKET0(scratch, 0);
3722        ib.ptr[1] = 0xDEADBEEF;
3723        ib.ptr[2] = PACKET2(0);
3724        ib.ptr[3] = PACKET2(0);
3725        ib.ptr[4] = PACKET2(0);
3726        ib.ptr[5] = PACKET2(0);
3727        ib.ptr[6] = PACKET2(0);
3728        ib.ptr[7] = PACKET2(0);
3729        ib.length_dw = 8;
3730        r = radeon_ib_schedule(rdev, &ib, NULL, false);
3731        if (r) {
3732                DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3733                goto free_ib;
3734        }
3735        r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3736                RADEON_USEC_IB_TEST_TIMEOUT));
3737        if (r < 0) {
3738                DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3739                goto free_ib;
3740        } else if (r == 0) {
3741                DRM_ERROR("radeon: fence wait timed out.\n");
3742                r = -ETIMEDOUT;
3743                goto free_ib;
3744        }
3745        r = 0;
3746        for (i = 0; i < rdev->usec_timeout; i++) {
3747                tmp = RREG32(scratch);
3748                if (tmp == 0xDEADBEEF) {
3749                        break;
3750                }
3751                udelay(1);
3752        }
3753        if (i < rdev->usec_timeout) {
3754                DRM_INFO("ib test succeeded in %u usecs\n", i);
3755        } else {
3756                DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3757                          scratch, tmp);
3758                r = -EINVAL;
3759        }
3760free_ib:
3761        radeon_ib_free(rdev, &ib);
3762free_scratch:
3763        radeon_scratch_free(rdev, scratch);
3764        return r;
3765}
3766
3767void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save)
3768{
3769        /* Shutdown CP we shouldn't need to do that but better be safe than
3770         * sorry
3771         */
3772        rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3773        WREG32(R_000740_CP_CSQ_CNTL, 0);
3774
3775        /* Save few CRTC registers */
3776        save->GENMO_WT = RREG8(R_0003C2_GENMO_WT);
3777        save->CRTC_EXT_CNTL = RREG32(R_000054_CRTC_EXT_CNTL);
3778        save->CRTC_GEN_CNTL = RREG32(R_000050_CRTC_GEN_CNTL);
3779        save->CUR_OFFSET = RREG32(R_000260_CUR_OFFSET);
3780        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3781                save->CRTC2_GEN_CNTL = RREG32(R_0003F8_CRTC2_GEN_CNTL);
3782                save->CUR2_OFFSET = RREG32(R_000360_CUR2_OFFSET);
3783        }
3784
3785        /* Disable VGA aperture access */
3786        WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & save->GENMO_WT);
3787        /* Disable cursor, overlay, crtc */
3788        WREG32(R_000260_CUR_OFFSET, save->CUR_OFFSET | S_000260_CUR_LOCK(1));
3789        WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL |
3790                                        S_000054_CRTC_DISPLAY_DIS(1));
3791        WREG32(R_000050_CRTC_GEN_CNTL,
3792                        (C_000050_CRTC_CUR_EN & save->CRTC_GEN_CNTL) |
3793                        S_000050_CRTC_DISP_REQ_EN_B(1));
3794        WREG32(R_000420_OV0_SCALE_CNTL,
3795                C_000420_OV0_OVERLAY_EN & RREG32(R_000420_OV0_SCALE_CNTL));
3796        WREG32(R_000260_CUR_OFFSET, C_000260_CUR_LOCK & save->CUR_OFFSET);
3797        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3798                WREG32(R_000360_CUR2_OFFSET, save->CUR2_OFFSET |
3799                                                S_000360_CUR2_LOCK(1));
3800                WREG32(R_0003F8_CRTC2_GEN_CNTL,
3801                        (C_0003F8_CRTC2_CUR_EN & save->CRTC2_GEN_CNTL) |
3802                        S_0003F8_CRTC2_DISPLAY_DIS(1) |
3803                        S_0003F8_CRTC2_DISP_REQ_EN_B(1));
3804                WREG32(R_000360_CUR2_OFFSET,
3805                        C_000360_CUR2_LOCK & save->CUR2_OFFSET);
3806        }
3807}
3808
3809void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save)
3810{
3811        /* Update base address for crtc */
3812        WREG32(R_00023C_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
3813        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3814                WREG32(R_00033C_CRTC2_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
3815        }
3816        /* Restore CRTC registers */
3817        WREG8(R_0003C2_GENMO_WT, save->GENMO_WT);
3818        WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL);
3819        WREG32(R_000050_CRTC_GEN_CNTL, save->CRTC_GEN_CNTL);
3820        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3821                WREG32(R_0003F8_CRTC2_GEN_CNTL, save->CRTC2_GEN_CNTL);
3822        }
3823}
3824
3825void r100_vga_render_disable(struct radeon_device *rdev)
3826{
3827        u32 tmp;
3828
3829        tmp = RREG8(R_0003C2_GENMO_WT);
3830        WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & tmp);
3831}
3832
3833static void r100_mc_program(struct radeon_device *rdev)
3834{
3835        struct r100_mc_save save;
3836
3837        /* Stops all mc clients */
3838        r100_mc_stop(rdev, &save);
3839        if (rdev->flags & RADEON_IS_AGP) {
3840                WREG32(R_00014C_MC_AGP_LOCATION,
3841                        S_00014C_MC_AGP_START(rdev->mc.gtt_start >> 16) |
3842                        S_00014C_MC_AGP_TOP(rdev->mc.gtt_end >> 16));
3843                WREG32(R_000170_AGP_BASE, lower_32_bits(rdev->mc.agp_base));
3844                if (rdev->family > CHIP_RV200)
3845                        WREG32(R_00015C_AGP_BASE_2,
3846                                upper_32_bits(rdev->mc.agp_base) & 0xff);
3847        } else {
3848                WREG32(R_00014C_MC_AGP_LOCATION, 0x0FFFFFFF);
3849                WREG32(R_000170_AGP_BASE, 0);
3850                if (rdev->family > CHIP_RV200)
3851                        WREG32(R_00015C_AGP_BASE_2, 0);
3852        }
3853        /* Wait for mc idle */
3854        if (r100_mc_wait_for_idle(rdev))
3855                dev_warn(rdev->dev, "Wait for MC idle timeout.\n");
3856        /* Program MC, should be a 32bits limited address space */
3857        WREG32(R_000148_MC_FB_LOCATION,
3858                S_000148_MC_FB_START(rdev->mc.vram_start >> 16) |
3859                S_000148_MC_FB_TOP(rdev->mc.vram_end >> 16));
3860        r100_mc_resume(rdev, &save);
3861}
3862
3863static void r100_clock_startup(struct radeon_device *rdev)
3864{
3865        u32 tmp;
3866
3867        if (radeon_dynclks != -1 && radeon_dynclks)
3868                radeon_legacy_set_clock_gating(rdev, 1);
3869        /* We need to force on some of the block */
3870        tmp = RREG32_PLL(R_00000D_SCLK_CNTL);
3871        tmp |= S_00000D_FORCE_CP(1) | S_00000D_FORCE_VIP(1);
3872        if ((rdev->family == CHIP_RV250) || (rdev->family == CHIP_RV280))
3873                tmp |= S_00000D_FORCE_DISP1(1) | S_00000D_FORCE_DISP2(1);
3874        WREG32_PLL(R_00000D_SCLK_CNTL, tmp);
3875}
3876
3877static int r100_startup(struct radeon_device *rdev)
3878{
3879        int r;
3880
3881        /* set common regs */
3882        r100_set_common_regs(rdev);
3883        /* program mc */
3884        r100_mc_program(rdev);
3885        /* Resume clock */
3886        r100_clock_startup(rdev);
3887        /* Initialize GART (initialize after TTM so we can allocate
3888         * memory through TTM but finalize after TTM) */
3889        r100_enable_bm(rdev);
3890        if (rdev->flags & RADEON_IS_PCI) {
3891                r = r100_pci_gart_enable(rdev);
3892                if (r)
3893                        return r;
3894        }
3895
3896        /* allocate wb buffer */
3897        r = radeon_wb_init(rdev);
3898        if (r)
3899                return r;
3900
3901        r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
3902        if (r) {
3903                dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
3904                return r;
3905        }
3906
3907        /* Enable IRQ */
3908        if (!rdev->irq.installed) {
3909                r = radeon_irq_kms_init(rdev);
3910                if (r)
3911                        return r;
3912        }
3913
3914        r100_irq_set(rdev);
3915        rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
3916        /* 1M ring buffer */
3917        r = r100_cp_init(rdev, 1024 * 1024);
3918        if (r) {
3919                dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
3920                return r;
3921        }
3922
3923        r = radeon_ib_pool_init(rdev);
3924        if (r) {
3925                dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
3926                return r;
3927        }
3928
3929        return 0;
3930}
3931
3932int r100_resume(struct radeon_device *rdev)
3933{
3934        int r;
3935
3936        /* Make sur GART are not working */
3937        if (rdev->flags & RADEON_IS_PCI)
3938                r100_pci_gart_disable(rdev);
3939        /* Resume clock before doing reset */
3940        r100_clock_startup(rdev);
3941        /* Reset gpu before posting otherwise ATOM will enter infinite loop */
3942        if (radeon_asic_reset(rdev)) {
3943                dev_warn(rdev->dev, "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
3944                        RREG32(R_000E40_RBBM_STATUS),
3945                        RREG32(R_0007C0_CP_STAT));
3946        }
3947        /* post */
3948        radeon_combios_asic_init(rdev->ddev);
3949        /* Resume clock after posting */
3950        r100_clock_startup(rdev);
3951        /* Initialize surface registers */
3952        radeon_surface_init(rdev);
3953
3954        rdev->accel_working = true;
3955        r = r100_startup(rdev);
3956        if (r) {
3957                rdev->accel_working = false;
3958        }
3959        return r;
3960}
3961
3962int r100_suspend(struct radeon_device *rdev)
3963{
3964        radeon_pm_suspend(rdev);
3965        r100_cp_disable(rdev);
3966        radeon_wb_disable(rdev);
3967        r100_irq_disable(rdev);
3968        if (rdev->flags & RADEON_IS_PCI)
3969                r100_pci_gart_disable(rdev);
3970        return 0;
3971}
3972
3973void r100_fini(struct radeon_device *rdev)
3974{
3975        radeon_pm_fini(rdev);
3976        r100_cp_fini(rdev);
3977        radeon_wb_fini(rdev);
3978        radeon_ib_pool_fini(rdev);
3979        radeon_gem_fini(rdev);
3980        if (rdev->flags & RADEON_IS_PCI)
3981                r100_pci_gart_fini(rdev);
3982        radeon_agp_fini(rdev);
3983        radeon_irq_kms_fini(rdev);
3984        radeon_fence_driver_fini(rdev);
3985        radeon_bo_fini(rdev);
3986        radeon_atombios_fini(rdev);
3987        kfree(rdev->bios);
3988        rdev->bios = NULL;
3989}
3990
3991/*
3992 * Due to how kexec works, it can leave the hw fully initialised when it
3993 * boots the new kernel. However doing our init sequence with the CP and
3994 * WB stuff setup causes GPU hangs on the RN50 at least. So at startup
3995 * do some quick sanity checks and restore sane values to avoid this
3996 * problem.
3997 */
3998void r100_restore_sanity(struct radeon_device *rdev)
3999{
4000        u32 tmp;
4001
4002        tmp = RREG32(RADEON_CP_CSQ_CNTL);
4003        if (tmp) {
4004                WREG32(RADEON_CP_CSQ_CNTL, 0);
4005        }
4006        tmp = RREG32(RADEON_CP_RB_CNTL);
4007        if (tmp) {
4008                WREG32(RADEON_CP_RB_CNTL, 0);
4009        }
4010        tmp = RREG32(RADEON_SCRATCH_UMSK);
4011        if (tmp) {
4012                WREG32(RADEON_SCRATCH_UMSK, 0);
4013        }
4014}
4015
4016int r100_init(struct radeon_device *rdev)
4017{
4018        int r;
4019
4020        /* Register debugfs file specific to this group of asics */
4021        r100_debugfs_mc_info_init(rdev);
4022        /* Disable VGA */
4023        r100_vga_render_disable(rdev);
4024        /* Initialize scratch registers */
4025        radeon_scratch_init(rdev);
4026        /* Initialize surface registers */
4027        radeon_surface_init(rdev);
4028        /* sanity check some register to avoid hangs like after kexec */
4029        r100_restore_sanity(rdev);
4030        /* TODO: disable VGA need to use VGA request */
4031        /* BIOS*/
4032        if (!radeon_get_bios(rdev)) {
4033                if (ASIC_IS_AVIVO(rdev))
4034                        return -EINVAL;
4035        }
4036        if (rdev->is_atom_bios) {
4037                dev_err(rdev->dev, "Expecting combios for RS400/RS480 GPU\n");
4038                return -EINVAL;
4039        } else {
4040                r = radeon_combios_init(rdev);
4041                if (r)
4042                        return r;
4043        }
4044        /* Reset gpu before posting otherwise ATOM will enter infinite loop */
4045        if (radeon_asic_reset(rdev)) {
4046                dev_warn(rdev->dev,
4047                        "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
4048                        RREG32(R_000E40_RBBM_STATUS),
4049                        RREG32(R_0007C0_CP_STAT));
4050        }
4051        /* check if cards are posted or not */
4052        if (radeon_boot_test_post_card(rdev) == false)
4053                return -EINVAL;
4054        /* Set asic errata */
4055        r100_errata(rdev);
4056        /* Initialize clocks */
4057        radeon_get_clock_info(rdev->ddev);
4058        /* initialize AGP */
4059        if (rdev->flags & RADEON_IS_AGP) {
4060                r = radeon_agp_init(rdev);
4061                if (r) {
4062                        radeon_agp_disable(rdev);
4063                }
4064        }
4065        /* initialize VRAM */
4066        r100_mc_init(rdev);
4067        /* Fence driver */
4068        radeon_fence_driver_init(rdev);
4069        /* Memory manager */
4070        r = radeon_bo_init(rdev);
4071        if (r)
4072                return r;
4073        if (rdev->flags & RADEON_IS_PCI) {
4074                r = r100_pci_gart_init(rdev);
4075                if (r)
4076                        return r;
4077        }
4078        r100_set_safe_registers(rdev);
4079
4080        /* Initialize power management */
4081        radeon_pm_init(rdev);
4082
4083        rdev->accel_working = true;
4084        r = r100_startup(rdev);
4085        if (r) {
4086                /* Somethings want wront with the accel init stop accel */
4087                dev_err(rdev->dev, "Disabling GPU acceleration\n");
4088                r100_cp_fini(rdev);
4089                radeon_wb_fini(rdev);
4090                radeon_ib_pool_fini(rdev);
4091                radeon_irq_kms_fini(rdev);
4092                if (rdev->flags & RADEON_IS_PCI)
4093                        r100_pci_gart_fini(rdev);
4094                rdev->accel_working = false;
4095        }
4096        return 0;
4097}
4098
4099uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg)
4100{
4101        unsigned long flags;
4102        uint32_t ret;
4103
4104        spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
4105        writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4106        ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4107        spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
4108        return ret;
4109}
4110
4111void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg, uint32_t v)
4112{
4113        unsigned long flags;
4114
4115        spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
4116        writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4117        writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4118        spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
4119}
4120
4121u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
4122{
4123        if (reg < rdev->rio_mem_size)
4124                return ioread32(rdev->rio_mem + reg);
4125        else {
4126                iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
4127                return ioread32(rdev->rio_mem + RADEON_MM_DATA);
4128        }
4129}
4130
4131void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
4132{
4133        if (reg < rdev->rio_mem_size)
4134                iowrite32(v, rdev->rio_mem + reg);
4135        else {
4136                iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
4137                iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
4138        }
4139}
4140