linux/drivers/gpu/drm/radeon/r100.c
<<
>>
Prefs
   1/*
   2 * Copyright 2008 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 * Copyright 2009 Jerome Glisse.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22 * OTHER DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors: Dave Airlie
  25 *          Alex Deucher
  26 *          Jerome Glisse
  27 */
  28
  29#include <linux/firmware.h>
  30#include <linux/module.h>
  31#include <linux/pci.h>
  32#include <linux/seq_file.h>
  33#include <linux/slab.h>
  34
  35#include <drm/drm_debugfs.h>
  36#include <drm/drm_device.h>
  37#include <drm/drm_file.h>
  38#include <drm/drm_fourcc.h>
  39#include <drm/drm_vblank.h>
  40#include <drm/radeon_drm.h>
  41
  42#include "atom.h"
  43#include "r100_reg_safe.h"
  44#include "r100d.h"
  45#include "radeon.h"
  46#include "radeon_asic.h"
  47#include "radeon_reg.h"
  48#include "rn50_reg_safe.h"
  49#include "rs100d.h"
  50#include "rv200d.h"
  51#include "rv250d.h"
  52
  53/* Firmware Names */
  54#define FIRMWARE_R100           "radeon/R100_cp.bin"
  55#define FIRMWARE_R200           "radeon/R200_cp.bin"
  56#define FIRMWARE_R300           "radeon/R300_cp.bin"
  57#define FIRMWARE_R420           "radeon/R420_cp.bin"
  58#define FIRMWARE_RS690          "radeon/RS690_cp.bin"
  59#define FIRMWARE_RS600          "radeon/RS600_cp.bin"
  60#define FIRMWARE_R520           "radeon/R520_cp.bin"
  61
  62MODULE_FIRMWARE(FIRMWARE_R100);
  63MODULE_FIRMWARE(FIRMWARE_R200);
  64MODULE_FIRMWARE(FIRMWARE_R300);
  65MODULE_FIRMWARE(FIRMWARE_R420);
  66MODULE_FIRMWARE(FIRMWARE_RS690);
  67MODULE_FIRMWARE(FIRMWARE_RS600);
  68MODULE_FIRMWARE(FIRMWARE_R520);
  69
  70#include "r100_track.h"
  71
  72/* This files gather functions specifics to:
  73 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
  74 * and others in some cases.
  75 */
  76
  77static bool r100_is_in_vblank(struct radeon_device *rdev, int crtc)
  78{
  79        if (crtc == 0) {
  80                if (RREG32(RADEON_CRTC_STATUS) & RADEON_CRTC_VBLANK_CUR)
  81                        return true;
  82                else
  83                        return false;
  84        } else {
  85                if (RREG32(RADEON_CRTC2_STATUS) & RADEON_CRTC2_VBLANK_CUR)
  86                        return true;
  87                else
  88                        return false;
  89        }
  90}
  91
  92static bool r100_is_counter_moving(struct radeon_device *rdev, int crtc)
  93{
  94        u32 vline1, vline2;
  95
  96        if (crtc == 0) {
  97                vline1 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
  98                vline2 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
  99        } else {
 100                vline1 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
 101                vline2 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
 102        }
 103        if (vline1 != vline2)
 104                return true;
 105        else
 106                return false;
 107}
 108
 109/**
 110 * r100_wait_for_vblank - vblank wait asic callback.
 111 *
 112 * @rdev: radeon_device pointer
 113 * @crtc: crtc to wait for vblank on
 114 *
 115 * Wait for vblank on the requested crtc (r1xx-r4xx).
 116 */
 117void r100_wait_for_vblank(struct radeon_device *rdev, int crtc)
 118{
 119        unsigned i = 0;
 120
 121        if (crtc >= rdev->num_crtc)
 122                return;
 123
 124        if (crtc == 0) {
 125                if (!(RREG32(RADEON_CRTC_GEN_CNTL) & RADEON_CRTC_EN))
 126                        return;
 127        } else {
 128                if (!(RREG32(RADEON_CRTC2_GEN_CNTL) & RADEON_CRTC2_EN))
 129                        return;
 130        }
 131
 132        /* depending on when we hit vblank, we may be close to active; if so,
 133         * wait for another frame.
 134         */
 135        while (r100_is_in_vblank(rdev, crtc)) {
 136                if (i++ % 100 == 0) {
 137                        if (!r100_is_counter_moving(rdev, crtc))
 138                                break;
 139                }
 140        }
 141
 142        while (!r100_is_in_vblank(rdev, crtc)) {
 143                if (i++ % 100 == 0) {
 144                        if (!r100_is_counter_moving(rdev, crtc))
 145                                break;
 146                }
 147        }
 148}
 149
 150/**
 151 * r100_page_flip - pageflip callback.
 152 *
 153 * @rdev: radeon_device pointer
 154 * @crtc_id: crtc to cleanup pageflip on
 155 * @crtc_base: new address of the crtc (GPU MC address)
 156 *
 157 * Does the actual pageflip (r1xx-r4xx).
 158 * During vblank we take the crtc lock and wait for the update_pending
 159 * bit to go high, when it does, we release the lock, and allow the
 160 * double buffered update to take place.
 161 */
 162void r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base, bool async)
 163{
 164        struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
 165        u32 tmp = ((u32)crtc_base) | RADEON_CRTC_OFFSET__OFFSET_LOCK;
 166        int i;
 167
 168        /* Lock the graphics update lock */
 169        /* update the scanout addresses */
 170        WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
 171
 172        /* Wait for update_pending to go high. */
 173        for (i = 0; i < rdev->usec_timeout; i++) {
 174                if (RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET)
 175                        break;
 176                udelay(1);
 177        }
 178        DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
 179
 180        /* Unlock the lock, so double-buffering can take place inside vblank */
 181        tmp &= ~RADEON_CRTC_OFFSET__OFFSET_LOCK;
 182        WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
 183
 184}
 185
 186/**
 187 * r100_page_flip_pending - check if page flip is still pending
 188 *
 189 * @rdev: radeon_device pointer
 190 * @crtc_id: crtc to check
 191 *
 192 * Check if the last pagefilp is still pending (r1xx-r4xx).
 193 * Returns the current update pending status.
 194 */
 195bool r100_page_flip_pending(struct radeon_device *rdev, int crtc_id)
 196{
 197        struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
 198
 199        /* Return current update_pending status: */
 200        return !!(RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) &
 201                RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET);
 202}
 203
 204/**
 205 * r100_pm_get_dynpm_state - look up dynpm power state callback.
 206 *
 207 * @rdev: radeon_device pointer
 208 *
 209 * Look up the optimal power state based on the
 210 * current state of the GPU (r1xx-r5xx).
 211 * Used for dynpm only.
 212 */
 213void r100_pm_get_dynpm_state(struct radeon_device *rdev)
 214{
 215        int i;
 216        rdev->pm.dynpm_can_upclock = true;
 217        rdev->pm.dynpm_can_downclock = true;
 218
 219        switch (rdev->pm.dynpm_planned_action) {
 220        case DYNPM_ACTION_MINIMUM:
 221                rdev->pm.requested_power_state_index = 0;
 222                rdev->pm.dynpm_can_downclock = false;
 223                break;
 224        case DYNPM_ACTION_DOWNCLOCK:
 225                if (rdev->pm.current_power_state_index == 0) {
 226                        rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
 227                        rdev->pm.dynpm_can_downclock = false;
 228                } else {
 229                        if (rdev->pm.active_crtc_count > 1) {
 230                                for (i = 0; i < rdev->pm.num_power_states; i++) {
 231                                        if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
 232                                                continue;
 233                                        else if (i >= rdev->pm.current_power_state_index) {
 234                                                rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
 235                                                break;
 236                                        } else {
 237                                                rdev->pm.requested_power_state_index = i;
 238                                                break;
 239                                        }
 240                                }
 241                        } else
 242                                rdev->pm.requested_power_state_index =
 243                                        rdev->pm.current_power_state_index - 1;
 244                }
 245                /* don't use the power state if crtcs are active and no display flag is set */
 246                if ((rdev->pm.active_crtc_count > 0) &&
 247                    (rdev->pm.power_state[rdev->pm.requested_power_state_index].clock_info[0].flags &
 248                     RADEON_PM_MODE_NO_DISPLAY)) {
 249                        rdev->pm.requested_power_state_index++;
 250                }
 251                break;
 252        case DYNPM_ACTION_UPCLOCK:
 253                if (rdev->pm.current_power_state_index == (rdev->pm.num_power_states - 1)) {
 254                        rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
 255                        rdev->pm.dynpm_can_upclock = false;
 256                } else {
 257                        if (rdev->pm.active_crtc_count > 1) {
 258                                for (i = (rdev->pm.num_power_states - 1); i >= 0; i--) {
 259                                        if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
 260                                                continue;
 261                                        else if (i <= rdev->pm.current_power_state_index) {
 262                                                rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
 263                                                break;
 264                                        } else {
 265                                                rdev->pm.requested_power_state_index = i;
 266                                                break;
 267                                        }
 268                                }
 269                        } else
 270                                rdev->pm.requested_power_state_index =
 271                                        rdev->pm.current_power_state_index + 1;
 272                }
 273                break;
 274        case DYNPM_ACTION_DEFAULT:
 275                rdev->pm.requested_power_state_index = rdev->pm.default_power_state_index;
 276                rdev->pm.dynpm_can_upclock = false;
 277                break;
 278        case DYNPM_ACTION_NONE:
 279        default:
 280                DRM_ERROR("Requested mode for not defined action\n");
 281                return;
 282        }
 283        /* only one clock mode per power state */
 284        rdev->pm.requested_clock_mode_index = 0;
 285
 286        DRM_DEBUG_DRIVER("Requested: e: %d m: %d p: %d\n",
 287                  rdev->pm.power_state[rdev->pm.requested_power_state_index].
 288                  clock_info[rdev->pm.requested_clock_mode_index].sclk,
 289                  rdev->pm.power_state[rdev->pm.requested_power_state_index].
 290                  clock_info[rdev->pm.requested_clock_mode_index].mclk,
 291                  rdev->pm.power_state[rdev->pm.requested_power_state_index].
 292                  pcie_lanes);
 293}
 294
 295/**
 296 * r100_pm_init_profile - Initialize power profiles callback.
 297 *
 298 * @rdev: radeon_device pointer
 299 *
 300 * Initialize the power states used in profile mode
 301 * (r1xx-r3xx).
 302 * Used for profile mode only.
 303 */
 304void r100_pm_init_profile(struct radeon_device *rdev)
 305{
 306        /* default */
 307        rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_ps_idx = rdev->pm.default_power_state_index;
 308        rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 309        rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_cm_idx = 0;
 310        rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_cm_idx = 0;
 311        /* low sh */
 312        rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_ps_idx = 0;
 313        rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_ps_idx = 0;
 314        rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_cm_idx = 0;
 315        rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_cm_idx = 0;
 316        /* mid sh */
 317        rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_ps_idx = 0;
 318        rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_ps_idx = 0;
 319        rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_cm_idx = 0;
 320        rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_cm_idx = 0;
 321        /* high sh */
 322        rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_ps_idx = 0;
 323        rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 324        rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_cm_idx = 0;
 325        rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_cm_idx = 0;
 326        /* low mh */
 327        rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_ps_idx = 0;
 328        rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 329        rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_cm_idx = 0;
 330        rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_cm_idx = 0;
 331        /* mid mh */
 332        rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_ps_idx = 0;
 333        rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 334        rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_cm_idx = 0;
 335        rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_cm_idx = 0;
 336        /* high mh */
 337        rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_ps_idx = 0;
 338        rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
 339        rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_cm_idx = 0;
 340        rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_cm_idx = 0;
 341}
 342
 343/**
 344 * r100_pm_misc - set additional pm hw parameters callback.
 345 *
 346 * @rdev: radeon_device pointer
 347 *
 348 * Set non-clock parameters associated with a power state
 349 * (voltage, pcie lanes, etc.) (r1xx-r4xx).
 350 */
 351void r100_pm_misc(struct radeon_device *rdev)
 352{
 353        int requested_index = rdev->pm.requested_power_state_index;
 354        struct radeon_power_state *ps = &rdev->pm.power_state[requested_index];
 355        struct radeon_voltage *voltage = &ps->clock_info[0].voltage;
 356        u32 tmp, sclk_cntl, sclk_cntl2, sclk_more_cntl;
 357
 358        if ((voltage->type == VOLTAGE_GPIO) && (voltage->gpio.valid)) {
 359                if (ps->misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_SUPPORT) {
 360                        tmp = RREG32(voltage->gpio.reg);
 361                        if (voltage->active_high)
 362                                tmp |= voltage->gpio.mask;
 363                        else
 364                                tmp &= ~(voltage->gpio.mask);
 365                        WREG32(voltage->gpio.reg, tmp);
 366                        if (voltage->delay)
 367                                udelay(voltage->delay);
 368                } else {
 369                        tmp = RREG32(voltage->gpio.reg);
 370                        if (voltage->active_high)
 371                                tmp &= ~voltage->gpio.mask;
 372                        else
 373                                tmp |= voltage->gpio.mask;
 374                        WREG32(voltage->gpio.reg, tmp);
 375                        if (voltage->delay)
 376                                udelay(voltage->delay);
 377                }
 378        }
 379
 380        sclk_cntl = RREG32_PLL(SCLK_CNTL);
 381        sclk_cntl2 = RREG32_PLL(SCLK_CNTL2);
 382        sclk_cntl2 &= ~REDUCED_SPEED_SCLK_SEL(3);
 383        sclk_more_cntl = RREG32_PLL(SCLK_MORE_CNTL);
 384        sclk_more_cntl &= ~VOLTAGE_DELAY_SEL(3);
 385        if (ps->misc & ATOM_PM_MISCINFO_ASIC_REDUCED_SPEED_SCLK_EN) {
 386                sclk_more_cntl |= REDUCED_SPEED_SCLK_EN;
 387                if (ps->misc & ATOM_PM_MISCINFO_DYN_CLK_3D_IDLE)
 388                        sclk_cntl2 |= REDUCED_SPEED_SCLK_MODE;
 389                else
 390                        sclk_cntl2 &= ~REDUCED_SPEED_SCLK_MODE;
 391                if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_2)
 392                        sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(0);
 393                else if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_4)
 394                        sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(2);
 395        } else
 396                sclk_more_cntl &= ~REDUCED_SPEED_SCLK_EN;
 397
 398        if (ps->misc & ATOM_PM_MISCINFO_ASIC_DYNAMIC_VOLTAGE_EN) {
 399                sclk_more_cntl |= IO_CG_VOLTAGE_DROP;
 400                if (voltage->delay) {
 401                        sclk_more_cntl |= VOLTAGE_DROP_SYNC;
 402                        switch (voltage->delay) {
 403                        case 33:
 404                                sclk_more_cntl |= VOLTAGE_DELAY_SEL(0);
 405                                break;
 406                        case 66:
 407                                sclk_more_cntl |= VOLTAGE_DELAY_SEL(1);
 408                                break;
 409                        case 99:
 410                                sclk_more_cntl |= VOLTAGE_DELAY_SEL(2);
 411                                break;
 412                        case 132:
 413                                sclk_more_cntl |= VOLTAGE_DELAY_SEL(3);
 414                                break;
 415                        }
 416                } else
 417                        sclk_more_cntl &= ~VOLTAGE_DROP_SYNC;
 418        } else
 419                sclk_more_cntl &= ~IO_CG_VOLTAGE_DROP;
 420
 421        if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_HDP_BLOCK_EN)
 422                sclk_cntl &= ~FORCE_HDP;
 423        else
 424                sclk_cntl |= FORCE_HDP;
 425
 426        WREG32_PLL(SCLK_CNTL, sclk_cntl);
 427        WREG32_PLL(SCLK_CNTL2, sclk_cntl2);
 428        WREG32_PLL(SCLK_MORE_CNTL, sclk_more_cntl);
 429
 430        /* set pcie lanes */
 431        if ((rdev->flags & RADEON_IS_PCIE) &&
 432            !(rdev->flags & RADEON_IS_IGP) &&
 433            rdev->asic->pm.set_pcie_lanes &&
 434            (ps->pcie_lanes !=
 435             rdev->pm.power_state[rdev->pm.current_power_state_index].pcie_lanes)) {
 436                radeon_set_pcie_lanes(rdev,
 437                                      ps->pcie_lanes);
 438                DRM_DEBUG_DRIVER("Setting: p: %d\n", ps->pcie_lanes);
 439        }
 440}
 441
 442/**
 443 * r100_pm_prepare - pre-power state change callback.
 444 *
 445 * @rdev: radeon_device pointer
 446 *
 447 * Prepare for a power state change (r1xx-r4xx).
 448 */
 449void r100_pm_prepare(struct radeon_device *rdev)
 450{
 451        struct drm_device *ddev = rdev->ddev;
 452        struct drm_crtc *crtc;
 453        struct radeon_crtc *radeon_crtc;
 454        u32 tmp;
 455
 456        /* disable any active CRTCs */
 457        list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
 458                radeon_crtc = to_radeon_crtc(crtc);
 459                if (radeon_crtc->enabled) {
 460                        if (radeon_crtc->crtc_id) {
 461                                tmp = RREG32(RADEON_CRTC2_GEN_CNTL);
 462                                tmp |= RADEON_CRTC2_DISP_REQ_EN_B;
 463                                WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
 464                        } else {
 465                                tmp = RREG32(RADEON_CRTC_GEN_CNTL);
 466                                tmp |= RADEON_CRTC_DISP_REQ_EN_B;
 467                                WREG32(RADEON_CRTC_GEN_CNTL, tmp);
 468                        }
 469                }
 470        }
 471}
 472
 473/**
 474 * r100_pm_finish - post-power state change callback.
 475 *
 476 * @rdev: radeon_device pointer
 477 *
 478 * Clean up after a power state change (r1xx-r4xx).
 479 */
 480void r100_pm_finish(struct radeon_device *rdev)
 481{
 482        struct drm_device *ddev = rdev->ddev;
 483        struct drm_crtc *crtc;
 484        struct radeon_crtc *radeon_crtc;
 485        u32 tmp;
 486
 487        /* enable any active CRTCs */
 488        list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
 489                radeon_crtc = to_radeon_crtc(crtc);
 490                if (radeon_crtc->enabled) {
 491                        if (radeon_crtc->crtc_id) {
 492                                tmp = RREG32(RADEON_CRTC2_GEN_CNTL);
 493                                tmp &= ~RADEON_CRTC2_DISP_REQ_EN_B;
 494                                WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
 495                        } else {
 496                                tmp = RREG32(RADEON_CRTC_GEN_CNTL);
 497                                tmp &= ~RADEON_CRTC_DISP_REQ_EN_B;
 498                                WREG32(RADEON_CRTC_GEN_CNTL, tmp);
 499                        }
 500                }
 501        }
 502}
 503
 504/**
 505 * r100_gui_idle - gui idle callback.
 506 *
 507 * @rdev: radeon_device pointer
 508 *
 509 * Check of the GUI (2D/3D engines) are idle (r1xx-r5xx).
 510 * Returns true if idle, false if not.
 511 */
 512bool r100_gui_idle(struct radeon_device *rdev)
 513{
 514        if (RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)
 515                return false;
 516        else
 517                return true;
 518}
 519
 520/* hpd for digital panel detect/disconnect */
 521/**
 522 * r100_hpd_sense - hpd sense callback.
 523 *
 524 * @rdev: radeon_device pointer
 525 * @hpd: hpd (hotplug detect) pin
 526 *
 527 * Checks if a digital monitor is connected (r1xx-r4xx).
 528 * Returns true if connected, false if not connected.
 529 */
 530bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
 531{
 532        bool connected = false;
 533
 534        switch (hpd) {
 535        case RADEON_HPD_1:
 536                if (RREG32(RADEON_FP_GEN_CNTL) & RADEON_FP_DETECT_SENSE)
 537                        connected = true;
 538                break;
 539        case RADEON_HPD_2:
 540                if (RREG32(RADEON_FP2_GEN_CNTL) & RADEON_FP2_DETECT_SENSE)
 541                        connected = true;
 542                break;
 543        default:
 544                break;
 545        }
 546        return connected;
 547}
 548
 549/**
 550 * r100_hpd_set_polarity - hpd set polarity callback.
 551 *
 552 * @rdev: radeon_device pointer
 553 * @hpd: hpd (hotplug detect) pin
 554 *
 555 * Set the polarity of the hpd pin (r1xx-r4xx).
 556 */
 557void r100_hpd_set_polarity(struct radeon_device *rdev,
 558                           enum radeon_hpd_id hpd)
 559{
 560        u32 tmp;
 561        bool connected = r100_hpd_sense(rdev, hpd);
 562
 563        switch (hpd) {
 564        case RADEON_HPD_1:
 565                tmp = RREG32(RADEON_FP_GEN_CNTL);
 566                if (connected)
 567                        tmp &= ~RADEON_FP_DETECT_INT_POL;
 568                else
 569                        tmp |= RADEON_FP_DETECT_INT_POL;
 570                WREG32(RADEON_FP_GEN_CNTL, tmp);
 571                break;
 572        case RADEON_HPD_2:
 573                tmp = RREG32(RADEON_FP2_GEN_CNTL);
 574                if (connected)
 575                        tmp &= ~RADEON_FP2_DETECT_INT_POL;
 576                else
 577                        tmp |= RADEON_FP2_DETECT_INT_POL;
 578                WREG32(RADEON_FP2_GEN_CNTL, tmp);
 579                break;
 580        default:
 581                break;
 582        }
 583}
 584
 585/**
 586 * r100_hpd_init - hpd setup callback.
 587 *
 588 * @rdev: radeon_device pointer
 589 *
 590 * Setup the hpd pins used by the card (r1xx-r4xx).
 591 * Set the polarity, and enable the hpd interrupts.
 592 */
 593void r100_hpd_init(struct radeon_device *rdev)
 594{
 595        struct drm_device *dev = rdev->ddev;
 596        struct drm_connector *connector;
 597        unsigned enable = 0;
 598
 599        list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 600                struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 601                if (radeon_connector->hpd.hpd != RADEON_HPD_NONE)
 602                        enable |= 1 << radeon_connector->hpd.hpd;
 603                radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
 604        }
 605        radeon_irq_kms_enable_hpd(rdev, enable);
 606}
 607
 608/**
 609 * r100_hpd_fini - hpd tear down callback.
 610 *
 611 * @rdev: radeon_device pointer
 612 *
 613 * Tear down the hpd pins used by the card (r1xx-r4xx).
 614 * Disable the hpd interrupts.
 615 */
 616void r100_hpd_fini(struct radeon_device *rdev)
 617{
 618        struct drm_device *dev = rdev->ddev;
 619        struct drm_connector *connector;
 620        unsigned disable = 0;
 621
 622        list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 623                struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 624                if (radeon_connector->hpd.hpd != RADEON_HPD_NONE)
 625                        disable |= 1 << radeon_connector->hpd.hpd;
 626        }
 627        radeon_irq_kms_disable_hpd(rdev, disable);
 628}
 629
 630/*
 631 * PCI GART
 632 */
 633void r100_pci_gart_tlb_flush(struct radeon_device *rdev)
 634{
 635        /* TODO: can we do somethings here ? */
 636        /* It seems hw only cache one entry so we should discard this
 637         * entry otherwise if first GPU GART read hit this entry it
 638         * could end up in wrong address. */
 639}
 640
 641int r100_pci_gart_init(struct radeon_device *rdev)
 642{
 643        int r;
 644
 645        if (rdev->gart.ptr) {
 646                WARN(1, "R100 PCI GART already initialized\n");
 647                return 0;
 648        }
 649        /* Initialize common gart structure */
 650        r = radeon_gart_init(rdev);
 651        if (r)
 652                return r;
 653        rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
 654        rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush;
 655        rdev->asic->gart.get_page_entry = &r100_pci_gart_get_page_entry;
 656        rdev->asic->gart.set_page = &r100_pci_gart_set_page;
 657        return radeon_gart_table_ram_alloc(rdev);
 658}
 659
 660int r100_pci_gart_enable(struct radeon_device *rdev)
 661{
 662        uint32_t tmp;
 663
 664        /* discard memory request outside of configured range */
 665        tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
 666        WREG32(RADEON_AIC_CNTL, tmp);
 667        /* set address range for PCI address translate */
 668        WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_start);
 669        WREG32(RADEON_AIC_HI_ADDR, rdev->mc.gtt_end);
 670        /* set PCI GART page-table base address */
 671        WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr);
 672        tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
 673        WREG32(RADEON_AIC_CNTL, tmp);
 674        r100_pci_gart_tlb_flush(rdev);
 675        DRM_INFO("PCI GART of %uM enabled (table at 0x%016llX).\n",
 676                 (unsigned)(rdev->mc.gtt_size >> 20),
 677                 (unsigned long long)rdev->gart.table_addr);
 678        rdev->gart.ready = true;
 679        return 0;
 680}
 681
 682void r100_pci_gart_disable(struct radeon_device *rdev)
 683{
 684        uint32_t tmp;
 685
 686        /* discard memory request outside of configured range */
 687        tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
 688        WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN);
 689        WREG32(RADEON_AIC_LO_ADDR, 0);
 690        WREG32(RADEON_AIC_HI_ADDR, 0);
 691}
 692
 693uint64_t r100_pci_gart_get_page_entry(uint64_t addr, uint32_t flags)
 694{
 695        return addr;
 696}
 697
 698void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
 699                            uint64_t entry)
 700{
 701        u32 *gtt = rdev->gart.ptr;
 702        gtt[i] = cpu_to_le32(lower_32_bits(entry));
 703}
 704
 705void r100_pci_gart_fini(struct radeon_device *rdev)
 706{
 707        radeon_gart_fini(rdev);
 708        r100_pci_gart_disable(rdev);
 709        radeon_gart_table_ram_free(rdev);
 710}
 711
 712int r100_irq_set(struct radeon_device *rdev)
 713{
 714        uint32_t tmp = 0;
 715
 716        if (!rdev->irq.installed) {
 717                WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
 718                WREG32(R_000040_GEN_INT_CNTL, 0);
 719                return -EINVAL;
 720        }
 721        if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
 722                tmp |= RADEON_SW_INT_ENABLE;
 723        }
 724        if (rdev->irq.crtc_vblank_int[0] ||
 725            atomic_read(&rdev->irq.pflip[0])) {
 726                tmp |= RADEON_CRTC_VBLANK_MASK;
 727        }
 728        if (rdev->irq.crtc_vblank_int[1] ||
 729            atomic_read(&rdev->irq.pflip[1])) {
 730                tmp |= RADEON_CRTC2_VBLANK_MASK;
 731        }
 732        if (rdev->irq.hpd[0]) {
 733                tmp |= RADEON_FP_DETECT_MASK;
 734        }
 735        if (rdev->irq.hpd[1]) {
 736                tmp |= RADEON_FP2_DETECT_MASK;
 737        }
 738        WREG32(RADEON_GEN_INT_CNTL, tmp);
 739
 740        /* read back to post the write */
 741        RREG32(RADEON_GEN_INT_CNTL);
 742
 743        return 0;
 744}
 745
 746void r100_irq_disable(struct radeon_device *rdev)
 747{
 748        u32 tmp;
 749
 750        WREG32(R_000040_GEN_INT_CNTL, 0);
 751        /* Wait and acknowledge irq */
 752        mdelay(1);
 753        tmp = RREG32(R_000044_GEN_INT_STATUS);
 754        WREG32(R_000044_GEN_INT_STATUS, tmp);
 755}
 756
 757static uint32_t r100_irq_ack(struct radeon_device *rdev)
 758{
 759        uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS);
 760        uint32_t irq_mask = RADEON_SW_INT_TEST |
 761                RADEON_CRTC_VBLANK_STAT | RADEON_CRTC2_VBLANK_STAT |
 762                RADEON_FP_DETECT_STAT | RADEON_FP2_DETECT_STAT;
 763
 764        if (irqs) {
 765                WREG32(RADEON_GEN_INT_STATUS, irqs);
 766        }
 767        return irqs & irq_mask;
 768}
 769
 770int r100_irq_process(struct radeon_device *rdev)
 771{
 772        uint32_t status, msi_rearm;
 773        bool queue_hotplug = false;
 774
 775        status = r100_irq_ack(rdev);
 776        if (!status) {
 777                return IRQ_NONE;
 778        }
 779        if (rdev->shutdown) {
 780                return IRQ_NONE;
 781        }
 782        while (status) {
 783                /* SW interrupt */
 784                if (status & RADEON_SW_INT_TEST) {
 785                        radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
 786                }
 787                /* Vertical blank interrupts */
 788                if (status & RADEON_CRTC_VBLANK_STAT) {
 789                        if (rdev->irq.crtc_vblank_int[0]) {
 790                                drm_handle_vblank(rdev->ddev, 0);
 791                                rdev->pm.vblank_sync = true;
 792                                wake_up(&rdev->irq.vblank_queue);
 793                        }
 794                        if (atomic_read(&rdev->irq.pflip[0]))
 795                                radeon_crtc_handle_vblank(rdev, 0);
 796                }
 797                if (status & RADEON_CRTC2_VBLANK_STAT) {
 798                        if (rdev->irq.crtc_vblank_int[1]) {
 799                                drm_handle_vblank(rdev->ddev, 1);
 800                                rdev->pm.vblank_sync = true;
 801                                wake_up(&rdev->irq.vblank_queue);
 802                        }
 803                        if (atomic_read(&rdev->irq.pflip[1]))
 804                                radeon_crtc_handle_vblank(rdev, 1);
 805                }
 806                if (status & RADEON_FP_DETECT_STAT) {
 807                        queue_hotplug = true;
 808                        DRM_DEBUG("HPD1\n");
 809                }
 810                if (status & RADEON_FP2_DETECT_STAT) {
 811                        queue_hotplug = true;
 812                        DRM_DEBUG("HPD2\n");
 813                }
 814                status = r100_irq_ack(rdev);
 815        }
 816        if (queue_hotplug)
 817                schedule_delayed_work(&rdev->hotplug_work, 0);
 818        if (rdev->msi_enabled) {
 819                switch (rdev->family) {
 820                case CHIP_RS400:
 821                case CHIP_RS480:
 822                        msi_rearm = RREG32(RADEON_AIC_CNTL) & ~RS400_MSI_REARM;
 823                        WREG32(RADEON_AIC_CNTL, msi_rearm);
 824                        WREG32(RADEON_AIC_CNTL, msi_rearm | RS400_MSI_REARM);
 825                        break;
 826                default:
 827                        WREG32(RADEON_MSI_REARM_EN, RV370_MSI_REARM_EN);
 828                        break;
 829                }
 830        }
 831        return IRQ_HANDLED;
 832}
 833
 834u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc)
 835{
 836        if (crtc == 0)
 837                return RREG32(RADEON_CRTC_CRNT_FRAME);
 838        else
 839                return RREG32(RADEON_CRTC2_CRNT_FRAME);
 840}
 841
 842/**
 843 * r100_ring_hdp_flush - flush Host Data Path via the ring buffer
 844 * rdev: radeon device structure
 845 * ring: ring buffer struct for emitting packets
 846 */
 847static void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring)
 848{
 849        radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
 850        radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
 851                                RADEON_HDP_READ_BUFFER_INVALIDATE);
 852        radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
 853        radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
 854}
 855
 856/* Who ever call radeon_fence_emit should call ring_lock and ask
 857 * for enough space (today caller are ib schedule and buffer move) */
 858void r100_fence_ring_emit(struct radeon_device *rdev,
 859                          struct radeon_fence *fence)
 860{
 861        struct radeon_ring *ring = &rdev->ring[fence->ring];
 862
 863        /* We have to make sure that caches are flushed before
 864         * CPU might read something from VRAM. */
 865        radeon_ring_write(ring, PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0));
 866        radeon_ring_write(ring, RADEON_RB3D_DC_FLUSH_ALL);
 867        radeon_ring_write(ring, PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0));
 868        radeon_ring_write(ring, RADEON_RB3D_ZC_FLUSH_ALL);
 869        /* Wait until IDLE & CLEAN */
 870        radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
 871        radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
 872        r100_ring_hdp_flush(rdev, ring);
 873        /* Emit fence sequence & fire IRQ */
 874        radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0));
 875        radeon_ring_write(ring, fence->seq);
 876        radeon_ring_write(ring, PACKET0(RADEON_GEN_INT_STATUS, 0));
 877        radeon_ring_write(ring, RADEON_SW_INT_FIRE);
 878}
 879
 880bool r100_semaphore_ring_emit(struct radeon_device *rdev,
 881                              struct radeon_ring *ring,
 882                              struct radeon_semaphore *semaphore,
 883                              bool emit_wait)
 884{
 885        /* Unused on older asics, since we don't have semaphores or multiple rings */
 886        BUG();
 887        return false;
 888}
 889
 890struct radeon_fence *r100_copy_blit(struct radeon_device *rdev,
 891                                    uint64_t src_offset,
 892                                    uint64_t dst_offset,
 893                                    unsigned num_gpu_pages,
 894                                    struct dma_resv *resv)
 895{
 896        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 897        struct radeon_fence *fence;
 898        uint32_t cur_pages;
 899        uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
 900        uint32_t pitch;
 901        uint32_t stride_pixels;
 902        unsigned ndw;
 903        int num_loops;
 904        int r = 0;
 905
 906        /* radeon limited to 16k stride */
 907        stride_bytes &= 0x3fff;
 908        /* radeon pitch is /64 */
 909        pitch = stride_bytes / 64;
 910        stride_pixels = stride_bytes / 4;
 911        num_loops = DIV_ROUND_UP(num_gpu_pages, 8191);
 912
 913        /* Ask for enough room for blit + flush + fence */
 914        ndw = 64 + (10 * num_loops);
 915        r = radeon_ring_lock(rdev, ring, ndw);
 916        if (r) {
 917                DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
 918                return ERR_PTR(-EINVAL);
 919        }
 920        while (num_gpu_pages > 0) {
 921                cur_pages = num_gpu_pages;
 922                if (cur_pages > 8191) {
 923                        cur_pages = 8191;
 924                }
 925                num_gpu_pages -= cur_pages;
 926
 927                /* pages are in Y direction - height
 928                   page width in X direction - width */
 929                radeon_ring_write(ring, PACKET3(PACKET3_BITBLT_MULTI, 8));
 930                radeon_ring_write(ring,
 931                                  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
 932                                  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
 933                                  RADEON_GMC_SRC_CLIPPING |
 934                                  RADEON_GMC_DST_CLIPPING |
 935                                  RADEON_GMC_BRUSH_NONE |
 936                                  (RADEON_COLOR_FORMAT_ARGB8888 << 8) |
 937                                  RADEON_GMC_SRC_DATATYPE_COLOR |
 938                                  RADEON_ROP3_S |
 939                                  RADEON_DP_SRC_SOURCE_MEMORY |
 940                                  RADEON_GMC_CLR_CMP_CNTL_DIS |
 941                                  RADEON_GMC_WR_MSK_DIS);
 942                radeon_ring_write(ring, (pitch << 22) | (src_offset >> 10));
 943                radeon_ring_write(ring, (pitch << 22) | (dst_offset >> 10));
 944                radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
 945                radeon_ring_write(ring, 0);
 946                radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
 947                radeon_ring_write(ring, num_gpu_pages);
 948                radeon_ring_write(ring, num_gpu_pages);
 949                radeon_ring_write(ring, cur_pages | (stride_pixels << 16));
 950        }
 951        radeon_ring_write(ring, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
 952        radeon_ring_write(ring, RADEON_RB2D_DC_FLUSH_ALL);
 953        radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
 954        radeon_ring_write(ring,
 955                          RADEON_WAIT_2D_IDLECLEAN |
 956                          RADEON_WAIT_HOST_IDLECLEAN |
 957                          RADEON_WAIT_DMA_GUI_IDLE);
 958        r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
 959        if (r) {
 960                radeon_ring_unlock_undo(rdev, ring);
 961                return ERR_PTR(r);
 962        }
 963        radeon_ring_unlock_commit(rdev, ring, false);
 964        return fence;
 965}
 966
 967static int r100_cp_wait_for_idle(struct radeon_device *rdev)
 968{
 969        unsigned i;
 970        u32 tmp;
 971
 972        for (i = 0; i < rdev->usec_timeout; i++) {
 973                tmp = RREG32(R_000E40_RBBM_STATUS);
 974                if (!G_000E40_CP_CMDSTRM_BUSY(tmp)) {
 975                        return 0;
 976                }
 977                udelay(1);
 978        }
 979        return -1;
 980}
 981
 982void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring)
 983{
 984        int r;
 985
 986        r = radeon_ring_lock(rdev, ring, 2);
 987        if (r) {
 988                return;
 989        }
 990        radeon_ring_write(ring, PACKET0(RADEON_ISYNC_CNTL, 0));
 991        radeon_ring_write(ring,
 992                          RADEON_ISYNC_ANY2D_IDLE3D |
 993                          RADEON_ISYNC_ANY3D_IDLE2D |
 994                          RADEON_ISYNC_WAIT_IDLEGUI |
 995                          RADEON_ISYNC_CPSCRATCH_IDLEGUI);
 996        radeon_ring_unlock_commit(rdev, ring, false);
 997}
 998
 999
1000/* Load the microcode for the CP */
1001static int r100_cp_init_microcode(struct radeon_device *rdev)
1002{
1003        const char *fw_name = NULL;
1004        int err;
1005
1006        DRM_DEBUG_KMS("\n");
1007
1008        if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) ||
1009            (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) ||
1010            (rdev->family == CHIP_RS200)) {
1011                DRM_INFO("Loading R100 Microcode\n");
1012                fw_name = FIRMWARE_R100;
1013        } else if ((rdev->family == CHIP_R200) ||
1014                   (rdev->family == CHIP_RV250) ||
1015                   (rdev->family == CHIP_RV280) ||
1016                   (rdev->family == CHIP_RS300)) {
1017                DRM_INFO("Loading R200 Microcode\n");
1018                fw_name = FIRMWARE_R200;
1019        } else if ((rdev->family == CHIP_R300) ||
1020                   (rdev->family == CHIP_R350) ||
1021                   (rdev->family == CHIP_RV350) ||
1022                   (rdev->family == CHIP_RV380) ||
1023                   (rdev->family == CHIP_RS400) ||
1024                   (rdev->family == CHIP_RS480)) {
1025                DRM_INFO("Loading R300 Microcode\n");
1026                fw_name = FIRMWARE_R300;
1027        } else if ((rdev->family == CHIP_R420) ||
1028                   (rdev->family == CHIP_R423) ||
1029                   (rdev->family == CHIP_RV410)) {
1030                DRM_INFO("Loading R400 Microcode\n");
1031                fw_name = FIRMWARE_R420;
1032        } else if ((rdev->family == CHIP_RS690) ||
1033                   (rdev->family == CHIP_RS740)) {
1034                DRM_INFO("Loading RS690/RS740 Microcode\n");
1035                fw_name = FIRMWARE_RS690;
1036        } else if (rdev->family == CHIP_RS600) {
1037                DRM_INFO("Loading RS600 Microcode\n");
1038                fw_name = FIRMWARE_RS600;
1039        } else if ((rdev->family == CHIP_RV515) ||
1040                   (rdev->family == CHIP_R520) ||
1041                   (rdev->family == CHIP_RV530) ||
1042                   (rdev->family == CHIP_R580) ||
1043                   (rdev->family == CHIP_RV560) ||
1044                   (rdev->family == CHIP_RV570)) {
1045                DRM_INFO("Loading R500 Microcode\n");
1046                fw_name = FIRMWARE_R520;
1047        }
1048
1049        err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1050        if (err) {
1051                pr_err("radeon_cp: Failed to load firmware \"%s\"\n", fw_name);
1052        } else if (rdev->me_fw->size % 8) {
1053                pr_err("radeon_cp: Bogus length %zu in firmware \"%s\"\n",
1054                       rdev->me_fw->size, fw_name);
1055                err = -EINVAL;
1056                release_firmware(rdev->me_fw);
1057                rdev->me_fw = NULL;
1058        }
1059        return err;
1060}
1061
1062u32 r100_gfx_get_rptr(struct radeon_device *rdev,
1063                      struct radeon_ring *ring)
1064{
1065        u32 rptr;
1066
1067        if (rdev->wb.enabled)
1068                rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
1069        else
1070                rptr = RREG32(RADEON_CP_RB_RPTR);
1071
1072        return rptr;
1073}
1074
1075u32 r100_gfx_get_wptr(struct radeon_device *rdev,
1076                      struct radeon_ring *ring)
1077{
1078        return RREG32(RADEON_CP_RB_WPTR);
1079}
1080
1081void r100_gfx_set_wptr(struct radeon_device *rdev,
1082                       struct radeon_ring *ring)
1083{
1084        WREG32(RADEON_CP_RB_WPTR, ring->wptr);
1085        (void)RREG32(RADEON_CP_RB_WPTR);
1086}
1087
1088static void r100_cp_load_microcode(struct radeon_device *rdev)
1089{
1090        const __be32 *fw_data;
1091        int i, size;
1092
1093        if (r100_gui_wait_for_idle(rdev)) {
1094                pr_warn("Failed to wait GUI idle while programming pipes. Bad things might happen.\n");
1095        }
1096
1097        if (rdev->me_fw) {
1098                size = rdev->me_fw->size / 4;
1099                fw_data = (const __be32 *)&rdev->me_fw->data[0];
1100                WREG32(RADEON_CP_ME_RAM_ADDR, 0);
1101                for (i = 0; i < size; i += 2) {
1102                        WREG32(RADEON_CP_ME_RAM_DATAH,
1103                               be32_to_cpup(&fw_data[i]));
1104                        WREG32(RADEON_CP_ME_RAM_DATAL,
1105                               be32_to_cpup(&fw_data[i + 1]));
1106                }
1107        }
1108}
1109
1110int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
1111{
1112        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1113        unsigned rb_bufsz;
1114        unsigned rb_blksz;
1115        unsigned max_fetch;
1116        unsigned pre_write_timer;
1117        unsigned pre_write_limit;
1118        unsigned indirect2_start;
1119        unsigned indirect1_start;
1120        uint32_t tmp;
1121        int r;
1122
1123        if (r100_debugfs_cp_init(rdev)) {
1124                DRM_ERROR("Failed to register debugfs file for CP !\n");
1125        }
1126        if (!rdev->me_fw) {
1127                r = r100_cp_init_microcode(rdev);
1128                if (r) {
1129                        DRM_ERROR("Failed to load firmware!\n");
1130                        return r;
1131                }
1132        }
1133
1134        /* Align ring size */
1135        rb_bufsz = order_base_2(ring_size / 8);
1136        ring_size = (1 << (rb_bufsz + 1)) * 4;
1137        r100_cp_load_microcode(rdev);
1138        r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET,
1139                             RADEON_CP_PACKET2);
1140        if (r) {
1141                return r;
1142        }
1143        /* Each time the cp read 1024 bytes (16 dword/quadword) update
1144         * the rptr copy in system ram */
1145        rb_blksz = 9;
1146        /* cp will read 128bytes at a time (4 dwords) */
1147        max_fetch = 1;
1148        ring->align_mask = 16 - 1;
1149        /* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
1150        pre_write_timer = 64;
1151        /* Force CP_RB_WPTR write if written more than one time before the
1152         * delay expire
1153         */
1154        pre_write_limit = 0;
1155        /* Setup the cp cache like this (cache size is 96 dwords) :
1156         *      RING            0  to 15
1157         *      INDIRECT1       16 to 79
1158         *      INDIRECT2       80 to 95
1159         * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
1160         *    indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords))
1161         *    indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
1162         * Idea being that most of the gpu cmd will be through indirect1 buffer
1163         * so it gets the bigger cache.
1164         */
1165        indirect2_start = 80;
1166        indirect1_start = 16;
1167        /* cp setup */
1168        WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
1169        tmp = (REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
1170               REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
1171               REG_SET(RADEON_MAX_FETCH, max_fetch));
1172#ifdef __BIG_ENDIAN
1173        tmp |= RADEON_BUF_SWAP_32BIT;
1174#endif
1175        WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_NO_UPDATE);
1176
1177        /* Set ring address */
1178        DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)ring->gpu_addr);
1179        WREG32(RADEON_CP_RB_BASE, ring->gpu_addr);
1180        /* Force read & write ptr to 0 */
1181        WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE);
1182        WREG32(RADEON_CP_RB_RPTR_WR, 0);
1183        ring->wptr = 0;
1184        WREG32(RADEON_CP_RB_WPTR, ring->wptr);
1185
1186        /* set the wb address whether it's enabled or not */
1187        WREG32(R_00070C_CP_RB_RPTR_ADDR,
1188                S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) >> 2));
1189        WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET);
1190
1191        if (rdev->wb.enabled)
1192                WREG32(R_000770_SCRATCH_UMSK, 0xff);
1193        else {
1194                tmp |= RADEON_RB_NO_UPDATE;
1195                WREG32(R_000770_SCRATCH_UMSK, 0);
1196        }
1197
1198        WREG32(RADEON_CP_RB_CNTL, tmp);
1199        udelay(10);
1200        /* Set cp mode to bus mastering & enable cp*/
1201        WREG32(RADEON_CP_CSQ_MODE,
1202               REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
1203               REG_SET(RADEON_INDIRECT1_START, indirect1_start));
1204        WREG32(RADEON_CP_RB_WPTR_DELAY, 0);
1205        WREG32(RADEON_CP_CSQ_MODE, 0x00004D4D);
1206        WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
1207
1208        /* at this point everything should be setup correctly to enable master */
1209        pci_set_master(rdev->pdev);
1210
1211        radeon_ring_start(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1212        r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, ring);
1213        if (r) {
1214                DRM_ERROR("radeon: cp isn't working (%d).\n", r);
1215                return r;
1216        }
1217        ring->ready = true;
1218        radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1219
1220        if (!ring->rptr_save_reg /* not resuming from suspend */
1221            && radeon_ring_supports_scratch_reg(rdev, ring)) {
1222                r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
1223                if (r) {
1224                        DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
1225                        ring->rptr_save_reg = 0;
1226                }
1227        }
1228        return 0;
1229}
1230
1231void r100_cp_fini(struct radeon_device *rdev)
1232{
1233        if (r100_cp_wait_for_idle(rdev)) {
1234                DRM_ERROR("Wait for CP idle timeout, shutting down CP.\n");
1235        }
1236        /* Disable ring */
1237        r100_cp_disable(rdev);
1238        radeon_scratch_free(rdev, rdev->ring[RADEON_RING_TYPE_GFX_INDEX].rptr_save_reg);
1239        radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1240        DRM_INFO("radeon: cp finalized\n");
1241}
1242
1243void r100_cp_disable(struct radeon_device *rdev)
1244{
1245        /* Disable ring */
1246        radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1247        rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1248        WREG32(RADEON_CP_CSQ_MODE, 0);
1249        WREG32(RADEON_CP_CSQ_CNTL, 0);
1250        WREG32(R_000770_SCRATCH_UMSK, 0);
1251        if (r100_gui_wait_for_idle(rdev)) {
1252                pr_warn("Failed to wait GUI idle while programming pipes. Bad things might happen.\n");
1253        }
1254}
1255
1256/*
1257 * CS functions
1258 */
1259int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
1260                            struct radeon_cs_packet *pkt,
1261                            unsigned idx,
1262                            unsigned reg)
1263{
1264        int r;
1265        u32 tile_flags = 0;
1266        u32 tmp;
1267        struct radeon_bo_list *reloc;
1268        u32 value;
1269
1270        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1271        if (r) {
1272                DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1273                          idx, reg);
1274                radeon_cs_dump_packet(p, pkt);
1275                return r;
1276        }
1277
1278        value = radeon_get_ib_value(p, idx);
1279        tmp = value & 0x003fffff;
1280        tmp += (((u32)reloc->gpu_offset) >> 10);
1281
1282        if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1283                if (reloc->tiling_flags & RADEON_TILING_MACRO)
1284                        tile_flags |= RADEON_DST_TILE_MACRO;
1285                if (reloc->tiling_flags & RADEON_TILING_MICRO) {
1286                        if (reg == RADEON_SRC_PITCH_OFFSET) {
1287                                DRM_ERROR("Cannot src blit from microtiled surface\n");
1288                                radeon_cs_dump_packet(p, pkt);
1289                                return -EINVAL;
1290                        }
1291                        tile_flags |= RADEON_DST_TILE_MICRO;
1292                }
1293
1294                tmp |= tile_flags;
1295                p->ib.ptr[idx] = (value & 0x3fc00000) | tmp;
1296        } else
1297                p->ib.ptr[idx] = (value & 0xffc00000) | tmp;
1298        return 0;
1299}
1300
1301int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
1302                             struct radeon_cs_packet *pkt,
1303                             int idx)
1304{
1305        unsigned c, i;
1306        struct radeon_bo_list *reloc;
1307        struct r100_cs_track *track;
1308        int r = 0;
1309        volatile uint32_t *ib;
1310        u32 idx_value;
1311
1312        ib = p->ib.ptr;
1313        track = (struct r100_cs_track *)p->track;
1314        c = radeon_get_ib_value(p, idx++) & 0x1F;
1315        if (c > 16) {
1316            DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
1317                      pkt->opcode);
1318            radeon_cs_dump_packet(p, pkt);
1319            return -EINVAL;
1320        }
1321        track->num_arrays = c;
1322        for (i = 0; i < (c - 1); i+=2, idx+=3) {
1323                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1324                if (r) {
1325                        DRM_ERROR("No reloc for packet3 %d\n",
1326                                  pkt->opcode);
1327                        radeon_cs_dump_packet(p, pkt);
1328                        return r;
1329                }
1330                idx_value = radeon_get_ib_value(p, idx);
1331                ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);
1332
1333                track->arrays[i + 0].esize = idx_value >> 8;
1334                track->arrays[i + 0].robj = reloc->robj;
1335                track->arrays[i + 0].esize &= 0x7F;
1336                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1337                if (r) {
1338                        DRM_ERROR("No reloc for packet3 %d\n",
1339                                  pkt->opcode);
1340                        radeon_cs_dump_packet(p, pkt);
1341                        return r;
1342                }
1343                ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->gpu_offset);
1344                track->arrays[i + 1].robj = reloc->robj;
1345                track->arrays[i + 1].esize = idx_value >> 24;
1346                track->arrays[i + 1].esize &= 0x7F;
1347        }
1348        if (c & 1) {
1349                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1350                if (r) {
1351                        DRM_ERROR("No reloc for packet3 %d\n",
1352                                          pkt->opcode);
1353                        radeon_cs_dump_packet(p, pkt);
1354                        return r;
1355                }
1356                idx_value = radeon_get_ib_value(p, idx);
1357                ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);
1358                track->arrays[i + 0].robj = reloc->robj;
1359                track->arrays[i + 0].esize = idx_value >> 8;
1360                track->arrays[i + 0].esize &= 0x7F;
1361        }
1362        return r;
1363}
1364
1365int r100_cs_parse_packet0(struct radeon_cs_parser *p,
1366                          struct radeon_cs_packet *pkt,
1367                          const unsigned *auth, unsigned n,
1368                          radeon_packet0_check_t check)
1369{
1370        unsigned reg;
1371        unsigned i, j, m;
1372        unsigned idx;
1373        int r;
1374
1375        idx = pkt->idx + 1;
1376        reg = pkt->reg;
1377        /* Check that register fall into register range
1378         * determined by the number of entry (n) in the
1379         * safe register bitmap.
1380         */
1381        if (pkt->one_reg_wr) {
1382                if ((reg >> 7) > n) {
1383                        return -EINVAL;
1384                }
1385        } else {
1386                if (((reg + (pkt->count << 2)) >> 7) > n) {
1387                        return -EINVAL;
1388                }
1389        }
1390        for (i = 0; i <= pkt->count; i++, idx++) {
1391                j = (reg >> 7);
1392                m = 1 << ((reg >> 2) & 31);
1393                if (auth[j] & m) {
1394                        r = check(p, pkt, idx, reg);
1395                        if (r) {
1396                                return r;
1397                        }
1398                }
1399                if (pkt->one_reg_wr) {
1400                        if (!(auth[j] & m)) {
1401                                break;
1402                        }
1403                } else {
1404                        reg += 4;
1405                }
1406        }
1407        return 0;
1408}
1409
1410/**
1411 * r100_cs_packet_next_vline() - parse userspace VLINE packet
1412 * @parser:             parser structure holding parsing context.
1413 *
1414 * Userspace sends a special sequence for VLINE waits.
1415 * PACKET0 - VLINE_START_END + value
1416 * PACKET0 - WAIT_UNTIL +_value
1417 * RELOC (P3) - crtc_id in reloc.
1418 *
1419 * This function parses this and relocates the VLINE START END
1420 * and WAIT UNTIL packets to the correct crtc.
1421 * It also detects a switched off crtc and nulls out the
1422 * wait in that case.
1423 */
1424int r100_cs_packet_parse_vline(struct radeon_cs_parser *p)
1425{
1426        struct drm_crtc *crtc;
1427        struct radeon_crtc *radeon_crtc;
1428        struct radeon_cs_packet p3reloc, waitreloc;
1429        int crtc_id;
1430        int r;
1431        uint32_t header, h_idx, reg;
1432        volatile uint32_t *ib;
1433
1434        ib = p->ib.ptr;
1435
1436        /* parse the wait until */
1437        r = radeon_cs_packet_parse(p, &waitreloc, p->idx);
1438        if (r)
1439                return r;
1440
1441        /* check its a wait until and only 1 count */
1442        if (waitreloc.reg != RADEON_WAIT_UNTIL ||
1443            waitreloc.count != 0) {
1444                DRM_ERROR("vline wait had illegal wait until segment\n");
1445                return -EINVAL;
1446        }
1447
1448        if (radeon_get_ib_value(p, waitreloc.idx + 1) != RADEON_WAIT_CRTC_VLINE) {
1449                DRM_ERROR("vline wait had illegal wait until\n");
1450                return -EINVAL;
1451        }
1452
1453        /* jump over the NOP */
1454        r = radeon_cs_packet_parse(p, &p3reloc, p->idx + waitreloc.count + 2);
1455        if (r)
1456                return r;
1457
1458        h_idx = p->idx - 2;
1459        p->idx += waitreloc.count + 2;
1460        p->idx += p3reloc.count + 2;
1461
1462        header = radeon_get_ib_value(p, h_idx);
1463        crtc_id = radeon_get_ib_value(p, h_idx + 5);
1464        reg = R100_CP_PACKET0_GET_REG(header);
1465        crtc = drm_crtc_find(p->rdev->ddev, p->filp, crtc_id);
1466        if (!crtc) {
1467                DRM_ERROR("cannot find crtc %d\n", crtc_id);
1468                return -ENOENT;
1469        }
1470        radeon_crtc = to_radeon_crtc(crtc);
1471        crtc_id = radeon_crtc->crtc_id;
1472
1473        if (!crtc->enabled) {
1474                /* if the CRTC isn't enabled - we need to nop out the wait until */
1475                ib[h_idx + 2] = PACKET2(0);
1476                ib[h_idx + 3] = PACKET2(0);
1477        } else if (crtc_id == 1) {
1478                switch (reg) {
1479                case AVIVO_D1MODE_VLINE_START_END:
1480                        header &= ~R300_CP_PACKET0_REG_MASK;
1481                        header |= AVIVO_D2MODE_VLINE_START_END >> 2;
1482                        break;
1483                case RADEON_CRTC_GUI_TRIG_VLINE:
1484                        header &= ~R300_CP_PACKET0_REG_MASK;
1485                        header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2;
1486                        break;
1487                default:
1488                        DRM_ERROR("unknown crtc reloc\n");
1489                        return -EINVAL;
1490                }
1491                ib[h_idx] = header;
1492                ib[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1;
1493        }
1494
1495        return 0;
1496}
1497
1498static int r100_get_vtx_size(uint32_t vtx_fmt)
1499{
1500        int vtx_size;
1501        vtx_size = 2;
1502        /* ordered according to bits in spec */
1503        if (vtx_fmt & RADEON_SE_VTX_FMT_W0)
1504                vtx_size++;
1505        if (vtx_fmt & RADEON_SE_VTX_FMT_FPCOLOR)
1506                vtx_size += 3;
1507        if (vtx_fmt & RADEON_SE_VTX_FMT_FPALPHA)
1508                vtx_size++;
1509        if (vtx_fmt & RADEON_SE_VTX_FMT_PKCOLOR)
1510                vtx_size++;
1511        if (vtx_fmt & RADEON_SE_VTX_FMT_FPSPEC)
1512                vtx_size += 3;
1513        if (vtx_fmt & RADEON_SE_VTX_FMT_FPFOG)
1514                vtx_size++;
1515        if (vtx_fmt & RADEON_SE_VTX_FMT_PKSPEC)
1516                vtx_size++;
1517        if (vtx_fmt & RADEON_SE_VTX_FMT_ST0)
1518                vtx_size += 2;
1519        if (vtx_fmt & RADEON_SE_VTX_FMT_ST1)
1520                vtx_size += 2;
1521        if (vtx_fmt & RADEON_SE_VTX_FMT_Q1)
1522                vtx_size++;
1523        if (vtx_fmt & RADEON_SE_VTX_FMT_ST2)
1524                vtx_size += 2;
1525        if (vtx_fmt & RADEON_SE_VTX_FMT_Q2)
1526                vtx_size++;
1527        if (vtx_fmt & RADEON_SE_VTX_FMT_ST3)
1528                vtx_size += 2;
1529        if (vtx_fmt & RADEON_SE_VTX_FMT_Q3)
1530                vtx_size++;
1531        if (vtx_fmt & RADEON_SE_VTX_FMT_Q0)
1532                vtx_size++;
1533        /* blend weight */
1534        if (vtx_fmt & (0x7 << 15))
1535                vtx_size += (vtx_fmt >> 15) & 0x7;
1536        if (vtx_fmt & RADEON_SE_VTX_FMT_N0)
1537                vtx_size += 3;
1538        if (vtx_fmt & RADEON_SE_VTX_FMT_XY1)
1539                vtx_size += 2;
1540        if (vtx_fmt & RADEON_SE_VTX_FMT_Z1)
1541                vtx_size++;
1542        if (vtx_fmt & RADEON_SE_VTX_FMT_W1)
1543                vtx_size++;
1544        if (vtx_fmt & RADEON_SE_VTX_FMT_N1)
1545                vtx_size++;
1546        if (vtx_fmt & RADEON_SE_VTX_FMT_Z)
1547                vtx_size++;
1548        return vtx_size;
1549}
1550
1551static int r100_packet0_check(struct radeon_cs_parser *p,
1552                              struct radeon_cs_packet *pkt,
1553                              unsigned idx, unsigned reg)
1554{
1555        struct radeon_bo_list *reloc;
1556        struct r100_cs_track *track;
1557        volatile uint32_t *ib;
1558        uint32_t tmp;
1559        int r;
1560        int i, face;
1561        u32 tile_flags = 0;
1562        u32 idx_value;
1563
1564        ib = p->ib.ptr;
1565        track = (struct r100_cs_track *)p->track;
1566
1567        idx_value = radeon_get_ib_value(p, idx);
1568
1569        switch (reg) {
1570        case RADEON_CRTC_GUI_TRIG_VLINE:
1571                r = r100_cs_packet_parse_vline(p);
1572                if (r) {
1573                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1574                                  idx, reg);
1575                        radeon_cs_dump_packet(p, pkt);
1576                        return r;
1577                }
1578                break;
1579                /* FIXME: only allow PACKET3 blit? easier to check for out of
1580                 * range access */
1581        case RADEON_DST_PITCH_OFFSET:
1582        case RADEON_SRC_PITCH_OFFSET:
1583                r = r100_reloc_pitch_offset(p, pkt, idx, reg);
1584                if (r)
1585                        return r;
1586                break;
1587        case RADEON_RB3D_DEPTHOFFSET:
1588                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1589                if (r) {
1590                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1591                                  idx, reg);
1592                        radeon_cs_dump_packet(p, pkt);
1593                        return r;
1594                }
1595                track->zb.robj = reloc->robj;
1596                track->zb.offset = idx_value;
1597                track->zb_dirty = true;
1598                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1599                break;
1600        case RADEON_RB3D_COLOROFFSET:
1601                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1602                if (r) {
1603                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1604                                  idx, reg);
1605                        radeon_cs_dump_packet(p, pkt);
1606                        return r;
1607                }
1608                track->cb[0].robj = reloc->robj;
1609                track->cb[0].offset = idx_value;
1610                track->cb_dirty = true;
1611                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1612                break;
1613        case RADEON_PP_TXOFFSET_0:
1614        case RADEON_PP_TXOFFSET_1:
1615        case RADEON_PP_TXOFFSET_2:
1616                i = (reg - RADEON_PP_TXOFFSET_0) / 24;
1617                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1618                if (r) {
1619                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1620                                  idx, reg);
1621                        radeon_cs_dump_packet(p, pkt);
1622                        return r;
1623                }
1624                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1625                        if (reloc->tiling_flags & RADEON_TILING_MACRO)
1626                                tile_flags |= RADEON_TXO_MACRO_TILE;
1627                        if (reloc->tiling_flags & RADEON_TILING_MICRO)
1628                                tile_flags |= RADEON_TXO_MICRO_TILE_X2;
1629
1630                        tmp = idx_value & ~(0x7 << 2);
1631                        tmp |= tile_flags;
1632                        ib[idx] = tmp + ((u32)reloc->gpu_offset);
1633                } else
1634                        ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1635                track->textures[i].robj = reloc->robj;
1636                track->tex_dirty = true;
1637                break;
1638        case RADEON_PP_CUBIC_OFFSET_T0_0:
1639        case RADEON_PP_CUBIC_OFFSET_T0_1:
1640        case RADEON_PP_CUBIC_OFFSET_T0_2:
1641        case RADEON_PP_CUBIC_OFFSET_T0_3:
1642        case RADEON_PP_CUBIC_OFFSET_T0_4:
1643                i = (reg - RADEON_PP_CUBIC_OFFSET_T0_0) / 4;
1644                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1645                if (r) {
1646                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1647                                  idx, reg);
1648                        radeon_cs_dump_packet(p, pkt);
1649                        return r;
1650                }
1651                track->textures[0].cube_info[i].offset = idx_value;
1652                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1653                track->textures[0].cube_info[i].robj = reloc->robj;
1654                track->tex_dirty = true;
1655                break;
1656        case RADEON_PP_CUBIC_OFFSET_T1_0:
1657        case RADEON_PP_CUBIC_OFFSET_T1_1:
1658        case RADEON_PP_CUBIC_OFFSET_T1_2:
1659        case RADEON_PP_CUBIC_OFFSET_T1_3:
1660        case RADEON_PP_CUBIC_OFFSET_T1_4:
1661                i = (reg - RADEON_PP_CUBIC_OFFSET_T1_0) / 4;
1662                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1663                if (r) {
1664                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1665                                  idx, reg);
1666                        radeon_cs_dump_packet(p, pkt);
1667                        return r;
1668                }
1669                track->textures[1].cube_info[i].offset = idx_value;
1670                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1671                track->textures[1].cube_info[i].robj = reloc->robj;
1672                track->tex_dirty = true;
1673                break;
1674        case RADEON_PP_CUBIC_OFFSET_T2_0:
1675        case RADEON_PP_CUBIC_OFFSET_T2_1:
1676        case RADEON_PP_CUBIC_OFFSET_T2_2:
1677        case RADEON_PP_CUBIC_OFFSET_T2_3:
1678        case RADEON_PP_CUBIC_OFFSET_T2_4:
1679                i = (reg - RADEON_PP_CUBIC_OFFSET_T2_0) / 4;
1680                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1681                if (r) {
1682                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1683                                  idx, reg);
1684                        radeon_cs_dump_packet(p, pkt);
1685                        return r;
1686                }
1687                track->textures[2].cube_info[i].offset = idx_value;
1688                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1689                track->textures[2].cube_info[i].robj = reloc->robj;
1690                track->tex_dirty = true;
1691                break;
1692        case RADEON_RE_WIDTH_HEIGHT:
1693                track->maxy = ((idx_value >> 16) & 0x7FF);
1694                track->cb_dirty = true;
1695                track->zb_dirty = true;
1696                break;
1697        case RADEON_RB3D_COLORPITCH:
1698                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1699                if (r) {
1700                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1701                                  idx, reg);
1702                        radeon_cs_dump_packet(p, pkt);
1703                        return r;
1704                }
1705                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1706                        if (reloc->tiling_flags & RADEON_TILING_MACRO)
1707                                tile_flags |= RADEON_COLOR_TILE_ENABLE;
1708                        if (reloc->tiling_flags & RADEON_TILING_MICRO)
1709                                tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;
1710
1711                        tmp = idx_value & ~(0x7 << 16);
1712                        tmp |= tile_flags;
1713                        ib[idx] = tmp;
1714                } else
1715                        ib[idx] = idx_value;
1716
1717                track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK;
1718                track->cb_dirty = true;
1719                break;
1720        case RADEON_RB3D_DEPTHPITCH:
1721                track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK;
1722                track->zb_dirty = true;
1723                break;
1724        case RADEON_RB3D_CNTL:
1725                switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
1726                case 7:
1727                case 8:
1728                case 9:
1729                case 11:
1730                case 12:
1731                        track->cb[0].cpp = 1;
1732                        break;
1733                case 3:
1734                case 4:
1735                case 15:
1736                        track->cb[0].cpp = 2;
1737                        break;
1738                case 6:
1739                        track->cb[0].cpp = 4;
1740                        break;
1741                default:
1742                        DRM_ERROR("Invalid color buffer format (%d) !\n",
1743                                  ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f));
1744                        return -EINVAL;
1745                }
1746                track->z_enabled = !!(idx_value & RADEON_Z_ENABLE);
1747                track->cb_dirty = true;
1748                track->zb_dirty = true;
1749                break;
1750        case RADEON_RB3D_ZSTENCILCNTL:
1751                switch (idx_value & 0xf) {
1752                case 0:
1753                        track->zb.cpp = 2;
1754                        break;
1755                case 2:
1756                case 3:
1757                case 4:
1758                case 5:
1759                case 9:
1760                case 11:
1761                        track->zb.cpp = 4;
1762                        break;
1763                default:
1764                        break;
1765                }
1766                track->zb_dirty = true;
1767                break;
1768        case RADEON_RB3D_ZPASS_ADDR:
1769                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1770                if (r) {
1771                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1772                                  idx, reg);
1773                        radeon_cs_dump_packet(p, pkt);
1774                        return r;
1775                }
1776                ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1777                break;
1778        case RADEON_PP_CNTL:
1779                {
1780                        uint32_t temp = idx_value >> 4;
1781                        for (i = 0; i < track->num_texture; i++)
1782                                track->textures[i].enabled = !!(temp & (1 << i));
1783                        track->tex_dirty = true;
1784                }
1785                break;
1786        case RADEON_SE_VF_CNTL:
1787                track->vap_vf_cntl = idx_value;
1788                break;
1789        case RADEON_SE_VTX_FMT:
1790                track->vtx_size = r100_get_vtx_size(idx_value);
1791                break;
1792        case RADEON_PP_TEX_SIZE_0:
1793        case RADEON_PP_TEX_SIZE_1:
1794        case RADEON_PP_TEX_SIZE_2:
1795                i = (reg - RADEON_PP_TEX_SIZE_0) / 8;
1796                track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1;
1797                track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
1798                track->tex_dirty = true;
1799                break;
1800        case RADEON_PP_TEX_PITCH_0:
1801        case RADEON_PP_TEX_PITCH_1:
1802        case RADEON_PP_TEX_PITCH_2:
1803                i = (reg - RADEON_PP_TEX_PITCH_0) / 8;
1804                track->textures[i].pitch = idx_value + 32;
1805                track->tex_dirty = true;
1806                break;
1807        case RADEON_PP_TXFILTER_0:
1808        case RADEON_PP_TXFILTER_1:
1809        case RADEON_PP_TXFILTER_2:
1810                i = (reg - RADEON_PP_TXFILTER_0) / 24;
1811                track->textures[i].num_levels = ((idx_value & RADEON_MAX_MIP_LEVEL_MASK)
1812                                                 >> RADEON_MAX_MIP_LEVEL_SHIFT);
1813                tmp = (idx_value >> 23) & 0x7;
1814                if (tmp == 2 || tmp == 6)
1815                        track->textures[i].roundup_w = false;
1816                tmp = (idx_value >> 27) & 0x7;
1817                if (tmp == 2 || tmp == 6)
1818                        track->textures[i].roundup_h = false;
1819                track->tex_dirty = true;
1820                break;
1821        case RADEON_PP_TXFORMAT_0:
1822        case RADEON_PP_TXFORMAT_1:
1823        case RADEON_PP_TXFORMAT_2:
1824                i = (reg - RADEON_PP_TXFORMAT_0) / 24;
1825                if (idx_value & RADEON_TXFORMAT_NON_POWER2) {
1826                        track->textures[i].use_pitch = true;
1827                } else {
1828                        track->textures[i].use_pitch = false;
1829                        track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT);
1830                        track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT);
1831                }
1832                if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE)
1833                        track->textures[i].tex_coord_type = 2;
1834                switch ((idx_value & RADEON_TXFORMAT_FORMAT_MASK)) {
1835                case RADEON_TXFORMAT_I8:
1836                case RADEON_TXFORMAT_RGB332:
1837                case RADEON_TXFORMAT_Y8:
1838                        track->textures[i].cpp = 1;
1839                        track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1840                        break;
1841                case RADEON_TXFORMAT_AI88:
1842                case RADEON_TXFORMAT_ARGB1555:
1843                case RADEON_TXFORMAT_RGB565:
1844                case RADEON_TXFORMAT_ARGB4444:
1845                case RADEON_TXFORMAT_VYUY422:
1846                case RADEON_TXFORMAT_YVYU422:
1847                case RADEON_TXFORMAT_SHADOW16:
1848                case RADEON_TXFORMAT_LDUDV655:
1849                case RADEON_TXFORMAT_DUDV88:
1850                        track->textures[i].cpp = 2;
1851                        track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1852                        break;
1853                case RADEON_TXFORMAT_ARGB8888:
1854                case RADEON_TXFORMAT_RGBA8888:
1855                case RADEON_TXFORMAT_SHADOW32:
1856                case RADEON_TXFORMAT_LDUDUV8888:
1857                        track->textures[i].cpp = 4;
1858                        track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1859                        break;
1860                case RADEON_TXFORMAT_DXT1:
1861                        track->textures[i].cpp = 1;
1862                        track->textures[i].compress_format = R100_TRACK_COMP_DXT1;
1863                        break;
1864                case RADEON_TXFORMAT_DXT23:
1865                case RADEON_TXFORMAT_DXT45:
1866                        track->textures[i].cpp = 1;
1867                        track->textures[i].compress_format = R100_TRACK_COMP_DXT35;
1868                        break;
1869                }
1870                track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf);
1871                track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf);
1872                track->tex_dirty = true;
1873                break;
1874        case RADEON_PP_CUBIC_FACES_0:
1875        case RADEON_PP_CUBIC_FACES_1:
1876        case RADEON_PP_CUBIC_FACES_2:
1877                tmp = idx_value;
1878                i = (reg - RADEON_PP_CUBIC_FACES_0) / 4;
1879                for (face = 0; face < 4; face++) {
1880                        track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
1881                        track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
1882                }
1883                track->tex_dirty = true;
1884                break;
1885        default:
1886                pr_err("Forbidden register 0x%04X in cs at %d\n", reg, idx);
1887                return -EINVAL;
1888        }
1889        return 0;
1890}
1891
1892int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
1893                                         struct radeon_cs_packet *pkt,
1894                                         struct radeon_bo *robj)
1895{
1896        unsigned idx;
1897        u32 value;
1898        idx = pkt->idx + 1;
1899        value = radeon_get_ib_value(p, idx + 2);
1900        if ((value + 1) > radeon_bo_size(robj)) {
1901                DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER "
1902                          "(need %u have %lu) !\n",
1903                          value + 1,
1904                          radeon_bo_size(robj));
1905                return -EINVAL;
1906        }
1907        return 0;
1908}
1909
1910static int r100_packet3_check(struct radeon_cs_parser *p,
1911                              struct radeon_cs_packet *pkt)
1912{
1913        struct radeon_bo_list *reloc;
1914        struct r100_cs_track *track;
1915        unsigned idx;
1916        volatile uint32_t *ib;
1917        int r;
1918
1919        ib = p->ib.ptr;
1920        idx = pkt->idx + 1;
1921        track = (struct r100_cs_track *)p->track;
1922        switch (pkt->opcode) {
1923        case PACKET3_3D_LOAD_VBPNTR:
1924                r = r100_packet3_load_vbpntr(p, pkt, idx);
1925                if (r)
1926                        return r;
1927                break;
1928        case PACKET3_INDX_BUFFER:
1929                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1930                if (r) {
1931                        DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1932                        radeon_cs_dump_packet(p, pkt);
1933                        return r;
1934                }
1935                ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->gpu_offset);
1936                r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1937                if (r) {
1938                        return r;
1939                }
1940                break;
1941        case 0x23:
1942                /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */
1943                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1944                if (r) {
1945                        DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1946                        radeon_cs_dump_packet(p, pkt);
1947                        return r;
1948                }
1949                ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->gpu_offset);
1950                track->num_arrays = 1;
1951                track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2));
1952
1953                track->arrays[0].robj = reloc->robj;
1954                track->arrays[0].esize = track->vtx_size;
1955
1956                track->max_indx = radeon_get_ib_value(p, idx+1);
1957
1958                track->vap_vf_cntl = radeon_get_ib_value(p, idx+3);
1959                track->immd_dwords = pkt->count - 1;
1960                r = r100_cs_track_check(p->rdev, track);
1961                if (r)
1962                        return r;
1963                break;
1964        case PACKET3_3D_DRAW_IMMD:
1965                if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) {
1966                        DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1967                        return -EINVAL;
1968                }
1969                track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 0));
1970                track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
1971                track->immd_dwords = pkt->count - 1;
1972                r = r100_cs_track_check(p->rdev, track);
1973                if (r)
1974                        return r;
1975                break;
1976                /* triggers drawing using in-packet vertex data */
1977        case PACKET3_3D_DRAW_IMMD_2:
1978                if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) {
1979                        DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1980                        return -EINVAL;
1981                }
1982                track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1983                track->immd_dwords = pkt->count;
1984                r = r100_cs_track_check(p->rdev, track);
1985                if (r)
1986                        return r;
1987                break;
1988                /* triggers drawing using in-packet vertex data */
1989        case PACKET3_3D_DRAW_VBUF_2:
1990                track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1991                r = r100_cs_track_check(p->rdev, track);
1992                if (r)
1993                        return r;
1994                break;
1995                /* triggers drawing of vertex buffers setup elsewhere */
1996        case PACKET3_3D_DRAW_INDX_2:
1997                track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1998                r = r100_cs_track_check(p->rdev, track);
1999                if (r)
2000                        return r;
2001                break;
2002                /* triggers drawing using indices to vertex buffer */
2003        case PACKET3_3D_DRAW_VBUF:
2004                track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
2005                r = r100_cs_track_check(p->rdev, track);
2006                if (r)
2007                        return r;
2008                break;
2009                /* triggers drawing of vertex buffers setup elsewhere */
2010        case PACKET3_3D_DRAW_INDX:
2011                track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
2012                r = r100_cs_track_check(p->rdev, track);
2013                if (r)
2014                        return r;
2015                break;
2016                /* triggers drawing using indices to vertex buffer */
2017        case PACKET3_3D_CLEAR_HIZ:
2018        case PACKET3_3D_CLEAR_ZMASK:
2019                if (p->rdev->hyperz_filp != p->filp)
2020                        return -EINVAL;
2021                break;
2022        case PACKET3_NOP:
2023                break;
2024        default:
2025                DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2026                return -EINVAL;
2027        }
2028        return 0;
2029}
2030
2031int r100_cs_parse(struct radeon_cs_parser *p)
2032{
2033        struct radeon_cs_packet pkt;
2034        struct r100_cs_track *track;
2035        int r;
2036
2037        track = kzalloc(sizeof(*track), GFP_KERNEL);
2038        if (!track)
2039                return -ENOMEM;
2040        r100_cs_track_clear(p->rdev, track);
2041        p->track = track;
2042        do {
2043                r = radeon_cs_packet_parse(p, &pkt, p->idx);
2044                if (r) {
2045                        return r;
2046                }
2047                p->idx += pkt.count + 2;
2048                switch (pkt.type) {
2049                case RADEON_PACKET_TYPE0:
2050                        if (p->rdev->family >= CHIP_R200)
2051                                r = r100_cs_parse_packet0(p, &pkt,
2052                                        p->rdev->config.r100.reg_safe_bm,
2053                                        p->rdev->config.r100.reg_safe_bm_size,
2054                                        &r200_packet0_check);
2055                        else
2056                                r = r100_cs_parse_packet0(p, &pkt,
2057                                        p->rdev->config.r100.reg_safe_bm,
2058                                        p->rdev->config.r100.reg_safe_bm_size,
2059                                        &r100_packet0_check);
2060                        break;
2061                case RADEON_PACKET_TYPE2:
2062                        break;
2063                case RADEON_PACKET_TYPE3:
2064                        r = r100_packet3_check(p, &pkt);
2065                        break;
2066                default:
2067                        DRM_ERROR("Unknown packet type %d !\n",
2068                                  pkt.type);
2069                        return -EINVAL;
2070                }
2071                if (r)
2072                        return r;
2073        } while (p->idx < p->chunk_ib->length_dw);
2074        return 0;
2075}
2076
2077static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
2078{
2079        DRM_ERROR("pitch                      %d\n", t->pitch);
2080        DRM_ERROR("use_pitch                  %d\n", t->use_pitch);
2081        DRM_ERROR("width                      %d\n", t->width);
2082        DRM_ERROR("width_11                   %d\n", t->width_11);
2083        DRM_ERROR("height                     %d\n", t->height);
2084        DRM_ERROR("height_11                  %d\n", t->height_11);
2085        DRM_ERROR("num levels                 %d\n", t->num_levels);
2086        DRM_ERROR("depth                      %d\n", t->txdepth);
2087        DRM_ERROR("bpp                        %d\n", t->cpp);
2088        DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
2089        DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
2090        DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
2091        DRM_ERROR("compress format            %d\n", t->compress_format);
2092}
2093
2094static int r100_track_compress_size(int compress_format, int w, int h)
2095{
2096        int block_width, block_height, block_bytes;
2097        int wblocks, hblocks;
2098        int min_wblocks;
2099        int sz;
2100
2101        block_width = 4;
2102        block_height = 4;
2103
2104        switch (compress_format) {
2105        case R100_TRACK_COMP_DXT1:
2106                block_bytes = 8;
2107                min_wblocks = 4;
2108                break;
2109        default:
2110        case R100_TRACK_COMP_DXT35:
2111                block_bytes = 16;
2112                min_wblocks = 2;
2113                break;
2114        }
2115
2116        hblocks = (h + block_height - 1) / block_height;
2117        wblocks = (w + block_width - 1) / block_width;
2118        if (wblocks < min_wblocks)
2119                wblocks = min_wblocks;
2120        sz = wblocks * hblocks * block_bytes;
2121        return sz;
2122}
2123
2124static int r100_cs_track_cube(struct radeon_device *rdev,
2125                              struct r100_cs_track *track, unsigned idx)
2126{
2127        unsigned face, w, h;
2128        struct radeon_bo *cube_robj;
2129        unsigned long size;
2130        unsigned compress_format = track->textures[idx].compress_format;
2131
2132        for (face = 0; face < 5; face++) {
2133                cube_robj = track->textures[idx].cube_info[face].robj;
2134                w = track->textures[idx].cube_info[face].width;
2135                h = track->textures[idx].cube_info[face].height;
2136
2137                if (compress_format) {
2138                        size = r100_track_compress_size(compress_format, w, h);
2139                } else
2140                        size = w * h;
2141                size *= track->textures[idx].cpp;
2142
2143                size += track->textures[idx].cube_info[face].offset;
2144
2145                if (size > radeon_bo_size(cube_robj)) {
2146                        DRM_ERROR("Cube texture offset greater than object size %lu %lu\n",
2147                                  size, radeon_bo_size(cube_robj));
2148                        r100_cs_track_texture_print(&track->textures[idx]);
2149                        return -1;
2150                }
2151        }
2152        return 0;
2153}
2154
2155static int r100_cs_track_texture_check(struct radeon_device *rdev,
2156                                       struct r100_cs_track *track)
2157{
2158        struct radeon_bo *robj;
2159        unsigned long size;
2160        unsigned u, i, w, h, d;
2161        int ret;
2162
2163        for (u = 0; u < track->num_texture; u++) {
2164                if (!track->textures[u].enabled)
2165                        continue;
2166                if (track->textures[u].lookup_disable)
2167                        continue;
2168                robj = track->textures[u].robj;
2169                if (robj == NULL) {
2170                        DRM_ERROR("No texture bound to unit %u\n", u);
2171                        return -EINVAL;
2172                }
2173                size = 0;
2174                for (i = 0; i <= track->textures[u].num_levels; i++) {
2175                        if (track->textures[u].use_pitch) {
2176                                if (rdev->family < CHIP_R300)
2177                                        w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i);
2178                                else
2179                                        w = track->textures[u].pitch / (1 << i);
2180                        } else {
2181                                w = track->textures[u].width;
2182                                if (rdev->family >= CHIP_RV515)
2183                                        w |= track->textures[u].width_11;
2184                                w = w / (1 << i);
2185                                if (track->textures[u].roundup_w)
2186                                        w = roundup_pow_of_two(w);
2187                        }
2188                        h = track->textures[u].height;
2189                        if (rdev->family >= CHIP_RV515)
2190                                h |= track->textures[u].height_11;
2191                        h = h / (1 << i);
2192                        if (track->textures[u].roundup_h)
2193                                h = roundup_pow_of_two(h);
2194                        if (track->textures[u].tex_coord_type == 1) {
2195                                d = (1 << track->textures[u].txdepth) / (1 << i);
2196                                if (!d)
2197                                        d = 1;
2198                        } else {
2199                                d = 1;
2200                        }
2201                        if (track->textures[u].compress_format) {
2202
2203                                size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d;
2204                                /* compressed textures are block based */
2205                        } else
2206                                size += w * h * d;
2207                }
2208                size *= track->textures[u].cpp;
2209
2210                switch (track->textures[u].tex_coord_type) {
2211                case 0:
2212                case 1:
2213                        break;
2214                case 2:
2215                        if (track->separate_cube) {
2216                                ret = r100_cs_track_cube(rdev, track, u);
2217                                if (ret)
2218                                        return ret;
2219                        } else
2220                                size *= 6;
2221                        break;
2222                default:
2223                        DRM_ERROR("Invalid texture coordinate type %u for unit "
2224                                  "%u\n", track->textures[u].tex_coord_type, u);
2225                        return -EINVAL;
2226                }
2227                if (size > radeon_bo_size(robj)) {
2228                        DRM_ERROR("Texture of unit %u needs %lu bytes but is "
2229                                  "%lu\n", u, size, radeon_bo_size(robj));
2230                        r100_cs_track_texture_print(&track->textures[u]);
2231                        return -EINVAL;
2232                }
2233        }
2234        return 0;
2235}
2236
2237int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
2238{
2239        unsigned i;
2240        unsigned long size;
2241        unsigned prim_walk;
2242        unsigned nverts;
2243        unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
2244
2245        if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
2246            !track->blend_read_enable)
2247                num_cb = 0;
2248
2249        for (i = 0; i < num_cb; i++) {
2250                if (track->cb[i].robj == NULL) {
2251                        DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
2252                        return -EINVAL;
2253                }
2254                size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
2255                size += track->cb[i].offset;
2256                if (size > radeon_bo_size(track->cb[i].robj)) {
2257                        DRM_ERROR("[drm] Buffer too small for color buffer %d "
2258                                  "(need %lu have %lu) !\n", i, size,
2259                                  radeon_bo_size(track->cb[i].robj));
2260                        DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
2261                                  i, track->cb[i].pitch, track->cb[i].cpp,
2262                                  track->cb[i].offset, track->maxy);
2263                        return -EINVAL;
2264                }
2265        }
2266        track->cb_dirty = false;
2267
2268        if (track->zb_dirty && track->z_enabled) {
2269                if (track->zb.robj == NULL) {
2270                        DRM_ERROR("[drm] No buffer for z buffer !\n");
2271                        return -EINVAL;
2272                }
2273                size = track->zb.pitch * track->zb.cpp * track->maxy;
2274                size += track->zb.offset;
2275                if (size > radeon_bo_size(track->zb.robj)) {
2276                        DRM_ERROR("[drm] Buffer too small for z buffer "
2277                                  "(need %lu have %lu) !\n", size,
2278                                  radeon_bo_size(track->zb.robj));
2279                        DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n",
2280                                  track->zb.pitch, track->zb.cpp,
2281                                  track->zb.offset, track->maxy);
2282                        return -EINVAL;
2283                }
2284        }
2285        track->zb_dirty = false;
2286
2287        if (track->aa_dirty && track->aaresolve) {
2288                if (track->aa.robj == NULL) {
2289                        DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i);
2290                        return -EINVAL;
2291                }
2292                /* I believe the format comes from colorbuffer0. */
2293                size = track->aa.pitch * track->cb[0].cpp * track->maxy;
2294                size += track->aa.offset;
2295                if (size > radeon_bo_size(track->aa.robj)) {
2296                        DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d "
2297                                  "(need %lu have %lu) !\n", i, size,
2298                                  radeon_bo_size(track->aa.robj));
2299                        DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n",
2300                                  i, track->aa.pitch, track->cb[0].cpp,
2301                                  track->aa.offset, track->maxy);
2302                        return -EINVAL;
2303                }
2304        }
2305        track->aa_dirty = false;
2306
2307        prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
2308        if (track->vap_vf_cntl & (1 << 14)) {
2309                nverts = track->vap_alt_nverts;
2310        } else {
2311                nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
2312        }
2313        switch (prim_walk) {
2314        case 1:
2315                for (i = 0; i < track->num_arrays; i++) {
2316                        size = track->arrays[i].esize * track->max_indx * 4;
2317                        if (track->arrays[i].robj == NULL) {
2318                                DRM_ERROR("(PW %u) Vertex array %u no buffer "
2319                                          "bound\n", prim_walk, i);
2320                                return -EINVAL;
2321                        }
2322                        if (size > radeon_bo_size(track->arrays[i].robj)) {
2323                                dev_err(rdev->dev, "(PW %u) Vertex array %u "
2324                                        "need %lu dwords have %lu dwords\n",
2325                                        prim_walk, i, size >> 2,
2326                                        radeon_bo_size(track->arrays[i].robj)
2327                                        >> 2);
2328                                DRM_ERROR("Max indices %u\n", track->max_indx);
2329                                return -EINVAL;
2330                        }
2331                }
2332                break;
2333        case 2:
2334                for (i = 0; i < track->num_arrays; i++) {
2335                        size = track->arrays[i].esize * (nverts - 1) * 4;
2336                        if (track->arrays[i].robj == NULL) {
2337                                DRM_ERROR("(PW %u) Vertex array %u no buffer "
2338                                          "bound\n", prim_walk, i);
2339                                return -EINVAL;
2340                        }
2341                        if (size > radeon_bo_size(track->arrays[i].robj)) {
2342                                dev_err(rdev->dev, "(PW %u) Vertex array %u "
2343                                        "need %lu dwords have %lu dwords\n",
2344                                        prim_walk, i, size >> 2,
2345                                        radeon_bo_size(track->arrays[i].robj)
2346                                        >> 2);
2347                                return -EINVAL;
2348                        }
2349                }
2350                break;
2351        case 3:
2352                size = track->vtx_size * nverts;
2353                if (size != track->immd_dwords) {
2354                        DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
2355                                  track->immd_dwords, size);
2356                        DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
2357                                  nverts, track->vtx_size);
2358                        return -EINVAL;
2359                }
2360                break;
2361        default:
2362                DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
2363                          prim_walk);
2364                return -EINVAL;
2365        }
2366
2367        if (track->tex_dirty) {
2368                track->tex_dirty = false;
2369                return r100_cs_track_texture_check(rdev, track);
2370        }
2371        return 0;
2372}
2373
2374void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
2375{
2376        unsigned i, face;
2377
2378        track->cb_dirty = true;
2379        track->zb_dirty = true;
2380        track->tex_dirty = true;
2381        track->aa_dirty = true;
2382
2383        if (rdev->family < CHIP_R300) {
2384                track->num_cb = 1;
2385                if (rdev->family <= CHIP_RS200)
2386                        track->num_texture = 3;
2387                else
2388                        track->num_texture = 6;
2389                track->maxy = 2048;
2390                track->separate_cube = true;
2391        } else {
2392                track->num_cb = 4;
2393                track->num_texture = 16;
2394                track->maxy = 4096;
2395                track->separate_cube = false;
2396                track->aaresolve = false;
2397                track->aa.robj = NULL;
2398        }
2399
2400        for (i = 0; i < track->num_cb; i++) {
2401                track->cb[i].robj = NULL;
2402                track->cb[i].pitch = 8192;
2403                track->cb[i].cpp = 16;
2404                track->cb[i].offset = 0;
2405        }
2406        track->z_enabled = true;
2407        track->zb.robj = NULL;
2408        track->zb.pitch = 8192;
2409        track->zb.cpp = 4;
2410        track->zb.offset = 0;
2411        track->vtx_size = 0x7F;
2412        track->immd_dwords = 0xFFFFFFFFUL;
2413        track->num_arrays = 11;
2414        track->max_indx = 0x00FFFFFFUL;
2415        for (i = 0; i < track->num_arrays; i++) {
2416                track->arrays[i].robj = NULL;
2417                track->arrays[i].esize = 0x7F;
2418        }
2419        for (i = 0; i < track->num_texture; i++) {
2420                track->textures[i].compress_format = R100_TRACK_COMP_NONE;
2421                track->textures[i].pitch = 16536;
2422                track->textures[i].width = 16536;
2423                track->textures[i].height = 16536;
2424                track->textures[i].width_11 = 1 << 11;
2425                track->textures[i].height_11 = 1 << 11;
2426                track->textures[i].num_levels = 12;
2427                if (rdev->family <= CHIP_RS200) {
2428                        track->textures[i].tex_coord_type = 0;
2429                        track->textures[i].txdepth = 0;
2430                } else {
2431                        track->textures[i].txdepth = 16;
2432                        track->textures[i].tex_coord_type = 1;
2433                }
2434                track->textures[i].cpp = 64;
2435                track->textures[i].robj = NULL;
2436                /* CS IB emission code makes sure texture unit are disabled */
2437                track->textures[i].enabled = false;
2438                track->textures[i].lookup_disable = false;
2439                track->textures[i].roundup_w = true;
2440                track->textures[i].roundup_h = true;
2441                if (track->separate_cube)
2442                        for (face = 0; face < 5; face++) {
2443                                track->textures[i].cube_info[face].robj = NULL;
2444                                track->textures[i].cube_info[face].width = 16536;
2445                                track->textures[i].cube_info[face].height = 16536;
2446                                track->textures[i].cube_info[face].offset = 0;
2447                        }
2448        }
2449}
2450
2451/*
2452 * Global GPU functions
2453 */
2454static void r100_errata(struct radeon_device *rdev)
2455{
2456        rdev->pll_errata = 0;
2457
2458        if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) {
2459                rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS;
2460        }
2461
2462        if (rdev->family == CHIP_RV100 ||
2463            rdev->family == CHIP_RS100 ||
2464            rdev->family == CHIP_RS200) {
2465                rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY;
2466        }
2467}
2468
2469static int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n)
2470{
2471        unsigned i;
2472        uint32_t tmp;
2473
2474        for (i = 0; i < rdev->usec_timeout; i++) {
2475                tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
2476                if (tmp >= n) {
2477                        return 0;
2478                }
2479                udelay(1);
2480        }
2481        return -1;
2482}
2483
2484int r100_gui_wait_for_idle(struct radeon_device *rdev)
2485{
2486        unsigned i;
2487        uint32_t tmp;
2488
2489        if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) {
2490                pr_warn("radeon: wait for empty RBBM fifo failed! Bad things might happen.\n");
2491        }
2492        for (i = 0; i < rdev->usec_timeout; i++) {
2493                tmp = RREG32(RADEON_RBBM_STATUS);
2494                if (!(tmp & RADEON_RBBM_ACTIVE)) {
2495                        return 0;
2496                }
2497                udelay(1);
2498        }
2499        return -1;
2500}
2501
2502int r100_mc_wait_for_idle(struct radeon_device *rdev)
2503{
2504        unsigned i;
2505        uint32_t tmp;
2506
2507        for (i = 0; i < rdev->usec_timeout; i++) {
2508                /* read MC_STATUS */
2509                tmp = RREG32(RADEON_MC_STATUS);
2510                if (tmp & RADEON_MC_IDLE) {
2511                        return 0;
2512                }
2513                udelay(1);
2514        }
2515        return -1;
2516}
2517
2518bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2519{
2520        u32 rbbm_status;
2521
2522        rbbm_status = RREG32(R_000E40_RBBM_STATUS);
2523        if (!G_000E40_GUI_ACTIVE(rbbm_status)) {
2524                radeon_ring_lockup_update(rdev, ring);
2525                return false;
2526        }
2527        return radeon_ring_test_lockup(rdev, ring);
2528}
2529
2530/* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */
2531void r100_enable_bm(struct radeon_device *rdev)
2532{
2533        uint32_t tmp;
2534        /* Enable bus mastering */
2535        tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
2536        WREG32(RADEON_BUS_CNTL, tmp);
2537}
2538
2539void r100_bm_disable(struct radeon_device *rdev)
2540{
2541        u32 tmp;
2542
2543        /* disable bus mastering */
2544        tmp = RREG32(R_000030_BUS_CNTL);
2545        WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000044);
2546        mdelay(1);
2547        WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000042);
2548        mdelay(1);
2549        WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040);
2550        tmp = RREG32(RADEON_BUS_CNTL);
2551        mdelay(1);
2552        pci_clear_master(rdev->pdev);
2553        mdelay(1);
2554}
2555
2556int r100_asic_reset(struct radeon_device *rdev, bool hard)
2557{
2558        struct r100_mc_save save;
2559        u32 status, tmp;
2560        int ret = 0;
2561
2562        status = RREG32(R_000E40_RBBM_STATUS);
2563        if (!G_000E40_GUI_ACTIVE(status)) {
2564                return 0;
2565        }
2566        r100_mc_stop(rdev, &save);
2567        status = RREG32(R_000E40_RBBM_STATUS);
2568        dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2569        /* stop CP */
2570        WREG32(RADEON_CP_CSQ_CNTL, 0);
2571        tmp = RREG32(RADEON_CP_RB_CNTL);
2572        WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
2573        WREG32(RADEON_CP_RB_RPTR_WR, 0);
2574        WREG32(RADEON_CP_RB_WPTR, 0);
2575        WREG32(RADEON_CP_RB_CNTL, tmp);
2576        /* save PCI state */
2577        pci_save_state(rdev->pdev);
2578        /* disable bus mastering */
2579        r100_bm_disable(rdev);
2580        WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_SE(1) |
2581                                        S_0000F0_SOFT_RESET_RE(1) |
2582                                        S_0000F0_SOFT_RESET_PP(1) |
2583                                        S_0000F0_SOFT_RESET_RB(1));
2584        RREG32(R_0000F0_RBBM_SOFT_RESET);
2585        mdelay(500);
2586        WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
2587        mdelay(1);
2588        status = RREG32(R_000E40_RBBM_STATUS);
2589        dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2590        /* reset CP */
2591        WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1));
2592        RREG32(R_0000F0_RBBM_SOFT_RESET);
2593        mdelay(500);
2594        WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
2595        mdelay(1);
2596        status = RREG32(R_000E40_RBBM_STATUS);
2597        dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2598        /* restore PCI & busmastering */
2599        pci_restore_state(rdev->pdev);
2600        r100_enable_bm(rdev);
2601        /* Check if GPU is idle */
2602        if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) ||
2603                G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) {
2604                dev_err(rdev->dev, "failed to reset GPU\n");
2605                ret = -1;
2606        } else
2607                dev_info(rdev->dev, "GPU reset succeed\n");
2608        r100_mc_resume(rdev, &save);
2609        return ret;
2610}
2611
2612void r100_set_common_regs(struct radeon_device *rdev)
2613{
2614        struct drm_device *dev = rdev->ddev;
2615        bool force_dac2 = false;
2616        u32 tmp;
2617
2618        /* set these so they don't interfere with anything */
2619        WREG32(RADEON_OV0_SCALE_CNTL, 0);
2620        WREG32(RADEON_SUBPIC_CNTL, 0);
2621        WREG32(RADEON_VIPH_CONTROL, 0);
2622        WREG32(RADEON_I2C_CNTL_1, 0);
2623        WREG32(RADEON_DVI_I2C_CNTL_1, 0);
2624        WREG32(RADEON_CAP0_TRIG_CNTL, 0);
2625        WREG32(RADEON_CAP1_TRIG_CNTL, 0);
2626
2627        /* always set up dac2 on rn50 and some rv100 as lots
2628         * of servers seem to wire it up to a VGA port but
2629         * don't report it in the bios connector
2630         * table.
2631         */
2632        switch (dev->pdev->device) {
2633                /* RN50 */
2634        case 0x515e:
2635        case 0x5969:
2636                force_dac2 = true;
2637                break;
2638                /* RV100*/
2639        case 0x5159:
2640        case 0x515a:
2641                /* DELL triple head servers */
2642                if ((dev->pdev->subsystem_vendor == 0x1028 /* DELL */) &&
2643                    ((dev->pdev->subsystem_device == 0x016c) ||
2644                     (dev->pdev->subsystem_device == 0x016d) ||
2645                     (dev->pdev->subsystem_device == 0x016e) ||
2646                     (dev->pdev->subsystem_device == 0x016f) ||
2647                     (dev->pdev->subsystem_device == 0x0170) ||
2648                     (dev->pdev->subsystem_device == 0x017d) ||
2649                     (dev->pdev->subsystem_device == 0x017e) ||
2650                     (dev->pdev->subsystem_device == 0x0183) ||
2651                     (dev->pdev->subsystem_device == 0x018a) ||
2652                     (dev->pdev->subsystem_device == 0x019a)))
2653                        force_dac2 = true;
2654                break;
2655        }
2656
2657        if (force_dac2) {
2658                u32 disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG);
2659                u32 tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL);
2660                u32 dac2_cntl = RREG32(RADEON_DAC_CNTL2);
2661
2662                /* For CRT on DAC2, don't turn it on if BIOS didn't
2663                   enable it, even it's detected.
2664                */
2665
2666                /* force it to crtc0 */
2667                dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL;
2668                dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL;
2669                disp_hw_debug |= RADEON_CRT2_DISP1_SEL;
2670
2671                /* set up the TV DAC */
2672                tv_dac_cntl &= ~(RADEON_TV_DAC_PEDESTAL |
2673                                 RADEON_TV_DAC_STD_MASK |
2674                                 RADEON_TV_DAC_RDACPD |
2675                                 RADEON_TV_DAC_GDACPD |
2676                                 RADEON_TV_DAC_BDACPD |
2677                                 RADEON_TV_DAC_BGADJ_MASK |
2678                                 RADEON_TV_DAC_DACADJ_MASK);
2679                tv_dac_cntl |= (RADEON_TV_DAC_NBLANK |
2680                                RADEON_TV_DAC_NHOLD |
2681                                RADEON_TV_DAC_STD_PS2 |
2682                                (0x58 << 16));
2683
2684                WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl);
2685                WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug);
2686                WREG32(RADEON_DAC_CNTL2, dac2_cntl);
2687        }
2688
2689        /* switch PM block to ACPI mode */
2690        tmp = RREG32_PLL(RADEON_PLL_PWRMGT_CNTL);
2691        tmp &= ~RADEON_PM_MODE_SEL;
2692        WREG32_PLL(RADEON_PLL_PWRMGT_CNTL, tmp);
2693
2694}
2695
2696/*
2697 * VRAM info
2698 */
2699static void r100_vram_get_type(struct radeon_device *rdev)
2700{
2701        uint32_t tmp;
2702
2703        rdev->mc.vram_is_ddr = false;
2704        if (rdev->flags & RADEON_IS_IGP)
2705                rdev->mc.vram_is_ddr = true;
2706        else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR)
2707                rdev->mc.vram_is_ddr = true;
2708        if ((rdev->family == CHIP_RV100) ||
2709            (rdev->family == CHIP_RS100) ||
2710            (rdev->family == CHIP_RS200)) {
2711                tmp = RREG32(RADEON_MEM_CNTL);
2712                if (tmp & RV100_HALF_MODE) {
2713                        rdev->mc.vram_width = 32;
2714                } else {
2715                        rdev->mc.vram_width = 64;
2716                }
2717                if (rdev->flags & RADEON_SINGLE_CRTC) {
2718                        rdev->mc.vram_width /= 4;
2719                        rdev->mc.vram_is_ddr = true;
2720                }
2721        } else if (rdev->family <= CHIP_RV280) {
2722                tmp = RREG32(RADEON_MEM_CNTL);
2723                if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) {
2724                        rdev->mc.vram_width = 128;
2725                } else {
2726                        rdev->mc.vram_width = 64;
2727                }
2728        } else {
2729                /* newer IGPs */
2730                rdev->mc.vram_width = 128;
2731        }
2732}
2733
2734static u32 r100_get_accessible_vram(struct radeon_device *rdev)
2735{
2736        u32 aper_size;
2737        u8 byte;
2738
2739        aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2740
2741        /* Set HDP_APER_CNTL only on cards that are known not to be broken,
2742         * that is has the 2nd generation multifunction PCI interface
2743         */
2744        if (rdev->family == CHIP_RV280 ||
2745            rdev->family >= CHIP_RV350) {
2746                WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL,
2747                       ~RADEON_HDP_APER_CNTL);
2748                DRM_INFO("Generation 2 PCI interface, using max accessible memory\n");
2749                return aper_size * 2;
2750        }
2751
2752        /* Older cards have all sorts of funny issues to deal with. First
2753         * check if it's a multifunction card by reading the PCI config
2754         * header type... Limit those to one aperture size
2755         */
2756        pci_read_config_byte(rdev->pdev, 0xe, &byte);
2757        if (byte & 0x80) {
2758                DRM_INFO("Generation 1 PCI interface in multifunction mode\n");
2759                DRM_INFO("Limiting VRAM to one aperture\n");
2760                return aper_size;
2761        }
2762
2763        /* Single function older card. We read HDP_APER_CNTL to see how the BIOS
2764         * have set it up. We don't write this as it's broken on some ASICs but
2765         * we expect the BIOS to have done the right thing (might be too optimistic...)
2766         */
2767        if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL)
2768                return aper_size * 2;
2769        return aper_size;
2770}
2771
2772void r100_vram_init_sizes(struct radeon_device *rdev)
2773{
2774        u64 config_aper_size;
2775
2776        /* work out accessible VRAM */
2777        rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
2778        rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
2779        rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev);
2780        /* FIXME we don't use the second aperture yet when we could use it */
2781        if (rdev->mc.visible_vram_size > rdev->mc.aper_size)
2782                rdev->mc.visible_vram_size = rdev->mc.aper_size;
2783        config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2784        if (rdev->flags & RADEON_IS_IGP) {
2785                uint32_t tom;
2786                /* read NB_TOM to get the amount of ram stolen for the GPU */
2787                tom = RREG32(RADEON_NB_TOM);
2788                rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
2789                WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
2790                rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
2791        } else {
2792                rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
2793                /* Some production boards of m6 will report 0
2794                 * if it's 8 MB
2795                 */
2796                if (rdev->mc.real_vram_size == 0) {
2797                        rdev->mc.real_vram_size = 8192 * 1024;
2798                        WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
2799                }
2800                /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM - 
2801                 * Novell bug 204882 + along with lots of ubuntu ones
2802                 */
2803                if (rdev->mc.aper_size > config_aper_size)
2804                        config_aper_size = rdev->mc.aper_size;
2805
2806                if (config_aper_size > rdev->mc.real_vram_size)
2807                        rdev->mc.mc_vram_size = config_aper_size;
2808                else
2809                        rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
2810        }
2811}
2812
2813void r100_vga_set_state(struct radeon_device *rdev, bool state)
2814{
2815        uint32_t temp;
2816
2817        temp = RREG32(RADEON_CONFIG_CNTL);
2818        if (!state) {
2819                temp &= ~RADEON_CFG_VGA_RAM_EN;
2820                temp |= RADEON_CFG_VGA_IO_DIS;
2821        } else {
2822                temp &= ~RADEON_CFG_VGA_IO_DIS;
2823        }
2824        WREG32(RADEON_CONFIG_CNTL, temp);
2825}
2826
2827static void r100_mc_init(struct radeon_device *rdev)
2828{
2829        u64 base;
2830
2831        r100_vram_get_type(rdev);
2832        r100_vram_init_sizes(rdev);
2833        base = rdev->mc.aper_base;
2834        if (rdev->flags & RADEON_IS_IGP)
2835                base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16;
2836        radeon_vram_location(rdev, &rdev->mc, base);
2837        rdev->mc.gtt_base_align = 0;
2838        if (!(rdev->flags & RADEON_IS_AGP))
2839                radeon_gtt_location(rdev, &rdev->mc);
2840        radeon_update_bandwidth_info(rdev);
2841}
2842
2843
2844/*
2845 * Indirect registers accessor
2846 */
2847void r100_pll_errata_after_index(struct radeon_device *rdev)
2848{
2849        if (rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS) {
2850                (void)RREG32(RADEON_CLOCK_CNTL_DATA);
2851                (void)RREG32(RADEON_CRTC_GEN_CNTL);
2852        }
2853}
2854
2855static void r100_pll_errata_after_data(struct radeon_device *rdev)
2856{
2857        /* This workarounds is necessary on RV100, RS100 and RS200 chips
2858         * or the chip could hang on a subsequent access
2859         */
2860        if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) {
2861                mdelay(5);
2862        }
2863
2864        /* This function is required to workaround a hardware bug in some (all?)
2865         * revisions of the R300.  This workaround should be called after every
2866         * CLOCK_CNTL_INDEX register access.  If not, register reads afterward
2867         * may not be correct.
2868         */
2869        if (rdev->pll_errata & CHIP_ERRATA_R300_CG) {
2870                uint32_t save, tmp;
2871
2872                save = RREG32(RADEON_CLOCK_CNTL_INDEX);
2873                tmp = save & ~(0x3f | RADEON_PLL_WR_EN);
2874                WREG32(RADEON_CLOCK_CNTL_INDEX, tmp);
2875                tmp = RREG32(RADEON_CLOCK_CNTL_DATA);
2876                WREG32(RADEON_CLOCK_CNTL_INDEX, save);
2877        }
2878}
2879
2880uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg)
2881{
2882        unsigned long flags;
2883        uint32_t data;
2884
2885        spin_lock_irqsave(&rdev->pll_idx_lock, flags);
2886        WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f);
2887        r100_pll_errata_after_index(rdev);
2888        data = RREG32(RADEON_CLOCK_CNTL_DATA);
2889        r100_pll_errata_after_data(rdev);
2890        spin_unlock_irqrestore(&rdev->pll_idx_lock, flags);
2891        return data;
2892}
2893
2894void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
2895{
2896        unsigned long flags;
2897
2898        spin_lock_irqsave(&rdev->pll_idx_lock, flags);
2899        WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN));
2900        r100_pll_errata_after_index(rdev);
2901        WREG32(RADEON_CLOCK_CNTL_DATA, v);
2902        r100_pll_errata_after_data(rdev);
2903        spin_unlock_irqrestore(&rdev->pll_idx_lock, flags);
2904}
2905
2906static void r100_set_safe_registers(struct radeon_device *rdev)
2907{
2908        if (ASIC_IS_RN50(rdev)) {
2909                rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm;
2910                rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm);
2911        } else if (rdev->family < CHIP_R200) {
2912                rdev->config.r100.reg_safe_bm = r100_reg_safe_bm;
2913                rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm);
2914        } else {
2915                r200_set_safe_registers(rdev);
2916        }
2917}
2918
2919/*
2920 * Debugfs info
2921 */
2922#if defined(CONFIG_DEBUG_FS)
2923static int r100_debugfs_rbbm_info(struct seq_file *m, void *data)
2924{
2925        struct drm_info_node *node = (struct drm_info_node *) m->private;
2926        struct drm_device *dev = node->minor->dev;
2927        struct radeon_device *rdev = dev->dev_private;
2928        uint32_t reg, value;
2929        unsigned i;
2930
2931        seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS));
2932        seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C));
2933        seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2934        for (i = 0; i < 64; i++) {
2935                WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100);
2936                reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2;
2937                WREG32(RADEON_RBBM_CMDFIFO_ADDR, i);
2938                value = RREG32(RADEON_RBBM_CMDFIFO_DATA);
2939                seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value);
2940        }
2941        return 0;
2942}
2943
2944static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data)
2945{
2946        struct drm_info_node *node = (struct drm_info_node *) m->private;
2947        struct drm_device *dev = node->minor->dev;
2948        struct radeon_device *rdev = dev->dev_private;
2949        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2950        uint32_t rdp, wdp;
2951        unsigned count, i, j;
2952
2953        radeon_ring_free_size(rdev, ring);
2954        rdp = RREG32(RADEON_CP_RB_RPTR);
2955        wdp = RREG32(RADEON_CP_RB_WPTR);
2956        count = (rdp + ring->ring_size - wdp) & ring->ptr_mask;
2957        seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2958        seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
2959        seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
2960        seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
2961        seq_printf(m, "%u dwords in ring\n", count);
2962        if (ring->ready) {
2963                for (j = 0; j <= count; j++) {
2964                        i = (rdp + j) & ring->ptr_mask;
2965                        seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]);
2966                }
2967        }
2968        return 0;
2969}
2970
2971
2972static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data)
2973{
2974        struct drm_info_node *node = (struct drm_info_node *) m->private;
2975        struct drm_device *dev = node->minor->dev;
2976        struct radeon_device *rdev = dev->dev_private;
2977        uint32_t csq_stat, csq2_stat, tmp;
2978        unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr;
2979        unsigned i;
2980
2981        seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2982        seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE));
2983        csq_stat = RREG32(RADEON_CP_CSQ_STAT);
2984        csq2_stat = RREG32(RADEON_CP_CSQ2_STAT);
2985        r_rptr = (csq_stat >> 0) & 0x3ff;
2986        r_wptr = (csq_stat >> 10) & 0x3ff;
2987        ib1_rptr = (csq_stat >> 20) & 0x3ff;
2988        ib1_wptr = (csq2_stat >> 0) & 0x3ff;
2989        ib2_rptr = (csq2_stat >> 10) & 0x3ff;
2990        ib2_wptr = (csq2_stat >> 20) & 0x3ff;
2991        seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat);
2992        seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat);
2993        seq_printf(m, "Ring rptr %u\n", r_rptr);
2994        seq_printf(m, "Ring wptr %u\n", r_wptr);
2995        seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr);
2996        seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr);
2997        seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr);
2998        seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr);
2999        /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms
3000         * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */
3001        seq_printf(m, "Ring fifo:\n");
3002        for (i = 0; i < 256; i++) {
3003                WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3004                tmp = RREG32(RADEON_CP_CSQ_DATA);
3005                seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp);
3006        }
3007        seq_printf(m, "Indirect1 fifo:\n");
3008        for (i = 256; i <= 512; i++) {
3009                WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3010                tmp = RREG32(RADEON_CP_CSQ_DATA);
3011                seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp);
3012        }
3013        seq_printf(m, "Indirect2 fifo:\n");
3014        for (i = 640; i < ib1_wptr; i++) {
3015                WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3016                tmp = RREG32(RADEON_CP_CSQ_DATA);
3017                seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp);
3018        }
3019        return 0;
3020}
3021
3022static int r100_debugfs_mc_info(struct seq_file *m, void *data)
3023{
3024        struct drm_info_node *node = (struct drm_info_node *) m->private;
3025        struct drm_device *dev = node->minor->dev;
3026        struct radeon_device *rdev = dev->dev_private;
3027        uint32_t tmp;
3028
3029        tmp = RREG32(RADEON_CONFIG_MEMSIZE);
3030        seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp);
3031        tmp = RREG32(RADEON_MC_FB_LOCATION);
3032        seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp);
3033        tmp = RREG32(RADEON_BUS_CNTL);
3034        seq_printf(m, "BUS_CNTL 0x%08x\n", tmp);
3035        tmp = RREG32(RADEON_MC_AGP_LOCATION);
3036        seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp);
3037        tmp = RREG32(RADEON_AGP_BASE);
3038        seq_printf(m, "AGP_BASE 0x%08x\n", tmp);
3039        tmp = RREG32(RADEON_HOST_PATH_CNTL);
3040        seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp);
3041        tmp = RREG32(0x01D0);
3042        seq_printf(m, "AIC_CTRL 0x%08x\n", tmp);
3043        tmp = RREG32(RADEON_AIC_LO_ADDR);
3044        seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp);
3045        tmp = RREG32(RADEON_AIC_HI_ADDR);
3046        seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp);
3047        tmp = RREG32(0x01E4);
3048        seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp);
3049        return 0;
3050}
3051
3052static struct drm_info_list r100_debugfs_rbbm_list[] = {
3053        {"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL},
3054};
3055
3056static struct drm_info_list r100_debugfs_cp_list[] = {
3057        {"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL},
3058        {"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL},
3059};
3060
3061static struct drm_info_list r100_debugfs_mc_info_list[] = {
3062        {"r100_mc_info", r100_debugfs_mc_info, 0, NULL},
3063};
3064#endif
3065
3066int r100_debugfs_rbbm_init(struct radeon_device *rdev)
3067{
3068#if defined(CONFIG_DEBUG_FS)
3069        return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1);
3070#else
3071        return 0;
3072#endif
3073}
3074
3075int r100_debugfs_cp_init(struct radeon_device *rdev)
3076{
3077#if defined(CONFIG_DEBUG_FS)
3078        return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2);
3079#else
3080        return 0;
3081#endif
3082}
3083
3084int r100_debugfs_mc_info_init(struct radeon_device *rdev)
3085{
3086#if defined(CONFIG_DEBUG_FS)
3087        return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1);
3088#else
3089        return 0;
3090#endif
3091}
3092
3093int r100_set_surface_reg(struct radeon_device *rdev, int reg,
3094                         uint32_t tiling_flags, uint32_t pitch,
3095                         uint32_t offset, uint32_t obj_size)
3096{
3097        int surf_index = reg * 16;
3098        int flags = 0;
3099
3100        if (rdev->family <= CHIP_RS200) {
3101                if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3102                                 == (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3103                        flags |= RADEON_SURF_TILE_COLOR_BOTH;
3104                if (tiling_flags & RADEON_TILING_MACRO)
3105                        flags |= RADEON_SURF_TILE_COLOR_MACRO;
3106                /* setting pitch to 0 disables tiling */
3107                if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3108                                == 0)
3109                        pitch = 0;
3110        } else if (rdev->family <= CHIP_RV280) {
3111                if (tiling_flags & (RADEON_TILING_MACRO))
3112                        flags |= R200_SURF_TILE_COLOR_MACRO;
3113                if (tiling_flags & RADEON_TILING_MICRO)
3114                        flags |= R200_SURF_TILE_COLOR_MICRO;
3115        } else {
3116                if (tiling_flags & RADEON_TILING_MACRO)
3117                        flags |= R300_SURF_TILE_MACRO;
3118                if (tiling_flags & RADEON_TILING_MICRO)
3119                        flags |= R300_SURF_TILE_MICRO;
3120        }
3121
3122        if (tiling_flags & RADEON_TILING_SWAP_16BIT)
3123                flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP;
3124        if (tiling_flags & RADEON_TILING_SWAP_32BIT)
3125                flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP;
3126
3127        /* r100/r200 divide by 16 */
3128        if (rdev->family < CHIP_R300)
3129                flags |= pitch / 16;
3130        else
3131                flags |= pitch / 8;
3132
3133
3134        DRM_DEBUG_KMS("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1);
3135        WREG32(RADEON_SURFACE0_INFO + surf_index, flags);
3136        WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset);
3137        WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1);
3138        return 0;
3139}
3140
3141void r100_clear_surface_reg(struct radeon_device *rdev, int reg)
3142{
3143        int surf_index = reg * 16;
3144        WREG32(RADEON_SURFACE0_INFO + surf_index, 0);
3145}
3146
3147void r100_bandwidth_update(struct radeon_device *rdev)
3148{
3149        fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff;
3150        fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff;
3151        fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff;
3152        fixed20_12 crit_point_ff = {0};
3153        uint32_t temp, data, mem_trcd, mem_trp, mem_tras;
3154        fixed20_12 memtcas_ff[8] = {
3155                dfixed_init(1),
3156                dfixed_init(2),
3157                dfixed_init(3),
3158                dfixed_init(0),
3159                dfixed_init_half(1),
3160                dfixed_init_half(2),
3161                dfixed_init(0),
3162        };
3163        fixed20_12 memtcas_rs480_ff[8] = {
3164                dfixed_init(0),
3165                dfixed_init(1),
3166                dfixed_init(2),
3167                dfixed_init(3),
3168                dfixed_init(0),
3169                dfixed_init_half(1),
3170                dfixed_init_half(2),
3171                dfixed_init_half(3),
3172        };
3173        fixed20_12 memtcas2_ff[8] = {
3174                dfixed_init(0),
3175                dfixed_init(1),
3176                dfixed_init(2),
3177                dfixed_init(3),
3178                dfixed_init(4),
3179                dfixed_init(5),
3180                dfixed_init(6),
3181                dfixed_init(7),
3182        };
3183        fixed20_12 memtrbs[8] = {
3184                dfixed_init(1),
3185                dfixed_init_half(1),
3186                dfixed_init(2),
3187                dfixed_init_half(2),
3188                dfixed_init(3),
3189                dfixed_init_half(3),
3190                dfixed_init(4),
3191                dfixed_init_half(4)
3192        };
3193        fixed20_12 memtrbs_r4xx[8] = {
3194                dfixed_init(4),
3195                dfixed_init(5),
3196                dfixed_init(6),
3197                dfixed_init(7),
3198                dfixed_init(8),
3199                dfixed_init(9),
3200                dfixed_init(10),
3201                dfixed_init(11)
3202        };
3203        fixed20_12 min_mem_eff;
3204        fixed20_12 mc_latency_sclk, mc_latency_mclk, k1;
3205        fixed20_12 cur_latency_mclk, cur_latency_sclk;
3206        fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate = {0},
3207                disp_drain_rate2, read_return_rate;
3208        fixed20_12 time_disp1_drop_priority;
3209        int c;
3210        int cur_size = 16;       /* in octawords */
3211        int critical_point = 0, critical_point2;
3212/*      uint32_t read_return_rate, time_disp1_drop_priority; */
3213        int stop_req, max_stop_req;
3214        struct drm_display_mode *mode1 = NULL;
3215        struct drm_display_mode *mode2 = NULL;
3216        uint32_t pixel_bytes1 = 0;
3217        uint32_t pixel_bytes2 = 0;
3218
3219        /* Guess line buffer size to be 8192 pixels */
3220        u32 lb_size = 8192;
3221
3222        if (!rdev->mode_info.mode_config_initialized)
3223                return;
3224
3225        radeon_update_display_priority(rdev);
3226
3227        if (rdev->mode_info.crtcs[0]->base.enabled) {
3228                const struct drm_framebuffer *fb =
3229                        rdev->mode_info.crtcs[0]->base.primary->fb;
3230
3231                mode1 = &rdev->mode_info.crtcs[0]->base.mode;
3232                pixel_bytes1 = fb->format->cpp[0];
3233        }
3234        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3235                if (rdev->mode_info.crtcs[1]->base.enabled) {
3236                        const struct drm_framebuffer *fb =
3237                                rdev->mode_info.crtcs[1]->base.primary->fb;
3238
3239                        mode2 = &rdev->mode_info.crtcs[1]->base.mode;
3240                        pixel_bytes2 = fb->format->cpp[0];
3241                }
3242        }
3243
3244        min_mem_eff.full = dfixed_const_8(0);
3245        /* get modes */
3246        if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) {
3247                uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER);
3248                mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT);
3249                mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT);
3250                /* check crtc enables */
3251                if (mode2)
3252                        mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT);
3253                if (mode1)
3254                        mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT);
3255                WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer);
3256        }
3257
3258        /*
3259         * determine is there is enough bw for current mode
3260         */
3261        sclk_ff = rdev->pm.sclk;
3262        mclk_ff = rdev->pm.mclk;
3263
3264        temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1);
3265        temp_ff.full = dfixed_const(temp);
3266        mem_bw.full = dfixed_mul(mclk_ff, temp_ff);
3267
3268        pix_clk.full = 0;
3269        pix_clk2.full = 0;
3270        peak_disp_bw.full = 0;
3271        if (mode1) {
3272                temp_ff.full = dfixed_const(1000);
3273                pix_clk.full = dfixed_const(mode1->clock); /* convert to fixed point */
3274                pix_clk.full = dfixed_div(pix_clk, temp_ff);
3275                temp_ff.full = dfixed_const(pixel_bytes1);
3276                peak_disp_bw.full += dfixed_mul(pix_clk, temp_ff);
3277        }
3278        if (mode2) {
3279                temp_ff.full = dfixed_const(1000);
3280                pix_clk2.full = dfixed_const(mode2->clock); /* convert to fixed point */
3281                pix_clk2.full = dfixed_div(pix_clk2, temp_ff);
3282                temp_ff.full = dfixed_const(pixel_bytes2);
3283                peak_disp_bw.full += dfixed_mul(pix_clk2, temp_ff);
3284        }
3285
3286        mem_bw.full = dfixed_mul(mem_bw, min_mem_eff);
3287        if (peak_disp_bw.full >= mem_bw.full) {
3288                DRM_ERROR("You may not have enough display bandwidth for current mode\n"
3289                          "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
3290        }
3291
3292        /*  Get values from the EXT_MEM_CNTL register...converting its contents. */
3293        temp = RREG32(RADEON_MEM_TIMING_CNTL);
3294        if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */
3295                mem_trcd = ((temp >> 2) & 0x3) + 1;
3296                mem_trp  = ((temp & 0x3)) + 1;
3297                mem_tras = ((temp & 0x70) >> 4) + 1;
3298        } else if (rdev->family == CHIP_R300 ||
3299                   rdev->family == CHIP_R350) { /* r300, r350 */
3300                mem_trcd = (temp & 0x7) + 1;
3301                mem_trp = ((temp >> 8) & 0x7) + 1;
3302                mem_tras = ((temp >> 11) & 0xf) + 4;
3303        } else if (rdev->family == CHIP_RV350 ||
3304                   rdev->family == CHIP_RV380) {
3305                /* rv3x0 */
3306                mem_trcd = (temp & 0x7) + 3;
3307                mem_trp = ((temp >> 8) & 0x7) + 3;
3308                mem_tras = ((temp >> 11) & 0xf) + 6;
3309        } else if (rdev->family == CHIP_R420 ||
3310                   rdev->family == CHIP_R423 ||
3311                   rdev->family == CHIP_RV410) {
3312                /* r4xx */
3313                mem_trcd = (temp & 0xf) + 3;
3314                if (mem_trcd > 15)
3315                        mem_trcd = 15;
3316                mem_trp = ((temp >> 8) & 0xf) + 3;
3317                if (mem_trp > 15)
3318                        mem_trp = 15;
3319                mem_tras = ((temp >> 12) & 0x1f) + 6;
3320                if (mem_tras > 31)
3321                        mem_tras = 31;
3322        } else { /* RV200, R200 */
3323                mem_trcd = (temp & 0x7) + 1;
3324                mem_trp = ((temp >> 8) & 0x7) + 1;
3325                mem_tras = ((temp >> 12) & 0xf) + 4;
3326        }
3327        /* convert to FF */
3328        trcd_ff.full = dfixed_const(mem_trcd);
3329        trp_ff.full = dfixed_const(mem_trp);
3330        tras_ff.full = dfixed_const(mem_tras);
3331
3332        /* Get values from the MEM_SDRAM_MODE_REG register...converting its */
3333        temp = RREG32(RADEON_MEM_SDRAM_MODE_REG);
3334        data = (temp & (7 << 20)) >> 20;
3335        if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) {
3336                if (rdev->family == CHIP_RS480) /* don't think rs400 */
3337                        tcas_ff = memtcas_rs480_ff[data];
3338                else
3339                        tcas_ff = memtcas_ff[data];
3340        } else
3341                tcas_ff = memtcas2_ff[data];
3342
3343        if (rdev->family == CHIP_RS400 ||
3344            rdev->family == CHIP_RS480) {
3345                /* extra cas latency stored in bits 23-25 0-4 clocks */
3346                data = (temp >> 23) & 0x7;
3347                if (data < 5)
3348                        tcas_ff.full += dfixed_const(data);
3349        }
3350
3351        if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) {
3352                /* on the R300, Tcas is included in Trbs.
3353                 */
3354                temp = RREG32(RADEON_MEM_CNTL);
3355                data = (R300_MEM_NUM_CHANNELS_MASK & temp);
3356                if (data == 1) {
3357                        if (R300_MEM_USE_CD_CH_ONLY & temp) {
3358                                temp = RREG32(R300_MC_IND_INDEX);
3359                                temp &= ~R300_MC_IND_ADDR_MASK;
3360                                temp |= R300_MC_READ_CNTL_CD_mcind;
3361                                WREG32(R300_MC_IND_INDEX, temp);
3362                                temp = RREG32(R300_MC_IND_DATA);
3363                                data = (R300_MEM_RBS_POSITION_C_MASK & temp);
3364                        } else {
3365                                temp = RREG32(R300_MC_READ_CNTL_AB);
3366                                data = (R300_MEM_RBS_POSITION_A_MASK & temp);
3367                        }
3368                } else {
3369                        temp = RREG32(R300_MC_READ_CNTL_AB);
3370                        data = (R300_MEM_RBS_POSITION_A_MASK & temp);
3371                }
3372                if (rdev->family == CHIP_RV410 ||
3373                    rdev->family == CHIP_R420 ||
3374                    rdev->family == CHIP_R423)
3375                        trbs_ff = memtrbs_r4xx[data];
3376                else
3377                        trbs_ff = memtrbs[data];
3378                tcas_ff.full += trbs_ff.full;
3379        }
3380
3381        sclk_eff_ff.full = sclk_ff.full;
3382
3383        if (rdev->flags & RADEON_IS_AGP) {
3384                fixed20_12 agpmode_ff;
3385                agpmode_ff.full = dfixed_const(radeon_agpmode);
3386                temp_ff.full = dfixed_const_666(16);
3387                sclk_eff_ff.full -= dfixed_mul(agpmode_ff, temp_ff);
3388        }
3389        /* TODO PCIE lanes may affect this - agpmode == 16?? */
3390
3391        if (ASIC_IS_R300(rdev)) {
3392                sclk_delay_ff.full = dfixed_const(250);
3393        } else {
3394                if ((rdev->family == CHIP_RV100) ||
3395                    rdev->flags & RADEON_IS_IGP) {
3396                        if (rdev->mc.vram_is_ddr)
3397                                sclk_delay_ff.full = dfixed_const(41);
3398                        else
3399                                sclk_delay_ff.full = dfixed_const(33);
3400                } else {
3401                        if (rdev->mc.vram_width == 128)
3402                                sclk_delay_ff.full = dfixed_const(57);
3403                        else
3404                                sclk_delay_ff.full = dfixed_const(41);
3405                }
3406        }
3407
3408        mc_latency_sclk.full = dfixed_div(sclk_delay_ff, sclk_eff_ff);
3409
3410        if (rdev->mc.vram_is_ddr) {
3411                if (rdev->mc.vram_width == 32) {
3412                        k1.full = dfixed_const(40);
3413                        c  = 3;
3414                } else {
3415                        k1.full = dfixed_const(20);
3416                        c  = 1;
3417                }
3418        } else {
3419                k1.full = dfixed_const(40);
3420                c  = 3;
3421        }
3422
3423        temp_ff.full = dfixed_const(2);
3424        mc_latency_mclk.full = dfixed_mul(trcd_ff, temp_ff);
3425        temp_ff.full = dfixed_const(c);
3426        mc_latency_mclk.full += dfixed_mul(tcas_ff, temp_ff);
3427        temp_ff.full = dfixed_const(4);
3428        mc_latency_mclk.full += dfixed_mul(tras_ff, temp_ff);
3429        mc_latency_mclk.full += dfixed_mul(trp_ff, temp_ff);
3430        mc_latency_mclk.full += k1.full;
3431
3432        mc_latency_mclk.full = dfixed_div(mc_latency_mclk, mclk_ff);
3433        mc_latency_mclk.full += dfixed_div(temp_ff, sclk_eff_ff);
3434
3435        /*
3436          HW cursor time assuming worst case of full size colour cursor.
3437        */
3438        temp_ff.full = dfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1))));
3439        temp_ff.full += trcd_ff.full;
3440        if (temp_ff.full < tras_ff.full)
3441                temp_ff.full = tras_ff.full;
3442        cur_latency_mclk.full = dfixed_div(temp_ff, mclk_ff);
3443
3444        temp_ff.full = dfixed_const(cur_size);
3445        cur_latency_sclk.full = dfixed_div(temp_ff, sclk_eff_ff);
3446        /*
3447          Find the total latency for the display data.
3448        */
3449        disp_latency_overhead.full = dfixed_const(8);
3450        disp_latency_overhead.full = dfixed_div(disp_latency_overhead, sclk_ff);
3451        mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full;
3452        mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full;
3453
3454        if (mc_latency_mclk.full > mc_latency_sclk.full)
3455                disp_latency.full = mc_latency_mclk.full;
3456        else
3457                disp_latency.full = mc_latency_sclk.full;
3458
3459        /* setup Max GRPH_STOP_REQ default value */
3460        if (ASIC_IS_RV100(rdev))
3461                max_stop_req = 0x5c;
3462        else
3463                max_stop_req = 0x7c;
3464
3465        if (mode1) {
3466                /*  CRTC1
3467                    Set GRPH_BUFFER_CNTL register using h/w defined optimal values.
3468                    GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ]
3469                */
3470                stop_req = mode1->hdisplay * pixel_bytes1 / 16;
3471
3472                if (stop_req > max_stop_req)
3473                        stop_req = max_stop_req;
3474
3475                /*
3476                  Find the drain rate of the display buffer.
3477                */
3478                temp_ff.full = dfixed_const((16/pixel_bytes1));
3479                disp_drain_rate.full = dfixed_div(pix_clk, temp_ff);
3480
3481                /*
3482                  Find the critical point of the display buffer.
3483                */
3484                crit_point_ff.full = dfixed_mul(disp_drain_rate, disp_latency);
3485                crit_point_ff.full += dfixed_const_half(0);
3486
3487                critical_point = dfixed_trunc(crit_point_ff);
3488
3489                if (rdev->disp_priority == 2) {
3490                        critical_point = 0;
3491                }
3492
3493                /*
3494                  The critical point should never be above max_stop_req-4.  Setting
3495                  GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time.
3496                */
3497                if (max_stop_req - critical_point < 4)
3498                        critical_point = 0;
3499
3500                if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) {
3501                        /* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/
3502                        critical_point = 0x10;
3503                }
3504
3505                temp = RREG32(RADEON_GRPH_BUFFER_CNTL);
3506                temp &= ~(RADEON_GRPH_STOP_REQ_MASK);
3507                temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3508                temp &= ~(RADEON_GRPH_START_REQ_MASK);
3509                if ((rdev->family == CHIP_R350) &&
3510                    (stop_req > 0x15)) {
3511                        stop_req -= 0x10;
3512                }
3513                temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3514                temp |= RADEON_GRPH_BUFFER_SIZE;
3515                temp &= ~(RADEON_GRPH_CRITICAL_CNTL   |
3516                          RADEON_GRPH_CRITICAL_AT_SOF |
3517                          RADEON_GRPH_STOP_CNTL);
3518                /*
3519                  Write the result into the register.
3520                */
3521                WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
3522                                                       (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3523
3524#if 0
3525                if ((rdev->family == CHIP_RS400) ||
3526                    (rdev->family == CHIP_RS480)) {
3527                        /* attempt to program RS400 disp regs correctly ??? */
3528                        temp = RREG32(RS400_DISP1_REG_CNTL);
3529                        temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK |
3530                                  RS400_DISP1_STOP_REQ_LEVEL_MASK);
3531                        WREG32(RS400_DISP1_REQ_CNTL1, (temp |
3532                                                       (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3533                                                       (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3534                        temp = RREG32(RS400_DMIF_MEM_CNTL1);
3535                        temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK |
3536                                  RS400_DISP1_CRITICAL_POINT_STOP_MASK);
3537                        WREG32(RS400_DMIF_MEM_CNTL1, (temp |
3538                                                      (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) |
3539                                                      (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT)));
3540                }
3541#endif
3542
3543                DRM_DEBUG_KMS("GRPH_BUFFER_CNTL from to %x\n",
3544                          /*      (unsigned int)info->SavedReg->grph_buffer_cntl, */
3545                          (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL));
3546        }
3547
3548        if (mode2) {
3549                u32 grph2_cntl;
3550                stop_req = mode2->hdisplay * pixel_bytes2 / 16;
3551
3552                if (stop_req > max_stop_req)
3553                        stop_req = max_stop_req;
3554
3555                /*
3556                  Find the drain rate of the display buffer.
3557                */
3558                temp_ff.full = dfixed_const((16/pixel_bytes2));
3559                disp_drain_rate2.full = dfixed_div(pix_clk2, temp_ff);
3560
3561                grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL);
3562                grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK);
3563                grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3564                grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK);
3565                if ((rdev->family == CHIP_R350) &&
3566                    (stop_req > 0x15)) {
3567                        stop_req -= 0x10;
3568                }
3569                grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3570                grph2_cntl |= RADEON_GRPH_BUFFER_SIZE;
3571                grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL   |
3572                          RADEON_GRPH_CRITICAL_AT_SOF |
3573                          RADEON_GRPH_STOP_CNTL);
3574
3575                if ((rdev->family == CHIP_RS100) ||
3576                    (rdev->family == CHIP_RS200))
3577                        critical_point2 = 0;
3578                else {
3579                        temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128;
3580                        temp_ff.full = dfixed_const(temp);
3581                        temp_ff.full = dfixed_mul(mclk_ff, temp_ff);
3582                        if (sclk_ff.full < temp_ff.full)
3583                                temp_ff.full = sclk_ff.full;
3584
3585                        read_return_rate.full = temp_ff.full;
3586
3587                        if (mode1) {
3588                                temp_ff.full = read_return_rate.full - disp_drain_rate.full;
3589                                time_disp1_drop_priority.full = dfixed_div(crit_point_ff, temp_ff);
3590                        } else {
3591                                time_disp1_drop_priority.full = 0;
3592                        }
3593                        crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full;
3594                        crit_point_ff.full = dfixed_mul(crit_point_ff, disp_drain_rate2);
3595                        crit_point_ff.full += dfixed_const_half(0);
3596
3597                        critical_point2 = dfixed_trunc(crit_point_ff);
3598
3599                        if (rdev->disp_priority == 2) {
3600                                critical_point2 = 0;
3601                        }
3602
3603                        if (max_stop_req - critical_point2 < 4)
3604                                critical_point2 = 0;
3605
3606                }
3607
3608                if (critical_point2 == 0 && rdev->family == CHIP_R300) {
3609                        /* some R300 cards have problem with this set to 0 */
3610                        critical_point2 = 0x10;
3611                }
3612
3613                WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
3614                                                  (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3615
3616                if ((rdev->family == CHIP_RS400) ||
3617                    (rdev->family == CHIP_RS480)) {
3618#if 0
3619                        /* attempt to program RS400 disp2 regs correctly ??? */
3620                        temp = RREG32(RS400_DISP2_REQ_CNTL1);
3621                        temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK |
3622                                  RS400_DISP2_STOP_REQ_LEVEL_MASK);
3623                        WREG32(RS400_DISP2_REQ_CNTL1, (temp |
3624                                                       (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3625                                                       (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3626                        temp = RREG32(RS400_DISP2_REQ_CNTL2);
3627                        temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK |
3628                                  RS400_DISP2_CRITICAL_POINT_STOP_MASK);
3629                        WREG32(RS400_DISP2_REQ_CNTL2, (temp |
3630                                                       (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) |
3631                                                       (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT)));
3632#endif
3633                        WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC);
3634                        WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000);
3635                        WREG32(RS400_DMIF_MEM_CNTL1,  0x29CA71DC);
3636                        WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC);
3637                }
3638
3639                DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n",
3640                          (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL));
3641        }
3642
3643        /* Save number of lines the linebuffer leads before the scanout */
3644        if (mode1)
3645            rdev->mode_info.crtcs[0]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode1->crtc_hdisplay);
3646
3647        if (mode2)
3648            rdev->mode_info.crtcs[1]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode2->crtc_hdisplay);
3649}
3650
3651int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3652{
3653        uint32_t scratch;
3654        uint32_t tmp = 0;
3655        unsigned i;
3656        int r;
3657
3658        r = radeon_scratch_get(rdev, &scratch);
3659        if (r) {
3660                DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3661                return r;
3662        }
3663        WREG32(scratch, 0xCAFEDEAD);
3664        r = radeon_ring_lock(rdev, ring, 2);
3665        if (r) {
3666                DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3667                radeon_scratch_free(rdev, scratch);
3668                return r;
3669        }
3670        radeon_ring_write(ring, PACKET0(scratch, 0));
3671        radeon_ring_write(ring, 0xDEADBEEF);
3672        radeon_ring_unlock_commit(rdev, ring, false);
3673        for (i = 0; i < rdev->usec_timeout; i++) {
3674                tmp = RREG32(scratch);
3675                if (tmp == 0xDEADBEEF) {
3676                        break;
3677                }
3678                udelay(1);
3679        }
3680        if (i < rdev->usec_timeout) {
3681                DRM_INFO("ring test succeeded in %d usecs\n", i);
3682        } else {
3683                DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n",
3684                          scratch, tmp);
3685                r = -EINVAL;
3686        }
3687        radeon_scratch_free(rdev, scratch);
3688        return r;
3689}
3690
3691void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3692{
3693        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3694
3695        if (ring->rptr_save_reg) {
3696                u32 next_rptr = ring->wptr + 2 + 3;
3697                radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
3698                radeon_ring_write(ring, next_rptr);
3699        }
3700
3701        radeon_ring_write(ring, PACKET0(RADEON_CP_IB_BASE, 1));
3702        radeon_ring_write(ring, ib->gpu_addr);
3703        radeon_ring_write(ring, ib->length_dw);
3704}
3705
3706int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3707{
3708        struct radeon_ib ib;
3709        uint32_t scratch;
3710        uint32_t tmp = 0;
3711        unsigned i;
3712        int r;
3713
3714        r = radeon_scratch_get(rdev, &scratch);
3715        if (r) {
3716                DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3717                return r;
3718        }
3719        WREG32(scratch, 0xCAFEDEAD);
3720        r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &ib, NULL, 256);
3721        if (r) {
3722                DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3723                goto free_scratch;
3724        }
3725        ib.ptr[0] = PACKET0(scratch, 0);
3726        ib.ptr[1] = 0xDEADBEEF;
3727        ib.ptr[2] = PACKET2(0);
3728        ib.ptr[3] = PACKET2(0);
3729        ib.ptr[4] = PACKET2(0);
3730        ib.ptr[5] = PACKET2(0);
3731        ib.ptr[6] = PACKET2(0);
3732        ib.ptr[7] = PACKET2(0);
3733        ib.length_dw = 8;
3734        r = radeon_ib_schedule(rdev, &ib, NULL, false);
3735        if (r) {
3736                DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3737                goto free_ib;
3738        }
3739        r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3740                RADEON_USEC_IB_TEST_TIMEOUT));
3741        if (r < 0) {
3742                DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3743                goto free_ib;
3744        } else if (r == 0) {
3745                DRM_ERROR("radeon: fence wait timed out.\n");
3746                r = -ETIMEDOUT;
3747                goto free_ib;
3748        }
3749        r = 0;
3750        for (i = 0; i < rdev->usec_timeout; i++) {
3751                tmp = RREG32(scratch);
3752                if (tmp == 0xDEADBEEF) {
3753                        break;
3754                }
3755                udelay(1);
3756        }
3757        if (i < rdev->usec_timeout) {
3758                DRM_INFO("ib test succeeded in %u usecs\n", i);
3759        } else {
3760                DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3761                          scratch, tmp);
3762                r = -EINVAL;
3763        }
3764free_ib:
3765        radeon_ib_free(rdev, &ib);
3766free_scratch:
3767        radeon_scratch_free(rdev, scratch);
3768        return r;
3769}
3770
3771void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save)
3772{
3773        /* Shutdown CP we shouldn't need to do that but better be safe than
3774         * sorry
3775         */
3776        rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3777        WREG32(R_000740_CP_CSQ_CNTL, 0);
3778
3779        /* Save few CRTC registers */
3780        save->GENMO_WT = RREG8(R_0003C2_GENMO_WT);
3781        save->CRTC_EXT_CNTL = RREG32(R_000054_CRTC_EXT_CNTL);
3782        save->CRTC_GEN_CNTL = RREG32(R_000050_CRTC_GEN_CNTL);
3783        save->CUR_OFFSET = RREG32(R_000260_CUR_OFFSET);
3784        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3785                save->CRTC2_GEN_CNTL = RREG32(R_0003F8_CRTC2_GEN_CNTL);
3786                save->CUR2_OFFSET = RREG32(R_000360_CUR2_OFFSET);
3787        }
3788
3789        /* Disable VGA aperture access */
3790        WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & save->GENMO_WT);
3791        /* Disable cursor, overlay, crtc */
3792        WREG32(R_000260_CUR_OFFSET, save->CUR_OFFSET | S_000260_CUR_LOCK(1));
3793        WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL |
3794                                        S_000054_CRTC_DISPLAY_DIS(1));
3795        WREG32(R_000050_CRTC_GEN_CNTL,
3796                        (C_000050_CRTC_CUR_EN & save->CRTC_GEN_CNTL) |
3797                        S_000050_CRTC_DISP_REQ_EN_B(1));
3798        WREG32(R_000420_OV0_SCALE_CNTL,
3799                C_000420_OV0_OVERLAY_EN & RREG32(R_000420_OV0_SCALE_CNTL));
3800        WREG32(R_000260_CUR_OFFSET, C_000260_CUR_LOCK & save->CUR_OFFSET);
3801        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3802                WREG32(R_000360_CUR2_OFFSET, save->CUR2_OFFSET |
3803                                                S_000360_CUR2_LOCK(1));
3804                WREG32(R_0003F8_CRTC2_GEN_CNTL,
3805                        (C_0003F8_CRTC2_CUR_EN & save->CRTC2_GEN_CNTL) |
3806                        S_0003F8_CRTC2_DISPLAY_DIS(1) |
3807                        S_0003F8_CRTC2_DISP_REQ_EN_B(1));
3808                WREG32(R_000360_CUR2_OFFSET,
3809                        C_000360_CUR2_LOCK & save->CUR2_OFFSET);
3810        }
3811}
3812
3813void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save)
3814{
3815        /* Update base address for crtc */
3816        WREG32(R_00023C_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
3817        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3818                WREG32(R_00033C_CRTC2_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
3819        }
3820        /* Restore CRTC registers */
3821        WREG8(R_0003C2_GENMO_WT, save->GENMO_WT);
3822        WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL);
3823        WREG32(R_000050_CRTC_GEN_CNTL, save->CRTC_GEN_CNTL);
3824        if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3825                WREG32(R_0003F8_CRTC2_GEN_CNTL, save->CRTC2_GEN_CNTL);
3826        }
3827}
3828
3829void r100_vga_render_disable(struct radeon_device *rdev)
3830{
3831        u32 tmp;
3832
3833        tmp = RREG8(R_0003C2_GENMO_WT);
3834        WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & tmp);
3835}
3836
3837static void r100_debugfs(struct radeon_device *rdev)
3838{
3839        int r;
3840
3841        r = r100_debugfs_mc_info_init(rdev);
3842        if (r)
3843                dev_warn(rdev->dev, "Failed to create r100_mc debugfs file.\n");
3844}
3845
3846static void r100_mc_program(struct radeon_device *rdev)
3847{
3848        struct r100_mc_save save;
3849
3850        /* Stops all mc clients */
3851        r100_mc_stop(rdev, &save);
3852        if (rdev->flags & RADEON_IS_AGP) {
3853                WREG32(R_00014C_MC_AGP_LOCATION,
3854                        S_00014C_MC_AGP_START(rdev->mc.gtt_start >> 16) |
3855                        S_00014C_MC_AGP_TOP(rdev->mc.gtt_end >> 16));
3856                WREG32(R_000170_AGP_BASE, lower_32_bits(rdev->mc.agp_base));
3857                if (rdev->family > CHIP_RV200)
3858                        WREG32(R_00015C_AGP_BASE_2,
3859                                upper_32_bits(rdev->mc.agp_base) & 0xff);
3860        } else {
3861                WREG32(R_00014C_MC_AGP_LOCATION, 0x0FFFFFFF);
3862                WREG32(R_000170_AGP_BASE, 0);
3863                if (rdev->family > CHIP_RV200)
3864                        WREG32(R_00015C_AGP_BASE_2, 0);
3865        }
3866        /* Wait for mc idle */
3867        if (r100_mc_wait_for_idle(rdev))
3868                dev_warn(rdev->dev, "Wait for MC idle timeout.\n");
3869        /* Program MC, should be a 32bits limited address space */
3870        WREG32(R_000148_MC_FB_LOCATION,
3871                S_000148_MC_FB_START(rdev->mc.vram_start >> 16) |
3872                S_000148_MC_FB_TOP(rdev->mc.vram_end >> 16));
3873        r100_mc_resume(rdev, &save);
3874}
3875
3876static void r100_clock_startup(struct radeon_device *rdev)
3877{
3878        u32 tmp;
3879
3880        if (radeon_dynclks != -1 && radeon_dynclks)
3881                radeon_legacy_set_clock_gating(rdev, 1);
3882        /* We need to force on some of the block */
3883        tmp = RREG32_PLL(R_00000D_SCLK_CNTL);
3884        tmp |= S_00000D_FORCE_CP(1) | S_00000D_FORCE_VIP(1);
3885        if ((rdev->family == CHIP_RV250) || (rdev->family == CHIP_RV280))
3886                tmp |= S_00000D_FORCE_DISP1(1) | S_00000D_FORCE_DISP2(1);
3887        WREG32_PLL(R_00000D_SCLK_CNTL, tmp);
3888}
3889
3890static int r100_startup(struct radeon_device *rdev)
3891{
3892        int r;
3893
3894        /* set common regs */
3895        r100_set_common_regs(rdev);
3896        /* program mc */
3897        r100_mc_program(rdev);
3898        /* Resume clock */
3899        r100_clock_startup(rdev);
3900        /* Initialize GART (initialize after TTM so we can allocate
3901         * memory through TTM but finalize after TTM) */
3902        r100_enable_bm(rdev);
3903        if (rdev->flags & RADEON_IS_PCI) {
3904                r = r100_pci_gart_enable(rdev);
3905                if (r)
3906                        return r;
3907        }
3908
3909        /* allocate wb buffer */
3910        r = radeon_wb_init(rdev);
3911        if (r)
3912                return r;
3913
3914        r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
3915        if (r) {
3916                dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
3917                return r;
3918        }
3919
3920        /* Enable IRQ */
3921        if (!rdev->irq.installed) {
3922                r = radeon_irq_kms_init(rdev);
3923                if (r)
3924                        return r;
3925        }
3926
3927        r100_irq_set(rdev);
3928        rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
3929        /* 1M ring buffer */
3930        r = r100_cp_init(rdev, 1024 * 1024);
3931        if (r) {
3932                dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
3933                return r;
3934        }
3935
3936        r = radeon_ib_pool_init(rdev);
3937        if (r) {
3938                dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
3939                return r;
3940        }
3941
3942        return 0;
3943}
3944
3945int r100_resume(struct radeon_device *rdev)
3946{
3947        int r;
3948
3949        /* Make sur GART are not working */
3950        if (rdev->flags & RADEON_IS_PCI)
3951                r100_pci_gart_disable(rdev);
3952        /* Resume clock before doing reset */
3953        r100_clock_startup(rdev);
3954        /* Reset gpu before posting otherwise ATOM will enter infinite loop */
3955        if (radeon_asic_reset(rdev)) {
3956                dev_warn(rdev->dev, "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
3957                        RREG32(R_000E40_RBBM_STATUS),
3958                        RREG32(R_0007C0_CP_STAT));
3959        }
3960        /* post */
3961        radeon_combios_asic_init(rdev->ddev);
3962        /* Resume clock after posting */
3963        r100_clock_startup(rdev);
3964        /* Initialize surface registers */
3965        radeon_surface_init(rdev);
3966
3967        rdev->accel_working = true;
3968        r = r100_startup(rdev);
3969        if (r) {
3970                rdev->accel_working = false;
3971        }
3972        return r;
3973}
3974
3975int r100_suspend(struct radeon_device *rdev)
3976{
3977        radeon_pm_suspend(rdev);
3978        r100_cp_disable(rdev);
3979        radeon_wb_disable(rdev);
3980        r100_irq_disable(rdev);
3981        if (rdev->flags & RADEON_IS_PCI)
3982                r100_pci_gart_disable(rdev);
3983        return 0;
3984}
3985
3986void r100_fini(struct radeon_device *rdev)
3987{
3988        radeon_pm_fini(rdev);
3989        r100_cp_fini(rdev);
3990        radeon_wb_fini(rdev);
3991        radeon_ib_pool_fini(rdev);
3992        radeon_gem_fini(rdev);
3993        if (rdev->flags & RADEON_IS_PCI)
3994                r100_pci_gart_fini(rdev);
3995        radeon_agp_fini(rdev);
3996        radeon_irq_kms_fini(rdev);
3997        radeon_fence_driver_fini(rdev);
3998        radeon_bo_fini(rdev);
3999        radeon_atombios_fini(rdev);
4000        kfree(rdev->bios);
4001        rdev->bios = NULL;
4002}
4003
4004/*
4005 * Due to how kexec works, it can leave the hw fully initialised when it
4006 * boots the new kernel. However doing our init sequence with the CP and
4007 * WB stuff setup causes GPU hangs on the RN50 at least. So at startup
4008 * do some quick sanity checks and restore sane values to avoid this
4009 * problem.
4010 */
4011void r100_restore_sanity(struct radeon_device *rdev)
4012{
4013        u32 tmp;
4014
4015        tmp = RREG32(RADEON_CP_CSQ_CNTL);
4016        if (tmp) {
4017                WREG32(RADEON_CP_CSQ_CNTL, 0);
4018        }
4019        tmp = RREG32(RADEON_CP_RB_CNTL);
4020        if (tmp) {
4021                WREG32(RADEON_CP_RB_CNTL, 0);
4022        }
4023        tmp = RREG32(RADEON_SCRATCH_UMSK);
4024        if (tmp) {
4025                WREG32(RADEON_SCRATCH_UMSK, 0);
4026        }
4027}
4028
4029int r100_init(struct radeon_device *rdev)
4030{
4031        int r;
4032
4033        /* Register debugfs file specific to this group of asics */
4034        r100_debugfs(rdev);
4035        /* Disable VGA */
4036        r100_vga_render_disable(rdev);
4037        /* Initialize scratch registers */
4038        radeon_scratch_init(rdev);
4039        /* Initialize surface registers */
4040        radeon_surface_init(rdev);
4041        /* sanity check some register to avoid hangs like after kexec */
4042        r100_restore_sanity(rdev);
4043        /* TODO: disable VGA need to use VGA request */
4044        /* BIOS*/
4045        if (!radeon_get_bios(rdev)) {
4046                if (ASIC_IS_AVIVO(rdev))
4047                        return -EINVAL;
4048        }
4049        if (rdev->is_atom_bios) {
4050                dev_err(rdev->dev, "Expecting combios for RS400/RS480 GPU\n");
4051                return -EINVAL;
4052        } else {
4053                r = radeon_combios_init(rdev);
4054                if (r)
4055                        return r;
4056        }
4057        /* Reset gpu before posting otherwise ATOM will enter infinite loop */
4058        if (radeon_asic_reset(rdev)) {
4059                dev_warn(rdev->dev,
4060                        "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
4061                        RREG32(R_000E40_RBBM_STATUS),
4062                        RREG32(R_0007C0_CP_STAT));
4063        }
4064        /* check if cards are posted or not */
4065        if (radeon_boot_test_post_card(rdev) == false)
4066                return -EINVAL;
4067        /* Set asic errata */
4068        r100_errata(rdev);
4069        /* Initialize clocks */
4070        radeon_get_clock_info(rdev->ddev);
4071        /* initialize AGP */
4072        if (rdev->flags & RADEON_IS_AGP) {
4073                r = radeon_agp_init(rdev);
4074                if (r) {
4075                        radeon_agp_disable(rdev);
4076                }
4077        }
4078        /* initialize VRAM */
4079        r100_mc_init(rdev);
4080        /* Fence driver */
4081        r = radeon_fence_driver_init(rdev);
4082        if (r)
4083                return r;
4084        /* Memory manager */
4085        r = radeon_bo_init(rdev);
4086        if (r)
4087                return r;
4088        if (rdev->flags & RADEON_IS_PCI) {
4089                r = r100_pci_gart_init(rdev);
4090                if (r)
4091                        return r;
4092        }
4093        r100_set_safe_registers(rdev);
4094
4095        /* Initialize power management */
4096        radeon_pm_init(rdev);
4097
4098        rdev->accel_working = true;
4099        r = r100_startup(rdev);
4100        if (r) {
4101                /* Somethings want wront with the accel init stop accel */
4102                dev_err(rdev->dev, "Disabling GPU acceleration\n");
4103                r100_cp_fini(rdev);
4104                radeon_wb_fini(rdev);
4105                radeon_ib_pool_fini(rdev);
4106                radeon_irq_kms_fini(rdev);
4107                if (rdev->flags & RADEON_IS_PCI)
4108                        r100_pci_gart_fini(rdev);
4109                rdev->accel_working = false;
4110        }
4111        return 0;
4112}
4113
4114uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg)
4115{
4116        unsigned long flags;
4117        uint32_t ret;
4118
4119        spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
4120        writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4121        ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4122        spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
4123        return ret;
4124}
4125
4126void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg, uint32_t v)
4127{
4128        unsigned long flags;
4129
4130        spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
4131        writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4132        writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4133        spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
4134}
4135
4136u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
4137{
4138        if (reg < rdev->rio_mem_size)
4139                return ioread32(rdev->rio_mem + reg);
4140        else {
4141                iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
4142                return ioread32(rdev->rio_mem + RADEON_MM_DATA);
4143        }
4144}
4145
4146void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
4147{
4148        if (reg < rdev->rio_mem_size)
4149                iowrite32(v, rdev->rio_mem + reg);
4150        else {
4151                iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
4152                iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
4153        }
4154}
4155