linux/drivers/gpu/drm/radeon/r600_cp.c
<<
>>
Prefs
   1/*
   2 * Copyright 2008-2009 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the "Software"),
   7 * to deal in the Software without restriction, including without limitation
   8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9 * and/or sell copies of the Software, and to permit persons to whom the
  10 * Software is furnished to do so, subject to the following conditions:
  11 *
  12 * The above copyright notice and this permission notice (including the next
  13 * paragraph) shall be included in all copies or substantial portions of the
  14 * Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22 * DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors:
  25 *     Dave Airlie <airlied@redhat.com>
  26 *     Alex Deucher <alexander.deucher@amd.com>
  27 */
  28
  29#include "drmP.h"
  30#include "drm.h"
  31#include "radeon_drm.h"
  32#include "radeon_drv.h"
  33
  34#define PFP_UCODE_SIZE 576
  35#define PM4_UCODE_SIZE 1792
  36#define R700_PFP_UCODE_SIZE 848
  37#define R700_PM4_UCODE_SIZE 1360
  38
  39/* Firmware Names */
  40MODULE_FIRMWARE("radeon/R600_pfp.bin");
  41MODULE_FIRMWARE("radeon/R600_me.bin");
  42MODULE_FIRMWARE("radeon/RV610_pfp.bin");
  43MODULE_FIRMWARE("radeon/RV610_me.bin");
  44MODULE_FIRMWARE("radeon/RV630_pfp.bin");
  45MODULE_FIRMWARE("radeon/RV630_me.bin");
  46MODULE_FIRMWARE("radeon/RV620_pfp.bin");
  47MODULE_FIRMWARE("radeon/RV620_me.bin");
  48MODULE_FIRMWARE("radeon/RV635_pfp.bin");
  49MODULE_FIRMWARE("radeon/RV635_me.bin");
  50MODULE_FIRMWARE("radeon/RV670_pfp.bin");
  51MODULE_FIRMWARE("radeon/RV670_me.bin");
  52MODULE_FIRMWARE("radeon/RS780_pfp.bin");
  53MODULE_FIRMWARE("radeon/RS780_me.bin");
  54MODULE_FIRMWARE("radeon/RV770_pfp.bin");
  55MODULE_FIRMWARE("radeon/RV770_me.bin");
  56MODULE_FIRMWARE("radeon/RV730_pfp.bin");
  57MODULE_FIRMWARE("radeon/RV730_me.bin");
  58MODULE_FIRMWARE("radeon/RV710_pfp.bin");
  59MODULE_FIRMWARE("radeon/RV710_me.bin");
  60
  61
  62int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp,
  63                        unsigned family, u32 *ib, int *l);
  64void r600_cs_legacy_init(void);
  65
  66
  67# define ATI_PCIGART_PAGE_SIZE          4096    /**< PCI GART page size */
  68# define ATI_PCIGART_PAGE_MASK          (~(ATI_PCIGART_PAGE_SIZE-1))
  69
  70#define R600_PTE_VALID     (1 << 0)
  71#define R600_PTE_SYSTEM    (1 << 1)
  72#define R600_PTE_SNOOPED   (1 << 2)
  73#define R600_PTE_READABLE  (1 << 5)
  74#define R600_PTE_WRITEABLE (1 << 6)
  75
  76/* MAX values used for gfx init */
  77#define R6XX_MAX_SH_GPRS           256
  78#define R6XX_MAX_TEMP_GPRS         16
  79#define R6XX_MAX_SH_THREADS        256
  80#define R6XX_MAX_SH_STACK_ENTRIES  4096
  81#define R6XX_MAX_BACKENDS          8
  82#define R6XX_MAX_BACKENDS_MASK     0xff
  83#define R6XX_MAX_SIMDS             8
  84#define R6XX_MAX_SIMDS_MASK        0xff
  85#define R6XX_MAX_PIPES             8
  86#define R6XX_MAX_PIPES_MASK        0xff
  87
  88#define R7XX_MAX_SH_GPRS           256
  89#define R7XX_MAX_TEMP_GPRS         16
  90#define R7XX_MAX_SH_THREADS        256
  91#define R7XX_MAX_SH_STACK_ENTRIES  4096
  92#define R7XX_MAX_BACKENDS          8
  93#define R7XX_MAX_BACKENDS_MASK     0xff
  94#define R7XX_MAX_SIMDS             16
  95#define R7XX_MAX_SIMDS_MASK        0xffff
  96#define R7XX_MAX_PIPES             8
  97#define R7XX_MAX_PIPES_MASK        0xff
  98
  99static int r600_do_wait_for_fifo(drm_radeon_private_t *dev_priv, int entries)
 100{
 101        int i;
 102
 103        dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
 104
 105        for (i = 0; i < dev_priv->usec_timeout; i++) {
 106                int slots;
 107                if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)
 108                        slots = (RADEON_READ(R600_GRBM_STATUS)
 109                                 & R700_CMDFIFO_AVAIL_MASK);
 110                else
 111                        slots = (RADEON_READ(R600_GRBM_STATUS)
 112                                 & R600_CMDFIFO_AVAIL_MASK);
 113                if (slots >= entries)
 114                        return 0;
 115                DRM_UDELAY(1);
 116        }
 117        DRM_INFO("wait for fifo failed status : 0x%08X 0x%08X\n",
 118                 RADEON_READ(R600_GRBM_STATUS),
 119                 RADEON_READ(R600_GRBM_STATUS2));
 120
 121        return -EBUSY;
 122}
 123
 124static int r600_do_wait_for_idle(drm_radeon_private_t *dev_priv)
 125{
 126        int i, ret;
 127
 128        dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
 129
 130        if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)
 131                ret = r600_do_wait_for_fifo(dev_priv, 8);
 132        else
 133                ret = r600_do_wait_for_fifo(dev_priv, 16);
 134        if (ret)
 135                return ret;
 136        for (i = 0; i < dev_priv->usec_timeout; i++) {
 137                if (!(RADEON_READ(R600_GRBM_STATUS) & R600_GUI_ACTIVE))
 138                        return 0;
 139                DRM_UDELAY(1);
 140        }
 141        DRM_INFO("wait idle failed status : 0x%08X 0x%08X\n",
 142                 RADEON_READ(R600_GRBM_STATUS),
 143                 RADEON_READ(R600_GRBM_STATUS2));
 144
 145        return -EBUSY;
 146}
 147
 148void r600_page_table_cleanup(struct drm_device *dev, struct drm_ati_pcigart_info *gart_info)
 149{
 150        struct drm_sg_mem *entry = dev->sg;
 151        int max_pages;
 152        int pages;
 153        int i;
 154
 155        if (!entry)
 156                return;
 157
 158        if (gart_info->bus_addr) {
 159                max_pages = (gart_info->table_size / sizeof(u64));
 160                pages = (entry->pages <= max_pages)
 161                  ? entry->pages : max_pages;
 162
 163                for (i = 0; i < pages; i++) {
 164                        if (!entry->busaddr[i])
 165                                break;
 166                        pci_unmap_page(dev->pdev, entry->busaddr[i],
 167                                       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 168                }
 169                if (gart_info->gart_table_location == DRM_ATI_GART_MAIN)
 170                        gart_info->bus_addr = 0;
 171        }
 172}
 173
 174/* R600 has page table setup */
 175int r600_page_table_init(struct drm_device *dev)
 176{
 177        drm_radeon_private_t *dev_priv = dev->dev_private;
 178        struct drm_ati_pcigart_info *gart_info = &dev_priv->gart_info;
 179        struct drm_local_map *map = &gart_info->mapping;
 180        struct drm_sg_mem *entry = dev->sg;
 181        int ret = 0;
 182        int i, j;
 183        int pages;
 184        u64 page_base;
 185        dma_addr_t entry_addr;
 186        int max_ati_pages, max_real_pages, gart_idx;
 187
 188        /* okay page table is available - lets rock */
 189        max_ati_pages = (gart_info->table_size / sizeof(u64));
 190        max_real_pages = max_ati_pages / (PAGE_SIZE / ATI_PCIGART_PAGE_SIZE);
 191
 192        pages = (entry->pages <= max_real_pages) ?
 193                entry->pages : max_real_pages;
 194
 195        memset_io((void __iomem *)map->handle, 0, max_ati_pages * sizeof(u64));
 196
 197        gart_idx = 0;
 198        for (i = 0; i < pages; i++) {
 199                entry->busaddr[i] = pci_map_page(dev->pdev,
 200                                                 entry->pagelist[i], 0,
 201                                                 PAGE_SIZE,
 202                                                 PCI_DMA_BIDIRECTIONAL);
 203                if (pci_dma_mapping_error(dev->pdev, entry->busaddr[i])) {
 204                        DRM_ERROR("unable to map PCIGART pages!\n");
 205                        r600_page_table_cleanup(dev, gart_info);
 206                        goto done;
 207                }
 208                entry_addr = entry->busaddr[i];
 209                for (j = 0; j < (PAGE_SIZE / ATI_PCIGART_PAGE_SIZE); j++) {
 210                        page_base = (u64) entry_addr & ATI_PCIGART_PAGE_MASK;
 211                        page_base |= R600_PTE_VALID | R600_PTE_SYSTEM | R600_PTE_SNOOPED;
 212                        page_base |= R600_PTE_READABLE | R600_PTE_WRITEABLE;
 213
 214                        DRM_WRITE64(map, gart_idx * sizeof(u64), page_base);
 215
 216                        gart_idx++;
 217
 218                        if ((i % 128) == 0)
 219                                DRM_DEBUG("page entry %d: 0x%016llx\n",
 220                                    i, (unsigned long long)page_base);
 221                        entry_addr += ATI_PCIGART_PAGE_SIZE;
 222                }
 223        }
 224        ret = 1;
 225done:
 226        return ret;
 227}
 228
 229static void r600_vm_flush_gart_range(struct drm_device *dev)
 230{
 231        drm_radeon_private_t *dev_priv = dev->dev_private;
 232        u32 resp, countdown = 1000;
 233        RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_LOW_ADDR, dev_priv->gart_vm_start >> 12);
 234        RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
 235        RADEON_WRITE(R600_VM_CONTEXT0_REQUEST_RESPONSE, 2);
 236
 237        do {
 238                resp = RADEON_READ(R600_VM_CONTEXT0_REQUEST_RESPONSE);
 239                countdown--;
 240                DRM_UDELAY(1);
 241        } while (((resp & 0xf0) == 0) && countdown);
 242}
 243
 244static void r600_vm_init(struct drm_device *dev)
 245{
 246        drm_radeon_private_t *dev_priv = dev->dev_private;
 247        /* initialise the VM to use the page table we constructed up there */
 248        u32 vm_c0, i;
 249        u32 mc_rd_a;
 250        u32 vm_l2_cntl, vm_l2_cntl3;
 251        /* okay set up the PCIE aperture type thingo */
 252        RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12);
 253        RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
 254        RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
 255
 256        /* setup MC RD a */
 257        mc_rd_a = R600_MCD_L1_TLB | R600_MCD_L1_FRAG_PROC | R600_MCD_SYSTEM_ACCESS_MODE_IN_SYS |
 258                R600_MCD_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | R600_MCD_EFFECTIVE_L1_TLB_SIZE(5) |
 259                R600_MCD_EFFECTIVE_L1_QUEUE_SIZE(5) | R600_MCD_WAIT_L2_QUERY;
 260
 261        RADEON_WRITE(R600_MCD_RD_A_CNTL, mc_rd_a);
 262        RADEON_WRITE(R600_MCD_RD_B_CNTL, mc_rd_a);
 263
 264        RADEON_WRITE(R600_MCD_WR_A_CNTL, mc_rd_a);
 265        RADEON_WRITE(R600_MCD_WR_B_CNTL, mc_rd_a);
 266
 267        RADEON_WRITE(R600_MCD_RD_GFX_CNTL, mc_rd_a);
 268        RADEON_WRITE(R600_MCD_WR_GFX_CNTL, mc_rd_a);
 269
 270        RADEON_WRITE(R600_MCD_RD_SYS_CNTL, mc_rd_a);
 271        RADEON_WRITE(R600_MCD_WR_SYS_CNTL, mc_rd_a);
 272
 273        RADEON_WRITE(R600_MCD_RD_HDP_CNTL, mc_rd_a | R600_MCD_L1_STRICT_ORDERING);
 274        RADEON_WRITE(R600_MCD_WR_HDP_CNTL, mc_rd_a /*| R600_MCD_L1_STRICT_ORDERING*/);
 275
 276        RADEON_WRITE(R600_MCD_RD_PDMA_CNTL, mc_rd_a);
 277        RADEON_WRITE(R600_MCD_WR_PDMA_CNTL, mc_rd_a);
 278
 279        RADEON_WRITE(R600_MCD_RD_SEM_CNTL, mc_rd_a | R600_MCD_SEMAPHORE_MODE);
 280        RADEON_WRITE(R600_MCD_WR_SEM_CNTL, mc_rd_a);
 281
 282        vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W;
 283        vm_l2_cntl |= R600_VM_L2_CNTL_QUEUE_SIZE(7);
 284        RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl);
 285
 286        RADEON_WRITE(R600_VM_L2_CNTL2, 0);
 287        vm_l2_cntl3 = (R600_VM_L2_CNTL3_BANK_SELECT_0(0) |
 288                       R600_VM_L2_CNTL3_BANK_SELECT_1(1) |
 289                       R600_VM_L2_CNTL3_CACHE_UPDATE_MODE(2));
 290        RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3);
 291
 292        vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT;
 293
 294        RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0);
 295
 296        vm_c0 &= ~R600_VM_ENABLE_CONTEXT;
 297
 298        /* disable all other contexts */
 299        for (i = 1; i < 8; i++)
 300                RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0);
 301
 302        RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12);
 303        RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12);
 304        RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
 305
 306        r600_vm_flush_gart_range(dev);
 307}
 308
 309static int r600_cp_init_microcode(drm_radeon_private_t *dev_priv)
 310{
 311        struct platform_device *pdev;
 312        const char *chip_name;
 313        size_t pfp_req_size, me_req_size;
 314        char fw_name[30];
 315        int err;
 316
 317        pdev = platform_device_register_simple("r600_cp", 0, NULL, 0);
 318        err = IS_ERR(pdev);
 319        if (err) {
 320                printk(KERN_ERR "r600_cp: Failed to register firmware\n");
 321                return -EINVAL;
 322        }
 323
 324        switch (dev_priv->flags & RADEON_FAMILY_MASK) {
 325        case CHIP_R600:  chip_name = "R600";  break;
 326        case CHIP_RV610: chip_name = "RV610"; break;
 327        case CHIP_RV630: chip_name = "RV630"; break;
 328        case CHIP_RV620: chip_name = "RV620"; break;
 329        case CHIP_RV635: chip_name = "RV635"; break;
 330        case CHIP_RV670: chip_name = "RV670"; break;
 331        case CHIP_RS780:
 332        case CHIP_RS880: chip_name = "RS780"; break;
 333        case CHIP_RV770: chip_name = "RV770"; break;
 334        case CHIP_RV730:
 335        case CHIP_RV740: chip_name = "RV730"; break;
 336        case CHIP_RV710: chip_name = "RV710"; break;
 337        default:         BUG();
 338        }
 339
 340        if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
 341                pfp_req_size = R700_PFP_UCODE_SIZE * 4;
 342                me_req_size = R700_PM4_UCODE_SIZE * 4;
 343        } else {
 344                pfp_req_size = PFP_UCODE_SIZE * 4;
 345                me_req_size = PM4_UCODE_SIZE * 12;
 346        }
 347
 348        DRM_INFO("Loading %s CP Microcode\n", chip_name);
 349
 350        snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
 351        err = request_firmware(&dev_priv->pfp_fw, fw_name, &pdev->dev);
 352        if (err)
 353                goto out;
 354        if (dev_priv->pfp_fw->size != pfp_req_size) {
 355                printk(KERN_ERR
 356                       "r600_cp: Bogus length %zu in firmware \"%s\"\n",
 357                       dev_priv->pfp_fw->size, fw_name);
 358                err = -EINVAL;
 359                goto out;
 360        }
 361
 362        snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
 363        err = request_firmware(&dev_priv->me_fw, fw_name, &pdev->dev);
 364        if (err)
 365                goto out;
 366        if (dev_priv->me_fw->size != me_req_size) {
 367                printk(KERN_ERR
 368                       "r600_cp: Bogus length %zu in firmware \"%s\"\n",
 369                       dev_priv->me_fw->size, fw_name);
 370                err = -EINVAL;
 371        }
 372out:
 373        platform_device_unregister(pdev);
 374
 375        if (err) {
 376                if (err != -EINVAL)
 377                        printk(KERN_ERR
 378                               "r600_cp: Failed to load firmware \"%s\"\n",
 379                               fw_name);
 380                release_firmware(dev_priv->pfp_fw);
 381                dev_priv->pfp_fw = NULL;
 382                release_firmware(dev_priv->me_fw);
 383                dev_priv->me_fw = NULL;
 384        }
 385        return err;
 386}
 387
 388static void r600_cp_load_microcode(drm_radeon_private_t *dev_priv)
 389{
 390        const __be32 *fw_data;
 391        int i;
 392
 393        if (!dev_priv->me_fw || !dev_priv->pfp_fw)
 394                return;
 395
 396        r600_do_cp_stop(dev_priv);
 397
 398        RADEON_WRITE(R600_CP_RB_CNTL,
 399#ifdef __BIG_ENDIAN
 400                     R600_BUF_SWAP_32BIT |
 401#endif
 402                     R600_RB_NO_UPDATE |
 403                     R600_RB_BLKSZ(15) |
 404                     R600_RB_BUFSZ(3));
 405
 406        RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
 407        RADEON_READ(R600_GRBM_SOFT_RESET);
 408        DRM_UDELAY(15000);
 409        RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
 410
 411        fw_data = (const __be32 *)dev_priv->me_fw->data;
 412        RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
 413        for (i = 0; i < PM4_UCODE_SIZE * 3; i++)
 414                RADEON_WRITE(R600_CP_ME_RAM_DATA,
 415                             be32_to_cpup(fw_data++));
 416
 417        fw_data = (const __be32 *)dev_priv->pfp_fw->data;
 418        RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
 419        for (i = 0; i < PFP_UCODE_SIZE; i++)
 420                RADEON_WRITE(R600_CP_PFP_UCODE_DATA,
 421                             be32_to_cpup(fw_data++));
 422
 423        RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
 424        RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
 425        RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0);
 426
 427}
 428
 429static void r700_vm_init(struct drm_device *dev)
 430{
 431        drm_radeon_private_t *dev_priv = dev->dev_private;
 432        /* initialise the VM to use the page table we constructed up there */
 433        u32 vm_c0, i;
 434        u32 mc_vm_md_l1;
 435        u32 vm_l2_cntl, vm_l2_cntl3;
 436        /* okay set up the PCIE aperture type thingo */
 437        RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12);
 438        RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
 439        RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
 440
 441        mc_vm_md_l1 = R700_ENABLE_L1_TLB |
 442            R700_ENABLE_L1_FRAGMENT_PROCESSING |
 443            R700_SYSTEM_ACCESS_MODE_IN_SYS |
 444            R700_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
 445            R700_EFFECTIVE_L1_TLB_SIZE(5) |
 446            R700_EFFECTIVE_L1_QUEUE_SIZE(5);
 447
 448        RADEON_WRITE(R700_MC_VM_MD_L1_TLB0_CNTL, mc_vm_md_l1);
 449        RADEON_WRITE(R700_MC_VM_MD_L1_TLB1_CNTL, mc_vm_md_l1);
 450        RADEON_WRITE(R700_MC_VM_MD_L1_TLB2_CNTL, mc_vm_md_l1);
 451        RADEON_WRITE(R700_MC_VM_MB_L1_TLB0_CNTL, mc_vm_md_l1);
 452        RADEON_WRITE(R700_MC_VM_MB_L1_TLB1_CNTL, mc_vm_md_l1);
 453        RADEON_WRITE(R700_MC_VM_MB_L1_TLB2_CNTL, mc_vm_md_l1);
 454        RADEON_WRITE(R700_MC_VM_MB_L1_TLB3_CNTL, mc_vm_md_l1);
 455
 456        vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W;
 457        vm_l2_cntl |= R700_VM_L2_CNTL_QUEUE_SIZE(7);
 458        RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl);
 459
 460        RADEON_WRITE(R600_VM_L2_CNTL2, 0);
 461        vm_l2_cntl3 = R700_VM_L2_CNTL3_BANK_SELECT(0) | R700_VM_L2_CNTL3_CACHE_UPDATE_MODE(2);
 462        RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3);
 463
 464        vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT;
 465
 466        RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0);
 467
 468        vm_c0 &= ~R600_VM_ENABLE_CONTEXT;
 469
 470        /* disable all other contexts */
 471        for (i = 1; i < 8; i++)
 472                RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0);
 473
 474        RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12);
 475        RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12);
 476        RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
 477
 478        r600_vm_flush_gart_range(dev);
 479}
 480
 481static void r700_cp_load_microcode(drm_radeon_private_t *dev_priv)
 482{
 483        const __be32 *fw_data;
 484        int i;
 485
 486        if (!dev_priv->me_fw || !dev_priv->pfp_fw)
 487                return;
 488
 489        r600_do_cp_stop(dev_priv);
 490
 491        RADEON_WRITE(R600_CP_RB_CNTL,
 492#ifdef __BIG_ENDIAN
 493                     R600_BUF_SWAP_32BIT |
 494#endif
 495                     R600_RB_NO_UPDATE |
 496                     R600_RB_BLKSZ(15) |
 497                     R600_RB_BUFSZ(3));
 498
 499        RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
 500        RADEON_READ(R600_GRBM_SOFT_RESET);
 501        DRM_UDELAY(15000);
 502        RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
 503
 504        fw_data = (const __be32 *)dev_priv->pfp_fw->data;
 505        RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
 506        for (i = 0; i < R700_PFP_UCODE_SIZE; i++)
 507                RADEON_WRITE(R600_CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
 508        RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
 509
 510        fw_data = (const __be32 *)dev_priv->me_fw->data;
 511        RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
 512        for (i = 0; i < R700_PM4_UCODE_SIZE; i++)
 513                RADEON_WRITE(R600_CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
 514        RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
 515
 516        RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
 517        RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
 518        RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0);
 519
 520}
 521
 522static void r600_test_writeback(drm_radeon_private_t *dev_priv)
 523{
 524        u32 tmp;
 525
 526        /* Start with assuming that writeback doesn't work */
 527        dev_priv->writeback_works = 0;
 528
 529        /* Writeback doesn't seem to work everywhere, test it here and possibly
 530         * enable it if it appears to work
 531         */
 532        radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0);
 533
 534        RADEON_WRITE(R600_SCRATCH_REG1, 0xdeadbeef);
 535
 536        for (tmp = 0; tmp < dev_priv->usec_timeout; tmp++) {
 537                u32 val;
 538
 539                val = radeon_read_ring_rptr(dev_priv, R600_SCRATCHOFF(1));
 540                if (val == 0xdeadbeef)
 541                        break;
 542                DRM_UDELAY(1);
 543        }
 544
 545        if (tmp < dev_priv->usec_timeout) {
 546                dev_priv->writeback_works = 1;
 547                DRM_INFO("writeback test succeeded in %d usecs\n", tmp);
 548        } else {
 549                dev_priv->writeback_works = 0;
 550                DRM_INFO("writeback test failed\n");
 551        }
 552        if (radeon_no_wb == 1) {
 553                dev_priv->writeback_works = 0;
 554                DRM_INFO("writeback forced off\n");
 555        }
 556
 557        if (!dev_priv->writeback_works) {
 558                /* Disable writeback to avoid unnecessary bus master transfer */
 559                RADEON_WRITE(R600_CP_RB_CNTL,
 560#ifdef __BIG_ENDIAN
 561                             R600_BUF_SWAP_32BIT |
 562#endif
 563                             RADEON_READ(R600_CP_RB_CNTL) |
 564                             R600_RB_NO_UPDATE);
 565                RADEON_WRITE(R600_SCRATCH_UMSK, 0);
 566        }
 567}
 568
 569int r600_do_engine_reset(struct drm_device *dev)
 570{
 571        drm_radeon_private_t *dev_priv = dev->dev_private;
 572        u32 cp_ptr, cp_me_cntl, cp_rb_cntl;
 573
 574        DRM_INFO("Resetting GPU\n");
 575
 576        cp_ptr = RADEON_READ(R600_CP_RB_WPTR);
 577        cp_me_cntl = RADEON_READ(R600_CP_ME_CNTL);
 578        RADEON_WRITE(R600_CP_ME_CNTL, R600_CP_ME_HALT);
 579
 580        RADEON_WRITE(R600_GRBM_SOFT_RESET, 0x7fff);
 581        RADEON_READ(R600_GRBM_SOFT_RESET);
 582        DRM_UDELAY(50);
 583        RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
 584        RADEON_READ(R600_GRBM_SOFT_RESET);
 585
 586        RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0);
 587        cp_rb_cntl = RADEON_READ(R600_CP_RB_CNTL);
 588        RADEON_WRITE(R600_CP_RB_CNTL,
 589#ifdef __BIG_ENDIAN
 590                     R600_BUF_SWAP_32BIT |
 591#endif
 592                     R600_RB_RPTR_WR_ENA);
 593
 594        RADEON_WRITE(R600_CP_RB_RPTR_WR, cp_ptr);
 595        RADEON_WRITE(R600_CP_RB_WPTR, cp_ptr);
 596        RADEON_WRITE(R600_CP_RB_CNTL, cp_rb_cntl);
 597        RADEON_WRITE(R600_CP_ME_CNTL, cp_me_cntl);
 598
 599        /* Reset the CP ring */
 600        r600_do_cp_reset(dev_priv);
 601
 602        /* The CP is no longer running after an engine reset */
 603        dev_priv->cp_running = 0;
 604
 605        /* Reset any pending vertex, indirect buffers */
 606        radeon_freelist_reset(dev);
 607
 608        return 0;
 609
 610}
 611
 612static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
 613                                             u32 num_backends,
 614                                             u32 backend_disable_mask)
 615{
 616        u32 backend_map = 0;
 617        u32 enabled_backends_mask;
 618        u32 enabled_backends_count;
 619        u32 cur_pipe;
 620        u32 swizzle_pipe[R6XX_MAX_PIPES];
 621        u32 cur_backend;
 622        u32 i;
 623
 624        if (num_tile_pipes > R6XX_MAX_PIPES)
 625                num_tile_pipes = R6XX_MAX_PIPES;
 626        if (num_tile_pipes < 1)
 627                num_tile_pipes = 1;
 628        if (num_backends > R6XX_MAX_BACKENDS)
 629                num_backends = R6XX_MAX_BACKENDS;
 630        if (num_backends < 1)
 631                num_backends = 1;
 632
 633        enabled_backends_mask = 0;
 634        enabled_backends_count = 0;
 635        for (i = 0; i < R6XX_MAX_BACKENDS; ++i) {
 636                if (((backend_disable_mask >> i) & 1) == 0) {
 637                        enabled_backends_mask |= (1 << i);
 638                        ++enabled_backends_count;
 639                }
 640                if (enabled_backends_count == num_backends)
 641                        break;
 642        }
 643
 644        if (enabled_backends_count == 0) {
 645                enabled_backends_mask = 1;
 646                enabled_backends_count = 1;
 647        }
 648
 649        if (enabled_backends_count != num_backends)
 650                num_backends = enabled_backends_count;
 651
 652        memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES);
 653        switch (num_tile_pipes) {
 654        case 1:
 655                swizzle_pipe[0] = 0;
 656                break;
 657        case 2:
 658                swizzle_pipe[0] = 0;
 659                swizzle_pipe[1] = 1;
 660                break;
 661        case 3:
 662                swizzle_pipe[0] = 0;
 663                swizzle_pipe[1] = 1;
 664                swizzle_pipe[2] = 2;
 665                break;
 666        case 4:
 667                swizzle_pipe[0] = 0;
 668                swizzle_pipe[1] = 1;
 669                swizzle_pipe[2] = 2;
 670                swizzle_pipe[3] = 3;
 671                break;
 672        case 5:
 673                swizzle_pipe[0] = 0;
 674                swizzle_pipe[1] = 1;
 675                swizzle_pipe[2] = 2;
 676                swizzle_pipe[3] = 3;
 677                swizzle_pipe[4] = 4;
 678                break;
 679        case 6:
 680                swizzle_pipe[0] = 0;
 681                swizzle_pipe[1] = 2;
 682                swizzle_pipe[2] = 4;
 683                swizzle_pipe[3] = 5;
 684                swizzle_pipe[4] = 1;
 685                swizzle_pipe[5] = 3;
 686                break;
 687        case 7:
 688                swizzle_pipe[0] = 0;
 689                swizzle_pipe[1] = 2;
 690                swizzle_pipe[2] = 4;
 691                swizzle_pipe[3] = 6;
 692                swizzle_pipe[4] = 1;
 693                swizzle_pipe[5] = 3;
 694                swizzle_pipe[6] = 5;
 695                break;
 696        case 8:
 697                swizzle_pipe[0] = 0;
 698                swizzle_pipe[1] = 2;
 699                swizzle_pipe[2] = 4;
 700                swizzle_pipe[3] = 6;
 701                swizzle_pipe[4] = 1;
 702                swizzle_pipe[5] = 3;
 703                swizzle_pipe[6] = 5;
 704                swizzle_pipe[7] = 7;
 705                break;
 706        }
 707
 708        cur_backend = 0;
 709        for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
 710                while (((1 << cur_backend) & enabled_backends_mask) == 0)
 711                        cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
 712
 713                backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
 714
 715                cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
 716        }
 717
 718        return backend_map;
 719}
 720
 721static int r600_count_pipe_bits(uint32_t val)
 722{
 723        int i, ret = 0;
 724        for (i = 0; i < 32; i++) {
 725                ret += val & 1;
 726                val >>= 1;
 727        }
 728        return ret;
 729}
 730
 731static void r600_gfx_init(struct drm_device *dev,
 732                          drm_radeon_private_t *dev_priv)
 733{
 734        int i, j, num_qd_pipes;
 735        u32 sx_debug_1;
 736        u32 tc_cntl;
 737        u32 arb_pop;
 738        u32 num_gs_verts_per_thread;
 739        u32 vgt_gs_per_es;
 740        u32 gs_prim_buffer_depth = 0;
 741        u32 sq_ms_fifo_sizes;
 742        u32 sq_config;
 743        u32 sq_gpr_resource_mgmt_1 = 0;
 744        u32 sq_gpr_resource_mgmt_2 = 0;
 745        u32 sq_thread_resource_mgmt = 0;
 746        u32 sq_stack_resource_mgmt_1 = 0;
 747        u32 sq_stack_resource_mgmt_2 = 0;
 748        u32 hdp_host_path_cntl;
 749        u32 backend_map;
 750        u32 gb_tiling_config = 0;
 751        u32 cc_rb_backend_disable;
 752        u32 cc_gc_shader_pipe_config;
 753        u32 ramcfg;
 754
 755        /* setup chip specs */
 756        switch (dev_priv->flags & RADEON_FAMILY_MASK) {
 757        case CHIP_R600:
 758                dev_priv->r600_max_pipes = 4;
 759                dev_priv->r600_max_tile_pipes = 8;
 760                dev_priv->r600_max_simds = 4;
 761                dev_priv->r600_max_backends = 4;
 762                dev_priv->r600_max_gprs = 256;
 763                dev_priv->r600_max_threads = 192;
 764                dev_priv->r600_max_stack_entries = 256;
 765                dev_priv->r600_max_hw_contexts = 8;
 766                dev_priv->r600_max_gs_threads = 16;
 767                dev_priv->r600_sx_max_export_size = 128;
 768                dev_priv->r600_sx_max_export_pos_size = 16;
 769                dev_priv->r600_sx_max_export_smx_size = 128;
 770                dev_priv->r600_sq_num_cf_insts = 2;
 771                break;
 772        case CHIP_RV630:
 773        case CHIP_RV635:
 774                dev_priv->r600_max_pipes = 2;
 775                dev_priv->r600_max_tile_pipes = 2;
 776                dev_priv->r600_max_simds = 3;
 777                dev_priv->r600_max_backends = 1;
 778                dev_priv->r600_max_gprs = 128;
 779                dev_priv->r600_max_threads = 192;
 780                dev_priv->r600_max_stack_entries = 128;
 781                dev_priv->r600_max_hw_contexts = 8;
 782                dev_priv->r600_max_gs_threads = 4;
 783                dev_priv->r600_sx_max_export_size = 128;
 784                dev_priv->r600_sx_max_export_pos_size = 16;
 785                dev_priv->r600_sx_max_export_smx_size = 128;
 786                dev_priv->r600_sq_num_cf_insts = 2;
 787                break;
 788        case CHIP_RV610:
 789        case CHIP_RS780:
 790        case CHIP_RS880:
 791        case CHIP_RV620:
 792                dev_priv->r600_max_pipes = 1;
 793                dev_priv->r600_max_tile_pipes = 1;
 794                dev_priv->r600_max_simds = 2;
 795                dev_priv->r600_max_backends = 1;
 796                dev_priv->r600_max_gprs = 128;
 797                dev_priv->r600_max_threads = 192;
 798                dev_priv->r600_max_stack_entries = 128;
 799                dev_priv->r600_max_hw_contexts = 4;
 800                dev_priv->r600_max_gs_threads = 4;
 801                dev_priv->r600_sx_max_export_size = 128;
 802                dev_priv->r600_sx_max_export_pos_size = 16;
 803                dev_priv->r600_sx_max_export_smx_size = 128;
 804                dev_priv->r600_sq_num_cf_insts = 1;
 805                break;
 806        case CHIP_RV670:
 807                dev_priv->r600_max_pipes = 4;
 808                dev_priv->r600_max_tile_pipes = 4;
 809                dev_priv->r600_max_simds = 4;
 810                dev_priv->r600_max_backends = 4;
 811                dev_priv->r600_max_gprs = 192;
 812                dev_priv->r600_max_threads = 192;
 813                dev_priv->r600_max_stack_entries = 256;
 814                dev_priv->r600_max_hw_contexts = 8;
 815                dev_priv->r600_max_gs_threads = 16;
 816                dev_priv->r600_sx_max_export_size = 128;
 817                dev_priv->r600_sx_max_export_pos_size = 16;
 818                dev_priv->r600_sx_max_export_smx_size = 128;
 819                dev_priv->r600_sq_num_cf_insts = 2;
 820                break;
 821        default:
 822                break;
 823        }
 824
 825        /* Initialize HDP */
 826        j = 0;
 827        for (i = 0; i < 32; i++) {
 828                RADEON_WRITE((0x2c14 + j), 0x00000000);
 829                RADEON_WRITE((0x2c18 + j), 0x00000000);
 830                RADEON_WRITE((0x2c1c + j), 0x00000000);
 831                RADEON_WRITE((0x2c20 + j), 0x00000000);
 832                RADEON_WRITE((0x2c24 + j), 0x00000000);
 833                j += 0x18;
 834        }
 835
 836        RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff));
 837
 838        /* setup tiling, simd, pipe config */
 839        ramcfg = RADEON_READ(R600_RAMCFG);
 840
 841        switch (dev_priv->r600_max_tile_pipes) {
 842        case 1:
 843                gb_tiling_config |= R600_PIPE_TILING(0);
 844                break;
 845        case 2:
 846                gb_tiling_config |= R600_PIPE_TILING(1);
 847                break;
 848        case 4:
 849                gb_tiling_config |= R600_PIPE_TILING(2);
 850                break;
 851        case 8:
 852                gb_tiling_config |= R600_PIPE_TILING(3);
 853                break;
 854        default:
 855                break;
 856        }
 857
 858        gb_tiling_config |= R600_BANK_TILING((ramcfg >> R600_NOOFBANK_SHIFT) & R600_NOOFBANK_MASK);
 859
 860        gb_tiling_config |= R600_GROUP_SIZE(0);
 861
 862        if (((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK) > 3) {
 863                gb_tiling_config |= R600_ROW_TILING(3);
 864                gb_tiling_config |= R600_SAMPLE_SPLIT(3);
 865        } else {
 866                gb_tiling_config |=
 867                        R600_ROW_TILING(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK));
 868                gb_tiling_config |=
 869                        R600_SAMPLE_SPLIT(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK));
 870        }
 871
 872        gb_tiling_config |= R600_BANK_SWAPS(1);
 873
 874        cc_rb_backend_disable = RADEON_READ(R600_CC_RB_BACKEND_DISABLE) & 0x00ff0000;
 875        cc_rb_backend_disable |=
 876                R600_BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R6XX_MAX_BACKENDS_MASK);
 877
 878        cc_gc_shader_pipe_config = RADEON_READ(R600_CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00;
 879        cc_gc_shader_pipe_config |=
 880                R600_INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R6XX_MAX_PIPES_MASK);
 881        cc_gc_shader_pipe_config |=
 882                R600_INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R6XX_MAX_SIMDS_MASK);
 883
 884        backend_map = r600_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes,
 885                                                        (R6XX_MAX_BACKENDS -
 886                                                         r600_count_pipe_bits((cc_rb_backend_disable &
 887                                                                               R6XX_MAX_BACKENDS_MASK) >> 16)),
 888                                                        (cc_rb_backend_disable >> 16));
 889        gb_tiling_config |= R600_BACKEND_MAP(backend_map);
 890
 891        RADEON_WRITE(R600_GB_TILING_CONFIG,      gb_tiling_config);
 892        RADEON_WRITE(R600_DCP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
 893        RADEON_WRITE(R600_HDP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
 894        if (gb_tiling_config & 0xc0) {
 895                dev_priv->r600_group_size = 512;
 896        } else {
 897                dev_priv->r600_group_size = 256;
 898        }
 899        dev_priv->r600_npipes = 1 << ((gb_tiling_config >> 1) & 0x7);
 900        if (gb_tiling_config & 0x30) {
 901                dev_priv->r600_nbanks = 8;
 902        } else {
 903                dev_priv->r600_nbanks = 4;
 904        }
 905
 906        RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE,      cc_rb_backend_disable);
 907        RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG,   cc_gc_shader_pipe_config);
 908        RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
 909
 910        num_qd_pipes =
 911                R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK) >> 8);
 912        RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK);
 913        RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK);
 914
 915        /* set HW defaults for 3D engine */
 916        RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) |
 917                                                R600_ROQ_IB2_START(0x2b)));
 918
 919        RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, (R600_MEQ_END(0x40) |
 920                                              R600_ROQ_END(0x40)));
 921
 922        RADEON_WRITE(R600_TA_CNTL_AUX, (R600_DISABLE_CUBE_ANISO |
 923                                        R600_SYNC_GRADIENT |
 924                                        R600_SYNC_WALKER |
 925                                        R600_SYNC_ALIGNER));
 926
 927        if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670)
 928                RADEON_WRITE(R600_ARB_GDEC_RD_CNTL, 0x00000021);
 929
 930        sx_debug_1 = RADEON_READ(R600_SX_DEBUG_1);
 931        sx_debug_1 |= R600_SMX_EVENT_RELEASE;
 932        if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600))
 933                sx_debug_1 |= R600_ENABLE_NEW_SMX_ADDRESS;
 934        RADEON_WRITE(R600_SX_DEBUG_1, sx_debug_1);
 935
 936        if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) ||
 937            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) ||
 938            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
 939            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
 940            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
 941            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880))
 942                RADEON_WRITE(R600_DB_DEBUG, R600_PREZ_MUST_WAIT_FOR_POSTZ_DONE);
 943        else
 944                RADEON_WRITE(R600_DB_DEBUG, 0);
 945
 946        RADEON_WRITE(R600_DB_WATERMARKS, (R600_DEPTH_FREE(4) |
 947                                          R600_DEPTH_FLUSH(16) |
 948                                          R600_DEPTH_PENDING_FREE(4) |
 949                                          R600_DEPTH_CACHELINE_FREE(16)));
 950        RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
 951        RADEON_WRITE(R600_VGT_NUM_INSTANCES, 0);
 952
 953        RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0));
 954        RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(0));
 955
 956        sq_ms_fifo_sizes = RADEON_READ(R600_SQ_MS_FIFO_SIZES);
 957        if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
 958            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
 959            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
 960            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) {
 961                sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(0xa) |
 962                                    R600_FETCH_FIFO_HIWATER(0xa) |
 963                                    R600_DONE_FIFO_HIWATER(0xe0) |
 964                                    R600_ALU_UPDATE_FIFO_HIWATER(0x8));
 965        } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) ||
 966                   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630)) {
 967                sq_ms_fifo_sizes &= ~R600_DONE_FIFO_HIWATER(0xff);
 968                sq_ms_fifo_sizes |= R600_DONE_FIFO_HIWATER(0x4);
 969        }
 970        RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
 971
 972        /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
 973         * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
 974         */
 975        sq_config = RADEON_READ(R600_SQ_CONFIG);
 976        sq_config &= ~(R600_PS_PRIO(3) |
 977                       R600_VS_PRIO(3) |
 978                       R600_GS_PRIO(3) |
 979                       R600_ES_PRIO(3));
 980        sq_config |= (R600_DX9_CONSTS |
 981                      R600_VC_ENABLE |
 982                      R600_PS_PRIO(0) |
 983                      R600_VS_PRIO(1) |
 984                      R600_GS_PRIO(2) |
 985                      R600_ES_PRIO(3));
 986
 987        if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) {
 988                sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(124) |
 989                                          R600_NUM_VS_GPRS(124) |
 990                                          R600_NUM_CLAUSE_TEMP_GPRS(4));
 991                sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(0) |
 992                                          R600_NUM_ES_GPRS(0));
 993                sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(136) |
 994                                           R600_NUM_VS_THREADS(48) |
 995                                           R600_NUM_GS_THREADS(4) |
 996                                           R600_NUM_ES_THREADS(4));
 997                sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(128) |
 998                                            R600_NUM_VS_STACK_ENTRIES(128));
 999                sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(0) |
1000                                            R600_NUM_ES_STACK_ENTRIES(0));
1001        } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
1002                   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
1003                   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
1004                   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) {
1005                /* no vertex cache */
1006                sq_config &= ~R600_VC_ENABLE;
1007
1008                sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
1009                                          R600_NUM_VS_GPRS(44) |
1010                                          R600_NUM_CLAUSE_TEMP_GPRS(2));
1011                sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) |
1012                                          R600_NUM_ES_GPRS(17));
1013                sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
1014                                           R600_NUM_VS_THREADS(78) |
1015                                           R600_NUM_GS_THREADS(4) |
1016                                           R600_NUM_ES_THREADS(31));
1017                sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) |
1018                                            R600_NUM_VS_STACK_ENTRIES(40));
1019                sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) |
1020                                            R600_NUM_ES_STACK_ENTRIES(16));
1021        } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) ||
1022                   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV635)) {
1023                sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
1024                                          R600_NUM_VS_GPRS(44) |
1025                                          R600_NUM_CLAUSE_TEMP_GPRS(2));
1026                sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(18) |
1027                                          R600_NUM_ES_GPRS(18));
1028                sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
1029                                           R600_NUM_VS_THREADS(78) |
1030                                           R600_NUM_GS_THREADS(4) |
1031                                           R600_NUM_ES_THREADS(31));
1032                sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) |
1033                                            R600_NUM_VS_STACK_ENTRIES(40));
1034                sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) |
1035                                            R600_NUM_ES_STACK_ENTRIES(16));
1036        } else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670) {
1037                sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
1038                                          R600_NUM_VS_GPRS(44) |
1039                                          R600_NUM_CLAUSE_TEMP_GPRS(2));
1040                sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) |
1041                                          R600_NUM_ES_GPRS(17));
1042                sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
1043                                           R600_NUM_VS_THREADS(78) |
1044                                           R600_NUM_GS_THREADS(4) |
1045                                           R600_NUM_ES_THREADS(31));
1046                sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(64) |
1047                                            R600_NUM_VS_STACK_ENTRIES(64));
1048                sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(64) |
1049                                            R600_NUM_ES_STACK_ENTRIES(64));
1050        }
1051
1052        RADEON_WRITE(R600_SQ_CONFIG, sq_config);
1053        RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1,  sq_gpr_resource_mgmt_1);
1054        RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2,  sq_gpr_resource_mgmt_2);
1055        RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
1056        RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, sq_stack_resource_mgmt_1);
1057        RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, sq_stack_resource_mgmt_2);
1058
1059        if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
1060            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
1061            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
1062            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880))
1063                RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_TC_ONLY));
1064        else
1065                RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_VC_AND_TC));
1066
1067        RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_2S, (R600_S0_X(0xc) |
1068                                                    R600_S0_Y(0x4) |
1069                                                    R600_S1_X(0x4) |
1070                                                    R600_S1_Y(0xc)));
1071        RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_4S, (R600_S0_X(0xe) |
1072                                                    R600_S0_Y(0xe) |
1073                                                    R600_S1_X(0x2) |
1074                                                    R600_S1_Y(0x2) |
1075                                                    R600_S2_X(0xa) |
1076                                                    R600_S2_Y(0x6) |
1077                                                    R600_S3_X(0x6) |
1078                                                    R600_S3_Y(0xa)));
1079        RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD0, (R600_S0_X(0xe) |
1080                                                        R600_S0_Y(0xb) |
1081                                                        R600_S1_X(0x4) |
1082                                                        R600_S1_Y(0xc) |
1083                                                        R600_S2_X(0x1) |
1084                                                        R600_S2_Y(0x6) |
1085                                                        R600_S3_X(0xa) |
1086                                                        R600_S3_Y(0xe)));
1087        RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD1, (R600_S4_X(0x6) |
1088                                                        R600_S4_Y(0x1) |
1089                                                        R600_S5_X(0x0) |
1090                                                        R600_S5_Y(0x0) |
1091                                                        R600_S6_X(0xb) |
1092                                                        R600_S6_Y(0x4) |
1093                                                        R600_S7_X(0x7) |
1094                                                        R600_S7_Y(0x8)));
1095
1096
1097        switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1098        case CHIP_R600:
1099        case CHIP_RV630:
1100        case CHIP_RV635:
1101                gs_prim_buffer_depth = 0;
1102                break;
1103        case CHIP_RV610:
1104        case CHIP_RS780:
1105        case CHIP_RS880:
1106        case CHIP_RV620:
1107                gs_prim_buffer_depth = 32;
1108                break;
1109        case CHIP_RV670:
1110                gs_prim_buffer_depth = 128;
1111                break;
1112        default:
1113                break;
1114        }
1115
1116        num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16;
1117        vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
1118        /* Max value for this is 256 */
1119        if (vgt_gs_per_es > 256)
1120                vgt_gs_per_es = 256;
1121
1122        RADEON_WRITE(R600_VGT_ES_PER_GS, 128);
1123        RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es);
1124        RADEON_WRITE(R600_VGT_GS_PER_VS, 2);
1125        RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16);
1126
1127        /* more default values. 2D/3D driver should adjust as needed */
1128        RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0);
1129        RADEON_WRITE(R600_VGT_STRMOUT_EN, 0);
1130        RADEON_WRITE(R600_SX_MISC, 0);
1131        RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0);
1132        RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0);
1133        RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0);
1134        RADEON_WRITE(R600_SPI_INPUT_Z, 0);
1135        RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2));
1136        RADEON_WRITE(R600_CB_COLOR7_FRAG, 0);
1137
1138        /* clear render buffer base addresses */
1139        RADEON_WRITE(R600_CB_COLOR0_BASE, 0);
1140        RADEON_WRITE(R600_CB_COLOR1_BASE, 0);
1141        RADEON_WRITE(R600_CB_COLOR2_BASE, 0);
1142        RADEON_WRITE(R600_CB_COLOR3_BASE, 0);
1143        RADEON_WRITE(R600_CB_COLOR4_BASE, 0);
1144        RADEON_WRITE(R600_CB_COLOR5_BASE, 0);
1145        RADEON_WRITE(R600_CB_COLOR6_BASE, 0);
1146        RADEON_WRITE(R600_CB_COLOR7_BASE, 0);
1147
1148        switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1149        case CHIP_RV610:
1150        case CHIP_RS780:
1151        case CHIP_RS880:
1152        case CHIP_RV620:
1153                tc_cntl = R600_TC_L2_SIZE(8);
1154                break;
1155        case CHIP_RV630:
1156        case CHIP_RV635:
1157                tc_cntl = R600_TC_L2_SIZE(4);
1158                break;
1159        case CHIP_R600:
1160                tc_cntl = R600_TC_L2_SIZE(0) | R600_L2_DISABLE_LATE_HIT;
1161                break;
1162        default:
1163                tc_cntl = R600_TC_L2_SIZE(0);
1164                break;
1165        }
1166
1167        RADEON_WRITE(R600_TC_CNTL, tc_cntl);
1168
1169        hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL);
1170        RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1171
1172        arb_pop = RADEON_READ(R600_ARB_POP);
1173        arb_pop |= R600_ENABLE_TC128;
1174        RADEON_WRITE(R600_ARB_POP, arb_pop);
1175
1176        RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
1177        RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA |
1178                                          R600_NUM_CLIP_SEQ(3)));
1179        RADEON_WRITE(R600_PA_SC_ENHANCE, R600_FORCE_EOV_MAX_CLK_CNT(4095));
1180
1181}
1182
1183static u32 r700_get_tile_pipe_to_backend_map(drm_radeon_private_t *dev_priv,
1184                                             u32 num_tile_pipes,
1185                                             u32 num_backends,
1186                                             u32 backend_disable_mask)
1187{
1188        u32 backend_map = 0;
1189        u32 enabled_backends_mask;
1190        u32 enabled_backends_count;
1191        u32 cur_pipe;
1192        u32 swizzle_pipe[R7XX_MAX_PIPES];
1193        u32 cur_backend;
1194        u32 i;
1195        bool force_no_swizzle;
1196
1197        if (num_tile_pipes > R7XX_MAX_PIPES)
1198                num_tile_pipes = R7XX_MAX_PIPES;
1199        if (num_tile_pipes < 1)
1200                num_tile_pipes = 1;
1201        if (num_backends > R7XX_MAX_BACKENDS)
1202                num_backends = R7XX_MAX_BACKENDS;
1203        if (num_backends < 1)
1204                num_backends = 1;
1205
1206        enabled_backends_mask = 0;
1207        enabled_backends_count = 0;
1208        for (i = 0; i < R7XX_MAX_BACKENDS; ++i) {
1209                if (((backend_disable_mask >> i) & 1) == 0) {
1210                        enabled_backends_mask |= (1 << i);
1211                        ++enabled_backends_count;
1212                }
1213                if (enabled_backends_count == num_backends)
1214                        break;
1215        }
1216
1217        if (enabled_backends_count == 0) {
1218                enabled_backends_mask = 1;
1219                enabled_backends_count = 1;
1220        }
1221
1222        if (enabled_backends_count != num_backends)
1223                num_backends = enabled_backends_count;
1224
1225        switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1226        case CHIP_RV770:
1227        case CHIP_RV730:
1228                force_no_swizzle = false;
1229                break;
1230        case CHIP_RV710:
1231        case CHIP_RV740:
1232        default:
1233                force_no_swizzle = true;
1234                break;
1235        }
1236
1237        memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES);
1238        switch (num_tile_pipes) {
1239        case 1:
1240                swizzle_pipe[0] = 0;
1241                break;
1242        case 2:
1243                swizzle_pipe[0] = 0;
1244                swizzle_pipe[1] = 1;
1245                break;
1246        case 3:
1247                if (force_no_swizzle) {
1248                        swizzle_pipe[0] = 0;
1249                        swizzle_pipe[1] = 1;
1250                        swizzle_pipe[2] = 2;
1251                } else {
1252                        swizzle_pipe[0] = 0;
1253                        swizzle_pipe[1] = 2;
1254                        swizzle_pipe[2] = 1;
1255                }
1256                break;
1257        case 4:
1258                if (force_no_swizzle) {
1259                        swizzle_pipe[0] = 0;
1260                        swizzle_pipe[1] = 1;
1261                        swizzle_pipe[2] = 2;
1262                        swizzle_pipe[3] = 3;
1263                } else {
1264                        swizzle_pipe[0] = 0;
1265                        swizzle_pipe[1] = 2;
1266                        swizzle_pipe[2] = 3;
1267                        swizzle_pipe[3] = 1;
1268                }
1269                break;
1270        case 5:
1271                if (force_no_swizzle) {
1272                        swizzle_pipe[0] = 0;
1273                        swizzle_pipe[1] = 1;
1274                        swizzle_pipe[2] = 2;
1275                        swizzle_pipe[3] = 3;
1276                        swizzle_pipe[4] = 4;
1277                } else {
1278                        swizzle_pipe[0] = 0;
1279                        swizzle_pipe[1] = 2;
1280                        swizzle_pipe[2] = 4;
1281                        swizzle_pipe[3] = 1;
1282                        swizzle_pipe[4] = 3;
1283                }
1284                break;
1285        case 6:
1286                if (force_no_swizzle) {
1287                        swizzle_pipe[0] = 0;
1288                        swizzle_pipe[1] = 1;
1289                        swizzle_pipe[2] = 2;
1290                        swizzle_pipe[3] = 3;
1291                        swizzle_pipe[4] = 4;
1292                        swizzle_pipe[5] = 5;
1293                } else {
1294                        swizzle_pipe[0] = 0;
1295                        swizzle_pipe[1] = 2;
1296                        swizzle_pipe[2] = 4;
1297                        swizzle_pipe[3] = 5;
1298                        swizzle_pipe[4] = 3;
1299                        swizzle_pipe[5] = 1;
1300                }
1301                break;
1302        case 7:
1303                if (force_no_swizzle) {
1304                        swizzle_pipe[0] = 0;
1305                        swizzle_pipe[1] = 1;
1306                        swizzle_pipe[2] = 2;
1307                        swizzle_pipe[3] = 3;
1308                        swizzle_pipe[4] = 4;
1309                        swizzle_pipe[5] = 5;
1310                        swizzle_pipe[6] = 6;
1311                } else {
1312                        swizzle_pipe[0] = 0;
1313                        swizzle_pipe[1] = 2;
1314                        swizzle_pipe[2] = 4;
1315                        swizzle_pipe[3] = 6;
1316                        swizzle_pipe[4] = 3;
1317                        swizzle_pipe[5] = 1;
1318                        swizzle_pipe[6] = 5;
1319                }
1320                break;
1321        case 8:
1322                if (force_no_swizzle) {
1323                        swizzle_pipe[0] = 0;
1324                        swizzle_pipe[1] = 1;
1325                        swizzle_pipe[2] = 2;
1326                        swizzle_pipe[3] = 3;
1327                        swizzle_pipe[4] = 4;
1328                        swizzle_pipe[5] = 5;
1329                        swizzle_pipe[6] = 6;
1330                        swizzle_pipe[7] = 7;
1331                } else {
1332                        swizzle_pipe[0] = 0;
1333                        swizzle_pipe[1] = 2;
1334                        swizzle_pipe[2] = 4;
1335                        swizzle_pipe[3] = 6;
1336                        swizzle_pipe[4] = 3;
1337                        swizzle_pipe[5] = 1;
1338                        swizzle_pipe[6] = 7;
1339                        swizzle_pipe[7] = 5;
1340                }
1341                break;
1342        }
1343
1344        cur_backend = 0;
1345        for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
1346                while (((1 << cur_backend) & enabled_backends_mask) == 0)
1347                        cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
1348
1349                backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
1350
1351                cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
1352        }
1353
1354        return backend_map;
1355}
1356
1357static void r700_gfx_init(struct drm_device *dev,
1358                          drm_radeon_private_t *dev_priv)
1359{
1360        int i, j, num_qd_pipes;
1361        u32 ta_aux_cntl;
1362        u32 sx_debug_1;
1363        u32 smx_dc_ctl0;
1364        u32 db_debug3;
1365        u32 num_gs_verts_per_thread;
1366        u32 vgt_gs_per_es;
1367        u32 gs_prim_buffer_depth = 0;
1368        u32 sq_ms_fifo_sizes;
1369        u32 sq_config;
1370        u32 sq_thread_resource_mgmt;
1371        u32 hdp_host_path_cntl;
1372        u32 sq_dyn_gpr_size_simd_ab_0;
1373        u32 backend_map;
1374        u32 gb_tiling_config = 0;
1375        u32 cc_rb_backend_disable;
1376        u32 cc_gc_shader_pipe_config;
1377        u32 mc_arb_ramcfg;
1378        u32 db_debug4;
1379
1380        /* setup chip specs */
1381        switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1382        case CHIP_RV770:
1383                dev_priv->r600_max_pipes = 4;
1384                dev_priv->r600_max_tile_pipes = 8;
1385                dev_priv->r600_max_simds = 10;
1386                dev_priv->r600_max_backends = 4;
1387                dev_priv->r600_max_gprs = 256;
1388                dev_priv->r600_max_threads = 248;
1389                dev_priv->r600_max_stack_entries = 512;
1390                dev_priv->r600_max_hw_contexts = 8;
1391                dev_priv->r600_max_gs_threads = 16 * 2;
1392                dev_priv->r600_sx_max_export_size = 128;
1393                dev_priv->r600_sx_max_export_pos_size = 16;
1394                dev_priv->r600_sx_max_export_smx_size = 112;
1395                dev_priv->r600_sq_num_cf_insts = 2;
1396
1397                dev_priv->r700_sx_num_of_sets = 7;
1398                dev_priv->r700_sc_prim_fifo_size = 0xF9;
1399                dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1400                dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1401                break;
1402        case CHIP_RV730:
1403                dev_priv->r600_max_pipes = 2;
1404                dev_priv->r600_max_tile_pipes = 4;
1405                dev_priv->r600_max_simds = 8;
1406                dev_priv->r600_max_backends = 2;
1407                dev_priv->r600_max_gprs = 128;
1408                dev_priv->r600_max_threads = 248;
1409                dev_priv->r600_max_stack_entries = 256;
1410                dev_priv->r600_max_hw_contexts = 8;
1411                dev_priv->r600_max_gs_threads = 16 * 2;
1412                dev_priv->r600_sx_max_export_size = 256;
1413                dev_priv->r600_sx_max_export_pos_size = 32;
1414                dev_priv->r600_sx_max_export_smx_size = 224;
1415                dev_priv->r600_sq_num_cf_insts = 2;
1416
1417                dev_priv->r700_sx_num_of_sets = 7;
1418                dev_priv->r700_sc_prim_fifo_size = 0xf9;
1419                dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1420                dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1421                if (dev_priv->r600_sx_max_export_pos_size > 16) {
1422                        dev_priv->r600_sx_max_export_pos_size -= 16;
1423                        dev_priv->r600_sx_max_export_smx_size += 16;
1424                }
1425                break;
1426        case CHIP_RV710:
1427                dev_priv->r600_max_pipes = 2;
1428                dev_priv->r600_max_tile_pipes = 2;
1429                dev_priv->r600_max_simds = 2;
1430                dev_priv->r600_max_backends = 1;
1431                dev_priv->r600_max_gprs = 256;
1432                dev_priv->r600_max_threads = 192;
1433                dev_priv->r600_max_stack_entries = 256;
1434                dev_priv->r600_max_hw_contexts = 4;
1435                dev_priv->r600_max_gs_threads = 8 * 2;
1436                dev_priv->r600_sx_max_export_size = 128;
1437                dev_priv->r600_sx_max_export_pos_size = 16;
1438                dev_priv->r600_sx_max_export_smx_size = 112;
1439                dev_priv->r600_sq_num_cf_insts = 1;
1440
1441                dev_priv->r700_sx_num_of_sets = 7;
1442                dev_priv->r700_sc_prim_fifo_size = 0x40;
1443                dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1444                dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1445                break;
1446        case CHIP_RV740:
1447                dev_priv->r600_max_pipes = 4;
1448                dev_priv->r600_max_tile_pipes = 4;
1449                dev_priv->r600_max_simds = 8;
1450                dev_priv->r600_max_backends = 4;
1451                dev_priv->r600_max_gprs = 256;
1452                dev_priv->r600_max_threads = 248;
1453                dev_priv->r600_max_stack_entries = 512;
1454                dev_priv->r600_max_hw_contexts = 8;
1455                dev_priv->r600_max_gs_threads = 16 * 2;
1456                dev_priv->r600_sx_max_export_size = 256;
1457                dev_priv->r600_sx_max_export_pos_size = 32;
1458                dev_priv->r600_sx_max_export_smx_size = 224;
1459                dev_priv->r600_sq_num_cf_insts = 2;
1460
1461                dev_priv->r700_sx_num_of_sets = 7;
1462                dev_priv->r700_sc_prim_fifo_size = 0x100;
1463                dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
1464                dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
1465
1466                if (dev_priv->r600_sx_max_export_pos_size > 16) {
1467                        dev_priv->r600_sx_max_export_pos_size -= 16;
1468                        dev_priv->r600_sx_max_export_smx_size += 16;
1469                }
1470                break;
1471        default:
1472                break;
1473        }
1474
1475        /* Initialize HDP */
1476        j = 0;
1477        for (i = 0; i < 32; i++) {
1478                RADEON_WRITE((0x2c14 + j), 0x00000000);
1479                RADEON_WRITE((0x2c18 + j), 0x00000000);
1480                RADEON_WRITE((0x2c1c + j), 0x00000000);
1481                RADEON_WRITE((0x2c20 + j), 0x00000000);
1482                RADEON_WRITE((0x2c24 + j), 0x00000000);
1483                j += 0x18;
1484        }
1485
1486        RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff));
1487
1488        /* setup tiling, simd, pipe config */
1489        mc_arb_ramcfg = RADEON_READ(R700_MC_ARB_RAMCFG);
1490
1491        switch (dev_priv->r600_max_tile_pipes) {
1492        case 1:
1493                gb_tiling_config |= R600_PIPE_TILING(0);
1494                break;
1495        case 2:
1496                gb_tiling_config |= R600_PIPE_TILING(1);
1497                break;
1498        case 4:
1499                gb_tiling_config |= R600_PIPE_TILING(2);
1500                break;
1501        case 8:
1502                gb_tiling_config |= R600_PIPE_TILING(3);
1503                break;
1504        default:
1505                break;
1506        }
1507
1508        if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770)
1509                gb_tiling_config |= R600_BANK_TILING(1);
1510        else
1511                gb_tiling_config |= R600_BANK_TILING((mc_arb_ramcfg >> R700_NOOFBANK_SHIFT) & R700_NOOFBANK_MASK);
1512
1513        gb_tiling_config |= R600_GROUP_SIZE(0);
1514
1515        if (((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK) > 3) {
1516                gb_tiling_config |= R600_ROW_TILING(3);
1517                gb_tiling_config |= R600_SAMPLE_SPLIT(3);
1518        } else {
1519                gb_tiling_config |=
1520                        R600_ROW_TILING(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK));
1521                gb_tiling_config |=
1522                        R600_SAMPLE_SPLIT(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK));
1523        }
1524
1525        gb_tiling_config |= R600_BANK_SWAPS(1);
1526
1527        cc_rb_backend_disable = RADEON_READ(R600_CC_RB_BACKEND_DISABLE) & 0x00ff0000;
1528        cc_rb_backend_disable |=
1529                R600_BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R7XX_MAX_BACKENDS_MASK);
1530
1531        cc_gc_shader_pipe_config = RADEON_READ(R600_CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00;
1532        cc_gc_shader_pipe_config |=
1533                R600_INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R7XX_MAX_PIPES_MASK);
1534        cc_gc_shader_pipe_config |=
1535                R600_INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R7XX_MAX_SIMDS_MASK);
1536
1537        if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV740)
1538                backend_map = 0x28;
1539        else
1540                backend_map = r700_get_tile_pipe_to_backend_map(dev_priv,
1541                                                                dev_priv->r600_max_tile_pipes,
1542                                                                (R7XX_MAX_BACKENDS -
1543                                                                 r600_count_pipe_bits((cc_rb_backend_disable &
1544                                                                                       R7XX_MAX_BACKENDS_MASK) >> 16)),
1545                                                                (cc_rb_backend_disable >> 16));
1546        gb_tiling_config |= R600_BACKEND_MAP(backend_map);
1547
1548        RADEON_WRITE(R600_GB_TILING_CONFIG,      gb_tiling_config);
1549        RADEON_WRITE(R600_DCP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
1550        RADEON_WRITE(R600_HDP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
1551        if (gb_tiling_config & 0xc0) {
1552                dev_priv->r600_group_size = 512;
1553        } else {
1554                dev_priv->r600_group_size = 256;
1555        }
1556        dev_priv->r600_npipes = 1 << ((gb_tiling_config >> 1) & 0x7);
1557        if (gb_tiling_config & 0x30) {
1558                dev_priv->r600_nbanks = 8;
1559        } else {
1560                dev_priv->r600_nbanks = 4;
1561        }
1562
1563        RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE,      cc_rb_backend_disable);
1564        RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG,   cc_gc_shader_pipe_config);
1565        RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
1566
1567        RADEON_WRITE(R700_CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1568        RADEON_WRITE(R700_CGTS_SYS_TCC_DISABLE, 0);
1569        RADEON_WRITE(R700_CGTS_TCC_DISABLE, 0);
1570        RADEON_WRITE(R700_CGTS_USER_SYS_TCC_DISABLE, 0);
1571        RADEON_WRITE(R700_CGTS_USER_TCC_DISABLE, 0);
1572
1573        num_qd_pipes =
1574                R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK) >> 8);
1575        RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK);
1576        RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK);
1577
1578        /* set HW defaults for 3D engine */
1579        RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) |
1580                                                R600_ROQ_IB2_START(0x2b)));
1581
1582        RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, R700_STQ_SPLIT(0x30));
1583
1584        ta_aux_cntl = RADEON_READ(R600_TA_CNTL_AUX);
1585        RADEON_WRITE(R600_TA_CNTL_AUX, ta_aux_cntl | R600_DISABLE_CUBE_ANISO);
1586
1587        sx_debug_1 = RADEON_READ(R700_SX_DEBUG_1);
1588        sx_debug_1 |= R700_ENABLE_NEW_SMX_ADDRESS;
1589        RADEON_WRITE(R700_SX_DEBUG_1, sx_debug_1);
1590
1591        smx_dc_ctl0 = RADEON_READ(R600_SMX_DC_CTL0);
1592        smx_dc_ctl0 &= ~R700_CACHE_DEPTH(0x1ff);
1593        smx_dc_ctl0 |= R700_CACHE_DEPTH((dev_priv->r700_sx_num_of_sets * 64) - 1);
1594        RADEON_WRITE(R600_SMX_DC_CTL0, smx_dc_ctl0);
1595
1596        if ((dev_priv->flags & RADEON_FAMILY_MASK) != CHIP_RV740)
1597                RADEON_WRITE(R700_SMX_EVENT_CTL, (R700_ES_FLUSH_CTL(4) |
1598                                                  R700_GS_FLUSH_CTL(4) |
1599                                                  R700_ACK_FLUSH_CTL(3) |
1600                                                  R700_SYNC_FLUSH_CTL));
1601
1602        db_debug3 = RADEON_READ(R700_DB_DEBUG3);
1603        db_debug3 &= ~R700_DB_CLK_OFF_DELAY(0x1f);
1604        switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1605        case CHIP_RV770:
1606        case CHIP_RV740:
1607                db_debug3 |= R700_DB_CLK_OFF_DELAY(0x1f);
1608                break;
1609        case CHIP_RV710:
1610        case CHIP_RV730:
1611        default:
1612                db_debug3 |= R700_DB_CLK_OFF_DELAY(2);
1613                break;
1614        }
1615        RADEON_WRITE(R700_DB_DEBUG3, db_debug3);
1616
1617        if ((dev_priv->flags & RADEON_FAMILY_MASK) != CHIP_RV770) {
1618                db_debug4 = RADEON_READ(RV700_DB_DEBUG4);
1619                db_debug4 |= RV700_DISABLE_TILE_COVERED_FOR_PS_ITER;
1620                RADEON_WRITE(RV700_DB_DEBUG4, db_debug4);
1621        }
1622
1623        RADEON_WRITE(R600_SX_EXPORT_BUFFER_SIZES, (R600_COLOR_BUFFER_SIZE((dev_priv->r600_sx_max_export_size / 4) - 1) |
1624                                                   R600_POSITION_BUFFER_SIZE((dev_priv->r600_sx_max_export_pos_size / 4) - 1) |
1625                                                   R600_SMX_BUFFER_SIZE((dev_priv->r600_sx_max_export_smx_size / 4) - 1)));
1626
1627        RADEON_WRITE(R700_PA_SC_FIFO_SIZE_R7XX, (R700_SC_PRIM_FIFO_SIZE(dev_priv->r700_sc_prim_fifo_size) |
1628                                                 R700_SC_HIZ_TILE_FIFO_SIZE(dev_priv->r700_sc_hiz_tile_fifo_size) |
1629                                                 R700_SC_EARLYZ_TILE_FIFO_SIZE(dev_priv->r700_sc_earlyz_tile_fifo_fize)));
1630
1631        RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
1632
1633        RADEON_WRITE(R600_VGT_NUM_INSTANCES, 1);
1634
1635        RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0));
1636
1637        RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(4));
1638
1639        RADEON_WRITE(R600_CP_PERFMON_CNTL, 0);
1640
1641        sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(16 * dev_priv->r600_sq_num_cf_insts) |
1642                            R600_DONE_FIFO_HIWATER(0xe0) |
1643                            R600_ALU_UPDATE_FIFO_HIWATER(0x8));
1644        switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1645        case CHIP_RV770:
1646        case CHIP_RV730:
1647        case CHIP_RV710:
1648                sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x1);
1649                break;
1650        case CHIP_RV740:
1651        default:
1652                sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x4);
1653                break;
1654        }
1655        RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
1656
1657        /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
1658         * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
1659         */
1660        sq_config = RADEON_READ(R600_SQ_CONFIG);
1661        sq_config &= ~(R600_PS_PRIO(3) |
1662                       R600_VS_PRIO(3) |
1663                       R600_GS_PRIO(3) |
1664                       R600_ES_PRIO(3));
1665        sq_config |= (R600_DX9_CONSTS |
1666                      R600_VC_ENABLE |
1667                      R600_EXPORT_SRC_C |
1668                      R600_PS_PRIO(0) |
1669                      R600_VS_PRIO(1) |
1670                      R600_GS_PRIO(2) |
1671                      R600_ES_PRIO(3));
1672        if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)
1673                /* no vertex cache */
1674                sq_config &= ~R600_VC_ENABLE;
1675
1676        RADEON_WRITE(R600_SQ_CONFIG, sq_config);
1677
1678        RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1,  (R600_NUM_PS_GPRS((dev_priv->r600_max_gprs * 24)/64) |
1679                                                    R600_NUM_VS_GPRS((dev_priv->r600_max_gprs * 24)/64) |
1680                                                    R600_NUM_CLAUSE_TEMP_GPRS(((dev_priv->r600_max_gprs * 24)/64)/2)));
1681
1682        RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2,  (R600_NUM_GS_GPRS((dev_priv->r600_max_gprs * 7)/64) |
1683                                                    R600_NUM_ES_GPRS((dev_priv->r600_max_gprs * 7)/64)));
1684
1685        sq_thread_resource_mgmt = (R600_NUM_PS_THREADS((dev_priv->r600_max_threads * 4)/8) |
1686                                   R600_NUM_VS_THREADS((dev_priv->r600_max_threads * 2)/8) |
1687                                   R600_NUM_ES_THREADS((dev_priv->r600_max_threads * 1)/8));
1688        if (((dev_priv->r600_max_threads * 1) / 8) > dev_priv->r600_max_gs_threads)
1689                sq_thread_resource_mgmt |= R600_NUM_GS_THREADS(dev_priv->r600_max_gs_threads);
1690        else
1691                sq_thread_resource_mgmt |= R600_NUM_GS_THREADS((dev_priv->r600_max_gs_threads * 1)/8);
1692        RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
1693
1694        RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, (R600_NUM_PS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) |
1695                                                     R600_NUM_VS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4)));
1696
1697        RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, (R600_NUM_GS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) |
1698                                                     R600_NUM_ES_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4)));
1699
1700        sq_dyn_gpr_size_simd_ab_0 = (R700_SIMDA_RING0((dev_priv->r600_max_gprs * 38)/64) |
1701                                     R700_SIMDA_RING1((dev_priv->r600_max_gprs * 38)/64) |
1702                                     R700_SIMDB_RING0((dev_priv->r600_max_gprs * 38)/64) |
1703                                     R700_SIMDB_RING1((dev_priv->r600_max_gprs * 38)/64));
1704
1705        RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0);
1706        RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0);
1707        RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0);
1708        RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0);
1709        RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0);
1710        RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0);
1711        RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0);
1712        RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0);
1713
1714        RADEON_WRITE(R700_PA_SC_FORCE_EOV_MAX_CNTS, (R700_FORCE_EOV_MAX_CLK_CNT(4095) |
1715                                                     R700_FORCE_EOV_MAX_REZ_CNT(255)));
1716
1717        if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)
1718                RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_TC_ONLY) |
1719                                                           R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO)));
1720        else
1721                RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_VC_AND_TC) |
1722                                                           R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO)));
1723
1724        switch (dev_priv->flags & RADEON_FAMILY_MASK) {
1725        case CHIP_RV770:
1726        case CHIP_RV730:
1727        case CHIP_RV740:
1728                gs_prim_buffer_depth = 384;
1729                break;
1730        case CHIP_RV710:
1731                gs_prim_buffer_depth = 128;
1732                break;
1733        default:
1734                break;
1735        }
1736
1737        num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16;
1738        vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
1739        /* Max value for this is 256 */
1740        if (vgt_gs_per_es > 256)
1741                vgt_gs_per_es = 256;
1742
1743        RADEON_WRITE(R600_VGT_ES_PER_GS, 128);
1744        RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es);
1745        RADEON_WRITE(R600_VGT_GS_PER_VS, 2);
1746
1747        /* more default values. 2D/3D driver should adjust as needed */
1748        RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16);
1749        RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0);
1750        RADEON_WRITE(R600_VGT_STRMOUT_EN, 0);
1751        RADEON_WRITE(R600_SX_MISC, 0);
1752        RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0);
1753        RADEON_WRITE(R700_PA_SC_EDGERULE, 0xaaaaaaaa);
1754        RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0);
1755        RADEON_WRITE(R600_PA_SC_CLIPRECT_RULE, 0xffff);
1756        RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0);
1757        RADEON_WRITE(R600_SPI_INPUT_Z, 0);
1758        RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2));
1759        RADEON_WRITE(R600_CB_COLOR7_FRAG, 0);
1760
1761        /* clear render buffer base addresses */
1762        RADEON_WRITE(R600_CB_COLOR0_BASE, 0);
1763        RADEON_WRITE(R600_CB_COLOR1_BASE, 0);
1764        RADEON_WRITE(R600_CB_COLOR2_BASE, 0);
1765        RADEON_WRITE(R600_CB_COLOR3_BASE, 0);
1766        RADEON_WRITE(R600_CB_COLOR4_BASE, 0);
1767        RADEON_WRITE(R600_CB_COLOR5_BASE, 0);
1768        RADEON_WRITE(R600_CB_COLOR6_BASE, 0);
1769        RADEON_WRITE(R600_CB_COLOR7_BASE, 0);
1770
1771        RADEON_WRITE(R700_TCP_CNTL, 0);
1772
1773        hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL);
1774        RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1775
1776        RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
1777
1778        RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA |
1779                                          R600_NUM_CLIP_SEQ(3)));
1780
1781}
1782
1783static void r600_cp_init_ring_buffer(struct drm_device *dev,
1784                                       drm_radeon_private_t *dev_priv,
1785                                       struct drm_file *file_priv)
1786{
1787        struct drm_radeon_master_private *master_priv;
1788        u32 ring_start;
1789        u64 rptr_addr;
1790
1791        if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
1792                r700_gfx_init(dev, dev_priv);
1793        else
1794                r600_gfx_init(dev, dev_priv);
1795
1796        RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
1797        RADEON_READ(R600_GRBM_SOFT_RESET);
1798        DRM_UDELAY(15000);
1799        RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
1800
1801
1802        /* Set ring buffer size */
1803#ifdef __BIG_ENDIAN
1804        RADEON_WRITE(R600_CP_RB_CNTL,
1805                     RADEON_BUF_SWAP_32BIT |
1806                     RADEON_RB_NO_UPDATE |
1807                     (dev_priv->ring.rptr_update_l2qw << 8) |
1808                     dev_priv->ring.size_l2qw);
1809#else
1810        RADEON_WRITE(R600_CP_RB_CNTL,
1811                     RADEON_RB_NO_UPDATE |
1812                     (dev_priv->ring.rptr_update_l2qw << 8) |
1813                     dev_priv->ring.size_l2qw);
1814#endif
1815
1816        RADEON_WRITE(R600_CP_SEM_WAIT_TIMER, 0x4);
1817
1818        /* Set the write pointer delay */
1819        RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0);
1820
1821#ifdef __BIG_ENDIAN
1822        RADEON_WRITE(R600_CP_RB_CNTL,
1823                     RADEON_BUF_SWAP_32BIT |
1824                     RADEON_RB_NO_UPDATE |
1825                     RADEON_RB_RPTR_WR_ENA |
1826                     (dev_priv->ring.rptr_update_l2qw << 8) |
1827                     dev_priv->ring.size_l2qw);
1828#else
1829        RADEON_WRITE(R600_CP_RB_CNTL,
1830                     RADEON_RB_NO_UPDATE |
1831                     RADEON_RB_RPTR_WR_ENA |
1832                     (dev_priv->ring.rptr_update_l2qw << 8) |
1833                     dev_priv->ring.size_l2qw);
1834#endif
1835
1836        /* Initialize the ring buffer's read and write pointers */
1837        RADEON_WRITE(R600_CP_RB_RPTR_WR, 0);
1838        RADEON_WRITE(R600_CP_RB_WPTR, 0);
1839        SET_RING_HEAD(dev_priv, 0);
1840        dev_priv->ring.tail = 0;
1841
1842#if __OS_HAS_AGP
1843        if (dev_priv->flags & RADEON_IS_AGP) {
1844                rptr_addr = dev_priv->ring_rptr->offset
1845                        - dev->agp->base +
1846                        dev_priv->gart_vm_start;
1847        } else
1848#endif
1849        {
1850                rptr_addr = dev_priv->ring_rptr->offset
1851                        - ((unsigned long) dev->sg->virtual)
1852                        + dev_priv->gart_vm_start;
1853        }
1854        RADEON_WRITE(R600_CP_RB_RPTR_ADDR,
1855#ifdef __BIG_ENDIAN
1856                     (2 << 0) |
1857#endif
1858                     (rptr_addr & 0xfffffffc));
1859        RADEON_WRITE(R600_CP_RB_RPTR_ADDR_HI,
1860                     upper_32_bits(rptr_addr));
1861
1862#ifdef __BIG_ENDIAN
1863        RADEON_WRITE(R600_CP_RB_CNTL,
1864                     RADEON_BUF_SWAP_32BIT |
1865                     (dev_priv->ring.rptr_update_l2qw << 8) |
1866                     dev_priv->ring.size_l2qw);
1867#else
1868        RADEON_WRITE(R600_CP_RB_CNTL,
1869                     (dev_priv->ring.rptr_update_l2qw << 8) |
1870                     dev_priv->ring.size_l2qw);
1871#endif
1872
1873#if __OS_HAS_AGP
1874        if (dev_priv->flags & RADEON_IS_AGP) {
1875                /* XXX */
1876                radeon_write_agp_base(dev_priv, dev->agp->base);
1877
1878                /* XXX */
1879                radeon_write_agp_location(dev_priv,
1880                             (((dev_priv->gart_vm_start - 1 +
1881                                dev_priv->gart_size) & 0xffff0000) |
1882                              (dev_priv->gart_vm_start >> 16)));
1883
1884                ring_start = (dev_priv->cp_ring->offset
1885                              - dev->agp->base
1886                              + dev_priv->gart_vm_start);
1887        } else
1888#endif
1889                ring_start = (dev_priv->cp_ring->offset
1890                              - (unsigned long)dev->sg->virtual
1891                              + dev_priv->gart_vm_start);
1892
1893        RADEON_WRITE(R600_CP_RB_BASE, ring_start >> 8);
1894
1895        RADEON_WRITE(R600_CP_ME_CNTL, 0xff);
1896
1897        RADEON_WRITE(R600_CP_DEBUG, (1 << 27) | (1 << 28));
1898
1899        /* Initialize the scratch register pointer.  This will cause
1900         * the scratch register values to be written out to memory
1901         * whenever they are updated.
1902         *
1903         * We simply put this behind the ring read pointer, this works
1904         * with PCI GART as well as (whatever kind of) AGP GART
1905         */
1906        {
1907                u64 scratch_addr;
1908
1909                scratch_addr = RADEON_READ(R600_CP_RB_RPTR_ADDR) & 0xFFFFFFFC;
1910                scratch_addr |= ((u64)RADEON_READ(R600_CP_RB_RPTR_ADDR_HI)) << 32;
1911                scratch_addr += R600_SCRATCH_REG_OFFSET;
1912                scratch_addr >>= 8;
1913                scratch_addr &= 0xffffffff;
1914
1915                RADEON_WRITE(R600_SCRATCH_ADDR, (uint32_t)scratch_addr);
1916        }
1917
1918        RADEON_WRITE(R600_SCRATCH_UMSK, 0x7);
1919
1920        /* Turn on bus mastering */
1921        radeon_enable_bm(dev_priv);
1922
1923        radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(0), 0);
1924        RADEON_WRITE(R600_LAST_FRAME_REG, 0);
1925
1926        radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0);
1927        RADEON_WRITE(R600_LAST_DISPATCH_REG, 0);
1928
1929        radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(2), 0);
1930        RADEON_WRITE(R600_LAST_CLEAR_REG, 0);
1931
1932        /* reset sarea copies of these */
1933        master_priv = file_priv->master->driver_priv;
1934        if (master_priv->sarea_priv) {
1935                master_priv->sarea_priv->last_frame = 0;
1936                master_priv->sarea_priv->last_dispatch = 0;
1937                master_priv->sarea_priv->last_clear = 0;
1938        }
1939
1940        r600_do_wait_for_idle(dev_priv);
1941
1942}
1943
1944int r600_do_cleanup_cp(struct drm_device *dev)
1945{
1946        drm_radeon_private_t *dev_priv = dev->dev_private;
1947        DRM_DEBUG("\n");
1948
1949        /* Make sure interrupts are disabled here because the uninstall ioctl
1950         * may not have been called from userspace and after dev_private
1951         * is freed, it's too late.
1952         */
1953        if (dev->irq_enabled)
1954                drm_irq_uninstall(dev);
1955
1956#if __OS_HAS_AGP
1957        if (dev_priv->flags & RADEON_IS_AGP) {
1958                if (dev_priv->cp_ring != NULL) {
1959                        drm_core_ioremapfree(dev_priv->cp_ring, dev);
1960                        dev_priv->cp_ring = NULL;
1961                }
1962                if (dev_priv->ring_rptr != NULL) {
1963                        drm_core_ioremapfree(dev_priv->ring_rptr, dev);
1964                        dev_priv->ring_rptr = NULL;
1965                }
1966                if (dev->agp_buffer_map != NULL) {
1967                        drm_core_ioremapfree(dev->agp_buffer_map, dev);
1968                        dev->agp_buffer_map = NULL;
1969                }
1970        } else
1971#endif
1972        {
1973
1974                if (dev_priv->gart_info.bus_addr)
1975                        r600_page_table_cleanup(dev, &dev_priv->gart_info);
1976
1977                if (dev_priv->gart_info.gart_table_location == DRM_ATI_GART_FB) {
1978                        drm_core_ioremapfree(&dev_priv->gart_info.mapping, dev);
1979                        dev_priv->gart_info.addr = NULL;
1980                }
1981        }
1982        /* only clear to the start of flags */
1983        memset(dev_priv, 0, offsetof(drm_radeon_private_t, flags));
1984
1985        return 0;
1986}
1987
1988int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
1989                    struct drm_file *file_priv)
1990{
1991        drm_radeon_private_t *dev_priv = dev->dev_private;
1992        struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
1993
1994        DRM_DEBUG("\n");
1995
1996        mutex_init(&dev_priv->cs_mutex);
1997        r600_cs_legacy_init();
1998        /* if we require new memory map but we don't have it fail */
1999        if ((dev_priv->flags & RADEON_NEW_MEMMAP) && !dev_priv->new_memmap) {
2000                DRM_ERROR("Cannot initialise DRM on this card\nThis card requires a new X.org DDX for 3D\n");
2001                r600_do_cleanup_cp(dev);
2002                return -EINVAL;
2003        }
2004
2005        if (init->is_pci && (dev_priv->flags & RADEON_IS_AGP)) {
2006                DRM_DEBUG("Forcing AGP card to PCI mode\n");
2007                dev_priv->flags &= ~RADEON_IS_AGP;
2008                /* The writeback test succeeds, but when writeback is enabled,
2009                 * the ring buffer read ptr update fails after first 128 bytes.
2010                 */
2011                radeon_no_wb = 1;
2012        } else if (!(dev_priv->flags & (RADEON_IS_AGP | RADEON_IS_PCI | RADEON_IS_PCIE))
2013                 && !init->is_pci) {
2014                DRM_DEBUG("Restoring AGP flag\n");
2015                dev_priv->flags |= RADEON_IS_AGP;
2016        }
2017
2018        dev_priv->usec_timeout = init->usec_timeout;
2019        if (dev_priv->usec_timeout < 1 ||
2020            dev_priv->usec_timeout > RADEON_MAX_USEC_TIMEOUT) {
2021                DRM_DEBUG("TIMEOUT problem!\n");
2022                r600_do_cleanup_cp(dev);
2023                return -EINVAL;
2024        }
2025
2026        /* Enable vblank on CRTC1 for older X servers
2027         */
2028        dev_priv->vblank_crtc = DRM_RADEON_VBLANK_CRTC1;
2029        dev_priv->do_boxes = 0;
2030        dev_priv->cp_mode = init->cp_mode;
2031
2032        /* We don't support anything other than bus-mastering ring mode,
2033         * but the ring can be in either AGP or PCI space for the ring
2034         * read pointer.
2035         */
2036        if ((init->cp_mode != RADEON_CSQ_PRIBM_INDDIS) &&
2037            (init->cp_mode != RADEON_CSQ_PRIBM_INDBM)) {
2038                DRM_DEBUG("BAD cp_mode (%x)!\n", init->cp_mode);
2039                r600_do_cleanup_cp(dev);
2040                return -EINVAL;
2041        }
2042
2043        switch (init->fb_bpp) {
2044        case 16:
2045                dev_priv->color_fmt = RADEON_COLOR_FORMAT_RGB565;
2046                break;
2047        case 32:
2048        default:
2049                dev_priv->color_fmt = RADEON_COLOR_FORMAT_ARGB8888;
2050                break;
2051        }
2052        dev_priv->front_offset = init->front_offset;
2053        dev_priv->front_pitch = init->front_pitch;
2054        dev_priv->back_offset = init->back_offset;
2055        dev_priv->back_pitch = init->back_pitch;
2056
2057        dev_priv->ring_offset = init->ring_offset;
2058        dev_priv->ring_rptr_offset = init->ring_rptr_offset;
2059        dev_priv->buffers_offset = init->buffers_offset;
2060        dev_priv->gart_textures_offset = init->gart_textures_offset;
2061
2062        master_priv->sarea = drm_getsarea(dev);
2063        if (!master_priv->sarea) {
2064                DRM_ERROR("could not find sarea!\n");
2065                r600_do_cleanup_cp(dev);
2066                return -EINVAL;
2067        }
2068
2069        dev_priv->cp_ring = drm_core_findmap(dev, init->ring_offset);
2070        if (!dev_priv->cp_ring) {
2071                DRM_ERROR("could not find cp ring region!\n");
2072                r600_do_cleanup_cp(dev);
2073                return -EINVAL;
2074        }
2075        dev_priv->ring_rptr = drm_core_findmap(dev, init->ring_rptr_offset);
2076        if (!dev_priv->ring_rptr) {
2077                DRM_ERROR("could not find ring read pointer!\n");
2078                r600_do_cleanup_cp(dev);
2079                return -EINVAL;
2080        }
2081        dev->agp_buffer_token = init->buffers_offset;
2082        dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset);
2083        if (!dev->agp_buffer_map) {
2084                DRM_ERROR("could not find dma buffer region!\n");
2085                r600_do_cleanup_cp(dev);
2086                return -EINVAL;
2087        }
2088
2089        if (init->gart_textures_offset) {
2090                dev_priv->gart_textures =
2091                    drm_core_findmap(dev, init->gart_textures_offset);
2092                if (!dev_priv->gart_textures) {
2093                        DRM_ERROR("could not find GART texture region!\n");
2094                        r600_do_cleanup_cp(dev);
2095                        return -EINVAL;
2096                }
2097        }
2098
2099#if __OS_HAS_AGP
2100        /* XXX */
2101        if (dev_priv->flags & RADEON_IS_AGP) {
2102                drm_core_ioremap_wc(dev_priv->cp_ring, dev);
2103                drm_core_ioremap_wc(dev_priv->ring_rptr, dev);
2104                drm_core_ioremap_wc(dev->agp_buffer_map, dev);
2105                if (!dev_priv->cp_ring->handle ||
2106                    !dev_priv->ring_rptr->handle ||
2107                    !dev->agp_buffer_map->handle) {
2108                        DRM_ERROR("could not find ioremap agp regions!\n");
2109                        r600_do_cleanup_cp(dev);
2110                        return -EINVAL;
2111                }
2112        } else
2113#endif
2114        {
2115                dev_priv->cp_ring->handle = (void *)(unsigned long)dev_priv->cp_ring->offset;
2116                dev_priv->ring_rptr->handle =
2117                        (void *)(unsigned long)dev_priv->ring_rptr->offset;
2118                dev->agp_buffer_map->handle =
2119                        (void *)(unsigned long)dev->agp_buffer_map->offset;
2120
2121                DRM_DEBUG("dev_priv->cp_ring->handle %p\n",
2122                          dev_priv->cp_ring->handle);
2123                DRM_DEBUG("dev_priv->ring_rptr->handle %p\n",
2124                          dev_priv->ring_rptr->handle);
2125                DRM_DEBUG("dev->agp_buffer_map->handle %p\n",
2126                          dev->agp_buffer_map->handle);
2127        }
2128
2129        dev_priv->fb_location = (radeon_read_fb_location(dev_priv) & 0xffff) << 24;
2130        dev_priv->fb_size =
2131                (((radeon_read_fb_location(dev_priv) & 0xffff0000u) << 8) + 0x1000000)
2132                - dev_priv->fb_location;
2133
2134        dev_priv->front_pitch_offset = (((dev_priv->front_pitch / 64) << 22) |
2135                                        ((dev_priv->front_offset
2136                                          + dev_priv->fb_location) >> 10));
2137
2138        dev_priv->back_pitch_offset = (((dev_priv->back_pitch / 64) << 22) |
2139                                       ((dev_priv->back_offset
2140                                         + dev_priv->fb_location) >> 10));
2141
2142        dev_priv->depth_pitch_offset = (((dev_priv->depth_pitch / 64) << 22) |
2143                                        ((dev_priv->depth_offset
2144                                          + dev_priv->fb_location) >> 10));
2145
2146        dev_priv->gart_size = init->gart_size;
2147
2148        /* New let's set the memory map ... */
2149        if (dev_priv->new_memmap) {
2150                u32 base = 0;
2151
2152                DRM_INFO("Setting GART location based on new memory map\n");
2153
2154                /* If using AGP, try to locate the AGP aperture at the same
2155                 * location in the card and on the bus, though we have to
2156                 * align it down.
2157                 */
2158#if __OS_HAS_AGP
2159                /* XXX */
2160                if (dev_priv->flags & RADEON_IS_AGP) {
2161                        base = dev->agp->base;
2162                        /* Check if valid */
2163                        if ((base + dev_priv->gart_size - 1) >= dev_priv->fb_location &&
2164                            base < (dev_priv->fb_location + dev_priv->fb_size - 1)) {
2165                                DRM_INFO("Can't use AGP base @0x%08lx, won't fit\n",
2166                                         dev->agp->base);
2167                                base = 0;
2168                        }
2169                }
2170#endif
2171                /* If not or if AGP is at 0 (Macs), try to put it elsewhere */
2172                if (base == 0) {
2173                        base = dev_priv->fb_location + dev_priv->fb_size;
2174                        if (base < dev_priv->fb_location ||
2175                            ((base + dev_priv->gart_size) & 0xfffffffful) < base)
2176                                base = dev_priv->fb_location
2177                                        - dev_priv->gart_size;
2178                }
2179                dev_priv->gart_vm_start = base & 0xffc00000u;
2180                if (dev_priv->gart_vm_start != base)
2181                        DRM_INFO("GART aligned down from 0x%08x to 0x%08x\n",
2182                                 base, dev_priv->gart_vm_start);
2183        }
2184
2185#if __OS_HAS_AGP
2186        /* XXX */
2187        if (dev_priv->flags & RADEON_IS_AGP)
2188                dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset
2189                                                 - dev->agp->base
2190                                                 + dev_priv->gart_vm_start);
2191        else
2192#endif
2193                dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset
2194                                                 - (unsigned long)dev->sg->virtual
2195                                                 + dev_priv->gart_vm_start);
2196
2197        DRM_DEBUG("fb 0x%08x size %d\n",
2198                  (unsigned int) dev_priv->fb_location,
2199                  (unsigned int) dev_priv->fb_size);
2200        DRM_DEBUG("dev_priv->gart_size %d\n", dev_priv->gart_size);
2201        DRM_DEBUG("dev_priv->gart_vm_start 0x%08x\n",
2202                  (unsigned int) dev_priv->gart_vm_start);
2203        DRM_DEBUG("dev_priv->gart_buffers_offset 0x%08lx\n",
2204                  dev_priv->gart_buffers_offset);
2205
2206        dev_priv->ring.start = (u32 *) dev_priv->cp_ring->handle;
2207        dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->handle
2208                              + init->ring_size / sizeof(u32));
2209        dev_priv->ring.size = init->ring_size;
2210        dev_priv->ring.size_l2qw = drm_order(init->ring_size / 8);
2211
2212        dev_priv->ring.rptr_update = /* init->rptr_update */ 4096;
2213        dev_priv->ring.rptr_update_l2qw = drm_order(/* init->rptr_update */ 4096 / 8);
2214
2215        dev_priv->ring.fetch_size = /* init->fetch_size */ 32;
2216        dev_priv->ring.fetch_size_l2ow = drm_order(/* init->fetch_size */ 32 / 16);
2217
2218        dev_priv->ring.tail_mask = (dev_priv->ring.size / sizeof(u32)) - 1;
2219
2220        dev_priv->ring.high_mark = RADEON_RING_HIGH_MARK;
2221
2222#if __OS_HAS_AGP
2223        if (dev_priv->flags & RADEON_IS_AGP) {
2224                /* XXX turn off pcie gart */
2225        } else
2226#endif
2227        {
2228                dev_priv->gart_info.table_mask = DMA_BIT_MASK(32);
2229                /* if we have an offset set from userspace */
2230                if (!dev_priv->pcigart_offset_set) {
2231                        DRM_ERROR("Need gart offset from userspace\n");
2232                        r600_do_cleanup_cp(dev);
2233                        return -EINVAL;
2234                }
2235
2236                DRM_DEBUG("Using gart offset 0x%08lx\n", dev_priv->pcigart_offset);
2237
2238                dev_priv->gart_info.bus_addr =
2239                        dev_priv->pcigart_offset + dev_priv->fb_location;
2240                dev_priv->gart_info.mapping.offset =
2241                        dev_priv->pcigart_offset + dev_priv->fb_aper_offset;
2242                dev_priv->gart_info.mapping.size =
2243                        dev_priv->gart_info.table_size;
2244
2245                drm_core_ioremap_wc(&dev_priv->gart_info.mapping, dev);
2246                if (!dev_priv->gart_info.mapping.handle) {
2247                        DRM_ERROR("ioremap failed.\n");
2248                        r600_do_cleanup_cp(dev);
2249                        return -EINVAL;
2250                }
2251
2252                dev_priv->gart_info.addr =
2253                        dev_priv->gart_info.mapping.handle;
2254
2255                DRM_DEBUG("Setting phys_pci_gart to %p %08lX\n",
2256                          dev_priv->gart_info.addr,
2257                          dev_priv->pcigart_offset);
2258
2259                if (!r600_page_table_init(dev)) {
2260                        DRM_ERROR("Failed to init GART table\n");
2261                        r600_do_cleanup_cp(dev);
2262                        return -EINVAL;
2263                }
2264
2265                if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
2266                        r700_vm_init(dev);
2267                else
2268                        r600_vm_init(dev);
2269        }
2270
2271        if (!dev_priv->me_fw || !dev_priv->pfp_fw) {
2272                int err = r600_cp_init_microcode(dev_priv);
2273                if (err) {
2274                        DRM_ERROR("Failed to load firmware!\n");
2275                        r600_do_cleanup_cp(dev);
2276                        return err;
2277                }
2278        }
2279        if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
2280                r700_cp_load_microcode(dev_priv);
2281        else
2282                r600_cp_load_microcode(dev_priv);
2283
2284        r600_cp_init_ring_buffer(dev, dev_priv, file_priv);
2285
2286        dev_priv->last_buf = 0;
2287
2288        r600_do_engine_reset(dev);
2289        r600_test_writeback(dev_priv);
2290
2291        return 0;
2292}
2293
2294int r600_do_resume_cp(struct drm_device *dev, struct drm_file *file_priv)
2295{
2296        drm_radeon_private_t *dev_priv = dev->dev_private;
2297
2298        DRM_DEBUG("\n");
2299        if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) {
2300                r700_vm_init(dev);
2301                r700_cp_load_microcode(dev_priv);
2302        } else {
2303                r600_vm_init(dev);
2304                r600_cp_load_microcode(dev_priv);
2305        }
2306        r600_cp_init_ring_buffer(dev, dev_priv, file_priv);
2307        r600_do_engine_reset(dev);
2308
2309        return 0;
2310}
2311
2312/* Wait for the CP to go idle.
2313 */
2314int r600_do_cp_idle(drm_radeon_private_t *dev_priv)
2315{
2316        RING_LOCALS;
2317        DRM_DEBUG("\n");
2318
2319        BEGIN_RING(5);
2320        OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
2321        OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
2322        /* wait for 3D idle clean */
2323        OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
2324        OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
2325        OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
2326
2327        ADVANCE_RING();
2328        COMMIT_RING();
2329
2330        return r600_do_wait_for_idle(dev_priv);
2331}
2332
2333/* Start the Command Processor.
2334 */
2335void r600_do_cp_start(drm_radeon_private_t *dev_priv)
2336{
2337        u32 cp_me;
2338        RING_LOCALS;
2339        DRM_DEBUG("\n");
2340
2341        BEGIN_RING(7);
2342        OUT_RING(CP_PACKET3(R600_IT_ME_INITIALIZE, 5));
2343        OUT_RING(0x00000001);
2344        if (((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770))
2345                OUT_RING(0x00000003);
2346        else
2347                OUT_RING(0x00000000);
2348        OUT_RING((dev_priv->r600_max_hw_contexts - 1));
2349        OUT_RING(R600_ME_INITIALIZE_DEVICE_ID(1));
2350        OUT_RING(0x00000000);
2351        OUT_RING(0x00000000);
2352        ADVANCE_RING();
2353        COMMIT_RING();
2354
2355        /* set the mux and reset the halt bit */
2356        cp_me = 0xff;
2357        RADEON_WRITE(R600_CP_ME_CNTL, cp_me);
2358
2359        dev_priv->cp_running = 1;
2360
2361}
2362
2363void r600_do_cp_reset(drm_radeon_private_t *dev_priv)
2364{
2365        u32 cur_read_ptr;
2366        DRM_DEBUG("\n");
2367
2368        cur_read_ptr = RADEON_READ(R600_CP_RB_RPTR);
2369        RADEON_WRITE(R600_CP_RB_WPTR, cur_read_ptr);
2370        SET_RING_HEAD(dev_priv, cur_read_ptr);
2371        dev_priv->ring.tail = cur_read_ptr;
2372}
2373
2374void r600_do_cp_stop(drm_radeon_private_t *dev_priv)
2375{
2376        uint32_t cp_me;
2377
2378        DRM_DEBUG("\n");
2379
2380        cp_me = 0xff | R600_CP_ME_HALT;
2381
2382        RADEON_WRITE(R600_CP_ME_CNTL, cp_me);
2383
2384        dev_priv->cp_running = 0;
2385}
2386
2387int r600_cp_dispatch_indirect(struct drm_device *dev,
2388                              struct drm_buf *buf, int start, int end)
2389{
2390        drm_radeon_private_t *dev_priv = dev->dev_private;
2391        RING_LOCALS;
2392
2393        if (start != end) {
2394                unsigned long offset = (dev_priv->gart_buffers_offset
2395                                        + buf->offset + start);
2396                int dwords = (end - start + 3) / sizeof(u32);
2397
2398                DRM_DEBUG("dwords:%d\n", dwords);
2399                DRM_DEBUG("offset 0x%lx\n", offset);
2400
2401
2402                /* Indirect buffer data must be a multiple of 16 dwords.
2403                 * pad the data with a Type-2 CP packet.
2404                 */
2405                while (dwords & 0xf) {
2406                        u32 *data = (u32 *)
2407                            ((char *)dev->agp_buffer_map->handle
2408                             + buf->offset + start);
2409                        data[dwords++] = RADEON_CP_PACKET2;
2410                }
2411
2412                /* Fire off the indirect buffer */
2413                BEGIN_RING(4);
2414                OUT_RING(CP_PACKET3(R600_IT_INDIRECT_BUFFER, 2));
2415                OUT_RING((offset & 0xfffffffc));
2416                OUT_RING((upper_32_bits(offset) & 0xff));
2417                OUT_RING(dwords);
2418                ADVANCE_RING();
2419        }
2420
2421        return 0;
2422}
2423
2424void r600_cp_dispatch_swap(struct drm_device *dev, struct drm_file *file_priv)
2425{
2426        drm_radeon_private_t *dev_priv = dev->dev_private;
2427        struct drm_master *master = file_priv->master;
2428        struct drm_radeon_master_private *master_priv = master->driver_priv;
2429        drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2430        int nbox = sarea_priv->nbox;
2431        struct drm_clip_rect *pbox = sarea_priv->boxes;
2432        int i, cpp, src_pitch, dst_pitch;
2433        uint64_t src, dst;
2434        RING_LOCALS;
2435        DRM_DEBUG("\n");
2436
2437        if (dev_priv->color_fmt == RADEON_COLOR_FORMAT_ARGB8888)
2438                cpp = 4;
2439        else
2440                cpp = 2;
2441
2442        if (sarea_priv->pfCurrentPage == 0) {
2443                src_pitch = dev_priv->back_pitch;
2444                dst_pitch = dev_priv->front_pitch;
2445                src = dev_priv->back_offset + dev_priv->fb_location;
2446                dst = dev_priv->front_offset + dev_priv->fb_location;
2447        } else {
2448                src_pitch = dev_priv->front_pitch;
2449                dst_pitch = dev_priv->back_pitch;
2450                src = dev_priv->front_offset + dev_priv->fb_location;
2451                dst = dev_priv->back_offset + dev_priv->fb_location;
2452        }
2453
2454        if (r600_prepare_blit_copy(dev, file_priv)) {
2455                DRM_ERROR("unable to allocate vertex buffer for swap buffer\n");
2456                return;
2457        }
2458        for (i = 0; i < nbox; i++) {
2459                int x = pbox[i].x1;
2460                int y = pbox[i].y1;
2461                int w = pbox[i].x2 - x;
2462                int h = pbox[i].y2 - y;
2463
2464                DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
2465
2466                r600_blit_swap(dev,
2467                               src, dst,
2468                               x, y, x, y, w, h,
2469                               src_pitch, dst_pitch, cpp);
2470        }
2471        r600_done_blit_copy(dev);
2472
2473        /* Increment the frame counter.  The client-side 3D driver must
2474         * throttle the framerate by waiting for this value before
2475         * performing the swapbuffer ioctl.
2476         */
2477        sarea_priv->last_frame++;
2478
2479        BEGIN_RING(3);
2480        R600_FRAME_AGE(sarea_priv->last_frame);
2481        ADVANCE_RING();
2482}
2483
2484int r600_cp_dispatch_texture(struct drm_device *dev,
2485                             struct drm_file *file_priv,
2486                             drm_radeon_texture_t *tex,
2487                             drm_radeon_tex_image_t *image)
2488{
2489        drm_radeon_private_t *dev_priv = dev->dev_private;
2490        struct drm_buf *buf;
2491        u32 *buffer;
2492        const u8 __user *data;
2493        int size, pass_size;
2494        u64 src_offset, dst_offset;
2495
2496        if (!radeon_check_offset(dev_priv, tex->offset)) {
2497                DRM_ERROR("Invalid destination offset\n");
2498                return -EINVAL;
2499        }
2500
2501        /* this might fail for zero-sized uploads - are those illegal? */
2502        if (!radeon_check_offset(dev_priv, tex->offset + tex->height * tex->pitch - 1)) {
2503                DRM_ERROR("Invalid final destination offset\n");
2504                return -EINVAL;
2505        }
2506
2507        size = tex->height * tex->pitch;
2508
2509        if (size == 0)
2510                return 0;
2511
2512        dst_offset = tex->offset;
2513
2514        if (r600_prepare_blit_copy(dev, file_priv)) {
2515                DRM_ERROR("unable to allocate vertex buffer for swap buffer\n");
2516                return -EAGAIN;
2517        }
2518        do {
2519                data = (const u8 __user *)image->data;
2520                pass_size = size;
2521
2522                buf = radeon_freelist_get(dev);
2523                if (!buf) {
2524                        DRM_DEBUG("EAGAIN\n");
2525                        if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
2526                                return -EFAULT;
2527                        return -EAGAIN;
2528                }
2529
2530                if (pass_size > buf->total)
2531                        pass_size = buf->total;
2532
2533                /* Dispatch the indirect buffer.
2534                 */
2535                buffer =
2536                    (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
2537
2538                if (DRM_COPY_FROM_USER(buffer, data, pass_size)) {
2539                        DRM_ERROR("EFAULT on pad, %d bytes\n", pass_size);
2540                        return -EFAULT;
2541                }
2542
2543                buf->file_priv = file_priv;
2544                buf->used = pass_size;
2545                src_offset = dev_priv->gart_buffers_offset + buf->offset;
2546
2547                r600_blit_copy(dev, src_offset, dst_offset, pass_size);
2548
2549                radeon_cp_discard_buffer(dev, file_priv->master, buf);
2550
2551                /* Update the input parameters for next time */
2552                image->data = (const u8 __user *)image->data + pass_size;
2553                dst_offset += pass_size;
2554                size -= pass_size;
2555        } while (size > 0);
2556        r600_done_blit_copy(dev);
2557
2558        return 0;
2559}
2560
2561/*
2562 * Legacy cs ioctl
2563 */
2564static u32 radeon_cs_id_get(struct drm_radeon_private *radeon)
2565{
2566        /* FIXME: check if wrap affect last reported wrap & sequence */
2567        radeon->cs_id_scnt = (radeon->cs_id_scnt + 1) & 0x00FFFFFF;
2568        if (!radeon->cs_id_scnt) {
2569                /* increment wrap counter */
2570                radeon->cs_id_wcnt += 0x01000000;
2571                /* valid sequence counter start at 1 */
2572                radeon->cs_id_scnt = 1;
2573        }
2574        return (radeon->cs_id_scnt | radeon->cs_id_wcnt);
2575}
2576
2577static void r600_cs_id_emit(drm_radeon_private_t *dev_priv, u32 *id)
2578{
2579        RING_LOCALS;
2580
2581        *id = radeon_cs_id_get(dev_priv);
2582
2583        /* SCRATCH 2 */
2584        BEGIN_RING(3);
2585        R600_CLEAR_AGE(*id);
2586        ADVANCE_RING();
2587        COMMIT_RING();
2588}
2589
2590static int r600_ib_get(struct drm_device *dev,
2591                        struct drm_file *fpriv,
2592                        struct drm_buf **buffer)
2593{
2594        struct drm_buf *buf;
2595
2596        *buffer = NULL;
2597        buf = radeon_freelist_get(dev);
2598        if (!buf) {
2599                return -EBUSY;
2600        }
2601        buf->file_priv = fpriv;
2602        *buffer = buf;
2603        return 0;
2604}
2605
2606static void r600_ib_free(struct drm_device *dev, struct drm_buf *buf,
2607                        struct drm_file *fpriv, int l, int r)
2608{
2609        drm_radeon_private_t *dev_priv = dev->dev_private;
2610
2611        if (buf) {
2612                if (!r)
2613                        r600_cp_dispatch_indirect(dev, buf, 0, l * 4);
2614                radeon_cp_discard_buffer(dev, fpriv->master, buf);
2615                COMMIT_RING();
2616        }
2617}
2618
2619int r600_cs_legacy_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv)
2620{
2621        struct drm_radeon_private *dev_priv = dev->dev_private;
2622        struct drm_radeon_cs *cs = data;
2623        struct drm_buf *buf;
2624        unsigned family;
2625        int l, r = 0;
2626        u32 *ib, cs_id = 0;
2627
2628        if (dev_priv == NULL) {
2629                DRM_ERROR("called with no initialization\n");
2630                return -EINVAL;
2631        }
2632        family = dev_priv->flags & RADEON_FAMILY_MASK;
2633        if (family < CHIP_R600) {
2634                DRM_ERROR("cs ioctl valid only for R6XX & R7XX in legacy mode\n");
2635                return -EINVAL;
2636        }
2637        mutex_lock(&dev_priv->cs_mutex);
2638        /* get ib */
2639        r = r600_ib_get(dev, fpriv, &buf);
2640        if (r) {
2641                DRM_ERROR("ib_get failed\n");
2642                goto out;
2643        }
2644        ib = dev->agp_buffer_map->handle + buf->offset;
2645        /* now parse command stream */
2646        r = r600_cs_legacy(dev, data,  fpriv, family, ib, &l);
2647        if (r) {
2648                goto out;
2649        }
2650
2651out:
2652        r600_ib_free(dev, buf, fpriv, l, r);
2653        /* emit cs id sequence */
2654        r600_cs_id_emit(dev_priv, &cs_id);
2655        cs->cs_id = cs_id;
2656        mutex_unlock(&dev_priv->cs_mutex);
2657        return r;
2658}
2659
2660void r600_cs_legacy_get_tiling_conf(struct drm_device *dev, u32 *npipes, u32 *nbanks, u32 *group_size)
2661{
2662        struct drm_radeon_private *dev_priv = dev->dev_private;
2663
2664        *npipes = dev_priv->r600_npipes;
2665        *nbanks = dev_priv->r600_nbanks;
2666        *group_size = dev_priv->r600_group_size;
2667}
2668