linux/drivers/gpu/drm/radeon/r600_blit_kms.c
<<
>>
Prefs
   1/*
   2 * Copyright 2009 Advanced Micro Devices, Inc.
   3 * Copyright 2009 Red Hat Inc.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the "Software"),
   7 * to deal in the Software without restriction, including without limitation
   8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9 * and/or sell copies of the Software, and to permit persons to whom the
  10 * Software is furnished to do so, subject to the following conditions:
  11 *
  12 * The above copyright notice and this permission notice (including the next
  13 * paragraph) shall be included in all copies or substantial portions of the
  14 * Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22 * DEALINGS IN THE SOFTWARE.
  23 *
  24 */
  25
  26#include "drmP.h"
  27#include "drm.h"
  28#include "radeon_drm.h"
  29#include "radeon.h"
  30
  31#include "r600d.h"
  32#include "r600_blit_shaders.h"
  33
  34#define DI_PT_RECTLIST        0x11
  35#define DI_INDEX_SIZE_16_BIT  0x0
  36#define DI_SRC_SEL_AUTO_INDEX 0x2
  37
  38#define FMT_8                 0x1
  39#define FMT_5_6_5             0x8
  40#define FMT_8_8_8_8           0x1a
  41#define COLOR_8               0x1
  42#define COLOR_5_6_5           0x8
  43#define COLOR_8_8_8_8         0x1a
  44
  45/* emits 21 on rv770+, 23 on r600 */
  46static void
  47set_render_target(struct radeon_device *rdev, int format,
  48                  int w, int h, u64 gpu_addr)
  49{
  50        u32 cb_color_info;
  51        int pitch, slice;
  52
  53        h = ALIGN(h, 8);
  54        if (h < 8)
  55                h = 8;
  56
  57        cb_color_info = ((format << 2) | (1 << 27) | (1 << 8));
  58        pitch = (w / 8) - 1;
  59        slice = ((w * h) / 64) - 1;
  60
  61        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
  62        radeon_ring_write(rdev, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
  63        radeon_ring_write(rdev, gpu_addr >> 8);
  64
  65        if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) {
  66                radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_BASE_UPDATE, 0));
  67                radeon_ring_write(rdev, 2 << 0);
  68        }
  69
  70        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
  71        radeon_ring_write(rdev, (CB_COLOR0_SIZE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
  72        radeon_ring_write(rdev, (pitch << 0) | (slice << 10));
  73
  74        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
  75        radeon_ring_write(rdev, (CB_COLOR0_VIEW - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
  76        radeon_ring_write(rdev, 0);
  77
  78        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
  79        radeon_ring_write(rdev, (CB_COLOR0_INFO - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
  80        radeon_ring_write(rdev, cb_color_info);
  81
  82        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
  83        radeon_ring_write(rdev, (CB_COLOR0_TILE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
  84        radeon_ring_write(rdev, 0);
  85
  86        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
  87        radeon_ring_write(rdev, (CB_COLOR0_FRAG - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
  88        radeon_ring_write(rdev, 0);
  89
  90        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
  91        radeon_ring_write(rdev, (CB_COLOR0_MASK - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
  92        radeon_ring_write(rdev, 0);
  93}
  94
  95/* emits 5dw */
  96static void
  97cp_set_surface_sync(struct radeon_device *rdev,
  98                    u32 sync_type, u32 size,
  99                    u64 mc_addr)
 100{
 101        u32 cp_coher_size;
 102
 103        if (size == 0xffffffff)
 104                cp_coher_size = 0xffffffff;
 105        else
 106                cp_coher_size = ((size + 255) >> 8);
 107
 108        radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3));
 109        radeon_ring_write(rdev, sync_type);
 110        radeon_ring_write(rdev, cp_coher_size);
 111        radeon_ring_write(rdev, mc_addr >> 8);
 112        radeon_ring_write(rdev, 10); /* poll interval */
 113}
 114
 115/* emits 21dw + 1 surface sync = 26dw */
 116static void
 117set_shaders(struct radeon_device *rdev)
 118{
 119        u64 gpu_addr;
 120        u32 sq_pgm_resources;
 121
 122        /* setup shader regs */
 123        sq_pgm_resources = (1 << 0);
 124
 125        /* VS */
 126        gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
 127        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 128        radeon_ring_write(rdev, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 129        radeon_ring_write(rdev, gpu_addr >> 8);
 130
 131        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 132        radeon_ring_write(rdev, (SQ_PGM_RESOURCES_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 133        radeon_ring_write(rdev, sq_pgm_resources);
 134
 135        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 136        radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 137        radeon_ring_write(rdev, 0);
 138
 139        /* PS */
 140        gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset;
 141        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 142        radeon_ring_write(rdev, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 143        radeon_ring_write(rdev, gpu_addr >> 8);
 144
 145        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 146        radeon_ring_write(rdev, (SQ_PGM_RESOURCES_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 147        radeon_ring_write(rdev, sq_pgm_resources | (1 << 28));
 148
 149        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 150        radeon_ring_write(rdev, (SQ_PGM_EXPORTS_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 151        radeon_ring_write(rdev, 2);
 152
 153        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 154        radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 155        radeon_ring_write(rdev, 0);
 156
 157        gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
 158        cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr);
 159}
 160
 161/* emits 9 + 1 sync (5) = 14*/
 162static void
 163set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
 164{
 165        u32 sq_vtx_constant_word2;
 166
 167        sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8));
 168#ifdef __BIG_ENDIAN
 169        sq_vtx_constant_word2 |= (2 << 30);
 170#endif
 171
 172        radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
 173        radeon_ring_write(rdev, 0x460);
 174        radeon_ring_write(rdev, gpu_addr & 0xffffffff);
 175        radeon_ring_write(rdev, 48 - 1);
 176        radeon_ring_write(rdev, sq_vtx_constant_word2);
 177        radeon_ring_write(rdev, 1 << 0);
 178        radeon_ring_write(rdev, 0);
 179        radeon_ring_write(rdev, 0);
 180        radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30);
 181
 182        if ((rdev->family == CHIP_RV610) ||
 183            (rdev->family == CHIP_RV620) ||
 184            (rdev->family == CHIP_RS780) ||
 185            (rdev->family == CHIP_RS880) ||
 186            (rdev->family == CHIP_RV710))
 187                cp_set_surface_sync(rdev,
 188                                    PACKET3_TC_ACTION_ENA, 48, gpu_addr);
 189        else
 190                cp_set_surface_sync(rdev,
 191                                    PACKET3_VC_ACTION_ENA, 48, gpu_addr);
 192}
 193
 194/* emits 9 */
 195static void
 196set_tex_resource(struct radeon_device *rdev,
 197                 int format, int w, int h, int pitch,
 198                 u64 gpu_addr)
 199{
 200        uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
 201
 202        if (h < 1)
 203                h = 1;
 204
 205        sq_tex_resource_word0 = (1 << 0) | (1 << 3);
 206        sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
 207                                  ((w - 1) << 19));
 208
 209        sq_tex_resource_word1 = (format << 26);
 210        sq_tex_resource_word1 |= ((h - 1) << 0);
 211
 212        sq_tex_resource_word4 = ((1 << 14) |
 213                                 (0 << 16) |
 214                                 (1 << 19) |
 215                                 (2 << 22) |
 216                                 (3 << 25));
 217
 218        radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
 219        radeon_ring_write(rdev, 0);
 220        radeon_ring_write(rdev, sq_tex_resource_word0);
 221        radeon_ring_write(rdev, sq_tex_resource_word1);
 222        radeon_ring_write(rdev, gpu_addr >> 8);
 223        radeon_ring_write(rdev, gpu_addr >> 8);
 224        radeon_ring_write(rdev, sq_tex_resource_word4);
 225        radeon_ring_write(rdev, 0);
 226        radeon_ring_write(rdev, SQ_TEX_VTX_VALID_TEXTURE << 30);
 227}
 228
 229/* emits 12 */
 230static void
 231set_scissors(struct radeon_device *rdev, int x1, int y1,
 232             int x2, int y2)
 233{
 234        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
 235        radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 236        radeon_ring_write(rdev, (x1 << 0) | (y1 << 16));
 237        radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
 238
 239        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
 240        radeon_ring_write(rdev, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 241        radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
 242        radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
 243
 244        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
 245        radeon_ring_write(rdev, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 246        radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
 247        radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
 248}
 249
 250/* emits 10 */
 251static void
 252draw_auto(struct radeon_device *rdev)
 253{
 254        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
 255        radeon_ring_write(rdev, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
 256        radeon_ring_write(rdev, DI_PT_RECTLIST);
 257
 258        radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0));
 259        radeon_ring_write(rdev,
 260#ifdef __BIG_ENDIAN
 261                          (2 << 2) |
 262#endif
 263                          DI_INDEX_SIZE_16_BIT);
 264
 265        radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0));
 266        radeon_ring_write(rdev, 1);
 267
 268        radeon_ring_write(rdev, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1));
 269        radeon_ring_write(rdev, 3);
 270        radeon_ring_write(rdev, DI_SRC_SEL_AUTO_INDEX);
 271
 272}
 273
 274/* emits 14 */
 275static void
 276set_default_state(struct radeon_device *rdev)
 277{
 278        u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
 279        u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
 280        int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
 281        int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
 282        int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
 283        u64 gpu_addr;
 284        int dwords;
 285
 286        switch (rdev->family) {
 287        case CHIP_R600:
 288                num_ps_gprs = 192;
 289                num_vs_gprs = 56;
 290                num_temp_gprs = 4;
 291                num_gs_gprs = 0;
 292                num_es_gprs = 0;
 293                num_ps_threads = 136;
 294                num_vs_threads = 48;
 295                num_gs_threads = 4;
 296                num_es_threads = 4;
 297                num_ps_stack_entries = 128;
 298                num_vs_stack_entries = 128;
 299                num_gs_stack_entries = 0;
 300                num_es_stack_entries = 0;
 301                break;
 302        case CHIP_RV630:
 303        case CHIP_RV635:
 304                num_ps_gprs = 84;
 305                num_vs_gprs = 36;
 306                num_temp_gprs = 4;
 307                num_gs_gprs = 0;
 308                num_es_gprs = 0;
 309                num_ps_threads = 144;
 310                num_vs_threads = 40;
 311                num_gs_threads = 4;
 312                num_es_threads = 4;
 313                num_ps_stack_entries = 40;
 314                num_vs_stack_entries = 40;
 315                num_gs_stack_entries = 32;
 316                num_es_stack_entries = 16;
 317                break;
 318        case CHIP_RV610:
 319        case CHIP_RV620:
 320        case CHIP_RS780:
 321        case CHIP_RS880:
 322        default:
 323                num_ps_gprs = 84;
 324                num_vs_gprs = 36;
 325                num_temp_gprs = 4;
 326                num_gs_gprs = 0;
 327                num_es_gprs = 0;
 328                num_ps_threads = 136;
 329                num_vs_threads = 48;
 330                num_gs_threads = 4;
 331                num_es_threads = 4;
 332                num_ps_stack_entries = 40;
 333                num_vs_stack_entries = 40;
 334                num_gs_stack_entries = 32;
 335                num_es_stack_entries = 16;
 336                break;
 337        case CHIP_RV670:
 338                num_ps_gprs = 144;
 339                num_vs_gprs = 40;
 340                num_temp_gprs = 4;
 341                num_gs_gprs = 0;
 342                num_es_gprs = 0;
 343                num_ps_threads = 136;
 344                num_vs_threads = 48;
 345                num_gs_threads = 4;
 346                num_es_threads = 4;
 347                num_ps_stack_entries = 40;
 348                num_vs_stack_entries = 40;
 349                num_gs_stack_entries = 32;
 350                num_es_stack_entries = 16;
 351                break;
 352        case CHIP_RV770:
 353                num_ps_gprs = 192;
 354                num_vs_gprs = 56;
 355                num_temp_gprs = 4;
 356                num_gs_gprs = 0;
 357                num_es_gprs = 0;
 358                num_ps_threads = 188;
 359                num_vs_threads = 60;
 360                num_gs_threads = 0;
 361                num_es_threads = 0;
 362                num_ps_stack_entries = 256;
 363                num_vs_stack_entries = 256;
 364                num_gs_stack_entries = 0;
 365                num_es_stack_entries = 0;
 366                break;
 367        case CHIP_RV730:
 368        case CHIP_RV740:
 369                num_ps_gprs = 84;
 370                num_vs_gprs = 36;
 371                num_temp_gprs = 4;
 372                num_gs_gprs = 0;
 373                num_es_gprs = 0;
 374                num_ps_threads = 188;
 375                num_vs_threads = 60;
 376                num_gs_threads = 0;
 377                num_es_threads = 0;
 378                num_ps_stack_entries = 128;
 379                num_vs_stack_entries = 128;
 380                num_gs_stack_entries = 0;
 381                num_es_stack_entries = 0;
 382                break;
 383        case CHIP_RV710:
 384                num_ps_gprs = 192;
 385                num_vs_gprs = 56;
 386                num_temp_gprs = 4;
 387                num_gs_gprs = 0;
 388                num_es_gprs = 0;
 389                num_ps_threads = 144;
 390                num_vs_threads = 48;
 391                num_gs_threads = 0;
 392                num_es_threads = 0;
 393                num_ps_stack_entries = 128;
 394                num_vs_stack_entries = 128;
 395                num_gs_stack_entries = 0;
 396                num_es_stack_entries = 0;
 397                break;
 398        }
 399
 400        if ((rdev->family == CHIP_RV610) ||
 401            (rdev->family == CHIP_RV620) ||
 402            (rdev->family == CHIP_RS780) ||
 403            (rdev->family == CHIP_RS880) ||
 404            (rdev->family == CHIP_RV710))
 405                sq_config = 0;
 406        else
 407                sq_config = VC_ENABLE;
 408
 409        sq_config |= (DX9_CONSTS |
 410                      ALU_INST_PREFER_VECTOR |
 411                      PS_PRIO(0) |
 412                      VS_PRIO(1) |
 413                      GS_PRIO(2) |
 414                      ES_PRIO(3));
 415
 416        sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
 417                                  NUM_VS_GPRS(num_vs_gprs) |
 418                                  NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
 419        sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
 420                                  NUM_ES_GPRS(num_es_gprs));
 421        sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
 422                                   NUM_VS_THREADS(num_vs_threads) |
 423                                   NUM_GS_THREADS(num_gs_threads) |
 424                                   NUM_ES_THREADS(num_es_threads));
 425        sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
 426                                    NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
 427        sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
 428                                    NUM_ES_STACK_ENTRIES(num_es_stack_entries));
 429
 430        /* emit an IB pointing at default state */
 431        dwords = ALIGN(rdev->r600_blit.state_len, 0x10);
 432        gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset;
 433        radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 434        radeon_ring_write(rdev,
 435#ifdef __BIG_ENDIAN
 436                          (2 << 0) |
 437#endif
 438                          (gpu_addr & 0xFFFFFFFC));
 439        radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF);
 440        radeon_ring_write(rdev, dwords);
 441
 442        /* SQ config */
 443        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 6));
 444        radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
 445        radeon_ring_write(rdev, sq_config);
 446        radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
 447        radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
 448        radeon_ring_write(rdev, sq_thread_resource_mgmt);
 449        radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
 450        radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
 451}
 452
 453static inline uint32_t i2f(uint32_t input)
 454{
 455        u32 result, i, exponent, fraction;
 456
 457        if ((input & 0x3fff) == 0)
 458                result = 0; /* 0 is a special case */
 459        else {
 460                exponent = 140; /* exponent biased by 127; */
 461                fraction = (input & 0x3fff) << 10; /* cheat and only
 462                                                      handle numbers below 2^^15 */
 463                for (i = 0; i < 14; i++) {
 464                        if (fraction & 0x800000)
 465                                break;
 466                        else {
 467                                fraction = fraction << 1; /* keep
 468                                                             shifting left until top bit = 1 */
 469                                exponent = exponent - 1;
 470                        }
 471                }
 472                result = exponent << 23 | (fraction & 0x7fffff); /* mask
 473                                                                    off top bit; assumed 1 */
 474        }
 475        return result;
 476}
 477
 478int r600_blit_init(struct radeon_device *rdev)
 479{
 480        u32 obj_size;
 481        int i, r, dwords;
 482        void *ptr;
 483        u32 packet2s[16];
 484        int num_packet2s = 0;
 485
 486        /* pin copy shader into vram if already initialized */
 487        if (rdev->r600_blit.shader_obj)
 488                goto done;
 489
 490        mutex_init(&rdev->r600_blit.mutex);
 491        rdev->r600_blit.state_offset = 0;
 492
 493        if (rdev->family >= CHIP_RV770)
 494                rdev->r600_blit.state_len = r7xx_default_size;
 495        else
 496                rdev->r600_blit.state_len = r6xx_default_size;
 497
 498        dwords = rdev->r600_blit.state_len;
 499        while (dwords & 0xf) {
 500                packet2s[num_packet2s++] = cpu_to_le32(PACKET2(0));
 501                dwords++;
 502        }
 503
 504        obj_size = dwords * 4;
 505        obj_size = ALIGN(obj_size, 256);
 506
 507        rdev->r600_blit.vs_offset = obj_size;
 508        obj_size += r6xx_vs_size * 4;
 509        obj_size = ALIGN(obj_size, 256);
 510
 511        rdev->r600_blit.ps_offset = obj_size;
 512        obj_size += r6xx_ps_size * 4;
 513        obj_size = ALIGN(obj_size, 256);
 514
 515        r = radeon_bo_create(rdev, NULL, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
 516                                &rdev->r600_blit.shader_obj);
 517        if (r) {
 518                DRM_ERROR("r600 failed to allocate shader\n");
 519                return r;
 520        }
 521
 522        DRM_DEBUG("r6xx blit allocated bo %08x vs %08x ps %08x\n",
 523                  obj_size,
 524                  rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset);
 525
 526        r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
 527        if (unlikely(r != 0))
 528                return r;
 529        r = radeon_bo_kmap(rdev->r600_blit.shader_obj, &ptr);
 530        if (r) {
 531                DRM_ERROR("failed to map blit object %d\n", r);
 532                return r;
 533        }
 534        if (rdev->family >= CHIP_RV770)
 535                memcpy_toio(ptr + rdev->r600_blit.state_offset,
 536                            r7xx_default_state, rdev->r600_blit.state_len * 4);
 537        else
 538                memcpy_toio(ptr + rdev->r600_blit.state_offset,
 539                            r6xx_default_state, rdev->r600_blit.state_len * 4);
 540        if (num_packet2s)
 541                memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
 542                            packet2s, num_packet2s * 4);
 543        for (i = 0; i < r6xx_vs_size; i++)
 544                *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(r6xx_vs[i]);
 545        for (i = 0; i < r6xx_ps_size; i++)
 546                *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(r6xx_ps[i]);
 547        radeon_bo_kunmap(rdev->r600_blit.shader_obj);
 548        radeon_bo_unreserve(rdev->r600_blit.shader_obj);
 549
 550done:
 551        r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
 552        if (unlikely(r != 0))
 553                return r;
 554        r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,
 555                          &rdev->r600_blit.shader_gpu_addr);
 556        radeon_bo_unreserve(rdev->r600_blit.shader_obj);
 557        if (r) {
 558                dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
 559                return r;
 560        }
 561        radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
 562        return 0;
 563}
 564
 565void r600_blit_fini(struct radeon_device *rdev)
 566{
 567        int r;
 568
 569        radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
 570        if (rdev->r600_blit.shader_obj == NULL)
 571                return;
 572        /* If we can't reserve the bo, unref should be enough to destroy
 573         * it when it becomes idle.
 574         */
 575        r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
 576        if (!r) {
 577                radeon_bo_unpin(rdev->r600_blit.shader_obj);
 578                radeon_bo_unreserve(rdev->r600_blit.shader_obj);
 579        }
 580        radeon_bo_unref(&rdev->r600_blit.shader_obj);
 581}
 582
 583static int r600_vb_ib_get(struct radeon_device *rdev)
 584{
 585        int r;
 586        r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib);
 587        if (r) {
 588                DRM_ERROR("failed to get IB for vertex buffer\n");
 589                return r;
 590        }
 591
 592        rdev->r600_blit.vb_total = 64*1024;
 593        rdev->r600_blit.vb_used = 0;
 594        return 0;
 595}
 596
 597static void r600_vb_ib_put(struct radeon_device *rdev)
 598{
 599        radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence);
 600        radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
 601}
 602
 603int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
 604{
 605        int r;
 606        int ring_size, line_size;
 607        int max_size;
 608        /* loops of emits 64 + fence emit possible */
 609        int dwords_per_loop = 76, num_loops;
 610
 611        r = r600_vb_ib_get(rdev);
 612        if (r)
 613                return r;
 614
 615        /* set_render_target emits 2 extra dwords on rv6xx */
 616        if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770)
 617                dwords_per_loop += 2;
 618
 619        /* 8 bpp vs 32 bpp for xfer unit */
 620        if (size_bytes & 3)
 621                line_size = 8192;
 622        else
 623                line_size = 8192*4;
 624
 625        max_size = 8192 * line_size;
 626
 627        /* major loops cover the max size transfer */
 628        num_loops = ((size_bytes + max_size) / max_size);
 629        /* minor loops cover the extra non aligned bits */
 630        num_loops += ((size_bytes % line_size) ? 1 : 0);
 631        /* calculate number of loops correctly */
 632        ring_size = num_loops * dwords_per_loop;
 633        /* set default  + shaders */
 634        ring_size += 40; /* shaders + def state */
 635        ring_size += 10; /* fence emit for VB IB */
 636        ring_size += 5; /* done copy */
 637        ring_size += 10; /* fence emit for done copy */
 638        r = radeon_ring_lock(rdev, ring_size);
 639        if (r)
 640                return r;
 641
 642        set_default_state(rdev); /* 14 */
 643        set_shaders(rdev); /* 26 */
 644        return 0;
 645}
 646
 647void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
 648{
 649        int r;
 650
 651        if (rdev->r600_blit.vb_ib)
 652                r600_vb_ib_put(rdev);
 653
 654        if (fence)
 655                r = radeon_fence_emit(rdev, fence);
 656
 657        radeon_ring_unlock_commit(rdev);
 658}
 659
 660void r600_kms_blit_copy(struct radeon_device *rdev,
 661                        u64 src_gpu_addr, u64 dst_gpu_addr,
 662                        int size_bytes)
 663{
 664        int max_bytes;
 665        u64 vb_gpu_addr;
 666        u32 *vb;
 667
 668        DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr,
 669                  size_bytes, rdev->r600_blit.vb_used);
 670        vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used);
 671        if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
 672                max_bytes = 8192;
 673
 674                while (size_bytes) {
 675                        int cur_size = size_bytes;
 676                        int src_x = src_gpu_addr & 255;
 677                        int dst_x = dst_gpu_addr & 255;
 678                        int h = 1;
 679                        src_gpu_addr = src_gpu_addr & ~255ULL;
 680                        dst_gpu_addr = dst_gpu_addr & ~255ULL;
 681
 682                        if (!src_x && !dst_x) {
 683                                h = (cur_size / max_bytes);
 684                                if (h > 8192)
 685                                        h = 8192;
 686                                if (h == 0)
 687                                        h = 1;
 688                                else
 689                                        cur_size = max_bytes;
 690                        } else {
 691                                if (cur_size > max_bytes)
 692                                        cur_size = max_bytes;
 693                                if (cur_size > (max_bytes - dst_x))
 694                                        cur_size = (max_bytes - dst_x);
 695                                if (cur_size > (max_bytes - src_x))
 696                                        cur_size = (max_bytes - src_x);
 697                        }
 698
 699                        if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
 700                                WARN_ON(1);
 701                        }
 702
 703                        vb[0] = i2f(dst_x);
 704                        vb[1] = 0;
 705                        vb[2] = i2f(src_x);
 706                        vb[3] = 0;
 707
 708                        vb[4] = i2f(dst_x);
 709                        vb[5] = i2f(h);
 710                        vb[6] = i2f(src_x);
 711                        vb[7] = i2f(h);
 712
 713                        vb[8] = i2f(dst_x + cur_size);
 714                        vb[9] = i2f(h);
 715                        vb[10] = i2f(src_x + cur_size);
 716                        vb[11] = i2f(h);
 717
 718                        /* src 9 */
 719                        set_tex_resource(rdev, FMT_8,
 720                                         src_x + cur_size, h, src_x + cur_size,
 721                                         src_gpu_addr);
 722
 723                        /* 5 */
 724                        cp_set_surface_sync(rdev,
 725                                            PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
 726
 727                        /* dst 23 */
 728                        set_render_target(rdev, COLOR_8,
 729                                          dst_x + cur_size, h,
 730                                          dst_gpu_addr);
 731
 732                        /* scissors 12 */
 733                        set_scissors(rdev, dst_x, 0, dst_x + cur_size, h);
 734
 735                        /* 14 */
 736                        vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
 737                        set_vtx_resource(rdev, vb_gpu_addr);
 738
 739                        /* draw 10 */
 740                        draw_auto(rdev);
 741
 742                        /* 5 */
 743                        cp_set_surface_sync(rdev,
 744                                            PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
 745                                            cur_size * h, dst_gpu_addr);
 746
 747                        vb += 12;
 748                        rdev->r600_blit.vb_used += 12 * 4;
 749
 750                        src_gpu_addr += cur_size * h;
 751                        dst_gpu_addr += cur_size * h;
 752                        size_bytes -= cur_size * h;
 753                }
 754        } else {
 755                max_bytes = 8192 * 4;
 756
 757                while (size_bytes) {
 758                        int cur_size = size_bytes;
 759                        int src_x = (src_gpu_addr & 255);
 760                        int dst_x = (dst_gpu_addr & 255);
 761                        int h = 1;
 762                        src_gpu_addr = src_gpu_addr & ~255ULL;
 763                        dst_gpu_addr = dst_gpu_addr & ~255ULL;
 764
 765                        if (!src_x && !dst_x) {
 766                                h = (cur_size / max_bytes);
 767                                if (h > 8192)
 768                                        h = 8192;
 769                                if (h == 0)
 770                                        h = 1;
 771                                else
 772                                        cur_size = max_bytes;
 773                        } else {
 774                                if (cur_size > max_bytes)
 775                                        cur_size = max_bytes;
 776                                if (cur_size > (max_bytes - dst_x))
 777                                        cur_size = (max_bytes - dst_x);
 778                                if (cur_size > (max_bytes - src_x))
 779                                        cur_size = (max_bytes - src_x);
 780                        }
 781
 782                        if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
 783                                WARN_ON(1);
 784                        }
 785
 786                        vb[0] = i2f(dst_x / 4);
 787                        vb[1] = 0;
 788                        vb[2] = i2f(src_x / 4);
 789                        vb[3] = 0;
 790
 791                        vb[4] = i2f(dst_x / 4);
 792                        vb[5] = i2f(h);
 793                        vb[6] = i2f(src_x / 4);
 794                        vb[7] = i2f(h);
 795
 796                        vb[8] = i2f((dst_x + cur_size) / 4);
 797                        vb[9] = i2f(h);
 798                        vb[10] = i2f((src_x + cur_size) / 4);
 799                        vb[11] = i2f(h);
 800
 801                        /* src 9 */
 802                        set_tex_resource(rdev, FMT_8_8_8_8,
 803                                         (src_x + cur_size) / 4,
 804                                         h, (src_x + cur_size) / 4,
 805                                         src_gpu_addr);
 806                        /* 5 */
 807                        cp_set_surface_sync(rdev,
 808                                            PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
 809
 810                        /* dst 23 */
 811                        set_render_target(rdev, COLOR_8_8_8_8,
 812                                          (dst_x + cur_size) / 4, h,
 813                                          dst_gpu_addr);
 814
 815                        /* scissors 12  */
 816                        set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
 817
 818                        /* Vertex buffer setup 14 */
 819                        vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
 820                        set_vtx_resource(rdev, vb_gpu_addr);
 821
 822                        /* draw 10 */
 823                        draw_auto(rdev);
 824
 825                        /* 5 */
 826                        cp_set_surface_sync(rdev,
 827                                            PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
 828                                            cur_size * h, dst_gpu_addr);
 829
 830                        /* 78 ring dwords per loop */
 831                        vb += 12;
 832                        rdev->r600_blit.vb_used += 12 * 4;
 833
 834                        src_gpu_addr += cur_size * h;
 835                        dst_gpu_addr += cur_size * h;
 836                        size_bytes -= cur_size * h;
 837                }
 838        }
 839}
 840
 841