linux/drivers/gpu/drm/radeon/r600_blit_kms.c
<<
>>
Prefs
   1/*
   2 * Copyright 2009 Advanced Micro Devices, Inc.
   3 * Copyright 2009 Red Hat Inc.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the "Software"),
   7 * to deal in the Software without restriction, including without limitation
   8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9 * and/or sell copies of the Software, and to permit persons to whom the
  10 * Software is furnished to do so, subject to the following conditions:
  11 *
  12 * The above copyright notice and this permission notice (including the next
  13 * paragraph) shall be included in all copies or substantial portions of the
  14 * Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22 * DEALINGS IN THE SOFTWARE.
  23 *
  24 */
  25
  26#include <drm/drmP.h>
  27#include <drm/radeon_drm.h>
  28#include "radeon.h"
  29
  30#include "r600d.h"
  31#include "r600_blit_shaders.h"
  32#include "radeon_blit_common.h"
  33
  34/* 23 bits of float fractional data */
  35#define I2F_FRAC_BITS  23
  36#define I2F_MASK ((1 << I2F_FRAC_BITS) - 1)
  37
  38/*
  39 * Converts unsigned integer into 32-bit IEEE floating point representation.
  40 * Will be exact from 0 to 2^24.  Above that, we round towards zero
  41 * as the fractional bits will not fit in a float.  (It would be better to
  42 * round towards even as the fpu does, but that is slower.)
  43 */
  44__pure uint32_t int2float(uint32_t x)
  45{
  46        uint32_t msb, exponent, fraction;
  47
  48        /* Zero is special */
  49        if (!x) return 0;
  50
  51        /* Get location of the most significant bit */
  52        msb = __fls(x);
  53
  54        /*
  55         * Use a rotate instead of a shift because that works both leftwards
  56         * and rightwards due to the mod(32) behaviour.  This means we don't
  57         * need to check to see if we are above 2^24 or not.
  58         */
  59        fraction = ror32(x, (msb - I2F_FRAC_BITS) & 0x1f) & I2F_MASK;
  60        exponent = (127 + msb) << I2F_FRAC_BITS;
  61
  62        return fraction + exponent;
  63}
  64
  65/* emits 21 on rv770+, 23 on r600 */
  66static void
  67set_render_target(struct radeon_device *rdev, int format,
  68                  int w, int h, u64 gpu_addr)
  69{
  70        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
  71        u32 cb_color_info;
  72        int pitch, slice;
  73
  74        h = ALIGN(h, 8);
  75        if (h < 8)
  76                h = 8;
  77
  78        cb_color_info = CB_FORMAT(format) |
  79                CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) |
  80                CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
  81        pitch = (w / 8) - 1;
  82        slice = ((w * h) / 64) - 1;
  83
  84        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
  85        radeon_ring_write(ring, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
  86        radeon_ring_write(ring, gpu_addr >> 8);
  87
  88        if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) {
  89                radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_BASE_UPDATE, 0));
  90                radeon_ring_write(ring, 2 << 0);
  91        }
  92
  93        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
  94        radeon_ring_write(ring, (CB_COLOR0_SIZE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
  95        radeon_ring_write(ring, (pitch << 0) | (slice << 10));
  96
  97        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
  98        radeon_ring_write(ring, (CB_COLOR0_VIEW - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
  99        radeon_ring_write(ring, 0);
 100
 101        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 102        radeon_ring_write(ring, (CB_COLOR0_INFO - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 103        radeon_ring_write(ring, cb_color_info);
 104
 105        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 106        radeon_ring_write(ring, (CB_COLOR0_TILE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 107        radeon_ring_write(ring, 0);
 108
 109        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 110        radeon_ring_write(ring, (CB_COLOR0_FRAG - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 111        radeon_ring_write(ring, 0);
 112
 113        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 114        radeon_ring_write(ring, (CB_COLOR0_MASK - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 115        radeon_ring_write(ring, 0);
 116}
 117
 118/* emits 5dw */
 119static void
 120cp_set_surface_sync(struct radeon_device *rdev,
 121                    u32 sync_type, u32 size,
 122                    u64 mc_addr)
 123{
 124        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 125        u32 cp_coher_size;
 126
 127        if (size == 0xffffffff)
 128                cp_coher_size = 0xffffffff;
 129        else
 130                cp_coher_size = ((size + 255) >> 8);
 131
 132        radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
 133        radeon_ring_write(ring, sync_type);
 134        radeon_ring_write(ring, cp_coher_size);
 135        radeon_ring_write(ring, mc_addr >> 8);
 136        radeon_ring_write(ring, 10); /* poll interval */
 137}
 138
 139/* emits 21dw + 1 surface sync = 26dw */
 140static void
 141set_shaders(struct radeon_device *rdev)
 142{
 143        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 144        u64 gpu_addr;
 145        u32 sq_pgm_resources;
 146
 147        /* setup shader regs */
 148        sq_pgm_resources = (1 << 0);
 149
 150        /* VS */
 151        gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
 152        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 153        radeon_ring_write(ring, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 154        radeon_ring_write(ring, gpu_addr >> 8);
 155
 156        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 157        radeon_ring_write(ring, (SQ_PGM_RESOURCES_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 158        radeon_ring_write(ring, sq_pgm_resources);
 159
 160        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 161        radeon_ring_write(ring, (SQ_PGM_CF_OFFSET_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 162        radeon_ring_write(ring, 0);
 163
 164        /* PS */
 165        gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset;
 166        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 167        radeon_ring_write(ring, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 168        radeon_ring_write(ring, gpu_addr >> 8);
 169
 170        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 171        radeon_ring_write(ring, (SQ_PGM_RESOURCES_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 172        radeon_ring_write(ring, sq_pgm_resources | (1 << 28));
 173
 174        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 175        radeon_ring_write(ring, (SQ_PGM_EXPORTS_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 176        radeon_ring_write(ring, 2);
 177
 178        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 179        radeon_ring_write(ring, (SQ_PGM_CF_OFFSET_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 180        radeon_ring_write(ring, 0);
 181
 182        gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
 183        cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr);
 184}
 185
 186/* emits 9 + 1 sync (5) = 14*/
 187static void
 188set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
 189{
 190        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 191        u32 sq_vtx_constant_word2;
 192
 193        sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) |
 194                SQ_VTXC_STRIDE(16);
 195#ifdef __BIG_ENDIAN
 196        sq_vtx_constant_word2 |=  SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32);
 197#endif
 198
 199        radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 7));
 200        radeon_ring_write(ring, 0x460);
 201        radeon_ring_write(ring, gpu_addr & 0xffffffff);
 202        radeon_ring_write(ring, 48 - 1);
 203        radeon_ring_write(ring, sq_vtx_constant_word2);
 204        radeon_ring_write(ring, 1 << 0);
 205        radeon_ring_write(ring, 0);
 206        radeon_ring_write(ring, 0);
 207        radeon_ring_write(ring, SQ_TEX_VTX_VALID_BUFFER << 30);
 208
 209        if ((rdev->family == CHIP_RV610) ||
 210            (rdev->family == CHIP_RV620) ||
 211            (rdev->family == CHIP_RS780) ||
 212            (rdev->family == CHIP_RS880) ||
 213            (rdev->family == CHIP_RV710))
 214                cp_set_surface_sync(rdev,
 215                                    PACKET3_TC_ACTION_ENA, 48, gpu_addr);
 216        else
 217                cp_set_surface_sync(rdev,
 218                                    PACKET3_VC_ACTION_ENA, 48, gpu_addr);
 219}
 220
 221/* emits 9 */
 222static void
 223set_tex_resource(struct radeon_device *rdev,
 224                 int format, int w, int h, int pitch,
 225                 u64 gpu_addr, u32 size)
 226{
 227        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 228        uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
 229
 230        if (h < 1)
 231                h = 1;
 232
 233        sq_tex_resource_word0 = S_038000_DIM(V_038000_SQ_TEX_DIM_2D) |
 234                S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
 235        sq_tex_resource_word0 |= S_038000_PITCH((pitch >> 3) - 1) |
 236                S_038000_TEX_WIDTH(w - 1);
 237
 238        sq_tex_resource_word1 = S_038004_DATA_FORMAT(format);
 239        sq_tex_resource_word1 |= S_038004_TEX_HEIGHT(h - 1);
 240
 241        sq_tex_resource_word4 = S_038010_REQUEST_SIZE(1) |
 242                S_038010_DST_SEL_X(SQ_SEL_X) |
 243                S_038010_DST_SEL_Y(SQ_SEL_Y) |
 244                S_038010_DST_SEL_Z(SQ_SEL_Z) |
 245                S_038010_DST_SEL_W(SQ_SEL_W);
 246
 247        cp_set_surface_sync(rdev,
 248                            PACKET3_TC_ACTION_ENA, size, gpu_addr);
 249
 250        radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 7));
 251        radeon_ring_write(ring, 0);
 252        radeon_ring_write(ring, sq_tex_resource_word0);
 253        radeon_ring_write(ring, sq_tex_resource_word1);
 254        radeon_ring_write(ring, gpu_addr >> 8);
 255        radeon_ring_write(ring, gpu_addr >> 8);
 256        radeon_ring_write(ring, sq_tex_resource_word4);
 257        radeon_ring_write(ring, 0);
 258        radeon_ring_write(ring, SQ_TEX_VTX_VALID_TEXTURE << 30);
 259}
 260
 261/* emits 12 */
 262static void
 263set_scissors(struct radeon_device *rdev, int x1, int y1,
 264             int x2, int y2)
 265{
 266        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 267        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
 268        radeon_ring_write(ring, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 269        radeon_ring_write(ring, (x1 << 0) | (y1 << 16));
 270        radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
 271
 272        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
 273        radeon_ring_write(ring, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 274        radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31));
 275        radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
 276
 277        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
 278        radeon_ring_write(ring, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 279        radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31));
 280        radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
 281}
 282
 283/* emits 10 */
 284static void
 285draw_auto(struct radeon_device *rdev)
 286{
 287        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 288        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
 289        radeon_ring_write(ring, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
 290        radeon_ring_write(ring, DI_PT_RECTLIST);
 291
 292        radeon_ring_write(ring, PACKET3(PACKET3_INDEX_TYPE, 0));
 293        radeon_ring_write(ring,
 294#ifdef __BIG_ENDIAN
 295                          (2 << 2) |
 296#endif
 297                          DI_INDEX_SIZE_16_BIT);
 298
 299        radeon_ring_write(ring, PACKET3(PACKET3_NUM_INSTANCES, 0));
 300        radeon_ring_write(ring, 1);
 301
 302        radeon_ring_write(ring, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1));
 303        radeon_ring_write(ring, 3);
 304        radeon_ring_write(ring, DI_SRC_SEL_AUTO_INDEX);
 305
 306}
 307
 308/* emits 14 */
 309static void
 310set_default_state(struct radeon_device *rdev)
 311{
 312        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 313        u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
 314        u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
 315        int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
 316        int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
 317        int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
 318        u64 gpu_addr;
 319        int dwords;
 320
 321        switch (rdev->family) {
 322        case CHIP_R600:
 323                num_ps_gprs = 192;
 324                num_vs_gprs = 56;
 325                num_temp_gprs = 4;
 326                num_gs_gprs = 0;
 327                num_es_gprs = 0;
 328                num_ps_threads = 136;
 329                num_vs_threads = 48;
 330                num_gs_threads = 4;
 331                num_es_threads = 4;
 332                num_ps_stack_entries = 128;
 333                num_vs_stack_entries = 128;
 334                num_gs_stack_entries = 0;
 335                num_es_stack_entries = 0;
 336                break;
 337        case CHIP_RV630:
 338        case CHIP_RV635:
 339                num_ps_gprs = 84;
 340                num_vs_gprs = 36;
 341                num_temp_gprs = 4;
 342                num_gs_gprs = 0;
 343                num_es_gprs = 0;
 344                num_ps_threads = 144;
 345                num_vs_threads = 40;
 346                num_gs_threads = 4;
 347                num_es_threads = 4;
 348                num_ps_stack_entries = 40;
 349                num_vs_stack_entries = 40;
 350                num_gs_stack_entries = 32;
 351                num_es_stack_entries = 16;
 352                break;
 353        case CHIP_RV610:
 354        case CHIP_RV620:
 355        case CHIP_RS780:
 356        case CHIP_RS880:
 357        default:
 358                num_ps_gprs = 84;
 359                num_vs_gprs = 36;
 360                num_temp_gprs = 4;
 361                num_gs_gprs = 0;
 362                num_es_gprs = 0;
 363                num_ps_threads = 136;
 364                num_vs_threads = 48;
 365                num_gs_threads = 4;
 366                num_es_threads = 4;
 367                num_ps_stack_entries = 40;
 368                num_vs_stack_entries = 40;
 369                num_gs_stack_entries = 32;
 370                num_es_stack_entries = 16;
 371                break;
 372        case CHIP_RV670:
 373                num_ps_gprs = 144;
 374                num_vs_gprs = 40;
 375                num_temp_gprs = 4;
 376                num_gs_gprs = 0;
 377                num_es_gprs = 0;
 378                num_ps_threads = 136;
 379                num_vs_threads = 48;
 380                num_gs_threads = 4;
 381                num_es_threads = 4;
 382                num_ps_stack_entries = 40;
 383                num_vs_stack_entries = 40;
 384                num_gs_stack_entries = 32;
 385                num_es_stack_entries = 16;
 386                break;
 387        case CHIP_RV770:
 388                num_ps_gprs = 192;
 389                num_vs_gprs = 56;
 390                num_temp_gprs = 4;
 391                num_gs_gprs = 0;
 392                num_es_gprs = 0;
 393                num_ps_threads = 188;
 394                num_vs_threads = 60;
 395                num_gs_threads = 0;
 396                num_es_threads = 0;
 397                num_ps_stack_entries = 256;
 398                num_vs_stack_entries = 256;
 399                num_gs_stack_entries = 0;
 400                num_es_stack_entries = 0;
 401                break;
 402        case CHIP_RV730:
 403        case CHIP_RV740:
 404                num_ps_gprs = 84;
 405                num_vs_gprs = 36;
 406                num_temp_gprs = 4;
 407                num_gs_gprs = 0;
 408                num_es_gprs = 0;
 409                num_ps_threads = 188;
 410                num_vs_threads = 60;
 411                num_gs_threads = 0;
 412                num_es_threads = 0;
 413                num_ps_stack_entries = 128;
 414                num_vs_stack_entries = 128;
 415                num_gs_stack_entries = 0;
 416                num_es_stack_entries = 0;
 417                break;
 418        case CHIP_RV710:
 419                num_ps_gprs = 192;
 420                num_vs_gprs = 56;
 421                num_temp_gprs = 4;
 422                num_gs_gprs = 0;
 423                num_es_gprs = 0;
 424                num_ps_threads = 144;
 425                num_vs_threads = 48;
 426                num_gs_threads = 0;
 427                num_es_threads = 0;
 428                num_ps_stack_entries = 128;
 429                num_vs_stack_entries = 128;
 430                num_gs_stack_entries = 0;
 431                num_es_stack_entries = 0;
 432                break;
 433        }
 434
 435        if ((rdev->family == CHIP_RV610) ||
 436            (rdev->family == CHIP_RV620) ||
 437            (rdev->family == CHIP_RS780) ||
 438            (rdev->family == CHIP_RS880) ||
 439            (rdev->family == CHIP_RV710))
 440                sq_config = 0;
 441        else
 442                sq_config = VC_ENABLE;
 443
 444        sq_config |= (DX9_CONSTS |
 445                      ALU_INST_PREFER_VECTOR |
 446                      PS_PRIO(0) |
 447                      VS_PRIO(1) |
 448                      GS_PRIO(2) |
 449                      ES_PRIO(3));
 450
 451        sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
 452                                  NUM_VS_GPRS(num_vs_gprs) |
 453                                  NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
 454        sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
 455                                  NUM_ES_GPRS(num_es_gprs));
 456        sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
 457                                   NUM_VS_THREADS(num_vs_threads) |
 458                                   NUM_GS_THREADS(num_gs_threads) |
 459                                   NUM_ES_THREADS(num_es_threads));
 460        sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
 461                                    NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
 462        sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
 463                                    NUM_ES_STACK_ENTRIES(num_es_stack_entries));
 464
 465        /* emit an IB pointing at default state */
 466        dwords = ALIGN(rdev->r600_blit.state_len, 0x10);
 467        gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset;
 468        radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 469        radeon_ring_write(ring,
 470#ifdef __BIG_ENDIAN
 471                          (2 << 0) |
 472#endif
 473                          (gpu_addr & 0xFFFFFFFC));
 474        radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xFF);
 475        radeon_ring_write(ring, dwords);
 476
 477        /* SQ config */
 478        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 6));
 479        radeon_ring_write(ring, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
 480        radeon_ring_write(ring, sq_config);
 481        radeon_ring_write(ring, sq_gpr_resource_mgmt_1);
 482        radeon_ring_write(ring, sq_gpr_resource_mgmt_2);
 483        radeon_ring_write(ring, sq_thread_resource_mgmt);
 484        radeon_ring_write(ring, sq_stack_resource_mgmt_1);
 485        radeon_ring_write(ring, sq_stack_resource_mgmt_2);
 486}
 487
 488int r600_blit_init(struct radeon_device *rdev)
 489{
 490        u32 obj_size;
 491        int i, r, dwords;
 492        void *ptr;
 493        u32 packet2s[16];
 494        int num_packet2s = 0;
 495
 496        rdev->r600_blit.primitives.set_render_target = set_render_target;
 497        rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync;
 498        rdev->r600_blit.primitives.set_shaders = set_shaders;
 499        rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource;
 500        rdev->r600_blit.primitives.set_tex_resource = set_tex_resource;
 501        rdev->r600_blit.primitives.set_scissors = set_scissors;
 502        rdev->r600_blit.primitives.draw_auto = draw_auto;
 503        rdev->r600_blit.primitives.set_default_state = set_default_state;
 504
 505        rdev->r600_blit.ring_size_common = 8; /* sync semaphore */
 506        rdev->r600_blit.ring_size_common += 40; /* shaders + def state */
 507        rdev->r600_blit.ring_size_common += 5; /* done copy */
 508        rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */
 509
 510        rdev->r600_blit.ring_size_per_loop = 76;
 511        /* set_render_target emits 2 extra dwords on rv6xx */
 512        if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770)
 513                rdev->r600_blit.ring_size_per_loop += 2;
 514
 515        rdev->r600_blit.max_dim = 8192;
 516
 517        rdev->r600_blit.state_offset = 0;
 518
 519        if (rdev->family >= CHIP_RV770)
 520                rdev->r600_blit.state_len = r7xx_default_size;
 521        else
 522                rdev->r600_blit.state_len = r6xx_default_size;
 523
 524        dwords = rdev->r600_blit.state_len;
 525        while (dwords & 0xf) {
 526                packet2s[num_packet2s++] = cpu_to_le32(PACKET2(0));
 527                dwords++;
 528        }
 529
 530        obj_size = dwords * 4;
 531        obj_size = ALIGN(obj_size, 256);
 532
 533        rdev->r600_blit.vs_offset = obj_size;
 534        obj_size += r6xx_vs_size * 4;
 535        obj_size = ALIGN(obj_size, 256);
 536
 537        rdev->r600_blit.ps_offset = obj_size;
 538        obj_size += r6xx_ps_size * 4;
 539        obj_size = ALIGN(obj_size, 256);
 540
 541        /* pin copy shader into vram if not already initialized */
 542        if (rdev->r600_blit.shader_obj == NULL) {
 543                r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true,
 544                                     RADEON_GEM_DOMAIN_VRAM,
 545                                     NULL, &rdev->r600_blit.shader_obj);
 546                if (r) {
 547                        DRM_ERROR("r600 failed to allocate shader\n");
 548                        return r;
 549                }
 550
 551                r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
 552                if (unlikely(r != 0))
 553                        return r;
 554                r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,
 555                                  &rdev->r600_blit.shader_gpu_addr);
 556                radeon_bo_unreserve(rdev->r600_blit.shader_obj);
 557                if (r) {
 558                        dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
 559                        return r;
 560                }
 561        }
 562
 563        DRM_DEBUG("r6xx blit allocated bo %08x vs %08x ps %08x\n",
 564                  obj_size,
 565                  rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset);
 566
 567        r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
 568        if (unlikely(r != 0))
 569                return r;
 570        r = radeon_bo_kmap(rdev->r600_blit.shader_obj, &ptr);
 571        if (r) {
 572                DRM_ERROR("failed to map blit object %d\n", r);
 573                return r;
 574        }
 575        if (rdev->family >= CHIP_RV770)
 576                memcpy_toio(ptr + rdev->r600_blit.state_offset,
 577                            r7xx_default_state, rdev->r600_blit.state_len * 4);
 578        else
 579                memcpy_toio(ptr + rdev->r600_blit.state_offset,
 580                            r6xx_default_state, rdev->r600_blit.state_len * 4);
 581        if (num_packet2s)
 582                memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
 583                            packet2s, num_packet2s * 4);
 584        for (i = 0; i < r6xx_vs_size; i++)
 585                *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(r6xx_vs[i]);
 586        for (i = 0; i < r6xx_ps_size; i++)
 587                *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(r6xx_ps[i]);
 588        radeon_bo_kunmap(rdev->r600_blit.shader_obj);
 589        radeon_bo_unreserve(rdev->r600_blit.shader_obj);
 590
 591        radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
 592        return 0;
 593}
 594
 595void r600_blit_fini(struct radeon_device *rdev)
 596{
 597        int r;
 598
 599        radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
 600        if (rdev->r600_blit.shader_obj == NULL)
 601                return;
 602        /* If we can't reserve the bo, unref should be enough to destroy
 603         * it when it becomes idle.
 604         */
 605        r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
 606        if (!r) {
 607                radeon_bo_unpin(rdev->r600_blit.shader_obj);
 608                radeon_bo_unreserve(rdev->r600_blit.shader_obj);
 609        }
 610        radeon_bo_unref(&rdev->r600_blit.shader_obj);
 611}
 612
 613static unsigned r600_blit_create_rect(unsigned num_gpu_pages,
 614                                      int *width, int *height, int max_dim)
 615{
 616        unsigned max_pages;
 617        unsigned pages = num_gpu_pages;
 618        int w, h;
 619
 620        if (num_gpu_pages == 0) {
 621                /* not supposed to be called with no pages, but just in case */
 622                h = 0;
 623                w = 0;
 624                pages = 0;
 625                WARN_ON(1);
 626        } else {
 627                int rect_order = 2;
 628                h = RECT_UNIT_H;
 629                while (num_gpu_pages / rect_order) {
 630                        h *= 2;
 631                        rect_order *= 4;
 632                        if (h >= max_dim) {
 633                                h = max_dim;
 634                                break;
 635                        }
 636                }
 637                max_pages = (max_dim * h) / (RECT_UNIT_W * RECT_UNIT_H);
 638                if (pages > max_pages)
 639                        pages = max_pages;
 640                w = (pages * RECT_UNIT_W * RECT_UNIT_H) / h;
 641                w = (w / RECT_UNIT_W) * RECT_UNIT_W;
 642                pages = (w * h) / (RECT_UNIT_W * RECT_UNIT_H);
 643                BUG_ON(pages == 0);
 644        }
 645
 646
 647        DRM_DEBUG("blit_rectangle: h=%d, w=%d, pages=%d\n", h, w, pages);
 648
 649        /* return width and height only of the caller wants it */
 650        if (height)
 651                *height = h;
 652        if (width)
 653                *width = w;
 654
 655        return pages;
 656}
 657
 658
 659int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages,
 660                           struct radeon_fence **fence, struct radeon_sa_bo **vb,
 661                           struct radeon_semaphore **sem)
 662{
 663        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 664        int r;
 665        int ring_size;
 666        int num_loops = 0;
 667        int dwords_per_loop = rdev->r600_blit.ring_size_per_loop;
 668
 669        /* num loops */
 670        while (num_gpu_pages) {
 671                num_gpu_pages -=
 672                        r600_blit_create_rect(num_gpu_pages, NULL, NULL,
 673                                              rdev->r600_blit.max_dim);
 674                num_loops++;
 675        }
 676
 677        /* 48 bytes for vertex per loop */
 678        r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, vb,
 679                             (num_loops*48)+256, 256, true);
 680        if (r) {
 681                return r;
 682        }
 683
 684        r = radeon_semaphore_create(rdev, sem);
 685        if (r) {
 686                radeon_sa_bo_free(rdev, vb, NULL);
 687                return r;
 688        }
 689
 690        /* calculate number of loops correctly */
 691        ring_size = num_loops * dwords_per_loop;
 692        ring_size += rdev->r600_blit.ring_size_common;
 693        r = radeon_ring_lock(rdev, ring, ring_size);
 694        if (r) {
 695                radeon_sa_bo_free(rdev, vb, NULL);
 696                radeon_semaphore_free(rdev, sem, NULL);
 697                return r;
 698        }
 699
 700        if (radeon_fence_need_sync(*fence, RADEON_RING_TYPE_GFX_INDEX)) {
 701                radeon_semaphore_sync_rings(rdev, *sem, (*fence)->ring,
 702                                            RADEON_RING_TYPE_GFX_INDEX);
 703                radeon_fence_note_sync(*fence, RADEON_RING_TYPE_GFX_INDEX);
 704        } else {
 705                radeon_semaphore_free(rdev, sem, NULL);
 706        }
 707
 708        rdev->r600_blit.primitives.set_default_state(rdev);
 709        rdev->r600_blit.primitives.set_shaders(rdev);
 710        return 0;
 711}
 712
 713void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence,
 714                         struct radeon_sa_bo *vb, struct radeon_semaphore *sem)
 715{
 716        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 717        int r;
 718
 719        r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
 720        if (r) {
 721                radeon_ring_unlock_undo(rdev, ring);
 722                return;
 723        }
 724
 725        radeon_ring_unlock_commit(rdev, ring);
 726        radeon_sa_bo_free(rdev, &vb, *fence);
 727        radeon_semaphore_free(rdev, &sem, *fence);
 728}
 729
 730void r600_kms_blit_copy(struct radeon_device *rdev,
 731                        u64 src_gpu_addr, u64 dst_gpu_addr,
 732                        unsigned num_gpu_pages,
 733                        struct radeon_sa_bo *vb)
 734{
 735        u64 vb_gpu_addr;
 736        u32 *vb_cpu_addr;
 737
 738        DRM_DEBUG("emitting copy %16llx %16llx %d\n",
 739                  src_gpu_addr, dst_gpu_addr, num_gpu_pages);
 740        vb_cpu_addr = (u32 *)radeon_sa_bo_cpu_addr(vb);
 741        vb_gpu_addr = radeon_sa_bo_gpu_addr(vb);
 742
 743        while (num_gpu_pages) {
 744                int w, h;
 745                unsigned size_in_bytes;
 746                unsigned pages_per_loop =
 747                        r600_blit_create_rect(num_gpu_pages, &w, &h,
 748                                              rdev->r600_blit.max_dim);
 749
 750                size_in_bytes = pages_per_loop * RADEON_GPU_PAGE_SIZE;
 751                DRM_DEBUG("rectangle w=%d h=%d\n", w, h);
 752
 753                vb_cpu_addr[0] = 0;
 754                vb_cpu_addr[1] = 0;
 755                vb_cpu_addr[2] = 0;
 756                vb_cpu_addr[3] = 0;
 757
 758                vb_cpu_addr[4] = 0;
 759                vb_cpu_addr[5] = int2float(h);
 760                vb_cpu_addr[6] = 0;
 761                vb_cpu_addr[7] = int2float(h);
 762
 763                vb_cpu_addr[8] = int2float(w);
 764                vb_cpu_addr[9] = int2float(h);
 765                vb_cpu_addr[10] = int2float(w);
 766                vb_cpu_addr[11] = int2float(h);
 767
 768                rdev->r600_blit.primitives.set_tex_resource(rdev, FMT_8_8_8_8,
 769                                                            w, h, w, src_gpu_addr, size_in_bytes);
 770                rdev->r600_blit.primitives.set_render_target(rdev, COLOR_8_8_8_8,
 771                                                             w, h, dst_gpu_addr);
 772                rdev->r600_blit.primitives.set_scissors(rdev, 0, 0, w, h);
 773                rdev->r600_blit.primitives.set_vtx_resource(rdev, vb_gpu_addr);
 774                rdev->r600_blit.primitives.draw_auto(rdev);
 775                rdev->r600_blit.primitives.cp_set_surface_sync(rdev,
 776                                    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
 777                                    size_in_bytes, dst_gpu_addr);
 778
 779                vb_cpu_addr += 12;
 780                vb_gpu_addr += 4*12;
 781                src_gpu_addr += size_in_bytes;
 782                dst_gpu_addr += size_in_bytes;
 783                num_gpu_pages -= pages_per_loop;
 784        }
 785}
 786