linux/drivers/gpu/drm/radeon/r600_blit_kms.c
<<
>>
Prefs
   1/*
   2 * Copyright 2009 Advanced Micro Devices, Inc.
   3 * Copyright 2009 Red Hat Inc.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the "Software"),
   7 * to deal in the Software without restriction, including without limitation
   8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9 * and/or sell copies of the Software, and to permit persons to whom the
  10 * Software is furnished to do so, subject to the following conditions:
  11 *
  12 * The above copyright notice and this permission notice (including the next
  13 * paragraph) shall be included in all copies or substantial portions of the
  14 * Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22 * DEALINGS IN THE SOFTWARE.
  23 *
  24 */
  25
  26#include "drmP.h"
  27#include "drm.h"
  28#include "radeon_drm.h"
  29#include "radeon.h"
  30
  31#include "r600d.h"
  32#include "r600_blit_shaders.h"
  33#include "radeon_blit_common.h"
  34
  35/* emits 21 on rv770+, 23 on r600 */
  36static void
  37set_render_target(struct radeon_device *rdev, int format,
  38                  int w, int h, u64 gpu_addr)
  39{
  40        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
  41        u32 cb_color_info;
  42        int pitch, slice;
  43
  44        h = ALIGN(h, 8);
  45        if (h < 8)
  46                h = 8;
  47
  48        cb_color_info = CB_FORMAT(format) |
  49                CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) |
  50                CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
  51        pitch = (w / 8) - 1;
  52        slice = ((w * h) / 64) - 1;
  53
  54        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
  55        radeon_ring_write(ring, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
  56        radeon_ring_write(ring, gpu_addr >> 8);
  57
  58        if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) {
  59                radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_BASE_UPDATE, 0));
  60                radeon_ring_write(ring, 2 << 0);
  61        }
  62
  63        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
  64        radeon_ring_write(ring, (CB_COLOR0_SIZE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
  65        radeon_ring_write(ring, (pitch << 0) | (slice << 10));
  66
  67        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
  68        radeon_ring_write(ring, (CB_COLOR0_VIEW - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
  69        radeon_ring_write(ring, 0);
  70
  71        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
  72        radeon_ring_write(ring, (CB_COLOR0_INFO - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
  73        radeon_ring_write(ring, cb_color_info);
  74
  75        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
  76        radeon_ring_write(ring, (CB_COLOR0_TILE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
  77        radeon_ring_write(ring, 0);
  78
  79        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
  80        radeon_ring_write(ring, (CB_COLOR0_FRAG - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
  81        radeon_ring_write(ring, 0);
  82
  83        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
  84        radeon_ring_write(ring, (CB_COLOR0_MASK - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
  85        radeon_ring_write(ring, 0);
  86}
  87
  88/* emits 5dw */
  89static void
  90cp_set_surface_sync(struct radeon_device *rdev,
  91                    u32 sync_type, u32 size,
  92                    u64 mc_addr)
  93{
  94        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
  95        u32 cp_coher_size;
  96
  97        if (size == 0xffffffff)
  98                cp_coher_size = 0xffffffff;
  99        else
 100                cp_coher_size = ((size + 255) >> 8);
 101
 102        radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
 103        radeon_ring_write(ring, sync_type);
 104        radeon_ring_write(ring, cp_coher_size);
 105        radeon_ring_write(ring, mc_addr >> 8);
 106        radeon_ring_write(ring, 10); /* poll interval */
 107}
 108
 109/* emits 21dw + 1 surface sync = 26dw */
 110static void
 111set_shaders(struct radeon_device *rdev)
 112{
 113        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 114        u64 gpu_addr;
 115        u32 sq_pgm_resources;
 116
 117        /* setup shader regs */
 118        sq_pgm_resources = (1 << 0);
 119
 120        /* VS */
 121        gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
 122        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 123        radeon_ring_write(ring, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 124        radeon_ring_write(ring, gpu_addr >> 8);
 125
 126        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 127        radeon_ring_write(ring, (SQ_PGM_RESOURCES_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 128        radeon_ring_write(ring, sq_pgm_resources);
 129
 130        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 131        radeon_ring_write(ring, (SQ_PGM_CF_OFFSET_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 132        radeon_ring_write(ring, 0);
 133
 134        /* PS */
 135        gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset;
 136        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 137        radeon_ring_write(ring, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 138        radeon_ring_write(ring, gpu_addr >> 8);
 139
 140        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 141        radeon_ring_write(ring, (SQ_PGM_RESOURCES_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 142        radeon_ring_write(ring, sq_pgm_resources | (1 << 28));
 143
 144        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 145        radeon_ring_write(ring, (SQ_PGM_EXPORTS_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 146        radeon_ring_write(ring, 2);
 147
 148        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 149        radeon_ring_write(ring, (SQ_PGM_CF_OFFSET_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 150        radeon_ring_write(ring, 0);
 151
 152        gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
 153        cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr);
 154}
 155
 156/* emits 9 + 1 sync (5) = 14*/
 157static void
 158set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
 159{
 160        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 161        u32 sq_vtx_constant_word2;
 162
 163        sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) |
 164                SQ_VTXC_STRIDE(16);
 165#ifdef __BIG_ENDIAN
 166        sq_vtx_constant_word2 |=  SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32);
 167#endif
 168
 169        radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 7));
 170        radeon_ring_write(ring, 0x460);
 171        radeon_ring_write(ring, gpu_addr & 0xffffffff);
 172        radeon_ring_write(ring, 48 - 1);
 173        radeon_ring_write(ring, sq_vtx_constant_word2);
 174        radeon_ring_write(ring, 1 << 0);
 175        radeon_ring_write(ring, 0);
 176        radeon_ring_write(ring, 0);
 177        radeon_ring_write(ring, SQ_TEX_VTX_VALID_BUFFER << 30);
 178
 179        if ((rdev->family == CHIP_RV610) ||
 180            (rdev->family == CHIP_RV620) ||
 181            (rdev->family == CHIP_RS780) ||
 182            (rdev->family == CHIP_RS880) ||
 183            (rdev->family == CHIP_RV710))
 184                cp_set_surface_sync(rdev,
 185                                    PACKET3_TC_ACTION_ENA, 48, gpu_addr);
 186        else
 187                cp_set_surface_sync(rdev,
 188                                    PACKET3_VC_ACTION_ENA, 48, gpu_addr);
 189}
 190
 191/* emits 9 */
 192static void
 193set_tex_resource(struct radeon_device *rdev,
 194                 int format, int w, int h, int pitch,
 195                 u64 gpu_addr, u32 size)
 196{
 197        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 198        uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
 199
 200        if (h < 1)
 201                h = 1;
 202
 203        sq_tex_resource_word0 = S_038000_DIM(V_038000_SQ_TEX_DIM_2D) |
 204                S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
 205        sq_tex_resource_word0 |= S_038000_PITCH((pitch >> 3) - 1) |
 206                S_038000_TEX_WIDTH(w - 1);
 207
 208        sq_tex_resource_word1 = S_038004_DATA_FORMAT(format);
 209        sq_tex_resource_word1 |= S_038004_TEX_HEIGHT(h - 1);
 210
 211        sq_tex_resource_word4 = S_038010_REQUEST_SIZE(1) |
 212                S_038010_DST_SEL_X(SQ_SEL_X) |
 213                S_038010_DST_SEL_Y(SQ_SEL_Y) |
 214                S_038010_DST_SEL_Z(SQ_SEL_Z) |
 215                S_038010_DST_SEL_W(SQ_SEL_W);
 216
 217        cp_set_surface_sync(rdev,
 218                            PACKET3_TC_ACTION_ENA, size, gpu_addr);
 219
 220        radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 7));
 221        radeon_ring_write(ring, 0);
 222        radeon_ring_write(ring, sq_tex_resource_word0);
 223        radeon_ring_write(ring, sq_tex_resource_word1);
 224        radeon_ring_write(ring, gpu_addr >> 8);
 225        radeon_ring_write(ring, gpu_addr >> 8);
 226        radeon_ring_write(ring, sq_tex_resource_word4);
 227        radeon_ring_write(ring, 0);
 228        radeon_ring_write(ring, SQ_TEX_VTX_VALID_TEXTURE << 30);
 229}
 230
 231/* emits 12 */
 232static void
 233set_scissors(struct radeon_device *rdev, int x1, int y1,
 234             int x2, int y2)
 235{
 236        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 237        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
 238        radeon_ring_write(ring, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 239        radeon_ring_write(ring, (x1 << 0) | (y1 << 16));
 240        radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
 241
 242        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
 243        radeon_ring_write(ring, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 244        radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31));
 245        radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
 246
 247        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
 248        radeon_ring_write(ring, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 249        radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31));
 250        radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
 251}
 252
 253/* emits 10 */
 254static void
 255draw_auto(struct radeon_device *rdev)
 256{
 257        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 258        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
 259        radeon_ring_write(ring, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
 260        radeon_ring_write(ring, DI_PT_RECTLIST);
 261
 262        radeon_ring_write(ring, PACKET3(PACKET3_INDEX_TYPE, 0));
 263        radeon_ring_write(ring,
 264#ifdef __BIG_ENDIAN
 265                          (2 << 2) |
 266#endif
 267                          DI_INDEX_SIZE_16_BIT);
 268
 269        radeon_ring_write(ring, PACKET3(PACKET3_NUM_INSTANCES, 0));
 270        radeon_ring_write(ring, 1);
 271
 272        radeon_ring_write(ring, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1));
 273        radeon_ring_write(ring, 3);
 274        radeon_ring_write(ring, DI_SRC_SEL_AUTO_INDEX);
 275
 276}
 277
 278/* emits 14 */
 279static void
 280set_default_state(struct radeon_device *rdev)
 281{
 282        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 283        u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
 284        u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
 285        int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
 286        int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
 287        int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
 288        u64 gpu_addr;
 289        int dwords;
 290
 291        switch (rdev->family) {
 292        case CHIP_R600:
 293                num_ps_gprs = 192;
 294                num_vs_gprs = 56;
 295                num_temp_gprs = 4;
 296                num_gs_gprs = 0;
 297                num_es_gprs = 0;
 298                num_ps_threads = 136;
 299                num_vs_threads = 48;
 300                num_gs_threads = 4;
 301                num_es_threads = 4;
 302                num_ps_stack_entries = 128;
 303                num_vs_stack_entries = 128;
 304                num_gs_stack_entries = 0;
 305                num_es_stack_entries = 0;
 306                break;
 307        case CHIP_RV630:
 308        case CHIP_RV635:
 309                num_ps_gprs = 84;
 310                num_vs_gprs = 36;
 311                num_temp_gprs = 4;
 312                num_gs_gprs = 0;
 313                num_es_gprs = 0;
 314                num_ps_threads = 144;
 315                num_vs_threads = 40;
 316                num_gs_threads = 4;
 317                num_es_threads = 4;
 318                num_ps_stack_entries = 40;
 319                num_vs_stack_entries = 40;
 320                num_gs_stack_entries = 32;
 321                num_es_stack_entries = 16;
 322                break;
 323        case CHIP_RV610:
 324        case CHIP_RV620:
 325        case CHIP_RS780:
 326        case CHIP_RS880:
 327        default:
 328                num_ps_gprs = 84;
 329                num_vs_gprs = 36;
 330                num_temp_gprs = 4;
 331                num_gs_gprs = 0;
 332                num_es_gprs = 0;
 333                num_ps_threads = 136;
 334                num_vs_threads = 48;
 335                num_gs_threads = 4;
 336                num_es_threads = 4;
 337                num_ps_stack_entries = 40;
 338                num_vs_stack_entries = 40;
 339                num_gs_stack_entries = 32;
 340                num_es_stack_entries = 16;
 341                break;
 342        case CHIP_RV670:
 343                num_ps_gprs = 144;
 344                num_vs_gprs = 40;
 345                num_temp_gprs = 4;
 346                num_gs_gprs = 0;
 347                num_es_gprs = 0;
 348                num_ps_threads = 136;
 349                num_vs_threads = 48;
 350                num_gs_threads = 4;
 351                num_es_threads = 4;
 352                num_ps_stack_entries = 40;
 353                num_vs_stack_entries = 40;
 354                num_gs_stack_entries = 32;
 355                num_es_stack_entries = 16;
 356                break;
 357        case CHIP_RV770:
 358                num_ps_gprs = 192;
 359                num_vs_gprs = 56;
 360                num_temp_gprs = 4;
 361                num_gs_gprs = 0;
 362                num_es_gprs = 0;
 363                num_ps_threads = 188;
 364                num_vs_threads = 60;
 365                num_gs_threads = 0;
 366                num_es_threads = 0;
 367                num_ps_stack_entries = 256;
 368                num_vs_stack_entries = 256;
 369                num_gs_stack_entries = 0;
 370                num_es_stack_entries = 0;
 371                break;
 372        case CHIP_RV730:
 373        case CHIP_RV740:
 374                num_ps_gprs = 84;
 375                num_vs_gprs = 36;
 376                num_temp_gprs = 4;
 377                num_gs_gprs = 0;
 378                num_es_gprs = 0;
 379                num_ps_threads = 188;
 380                num_vs_threads = 60;
 381                num_gs_threads = 0;
 382                num_es_threads = 0;
 383                num_ps_stack_entries = 128;
 384                num_vs_stack_entries = 128;
 385                num_gs_stack_entries = 0;
 386                num_es_stack_entries = 0;
 387                break;
 388        case CHIP_RV710:
 389                num_ps_gprs = 192;
 390                num_vs_gprs = 56;
 391                num_temp_gprs = 4;
 392                num_gs_gprs = 0;
 393                num_es_gprs = 0;
 394                num_ps_threads = 144;
 395                num_vs_threads = 48;
 396                num_gs_threads = 0;
 397                num_es_threads = 0;
 398                num_ps_stack_entries = 128;
 399                num_vs_stack_entries = 128;
 400                num_gs_stack_entries = 0;
 401                num_es_stack_entries = 0;
 402                break;
 403        }
 404
 405        if ((rdev->family == CHIP_RV610) ||
 406            (rdev->family == CHIP_RV620) ||
 407            (rdev->family == CHIP_RS780) ||
 408            (rdev->family == CHIP_RS880) ||
 409            (rdev->family == CHIP_RV710))
 410                sq_config = 0;
 411        else
 412                sq_config = VC_ENABLE;
 413
 414        sq_config |= (DX9_CONSTS |
 415                      ALU_INST_PREFER_VECTOR |
 416                      PS_PRIO(0) |
 417                      VS_PRIO(1) |
 418                      GS_PRIO(2) |
 419                      ES_PRIO(3));
 420
 421        sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
 422                                  NUM_VS_GPRS(num_vs_gprs) |
 423                                  NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
 424        sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
 425                                  NUM_ES_GPRS(num_es_gprs));
 426        sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
 427                                   NUM_VS_THREADS(num_vs_threads) |
 428                                   NUM_GS_THREADS(num_gs_threads) |
 429                                   NUM_ES_THREADS(num_es_threads));
 430        sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
 431                                    NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
 432        sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
 433                                    NUM_ES_STACK_ENTRIES(num_es_stack_entries));
 434
 435        /* emit an IB pointing at default state */
 436        dwords = ALIGN(rdev->r600_blit.state_len, 0x10);
 437        gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset;
 438        radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 439        radeon_ring_write(ring,
 440#ifdef __BIG_ENDIAN
 441                          (2 << 0) |
 442#endif
 443                          (gpu_addr & 0xFFFFFFFC));
 444        radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xFF);
 445        radeon_ring_write(ring, dwords);
 446
 447        /* SQ config */
 448        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 6));
 449        radeon_ring_write(ring, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
 450        radeon_ring_write(ring, sq_config);
 451        radeon_ring_write(ring, sq_gpr_resource_mgmt_1);
 452        radeon_ring_write(ring, sq_gpr_resource_mgmt_2);
 453        radeon_ring_write(ring, sq_thread_resource_mgmt);
 454        radeon_ring_write(ring, sq_stack_resource_mgmt_1);
 455        radeon_ring_write(ring, sq_stack_resource_mgmt_2);
 456}
 457
 458#define I2F_MAX_BITS 15
 459#define I2F_MAX_INPUT  ((1 << I2F_MAX_BITS) - 1)
 460#define I2F_SHIFT (24 - I2F_MAX_BITS)
 461
 462/*
 463 * Converts unsigned integer into 32-bit IEEE floating point representation.
 464 * Conversion is not universal and only works for the range from 0
 465 * to 2^I2F_MAX_BITS-1. Currently we only use it with inputs between
 466 * 0 and 16384 (inclusive), so I2F_MAX_BITS=15 is enough. If necessary,
 467 * I2F_MAX_BITS can be increased, but that will add to the loop iterations
 468 * and slow us down. Conversion is done by shifting the input and counting
 469 * down until the first 1 reaches bit position 23. The resulting counter
 470 * and the shifted input are, respectively, the exponent and the fraction.
 471 * The sign is always zero.
 472 */
 473static uint32_t i2f(uint32_t input)
 474{
 475        u32 result, i, exponent, fraction;
 476
 477        WARN_ON_ONCE(input > I2F_MAX_INPUT);
 478
 479        if ((input & I2F_MAX_INPUT) == 0)
 480                result = 0;
 481        else {
 482                exponent = 126 + I2F_MAX_BITS;
 483                fraction = (input & I2F_MAX_INPUT) << I2F_SHIFT;
 484
 485                for (i = 0; i < I2F_MAX_BITS; i++) {
 486                        if (fraction & 0x800000)
 487                                break;
 488                        else {
 489                                fraction = fraction << 1;
 490                                exponent = exponent - 1;
 491                        }
 492                }
 493                result = exponent << 23 | (fraction & 0x7fffff);
 494        }
 495        return result;
 496}
 497
 498int r600_blit_init(struct radeon_device *rdev)
 499{
 500        u32 obj_size;
 501        int i, r, dwords;
 502        void *ptr;
 503        u32 packet2s[16];
 504        int num_packet2s = 0;
 505
 506        rdev->r600_blit.primitives.set_render_target = set_render_target;
 507        rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync;
 508        rdev->r600_blit.primitives.set_shaders = set_shaders;
 509        rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource;
 510        rdev->r600_blit.primitives.set_tex_resource = set_tex_resource;
 511        rdev->r600_blit.primitives.set_scissors = set_scissors;
 512        rdev->r600_blit.primitives.draw_auto = draw_auto;
 513        rdev->r600_blit.primitives.set_default_state = set_default_state;
 514
 515        rdev->r600_blit.ring_size_common = 8; /* sync semaphore */
 516        rdev->r600_blit.ring_size_common += 40; /* shaders + def state */
 517        rdev->r600_blit.ring_size_common += 5; /* done copy */
 518        rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */
 519
 520        rdev->r600_blit.ring_size_per_loop = 76;
 521        /* set_render_target emits 2 extra dwords on rv6xx */
 522        if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770)
 523                rdev->r600_blit.ring_size_per_loop += 2;
 524
 525        rdev->r600_blit.max_dim = 8192;
 526
 527        rdev->r600_blit.state_offset = 0;
 528
 529        if (rdev->family >= CHIP_RV770)
 530                rdev->r600_blit.state_len = r7xx_default_size;
 531        else
 532                rdev->r600_blit.state_len = r6xx_default_size;
 533
 534        dwords = rdev->r600_blit.state_len;
 535        while (dwords & 0xf) {
 536                packet2s[num_packet2s++] = cpu_to_le32(PACKET2(0));
 537                dwords++;
 538        }
 539
 540        obj_size = dwords * 4;
 541        obj_size = ALIGN(obj_size, 256);
 542
 543        rdev->r600_blit.vs_offset = obj_size;
 544        obj_size += r6xx_vs_size * 4;
 545        obj_size = ALIGN(obj_size, 256);
 546
 547        rdev->r600_blit.ps_offset = obj_size;
 548        obj_size += r6xx_ps_size * 4;
 549        obj_size = ALIGN(obj_size, 256);
 550
 551        /* pin copy shader into vram if not already initialized */
 552        if (rdev->r600_blit.shader_obj == NULL) {
 553                r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true,
 554                                     RADEON_GEM_DOMAIN_VRAM,
 555                                     NULL, &rdev->r600_blit.shader_obj);
 556                if (r) {
 557                        DRM_ERROR("r600 failed to allocate shader\n");
 558                        return r;
 559                }
 560
 561                r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
 562                if (unlikely(r != 0))
 563                        return r;
 564                r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,
 565                                  &rdev->r600_blit.shader_gpu_addr);
 566                radeon_bo_unreserve(rdev->r600_blit.shader_obj);
 567                if (r) {
 568                        dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
 569                        return r;
 570                }
 571        }
 572
 573        DRM_DEBUG("r6xx blit allocated bo %08x vs %08x ps %08x\n",
 574                  obj_size,
 575                  rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset);
 576
 577        r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
 578        if (unlikely(r != 0))
 579                return r;
 580        r = radeon_bo_kmap(rdev->r600_blit.shader_obj, &ptr);
 581        if (r) {
 582                DRM_ERROR("failed to map blit object %d\n", r);
 583                return r;
 584        }
 585        if (rdev->family >= CHIP_RV770)
 586                memcpy_toio(ptr + rdev->r600_blit.state_offset,
 587                            r7xx_default_state, rdev->r600_blit.state_len * 4);
 588        else
 589                memcpy_toio(ptr + rdev->r600_blit.state_offset,
 590                            r6xx_default_state, rdev->r600_blit.state_len * 4);
 591        if (num_packet2s)
 592                memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
 593                            packet2s, num_packet2s * 4);
 594        for (i = 0; i < r6xx_vs_size; i++)
 595                *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(r6xx_vs[i]);
 596        for (i = 0; i < r6xx_ps_size; i++)
 597                *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(r6xx_ps[i]);
 598        radeon_bo_kunmap(rdev->r600_blit.shader_obj);
 599        radeon_bo_unreserve(rdev->r600_blit.shader_obj);
 600
 601        radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
 602        return 0;
 603}
 604
 605void r600_blit_fini(struct radeon_device *rdev)
 606{
 607        int r;
 608
 609        radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
 610        if (rdev->r600_blit.shader_obj == NULL)
 611                return;
 612        /* If we can't reserve the bo, unref should be enough to destroy
 613         * it when it becomes idle.
 614         */
 615        r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
 616        if (!r) {
 617                radeon_bo_unpin(rdev->r600_blit.shader_obj);
 618                radeon_bo_unreserve(rdev->r600_blit.shader_obj);
 619        }
 620        radeon_bo_unref(&rdev->r600_blit.shader_obj);
 621}
 622
 623static unsigned r600_blit_create_rect(unsigned num_gpu_pages,
 624                                      int *width, int *height, int max_dim)
 625{
 626        unsigned max_pages;
 627        unsigned pages = num_gpu_pages;
 628        int w, h;
 629
 630        if (num_gpu_pages == 0) {
 631                /* not supposed to be called with no pages, but just in case */
 632                h = 0;
 633                w = 0;
 634                pages = 0;
 635                WARN_ON(1);
 636        } else {
 637                int rect_order = 2;
 638                h = RECT_UNIT_H;
 639                while (num_gpu_pages / rect_order) {
 640                        h *= 2;
 641                        rect_order *= 4;
 642                        if (h >= max_dim) {
 643                                h = max_dim;
 644                                break;
 645                        }
 646                }
 647                max_pages = (max_dim * h) / (RECT_UNIT_W * RECT_UNIT_H);
 648                if (pages > max_pages)
 649                        pages = max_pages;
 650                w = (pages * RECT_UNIT_W * RECT_UNIT_H) / h;
 651                w = (w / RECT_UNIT_W) * RECT_UNIT_W;
 652                pages = (w * h) / (RECT_UNIT_W * RECT_UNIT_H);
 653                BUG_ON(pages == 0);
 654        }
 655
 656
 657        DRM_DEBUG("blit_rectangle: h=%d, w=%d, pages=%d\n", h, w, pages);
 658
 659        /* return width and height only of the caller wants it */
 660        if (height)
 661                *height = h;
 662        if (width)
 663                *width = w;
 664
 665        return pages;
 666}
 667
 668
 669int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages,
 670                           struct radeon_fence **fence, struct radeon_sa_bo **vb,
 671                           struct radeon_semaphore **sem)
 672{
 673        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 674        int r;
 675        int ring_size;
 676        int num_loops = 0;
 677        int dwords_per_loop = rdev->r600_blit.ring_size_per_loop;
 678
 679        /* num loops */
 680        while (num_gpu_pages) {
 681                num_gpu_pages -=
 682                        r600_blit_create_rect(num_gpu_pages, NULL, NULL,
 683                                              rdev->r600_blit.max_dim);
 684                num_loops++;
 685        }
 686
 687        /* 48 bytes for vertex per loop */
 688        r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, vb,
 689                             (num_loops*48)+256, 256, true);
 690        if (r) {
 691                return r;
 692        }
 693
 694        r = radeon_semaphore_create(rdev, sem);
 695        if (r) {
 696                radeon_sa_bo_free(rdev, vb, NULL);
 697                return r;
 698        }
 699
 700        /* calculate number of loops correctly */
 701        ring_size = num_loops * dwords_per_loop;
 702        ring_size += rdev->r600_blit.ring_size_common;
 703        r = radeon_ring_lock(rdev, ring, ring_size);
 704        if (r) {
 705                radeon_sa_bo_free(rdev, vb, NULL);
 706                radeon_semaphore_free(rdev, sem, NULL);
 707                return r;
 708        }
 709
 710        if (radeon_fence_need_sync(*fence, RADEON_RING_TYPE_GFX_INDEX)) {
 711                radeon_semaphore_sync_rings(rdev, *sem, (*fence)->ring,
 712                                            RADEON_RING_TYPE_GFX_INDEX);
 713                radeon_fence_note_sync(*fence, RADEON_RING_TYPE_GFX_INDEX);
 714        } else {
 715                radeon_semaphore_free(rdev, sem, NULL);
 716        }
 717
 718        rdev->r600_blit.primitives.set_default_state(rdev);
 719        rdev->r600_blit.primitives.set_shaders(rdev);
 720        return 0;
 721}
 722
 723void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence,
 724                         struct radeon_sa_bo *vb, struct radeon_semaphore *sem)
 725{
 726        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 727        int r;
 728
 729        r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
 730        if (r) {
 731                radeon_ring_unlock_undo(rdev, ring);
 732                return;
 733        }
 734
 735        radeon_ring_unlock_commit(rdev, ring);
 736        radeon_sa_bo_free(rdev, &vb, *fence);
 737        radeon_semaphore_free(rdev, &sem, *fence);
 738}
 739
 740void r600_kms_blit_copy(struct radeon_device *rdev,
 741                        u64 src_gpu_addr, u64 dst_gpu_addr,
 742                        unsigned num_gpu_pages,
 743                        struct radeon_sa_bo *vb)
 744{
 745        u64 vb_gpu_addr;
 746        u32 *vb_cpu_addr;
 747
 748        DRM_DEBUG("emitting copy %16llx %16llx %d\n",
 749                  src_gpu_addr, dst_gpu_addr, num_gpu_pages);
 750        vb_cpu_addr = (u32 *)radeon_sa_bo_cpu_addr(vb);
 751        vb_gpu_addr = radeon_sa_bo_gpu_addr(vb);
 752
 753        while (num_gpu_pages) {
 754                int w, h;
 755                unsigned size_in_bytes;
 756                unsigned pages_per_loop =
 757                        r600_blit_create_rect(num_gpu_pages, &w, &h,
 758                                              rdev->r600_blit.max_dim);
 759
 760                size_in_bytes = pages_per_loop * RADEON_GPU_PAGE_SIZE;
 761                DRM_DEBUG("rectangle w=%d h=%d\n", w, h);
 762
 763                vb_cpu_addr[0] = 0;
 764                vb_cpu_addr[1] = 0;
 765                vb_cpu_addr[2] = 0;
 766                vb_cpu_addr[3] = 0;
 767
 768                vb_cpu_addr[4] = 0;
 769                vb_cpu_addr[5] = i2f(h);
 770                vb_cpu_addr[6] = 0;
 771                vb_cpu_addr[7] = i2f(h);
 772
 773                vb_cpu_addr[8] = i2f(w);
 774                vb_cpu_addr[9] = i2f(h);
 775                vb_cpu_addr[10] = i2f(w);
 776                vb_cpu_addr[11] = i2f(h);
 777
 778                rdev->r600_blit.primitives.set_tex_resource(rdev, FMT_8_8_8_8,
 779                                                            w, h, w, src_gpu_addr, size_in_bytes);
 780                rdev->r600_blit.primitives.set_render_target(rdev, COLOR_8_8_8_8,
 781                                                             w, h, dst_gpu_addr);
 782                rdev->r600_blit.primitives.set_scissors(rdev, 0, 0, w, h);
 783                rdev->r600_blit.primitives.set_vtx_resource(rdev, vb_gpu_addr);
 784                rdev->r600_blit.primitives.draw_auto(rdev);
 785                rdev->r600_blit.primitives.cp_set_surface_sync(rdev,
 786                                    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
 787                                    size_in_bytes, dst_gpu_addr);
 788
 789                vb_cpu_addr += 12;
 790                vb_gpu_addr += 4*12;
 791                src_gpu_addr += size_in_bytes;
 792                dst_gpu_addr += size_in_bytes;
 793                num_gpu_pages -= pages_per_loop;
 794        }
 795}
 796