linux/drivers/gpu/drm/radeon/r600_blit.c
<<
>>
Prefs
   1/*
   2 * Copyright 2009 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21 * DEALINGS IN THE SOFTWARE.
  22 *
  23 * Authors:
  24 *     Alex Deucher <alexander.deucher@amd.com>
  25 */
  26#include "drmP.h"
  27#include "drm.h"
  28#include "radeon_drm.h"
  29#include "radeon_drv.h"
  30
  31#include "r600_blit_shaders.h"
  32
  33#define DI_PT_RECTLIST        0x11
  34#define DI_INDEX_SIZE_16_BIT  0x0
  35#define DI_SRC_SEL_AUTO_INDEX 0x2
  36
  37#define FMT_8                 0x1
  38#define FMT_5_6_5             0x8
  39#define FMT_8_8_8_8           0x1a
  40#define COLOR_8               0x1
  41#define COLOR_5_6_5           0x8
  42#define COLOR_8_8_8_8         0x1a
  43
  44static inline void
  45set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
  46{
  47        u32 cb_color_info;
  48        int pitch, slice;
  49        RING_LOCALS;
  50        DRM_DEBUG("\n");
  51
  52        h = (h + 7) & ~7;
  53        if (h < 8)
  54                h = 8;
  55
  56        cb_color_info = ((format << 2) | (1 << 27));
  57        pitch = (w / 8) - 1;
  58        slice = ((w * h) / 64) - 1;
  59
  60        if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) &&
  61            ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) {
  62                BEGIN_RING(21 + 2);
  63                OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
  64                OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
  65                OUT_RING(gpu_addr >> 8);
  66                OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
  67                OUT_RING(2 << 0);
  68        } else {
  69                BEGIN_RING(21);
  70                OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
  71                OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
  72                OUT_RING(gpu_addr >> 8);
  73        }
  74
  75        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
  76        OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
  77        OUT_RING((pitch << 0) | (slice << 10));
  78
  79        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
  80        OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2);
  81        OUT_RING(0);
  82
  83        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
  84        OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2);
  85        OUT_RING(cb_color_info);
  86
  87        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
  88        OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
  89        OUT_RING(0);
  90
  91        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
  92        OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2);
  93        OUT_RING(0);
  94
  95        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
  96        OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2);
  97        OUT_RING(0);
  98
  99        ADVANCE_RING();
 100}
 101
 102static inline void
 103cp_set_surface_sync(drm_radeon_private_t *dev_priv,
 104                    u32 sync_type, u32 size, u64 mc_addr)
 105{
 106        u32 cp_coher_size;
 107        RING_LOCALS;
 108        DRM_DEBUG("\n");
 109
 110        if (size == 0xffffffff)
 111                cp_coher_size = 0xffffffff;
 112        else
 113                cp_coher_size = ((size + 255) >> 8);
 114
 115        BEGIN_RING(5);
 116        OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
 117        OUT_RING(sync_type);
 118        OUT_RING(cp_coher_size);
 119        OUT_RING((mc_addr >> 8));
 120        OUT_RING(10); /* poll interval */
 121        ADVANCE_RING();
 122}
 123
 124static inline void
 125set_shaders(struct drm_device *dev)
 126{
 127        drm_radeon_private_t *dev_priv = dev->dev_private;
 128        u64 gpu_addr;
 129        int i;
 130        u32 *vs, *ps;
 131        uint32_t sq_pgm_resources;
 132        RING_LOCALS;
 133        DRM_DEBUG("\n");
 134
 135        /* load shaders */
 136        vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset);
 137        ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256);
 138
 139        for (i = 0; i < r6xx_vs_size; i++)
 140                vs[i] = r6xx_vs[i];
 141        for (i = 0; i < r6xx_ps_size; i++)
 142                ps[i] = r6xx_ps[i];
 143
 144        dev_priv->blit_vb->used = 512;
 145
 146        gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset;
 147
 148        /* setup shader regs */
 149        sq_pgm_resources = (1 << 0);
 150
 151        BEGIN_RING(9 + 12);
 152        /* VS */
 153        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 154        OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 155        OUT_RING(gpu_addr >> 8);
 156
 157        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 158        OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 159        OUT_RING(sq_pgm_resources);
 160
 161        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 162        OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 163        OUT_RING(0);
 164
 165        /* PS */
 166        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 167        OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 168        OUT_RING((gpu_addr + 256) >> 8);
 169
 170        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 171        OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 172        OUT_RING(sq_pgm_resources | (1 << 28));
 173
 174        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 175        OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 176        OUT_RING(2);
 177
 178        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 179        OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 180        OUT_RING(0);
 181        ADVANCE_RING();
 182
 183        cp_set_surface_sync(dev_priv,
 184                            R600_SH_ACTION_ENA, 512, gpu_addr);
 185}
 186
 187static inline void
 188set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
 189{
 190        uint32_t sq_vtx_constant_word2;
 191        RING_LOCALS;
 192        DRM_DEBUG("\n");
 193
 194        sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8));
 195
 196        BEGIN_RING(9);
 197        OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
 198        OUT_RING(0x460);
 199        OUT_RING(gpu_addr & 0xffffffff);
 200        OUT_RING(48 - 1);
 201        OUT_RING(sq_vtx_constant_word2);
 202        OUT_RING(1 << 0);
 203        OUT_RING(0);
 204        OUT_RING(0);
 205        OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30);
 206        ADVANCE_RING();
 207
 208        if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
 209            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
 210            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
 211            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
 212            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
 213                cp_set_surface_sync(dev_priv,
 214                                    R600_TC_ACTION_ENA, 48, gpu_addr);
 215        else
 216                cp_set_surface_sync(dev_priv,
 217                                    R600_VC_ACTION_ENA, 48, gpu_addr);
 218}
 219
 220static inline void
 221set_tex_resource(drm_radeon_private_t *dev_priv,
 222                 int format, int w, int h, int pitch, u64 gpu_addr)
 223{
 224        uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
 225        RING_LOCALS;
 226        DRM_DEBUG("\n");
 227
 228        if (h < 1)
 229                h = 1;
 230
 231        sq_tex_resource_word0 = (1 << 0);
 232        sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
 233                                  ((w - 1) << 19));
 234
 235        sq_tex_resource_word1 = (format << 26);
 236        sq_tex_resource_word1 |= ((h - 1) << 0);
 237
 238        sq_tex_resource_word4 = ((1 << 14) |
 239                                 (0 << 16) |
 240                                 (1 << 19) |
 241                                 (2 << 22) |
 242                                 (3 << 25));
 243
 244        BEGIN_RING(9);
 245        OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
 246        OUT_RING(0);
 247        OUT_RING(sq_tex_resource_word0);
 248        OUT_RING(sq_tex_resource_word1);
 249        OUT_RING(gpu_addr >> 8);
 250        OUT_RING(gpu_addr >> 8);
 251        OUT_RING(sq_tex_resource_word4);
 252        OUT_RING(0);
 253        OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30);
 254        ADVANCE_RING();
 255
 256}
 257
 258static inline void
 259set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
 260{
 261        RING_LOCALS;
 262        DRM_DEBUG("\n");
 263
 264        BEGIN_RING(12);
 265        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
 266        OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 267        OUT_RING((x1 << 0) | (y1 << 16));
 268        OUT_RING((x2 << 0) | (y2 << 16));
 269
 270        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
 271        OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 272        OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
 273        OUT_RING((x2 << 0) | (y2 << 16));
 274
 275        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
 276        OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 277        OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
 278        OUT_RING((x2 << 0) | (y2 << 16));
 279        ADVANCE_RING();
 280}
 281
 282static inline void
 283draw_auto(drm_radeon_private_t *dev_priv)
 284{
 285        RING_LOCALS;
 286        DRM_DEBUG("\n");
 287
 288        BEGIN_RING(10);
 289        OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
 290        OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2);
 291        OUT_RING(DI_PT_RECTLIST);
 292
 293        OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
 294        OUT_RING(DI_INDEX_SIZE_16_BIT);
 295
 296        OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
 297        OUT_RING(1);
 298
 299        OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
 300        OUT_RING(3);
 301        OUT_RING(DI_SRC_SEL_AUTO_INDEX);
 302
 303        ADVANCE_RING();
 304        COMMIT_RING();
 305}
 306
 307static inline void
 308set_default_state(drm_radeon_private_t *dev_priv)
 309{
 310        int i;
 311        u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
 312        u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
 313        int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
 314        int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
 315        int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
 316        RING_LOCALS;
 317
 318        switch ((dev_priv->flags & RADEON_FAMILY_MASK)) {
 319        case CHIP_R600:
 320                num_ps_gprs = 192;
 321                num_vs_gprs = 56;
 322                num_temp_gprs = 4;
 323                num_gs_gprs = 0;
 324                num_es_gprs = 0;
 325                num_ps_threads = 136;
 326                num_vs_threads = 48;
 327                num_gs_threads = 4;
 328                num_es_threads = 4;
 329                num_ps_stack_entries = 128;
 330                num_vs_stack_entries = 128;
 331                num_gs_stack_entries = 0;
 332                num_es_stack_entries = 0;
 333                break;
 334        case CHIP_RV630:
 335        case CHIP_RV635:
 336                num_ps_gprs = 84;
 337                num_vs_gprs = 36;
 338                num_temp_gprs = 4;
 339                num_gs_gprs = 0;
 340                num_es_gprs = 0;
 341                num_ps_threads = 144;
 342                num_vs_threads = 40;
 343                num_gs_threads = 4;
 344                num_es_threads = 4;
 345                num_ps_stack_entries = 40;
 346                num_vs_stack_entries = 40;
 347                num_gs_stack_entries = 32;
 348                num_es_stack_entries = 16;
 349                break;
 350        case CHIP_RV610:
 351        case CHIP_RV620:
 352        case CHIP_RS780:
 353        case CHIP_RS880:
 354        default:
 355                num_ps_gprs = 84;
 356                num_vs_gprs = 36;
 357                num_temp_gprs = 4;
 358                num_gs_gprs = 0;
 359                num_es_gprs = 0;
 360                num_ps_threads = 136;
 361                num_vs_threads = 48;
 362                num_gs_threads = 4;
 363                num_es_threads = 4;
 364                num_ps_stack_entries = 40;
 365                num_vs_stack_entries = 40;
 366                num_gs_stack_entries = 32;
 367                num_es_stack_entries = 16;
 368                break;
 369        case CHIP_RV670:
 370                num_ps_gprs = 144;
 371                num_vs_gprs = 40;
 372                num_temp_gprs = 4;
 373                num_gs_gprs = 0;
 374                num_es_gprs = 0;
 375                num_ps_threads = 136;
 376                num_vs_threads = 48;
 377                num_gs_threads = 4;
 378                num_es_threads = 4;
 379                num_ps_stack_entries = 40;
 380                num_vs_stack_entries = 40;
 381                num_gs_stack_entries = 32;
 382                num_es_stack_entries = 16;
 383                break;
 384        case CHIP_RV770:
 385                num_ps_gprs = 192;
 386                num_vs_gprs = 56;
 387                num_temp_gprs = 4;
 388                num_gs_gprs = 0;
 389                num_es_gprs = 0;
 390                num_ps_threads = 188;
 391                num_vs_threads = 60;
 392                num_gs_threads = 0;
 393                num_es_threads = 0;
 394                num_ps_stack_entries = 256;
 395                num_vs_stack_entries = 256;
 396                num_gs_stack_entries = 0;
 397                num_es_stack_entries = 0;
 398                break;
 399        case CHIP_RV730:
 400        case CHIP_RV740:
 401                num_ps_gprs = 84;
 402                num_vs_gprs = 36;
 403                num_temp_gprs = 4;
 404                num_gs_gprs = 0;
 405                num_es_gprs = 0;
 406                num_ps_threads = 188;
 407                num_vs_threads = 60;
 408                num_gs_threads = 0;
 409                num_es_threads = 0;
 410                num_ps_stack_entries = 128;
 411                num_vs_stack_entries = 128;
 412                num_gs_stack_entries = 0;
 413                num_es_stack_entries = 0;
 414                break;
 415        case CHIP_RV710:
 416                num_ps_gprs = 192;
 417                num_vs_gprs = 56;
 418                num_temp_gprs = 4;
 419                num_gs_gprs = 0;
 420                num_es_gprs = 0;
 421                num_ps_threads = 144;
 422                num_vs_threads = 48;
 423                num_gs_threads = 0;
 424                num_es_threads = 0;
 425                num_ps_stack_entries = 128;
 426                num_vs_stack_entries = 128;
 427                num_gs_stack_entries = 0;
 428                num_es_stack_entries = 0;
 429                break;
 430        }
 431
 432        if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
 433            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
 434            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
 435            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
 436            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
 437                sq_config = 0;
 438        else
 439                sq_config = R600_VC_ENABLE;
 440
 441        sq_config |= (R600_DX9_CONSTS |
 442                      R600_ALU_INST_PREFER_VECTOR |
 443                      R600_PS_PRIO(0) |
 444                      R600_VS_PRIO(1) |
 445                      R600_GS_PRIO(2) |
 446                      R600_ES_PRIO(3));
 447
 448        sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) |
 449                                  R600_NUM_VS_GPRS(num_vs_gprs) |
 450                                  R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
 451        sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) |
 452                                  R600_NUM_ES_GPRS(num_es_gprs));
 453        sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) |
 454                                   R600_NUM_VS_THREADS(num_vs_threads) |
 455                                   R600_NUM_GS_THREADS(num_gs_threads) |
 456                                   R600_NUM_ES_THREADS(num_es_threads));
 457        sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
 458                                    R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
 459        sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
 460                                    R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries));
 461
 462        if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
 463                BEGIN_RING(r7xx_default_size + 10);
 464                for (i = 0; i < r7xx_default_size; i++)
 465                        OUT_RING(r7xx_default_state[i]);
 466        } else {
 467                BEGIN_RING(r6xx_default_size + 10);
 468                for (i = 0; i < r6xx_default_size; i++)
 469                        OUT_RING(r6xx_default_state[i]);
 470        }
 471        OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
 472        OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
 473        /* SQ config */
 474        OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6));
 475        OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2);
 476        OUT_RING(sq_config);
 477        OUT_RING(sq_gpr_resource_mgmt_1);
 478        OUT_RING(sq_gpr_resource_mgmt_2);
 479        OUT_RING(sq_thread_resource_mgmt);
 480        OUT_RING(sq_stack_resource_mgmt_1);
 481        OUT_RING(sq_stack_resource_mgmt_2);
 482        ADVANCE_RING();
 483}
 484
 485static inline uint32_t i2f(uint32_t input)
 486{
 487        u32 result, i, exponent, fraction;
 488
 489        if ((input & 0x3fff) == 0)
 490                result = 0; /* 0 is a special case */
 491        else {
 492                exponent = 140; /* exponent biased by 127; */
 493                fraction = (input & 0x3fff) << 10; /* cheat and only
 494                                                      handle numbers below 2^^15 */
 495                for (i = 0; i < 14; i++) {
 496                        if (fraction & 0x800000)
 497                                break;
 498                        else {
 499                                fraction = fraction << 1; /* keep
 500                                                             shifting left until top bit = 1 */
 501                                exponent = exponent - 1;
 502                        }
 503                }
 504                result = exponent << 23 | (fraction & 0x7fffff); /* mask
 505                                                                    off top bit; assumed 1 */
 506        }
 507        return result;
 508}
 509
 510
 511static inline int r600_nomm_get_vb(struct drm_device *dev)
 512{
 513        drm_radeon_private_t *dev_priv = dev->dev_private;
 514        dev_priv->blit_vb = radeon_freelist_get(dev);
 515        if (!dev_priv->blit_vb) {
 516                DRM_ERROR("Unable to allocate vertex buffer for blit\n");
 517                return -EAGAIN;
 518        }
 519        return 0;
 520}
 521
 522static inline void r600_nomm_put_vb(struct drm_device *dev)
 523{
 524        drm_radeon_private_t *dev_priv = dev->dev_private;
 525
 526        dev_priv->blit_vb->used = 0;
 527        radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);
 528}
 529
 530static inline void *r600_nomm_get_vb_ptr(struct drm_device *dev)
 531{
 532        drm_radeon_private_t *dev_priv = dev->dev_private;
 533        return (((char *)dev->agp_buffer_map->handle +
 534                 dev_priv->blit_vb->offset + dev_priv->blit_vb->used));
 535}
 536
 537int
 538r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv)
 539{
 540        drm_radeon_private_t *dev_priv = dev->dev_private;
 541        DRM_DEBUG("\n");
 542
 543        r600_nomm_get_vb(dev);
 544
 545        dev_priv->blit_vb->file_priv = file_priv;
 546
 547        set_default_state(dev_priv);
 548        set_shaders(dev);
 549
 550        return 0;
 551}
 552
 553
 554void
 555r600_done_blit_copy(struct drm_device *dev)
 556{
 557        drm_radeon_private_t *dev_priv = dev->dev_private;
 558        RING_LOCALS;
 559        DRM_DEBUG("\n");
 560
 561        BEGIN_RING(5);
 562        OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
 563        OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
 564        /* wait for 3D idle clean */
 565        OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
 566        OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
 567        OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
 568
 569        ADVANCE_RING();
 570        COMMIT_RING();
 571
 572        r600_nomm_put_vb(dev);
 573}
 574
 575void
 576r600_blit_copy(struct drm_device *dev,
 577               uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
 578               int size_bytes)
 579{
 580        drm_radeon_private_t *dev_priv = dev->dev_private;
 581        int max_bytes;
 582        u64 vb_addr;
 583        u32 *vb;
 584
 585        vb = r600_nomm_get_vb_ptr(dev);
 586
 587        if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
 588                max_bytes = 8192;
 589
 590                while (size_bytes) {
 591                        int cur_size = size_bytes;
 592                        int src_x = src_gpu_addr & 255;
 593                        int dst_x = dst_gpu_addr & 255;
 594                        int h = 1;
 595                        src_gpu_addr = src_gpu_addr & ~255;
 596                        dst_gpu_addr = dst_gpu_addr & ~255;
 597
 598                        if (!src_x && !dst_x) {
 599                                h = (cur_size / max_bytes);
 600                                if (h > 8192)
 601                                        h = 8192;
 602                                if (h == 0)
 603                                        h = 1;
 604                                else
 605                                        cur_size = max_bytes;
 606                        } else {
 607                                if (cur_size > max_bytes)
 608                                        cur_size = max_bytes;
 609                                if (cur_size > (max_bytes - dst_x))
 610                                        cur_size = (max_bytes - dst_x);
 611                                if (cur_size > (max_bytes - src_x))
 612                                        cur_size = (max_bytes - src_x);
 613                        }
 614
 615                        if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
 616
 617                                r600_nomm_put_vb(dev);
 618                                r600_nomm_get_vb(dev);
 619                                if (!dev_priv->blit_vb)
 620                                        return;
 621                                set_shaders(dev);
 622                                vb = r600_nomm_get_vb_ptr(dev);
 623                        }
 624
 625                        vb[0] = i2f(dst_x);
 626                        vb[1] = 0;
 627                        vb[2] = i2f(src_x);
 628                        vb[3] = 0;
 629
 630                        vb[4] = i2f(dst_x);
 631                        vb[5] = i2f(h);
 632                        vb[6] = i2f(src_x);
 633                        vb[7] = i2f(h);
 634
 635                        vb[8] = i2f(dst_x + cur_size);
 636                        vb[9] = i2f(h);
 637                        vb[10] = i2f(src_x + cur_size);
 638                        vb[11] = i2f(h);
 639
 640                        /* src */
 641                        set_tex_resource(dev_priv, FMT_8,
 642                                         src_x + cur_size, h, src_x + cur_size,
 643                                         src_gpu_addr);
 644
 645                        cp_set_surface_sync(dev_priv,
 646                                            R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
 647
 648                        /* dst */
 649                        set_render_target(dev_priv, COLOR_8,
 650                                          dst_x + cur_size, h,
 651                                          dst_gpu_addr);
 652
 653                        /* scissors */
 654                        set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h);
 655
 656                        /* Vertex buffer setup */
 657                        vb_addr = dev_priv->gart_buffers_offset +
 658                                dev_priv->blit_vb->offset +
 659                                dev_priv->blit_vb->used;
 660                        set_vtx_resource(dev_priv, vb_addr);
 661
 662                        /* draw */
 663                        draw_auto(dev_priv);
 664
 665                        cp_set_surface_sync(dev_priv,
 666                                            R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
 667                                            cur_size * h, dst_gpu_addr);
 668
 669                        vb += 12;
 670                        dev_priv->blit_vb->used += 12 * 4;
 671
 672                        src_gpu_addr += cur_size * h;
 673                        dst_gpu_addr += cur_size * h;
 674                        size_bytes -= cur_size * h;
 675                }
 676        } else {
 677                max_bytes = 8192 * 4;
 678
 679                while (size_bytes) {
 680                        int cur_size = size_bytes;
 681                        int src_x = (src_gpu_addr & 255);
 682                        int dst_x = (dst_gpu_addr & 255);
 683                        int h = 1;
 684                        src_gpu_addr = src_gpu_addr & ~255;
 685                        dst_gpu_addr = dst_gpu_addr & ~255;
 686
 687                        if (!src_x && !dst_x) {
 688                                h = (cur_size / max_bytes);
 689                                if (h > 8192)
 690                                        h = 8192;
 691                                if (h == 0)
 692                                        h = 1;
 693                                else
 694                                        cur_size = max_bytes;
 695                        } else {
 696                                if (cur_size > max_bytes)
 697                                        cur_size = max_bytes;
 698                                if (cur_size > (max_bytes - dst_x))
 699                                        cur_size = (max_bytes - dst_x);
 700                                if (cur_size > (max_bytes - src_x))
 701                                        cur_size = (max_bytes - src_x);
 702                        }
 703
 704                        if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
 705                                r600_nomm_put_vb(dev);
 706                                r600_nomm_get_vb(dev);
 707                                if (!dev_priv->blit_vb)
 708                                        return;
 709
 710                                set_shaders(dev);
 711                                vb = r600_nomm_get_vb_ptr(dev);
 712                        }
 713
 714                        vb[0] = i2f(dst_x / 4);
 715                        vb[1] = 0;
 716                        vb[2] = i2f(src_x / 4);
 717                        vb[3] = 0;
 718
 719                        vb[4] = i2f(dst_x / 4);
 720                        vb[5] = i2f(h);
 721                        vb[6] = i2f(src_x / 4);
 722                        vb[7] = i2f(h);
 723
 724                        vb[8] = i2f((dst_x + cur_size) / 4);
 725                        vb[9] = i2f(h);
 726                        vb[10] = i2f((src_x + cur_size) / 4);
 727                        vb[11] = i2f(h);
 728
 729                        /* src */
 730                        set_tex_resource(dev_priv, FMT_8_8_8_8,
 731                                         (src_x + cur_size) / 4,
 732                                         h, (src_x + cur_size) / 4,
 733                                         src_gpu_addr);
 734
 735                        cp_set_surface_sync(dev_priv,
 736                                            R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
 737
 738                        /* dst */
 739                        set_render_target(dev_priv, COLOR_8_8_8_8,
 740                                          (dst_x + cur_size) / 4, h,
 741                                          dst_gpu_addr);
 742
 743                        /* scissors */
 744                        set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
 745
 746                        /* Vertex buffer setup */
 747                        vb_addr = dev_priv->gart_buffers_offset +
 748                                dev_priv->blit_vb->offset +
 749                                dev_priv->blit_vb->used;
 750                        set_vtx_resource(dev_priv, vb_addr);
 751
 752                        /* draw */
 753                        draw_auto(dev_priv);
 754
 755                        cp_set_surface_sync(dev_priv,
 756                                            R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
 757                                            cur_size * h, dst_gpu_addr);
 758
 759                        vb += 12;
 760                        dev_priv->blit_vb->used += 12 * 4;
 761
 762                        src_gpu_addr += cur_size * h;
 763                        dst_gpu_addr += cur_size * h;
 764                        size_bytes -= cur_size * h;
 765                }
 766        }
 767}
 768
 769void
 770r600_blit_swap(struct drm_device *dev,
 771               uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
 772               int sx, int sy, int dx, int dy,
 773               int w, int h, int src_pitch, int dst_pitch, int cpp)
 774{
 775        drm_radeon_private_t *dev_priv = dev->dev_private;
 776        int cb_format, tex_format;
 777        int sx2, sy2, dx2, dy2;
 778        u64 vb_addr;
 779        u32 *vb;
 780
 781        if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
 782
 783                r600_nomm_put_vb(dev);
 784                r600_nomm_get_vb(dev);
 785                if (!dev_priv->blit_vb)
 786                        return;
 787
 788                set_shaders(dev);
 789        }
 790        vb = r600_nomm_get_vb_ptr(dev);
 791
 792        sx2 = sx + w;
 793        sy2 = sy + h;
 794        dx2 = dx + w;
 795        dy2 = dy + h;
 796
 797        vb[0] = i2f(dx);
 798        vb[1] = i2f(dy);
 799        vb[2] = i2f(sx);
 800        vb[3] = i2f(sy);
 801
 802        vb[4] = i2f(dx);
 803        vb[5] = i2f(dy2);
 804        vb[6] = i2f(sx);
 805        vb[7] = i2f(sy2);
 806
 807        vb[8] = i2f(dx2);
 808        vb[9] = i2f(dy2);
 809        vb[10] = i2f(sx2);
 810        vb[11] = i2f(sy2);
 811
 812        switch(cpp) {
 813        case 4:
 814                cb_format = COLOR_8_8_8_8;
 815                tex_format = FMT_8_8_8_8;
 816                break;
 817        case 2:
 818                cb_format = COLOR_5_6_5;
 819                tex_format = FMT_5_6_5;
 820                break;
 821        default:
 822                cb_format = COLOR_8;
 823                tex_format = FMT_8;
 824                break;
 825        }
 826
 827        /* src */
 828        set_tex_resource(dev_priv, tex_format,
 829                         src_pitch / cpp,
 830                         sy2, src_pitch / cpp,
 831                         src_gpu_addr);
 832
 833        cp_set_surface_sync(dev_priv,
 834                            R600_TC_ACTION_ENA, src_pitch * sy2, src_gpu_addr);
 835
 836        /* dst */
 837        set_render_target(dev_priv, cb_format,
 838                          dst_pitch / cpp, dy2,
 839                          dst_gpu_addr);
 840
 841        /* scissors */
 842        set_scissors(dev_priv, dx, dy, dx2, dy2);
 843
 844        /* Vertex buffer setup */
 845        vb_addr = dev_priv->gart_buffers_offset +
 846                dev_priv->blit_vb->offset +
 847                dev_priv->blit_vb->used;
 848        set_vtx_resource(dev_priv, vb_addr);
 849
 850        /* draw */
 851        draw_auto(dev_priv);
 852
 853        cp_set_surface_sync(dev_priv,
 854                            R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
 855                            dst_pitch * dy2, dst_gpu_addr);
 856
 857        dev_priv->blit_vb->used += 12 * 4;
 858}
 859