linux/drivers/gpu/drm/radeon/r600_blit.c
<<
>>
Prefs
   1/*
   2 * Copyright 2009 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21 * DEALINGS IN THE SOFTWARE.
  22 *
  23 * Authors:
  24 *     Alex Deucher <alexander.deucher@amd.com>
  25 */
  26#include "drmP.h"
  27#include "drm.h"
  28#include "radeon_drm.h"
  29#include "radeon_drv.h"
  30
  31#include "r600_blit_shaders.h"
  32
  33#define DI_PT_RECTLIST        0x11
  34#define DI_INDEX_SIZE_16_BIT  0x0
  35#define DI_SRC_SEL_AUTO_INDEX 0x2
  36
  37#define FMT_8                 0x1
  38#define FMT_5_6_5             0x8
  39#define FMT_8_8_8_8           0x1a
  40#define COLOR_8               0x1
  41#define COLOR_5_6_5           0x8
  42#define COLOR_8_8_8_8         0x1a
  43
  44static inline void
  45set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
  46{
  47        u32 cb_color_info;
  48        int pitch, slice;
  49        RING_LOCALS;
  50        DRM_DEBUG("\n");
  51
  52        h = ALIGN(h, 8);
  53        if (h < 8)
  54                h = 8;
  55
  56        cb_color_info = ((format << 2) | (1 << 27));
  57        pitch = (w / 8) - 1;
  58        slice = ((w * h) / 64) - 1;
  59
  60        if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) &&
  61            ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) {
  62                BEGIN_RING(21 + 2);
  63                OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
  64                OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
  65                OUT_RING(gpu_addr >> 8);
  66                OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
  67                OUT_RING(2 << 0);
  68        } else {
  69                BEGIN_RING(21);
  70                OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
  71                OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
  72                OUT_RING(gpu_addr >> 8);
  73        }
  74
  75        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
  76        OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
  77        OUT_RING((pitch << 0) | (slice << 10));
  78
  79        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
  80        OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2);
  81        OUT_RING(0);
  82
  83        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
  84        OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2);
  85        OUT_RING(cb_color_info);
  86
  87        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
  88        OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
  89        OUT_RING(0);
  90
  91        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
  92        OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2);
  93        OUT_RING(0);
  94
  95        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
  96        OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2);
  97        OUT_RING(0);
  98
  99        ADVANCE_RING();
 100}
 101
 102static inline void
 103cp_set_surface_sync(drm_radeon_private_t *dev_priv,
 104                    u32 sync_type, u32 size, u64 mc_addr)
 105{
 106        u32 cp_coher_size;
 107        RING_LOCALS;
 108        DRM_DEBUG("\n");
 109
 110        if (size == 0xffffffff)
 111                cp_coher_size = 0xffffffff;
 112        else
 113                cp_coher_size = ((size + 255) >> 8);
 114
 115        BEGIN_RING(5);
 116        OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
 117        OUT_RING(sync_type);
 118        OUT_RING(cp_coher_size);
 119        OUT_RING((mc_addr >> 8));
 120        OUT_RING(10); /* poll interval */
 121        ADVANCE_RING();
 122}
 123
 124static inline void
 125set_shaders(struct drm_device *dev)
 126{
 127        drm_radeon_private_t *dev_priv = dev->dev_private;
 128        u64 gpu_addr;
 129        int i;
 130        u32 *vs, *ps;
 131        uint32_t sq_pgm_resources;
 132        RING_LOCALS;
 133        DRM_DEBUG("\n");
 134
 135        /* load shaders */
 136        vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset);
 137        ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256);
 138
 139        for (i = 0; i < r6xx_vs_size; i++)
 140                vs[i] = cpu_to_le32(r6xx_vs[i]);
 141        for (i = 0; i < r6xx_ps_size; i++)
 142                ps[i] = cpu_to_le32(r6xx_ps[i]);
 143
 144        dev_priv->blit_vb->used = 512;
 145
 146        gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset;
 147
 148        /* setup shader regs */
 149        sq_pgm_resources = (1 << 0);
 150
 151        BEGIN_RING(9 + 12);
 152        /* VS */
 153        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 154        OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 155        OUT_RING(gpu_addr >> 8);
 156
 157        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 158        OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 159        OUT_RING(sq_pgm_resources);
 160
 161        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 162        OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 163        OUT_RING(0);
 164
 165        /* PS */
 166        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 167        OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 168        OUT_RING((gpu_addr + 256) >> 8);
 169
 170        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 171        OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 172        OUT_RING(sq_pgm_resources | (1 << 28));
 173
 174        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 175        OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 176        OUT_RING(2);
 177
 178        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 179        OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 180        OUT_RING(0);
 181        ADVANCE_RING();
 182
 183        cp_set_surface_sync(dev_priv,
 184                            R600_SH_ACTION_ENA, 512, gpu_addr);
 185}
 186
 187static inline void
 188set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
 189{
 190        uint32_t sq_vtx_constant_word2;
 191        RING_LOCALS;
 192        DRM_DEBUG("\n");
 193
 194        sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8));
 195#ifdef __BIG_ENDIAN
 196        sq_vtx_constant_word2 |= (2 << 30);
 197#endif
 198
 199        BEGIN_RING(9);
 200        OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
 201        OUT_RING(0x460);
 202        OUT_RING(gpu_addr & 0xffffffff);
 203        OUT_RING(48 - 1);
 204        OUT_RING(sq_vtx_constant_word2);
 205        OUT_RING(1 << 0);
 206        OUT_RING(0);
 207        OUT_RING(0);
 208        OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30);
 209        ADVANCE_RING();
 210
 211        if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
 212            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
 213            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
 214            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
 215            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
 216                cp_set_surface_sync(dev_priv,
 217                                    R600_TC_ACTION_ENA, 48, gpu_addr);
 218        else
 219                cp_set_surface_sync(dev_priv,
 220                                    R600_VC_ACTION_ENA, 48, gpu_addr);
 221}
 222
 223static inline void
 224set_tex_resource(drm_radeon_private_t *dev_priv,
 225                 int format, int w, int h, int pitch, u64 gpu_addr)
 226{
 227        uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
 228        RING_LOCALS;
 229        DRM_DEBUG("\n");
 230
 231        if (h < 1)
 232                h = 1;
 233
 234        sq_tex_resource_word0 = (1 << 0);
 235        sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
 236                                  ((w - 1) << 19));
 237
 238        sq_tex_resource_word1 = (format << 26);
 239        sq_tex_resource_word1 |= ((h - 1) << 0);
 240
 241        sq_tex_resource_word4 = ((1 << 14) |
 242                                 (0 << 16) |
 243                                 (1 << 19) |
 244                                 (2 << 22) |
 245                                 (3 << 25));
 246
 247        BEGIN_RING(9);
 248        OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
 249        OUT_RING(0);
 250        OUT_RING(sq_tex_resource_word0);
 251        OUT_RING(sq_tex_resource_word1);
 252        OUT_RING(gpu_addr >> 8);
 253        OUT_RING(gpu_addr >> 8);
 254        OUT_RING(sq_tex_resource_word4);
 255        OUT_RING(0);
 256        OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30);
 257        ADVANCE_RING();
 258
 259}
 260
 261static inline void
 262set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
 263{
 264        RING_LOCALS;
 265        DRM_DEBUG("\n");
 266
 267        BEGIN_RING(12);
 268        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
 269        OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 270        OUT_RING((x1 << 0) | (y1 << 16));
 271        OUT_RING((x2 << 0) | (y2 << 16));
 272
 273        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
 274        OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 275        OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
 276        OUT_RING((x2 << 0) | (y2 << 16));
 277
 278        OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
 279        OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 280        OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
 281        OUT_RING((x2 << 0) | (y2 << 16));
 282        ADVANCE_RING();
 283}
 284
 285static inline void
 286draw_auto(drm_radeon_private_t *dev_priv)
 287{
 288        RING_LOCALS;
 289        DRM_DEBUG("\n");
 290
 291        BEGIN_RING(10);
 292        OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
 293        OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2);
 294        OUT_RING(DI_PT_RECTLIST);
 295
 296        OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
 297#ifdef __BIG_ENDIAN
 298        OUT_RING((2 << 2) | DI_INDEX_SIZE_16_BIT);
 299#else
 300        OUT_RING(DI_INDEX_SIZE_16_BIT);
 301#endif
 302
 303        OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
 304        OUT_RING(1);
 305
 306        OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
 307        OUT_RING(3);
 308        OUT_RING(DI_SRC_SEL_AUTO_INDEX);
 309
 310        ADVANCE_RING();
 311        COMMIT_RING();
 312}
 313
 314static inline void
 315set_default_state(drm_radeon_private_t *dev_priv)
 316{
 317        int i;
 318        u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
 319        u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
 320        int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
 321        int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
 322        int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
 323        RING_LOCALS;
 324
 325        switch ((dev_priv->flags & RADEON_FAMILY_MASK)) {
 326        case CHIP_R600:
 327                num_ps_gprs = 192;
 328                num_vs_gprs = 56;
 329                num_temp_gprs = 4;
 330                num_gs_gprs = 0;
 331                num_es_gprs = 0;
 332                num_ps_threads = 136;
 333                num_vs_threads = 48;
 334                num_gs_threads = 4;
 335                num_es_threads = 4;
 336                num_ps_stack_entries = 128;
 337                num_vs_stack_entries = 128;
 338                num_gs_stack_entries = 0;
 339                num_es_stack_entries = 0;
 340                break;
 341        case CHIP_RV630:
 342        case CHIP_RV635:
 343                num_ps_gprs = 84;
 344                num_vs_gprs = 36;
 345                num_temp_gprs = 4;
 346                num_gs_gprs = 0;
 347                num_es_gprs = 0;
 348                num_ps_threads = 144;
 349                num_vs_threads = 40;
 350                num_gs_threads = 4;
 351                num_es_threads = 4;
 352                num_ps_stack_entries = 40;
 353                num_vs_stack_entries = 40;
 354                num_gs_stack_entries = 32;
 355                num_es_stack_entries = 16;
 356                break;
 357        case CHIP_RV610:
 358        case CHIP_RV620:
 359        case CHIP_RS780:
 360        case CHIP_RS880:
 361        default:
 362                num_ps_gprs = 84;
 363                num_vs_gprs = 36;
 364                num_temp_gprs = 4;
 365                num_gs_gprs = 0;
 366                num_es_gprs = 0;
 367                num_ps_threads = 136;
 368                num_vs_threads = 48;
 369                num_gs_threads = 4;
 370                num_es_threads = 4;
 371                num_ps_stack_entries = 40;
 372                num_vs_stack_entries = 40;
 373                num_gs_stack_entries = 32;
 374                num_es_stack_entries = 16;
 375                break;
 376        case CHIP_RV670:
 377                num_ps_gprs = 144;
 378                num_vs_gprs = 40;
 379                num_temp_gprs = 4;
 380                num_gs_gprs = 0;
 381                num_es_gprs = 0;
 382                num_ps_threads = 136;
 383                num_vs_threads = 48;
 384                num_gs_threads = 4;
 385                num_es_threads = 4;
 386                num_ps_stack_entries = 40;
 387                num_vs_stack_entries = 40;
 388                num_gs_stack_entries = 32;
 389                num_es_stack_entries = 16;
 390                break;
 391        case CHIP_RV770:
 392                num_ps_gprs = 192;
 393                num_vs_gprs = 56;
 394                num_temp_gprs = 4;
 395                num_gs_gprs = 0;
 396                num_es_gprs = 0;
 397                num_ps_threads = 188;
 398                num_vs_threads = 60;
 399                num_gs_threads = 0;
 400                num_es_threads = 0;
 401                num_ps_stack_entries = 256;
 402                num_vs_stack_entries = 256;
 403                num_gs_stack_entries = 0;
 404                num_es_stack_entries = 0;
 405                break;
 406        case CHIP_RV730:
 407        case CHIP_RV740:
 408                num_ps_gprs = 84;
 409                num_vs_gprs = 36;
 410                num_temp_gprs = 4;
 411                num_gs_gprs = 0;
 412                num_es_gprs = 0;
 413                num_ps_threads = 188;
 414                num_vs_threads = 60;
 415                num_gs_threads = 0;
 416                num_es_threads = 0;
 417                num_ps_stack_entries = 128;
 418                num_vs_stack_entries = 128;
 419                num_gs_stack_entries = 0;
 420                num_es_stack_entries = 0;
 421                break;
 422        case CHIP_RV710:
 423                num_ps_gprs = 192;
 424                num_vs_gprs = 56;
 425                num_temp_gprs = 4;
 426                num_gs_gprs = 0;
 427                num_es_gprs = 0;
 428                num_ps_threads = 144;
 429                num_vs_threads = 48;
 430                num_gs_threads = 0;
 431                num_es_threads = 0;
 432                num_ps_stack_entries = 128;
 433                num_vs_stack_entries = 128;
 434                num_gs_stack_entries = 0;
 435                num_es_stack_entries = 0;
 436                break;
 437        }
 438
 439        if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
 440            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
 441            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
 442            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
 443            ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
 444                sq_config = 0;
 445        else
 446                sq_config = R600_VC_ENABLE;
 447
 448        sq_config |= (R600_DX9_CONSTS |
 449                      R600_ALU_INST_PREFER_VECTOR |
 450                      R600_PS_PRIO(0) |
 451                      R600_VS_PRIO(1) |
 452                      R600_GS_PRIO(2) |
 453                      R600_ES_PRIO(3));
 454
 455        sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) |
 456                                  R600_NUM_VS_GPRS(num_vs_gprs) |
 457                                  R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
 458        sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) |
 459                                  R600_NUM_ES_GPRS(num_es_gprs));
 460        sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) |
 461                                   R600_NUM_VS_THREADS(num_vs_threads) |
 462                                   R600_NUM_GS_THREADS(num_gs_threads) |
 463                                   R600_NUM_ES_THREADS(num_es_threads));
 464        sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
 465                                    R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
 466        sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
 467                                    R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries));
 468
 469        if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
 470                BEGIN_RING(r7xx_default_size + 10);
 471                for (i = 0; i < r7xx_default_size; i++)
 472                        OUT_RING(r7xx_default_state[i]);
 473        } else {
 474                BEGIN_RING(r6xx_default_size + 10);
 475                for (i = 0; i < r6xx_default_size; i++)
 476                        OUT_RING(r6xx_default_state[i]);
 477        }
 478        OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
 479        OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
 480        /* SQ config */
 481        OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6));
 482        OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2);
 483        OUT_RING(sq_config);
 484        OUT_RING(sq_gpr_resource_mgmt_1);
 485        OUT_RING(sq_gpr_resource_mgmt_2);
 486        OUT_RING(sq_thread_resource_mgmt);
 487        OUT_RING(sq_stack_resource_mgmt_1);
 488        OUT_RING(sq_stack_resource_mgmt_2);
 489        ADVANCE_RING();
 490}
 491
 492static inline uint32_t i2f(uint32_t input)
 493{
 494        u32 result, i, exponent, fraction;
 495
 496        if ((input & 0x3fff) == 0)
 497                result = 0; /* 0 is a special case */
 498        else {
 499                exponent = 140; /* exponent biased by 127; */
 500                fraction = (input & 0x3fff) << 10; /* cheat and only
 501                                                      handle numbers below 2^^15 */
 502                for (i = 0; i < 14; i++) {
 503                        if (fraction & 0x800000)
 504                                break;
 505                        else {
 506                                fraction = fraction << 1; /* keep
 507                                                             shifting left until top bit = 1 */
 508                                exponent = exponent - 1;
 509                        }
 510                }
 511                result = exponent << 23 | (fraction & 0x7fffff); /* mask
 512                                                                    off top bit; assumed 1 */
 513        }
 514        return result;
 515}
 516
 517
 518static inline int r600_nomm_get_vb(struct drm_device *dev)
 519{
 520        drm_radeon_private_t *dev_priv = dev->dev_private;
 521        dev_priv->blit_vb = radeon_freelist_get(dev);
 522        if (!dev_priv->blit_vb) {
 523                DRM_ERROR("Unable to allocate vertex buffer for blit\n");
 524                return -EAGAIN;
 525        }
 526        return 0;
 527}
 528
 529static inline void r600_nomm_put_vb(struct drm_device *dev)
 530{
 531        drm_radeon_private_t *dev_priv = dev->dev_private;
 532
 533        dev_priv->blit_vb->used = 0;
 534        radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);
 535}
 536
 537static inline void *r600_nomm_get_vb_ptr(struct drm_device *dev)
 538{
 539        drm_radeon_private_t *dev_priv = dev->dev_private;
 540        return (((char *)dev->agp_buffer_map->handle +
 541                 dev_priv->blit_vb->offset + dev_priv->blit_vb->used));
 542}
 543
 544int
 545r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv)
 546{
 547        drm_radeon_private_t *dev_priv = dev->dev_private;
 548        int ret;
 549        DRM_DEBUG("\n");
 550
 551        ret = r600_nomm_get_vb(dev);
 552        if (ret)
 553                return ret;
 554
 555        dev_priv->blit_vb->file_priv = file_priv;
 556
 557        set_default_state(dev_priv);
 558        set_shaders(dev);
 559
 560        return 0;
 561}
 562
 563
 564void
 565r600_done_blit_copy(struct drm_device *dev)
 566{
 567        drm_radeon_private_t *dev_priv = dev->dev_private;
 568        RING_LOCALS;
 569        DRM_DEBUG("\n");
 570
 571        BEGIN_RING(5);
 572        OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
 573        OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
 574        /* wait for 3D idle clean */
 575        OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
 576        OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
 577        OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
 578
 579        ADVANCE_RING();
 580        COMMIT_RING();
 581
 582        r600_nomm_put_vb(dev);
 583}
 584
 585void
 586r600_blit_copy(struct drm_device *dev,
 587               uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
 588               int size_bytes)
 589{
 590        drm_radeon_private_t *dev_priv = dev->dev_private;
 591        int max_bytes;
 592        u64 vb_addr;
 593        u32 *vb;
 594
 595        vb = r600_nomm_get_vb_ptr(dev);
 596
 597        if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
 598                max_bytes = 8192;
 599
 600                while (size_bytes) {
 601                        int cur_size = size_bytes;
 602                        int src_x = src_gpu_addr & 255;
 603                        int dst_x = dst_gpu_addr & 255;
 604                        int h = 1;
 605                        src_gpu_addr = src_gpu_addr & ~255;
 606                        dst_gpu_addr = dst_gpu_addr & ~255;
 607
 608                        if (!src_x && !dst_x) {
 609                                h = (cur_size / max_bytes);
 610                                if (h > 8192)
 611                                        h = 8192;
 612                                if (h == 0)
 613                                        h = 1;
 614                                else
 615                                        cur_size = max_bytes;
 616                        } else {
 617                                if (cur_size > max_bytes)
 618                                        cur_size = max_bytes;
 619                                if (cur_size > (max_bytes - dst_x))
 620                                        cur_size = (max_bytes - dst_x);
 621                                if (cur_size > (max_bytes - src_x))
 622                                        cur_size = (max_bytes - src_x);
 623                        }
 624
 625                        if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
 626
 627                                r600_nomm_put_vb(dev);
 628                                r600_nomm_get_vb(dev);
 629                                if (!dev_priv->blit_vb)
 630                                        return;
 631                                set_shaders(dev);
 632                                vb = r600_nomm_get_vb_ptr(dev);
 633                        }
 634
 635                        vb[0] = i2f(dst_x);
 636                        vb[1] = 0;
 637                        vb[2] = i2f(src_x);
 638                        vb[3] = 0;
 639
 640                        vb[4] = i2f(dst_x);
 641                        vb[5] = i2f(h);
 642                        vb[6] = i2f(src_x);
 643                        vb[7] = i2f(h);
 644
 645                        vb[8] = i2f(dst_x + cur_size);
 646                        vb[9] = i2f(h);
 647                        vb[10] = i2f(src_x + cur_size);
 648                        vb[11] = i2f(h);
 649
 650                        /* src */
 651                        set_tex_resource(dev_priv, FMT_8,
 652                                         src_x + cur_size, h, src_x + cur_size,
 653                                         src_gpu_addr);
 654
 655                        cp_set_surface_sync(dev_priv,
 656                                            R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
 657
 658                        /* dst */
 659                        set_render_target(dev_priv, COLOR_8,
 660                                          dst_x + cur_size, h,
 661                                          dst_gpu_addr);
 662
 663                        /* scissors */
 664                        set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h);
 665
 666                        /* Vertex buffer setup */
 667                        vb_addr = dev_priv->gart_buffers_offset +
 668                                dev_priv->blit_vb->offset +
 669                                dev_priv->blit_vb->used;
 670                        set_vtx_resource(dev_priv, vb_addr);
 671
 672                        /* draw */
 673                        draw_auto(dev_priv);
 674
 675                        cp_set_surface_sync(dev_priv,
 676                                            R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
 677                                            cur_size * h, dst_gpu_addr);
 678
 679                        vb += 12;
 680                        dev_priv->blit_vb->used += 12 * 4;
 681
 682                        src_gpu_addr += cur_size * h;
 683                        dst_gpu_addr += cur_size * h;
 684                        size_bytes -= cur_size * h;
 685                }
 686        } else {
 687                max_bytes = 8192 * 4;
 688
 689                while (size_bytes) {
 690                        int cur_size = size_bytes;
 691                        int src_x = (src_gpu_addr & 255);
 692                        int dst_x = (dst_gpu_addr & 255);
 693                        int h = 1;
 694                        src_gpu_addr = src_gpu_addr & ~255;
 695                        dst_gpu_addr = dst_gpu_addr & ~255;
 696
 697                        if (!src_x && !dst_x) {
 698                                h = (cur_size / max_bytes);
 699                                if (h > 8192)
 700                                        h = 8192;
 701                                if (h == 0)
 702                                        h = 1;
 703                                else
 704                                        cur_size = max_bytes;
 705                        } else {
 706                                if (cur_size > max_bytes)
 707                                        cur_size = max_bytes;
 708                                if (cur_size > (max_bytes - dst_x))
 709                                        cur_size = (max_bytes - dst_x);
 710                                if (cur_size > (max_bytes - src_x))
 711                                        cur_size = (max_bytes - src_x);
 712                        }
 713
 714                        if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
 715                                r600_nomm_put_vb(dev);
 716                                r600_nomm_get_vb(dev);
 717                                if (!dev_priv->blit_vb)
 718                                        return;
 719
 720                                set_shaders(dev);
 721                                vb = r600_nomm_get_vb_ptr(dev);
 722                        }
 723
 724                        vb[0] = i2f(dst_x / 4);
 725                        vb[1] = 0;
 726                        vb[2] = i2f(src_x / 4);
 727                        vb[3] = 0;
 728
 729                        vb[4] = i2f(dst_x / 4);
 730                        vb[5] = i2f(h);
 731                        vb[6] = i2f(src_x / 4);
 732                        vb[7] = i2f(h);
 733
 734                        vb[8] = i2f((dst_x + cur_size) / 4);
 735                        vb[9] = i2f(h);
 736                        vb[10] = i2f((src_x + cur_size) / 4);
 737                        vb[11] = i2f(h);
 738
 739                        /* src */
 740                        set_tex_resource(dev_priv, FMT_8_8_8_8,
 741                                         (src_x + cur_size) / 4,
 742                                         h, (src_x + cur_size) / 4,
 743                                         src_gpu_addr);
 744
 745                        cp_set_surface_sync(dev_priv,
 746                                            R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
 747
 748                        /* dst */
 749                        set_render_target(dev_priv, COLOR_8_8_8_8,
 750                                          (dst_x + cur_size) / 4, h,
 751                                          dst_gpu_addr);
 752
 753                        /* scissors */
 754                        set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
 755
 756                        /* Vertex buffer setup */
 757                        vb_addr = dev_priv->gart_buffers_offset +
 758                                dev_priv->blit_vb->offset +
 759                                dev_priv->blit_vb->used;
 760                        set_vtx_resource(dev_priv, vb_addr);
 761
 762                        /* draw */
 763                        draw_auto(dev_priv);
 764
 765                        cp_set_surface_sync(dev_priv,
 766                                            R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
 767                                            cur_size * h, dst_gpu_addr);
 768
 769                        vb += 12;
 770                        dev_priv->blit_vb->used += 12 * 4;
 771
 772                        src_gpu_addr += cur_size * h;
 773                        dst_gpu_addr += cur_size * h;
 774                        size_bytes -= cur_size * h;
 775                }
 776        }
 777}
 778
 779void
 780r600_blit_swap(struct drm_device *dev,
 781               uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
 782               int sx, int sy, int dx, int dy,
 783               int w, int h, int src_pitch, int dst_pitch, int cpp)
 784{
 785        drm_radeon_private_t *dev_priv = dev->dev_private;
 786        int cb_format, tex_format;
 787        int sx2, sy2, dx2, dy2;
 788        u64 vb_addr;
 789        u32 *vb;
 790
 791        if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
 792
 793                r600_nomm_put_vb(dev);
 794                r600_nomm_get_vb(dev);
 795                if (!dev_priv->blit_vb)
 796                        return;
 797
 798                set_shaders(dev);
 799        }
 800        vb = r600_nomm_get_vb_ptr(dev);
 801
 802        sx2 = sx + w;
 803        sy2 = sy + h;
 804        dx2 = dx + w;
 805        dy2 = dy + h;
 806
 807        vb[0] = i2f(dx);
 808        vb[1] = i2f(dy);
 809        vb[2] = i2f(sx);
 810        vb[3] = i2f(sy);
 811
 812        vb[4] = i2f(dx);
 813        vb[5] = i2f(dy2);
 814        vb[6] = i2f(sx);
 815        vb[7] = i2f(sy2);
 816
 817        vb[8] = i2f(dx2);
 818        vb[9] = i2f(dy2);
 819        vb[10] = i2f(sx2);
 820        vb[11] = i2f(sy2);
 821
 822        switch(cpp) {
 823        case 4:
 824                cb_format = COLOR_8_8_8_8;
 825                tex_format = FMT_8_8_8_8;
 826                break;
 827        case 2:
 828                cb_format = COLOR_5_6_5;
 829                tex_format = FMT_5_6_5;
 830                break;
 831        default:
 832                cb_format = COLOR_8;
 833                tex_format = FMT_8;
 834                break;
 835        }
 836
 837        /* src */
 838        set_tex_resource(dev_priv, tex_format,
 839                         src_pitch / cpp,
 840                         sy2, src_pitch / cpp,
 841                         src_gpu_addr);
 842
 843        cp_set_surface_sync(dev_priv,
 844                            R600_TC_ACTION_ENA, src_pitch * sy2, src_gpu_addr);
 845
 846        /* dst */
 847        set_render_target(dev_priv, cb_format,
 848                          dst_pitch / cpp, dy2,
 849                          dst_gpu_addr);
 850
 851        /* scissors */
 852        set_scissors(dev_priv, dx, dy, dx2, dy2);
 853
 854        /* Vertex buffer setup */
 855        vb_addr = dev_priv->gart_buffers_offset +
 856                dev_priv->blit_vb->offset +
 857                dev_priv->blit_vb->used;
 858        set_vtx_resource(dev_priv, vb_addr);
 859
 860        /* draw */
 861        draw_auto(dev_priv);
 862
 863        cp_set_surface_sync(dev_priv,
 864                            R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
 865                            dst_pitch * dy2, dst_gpu_addr);
 866
 867        dev_priv->blit_vb->used += 12 * 4;
 868}
 869