linux/drivers/gpu/drm/radeon/evergreen_cs.c
<<
>>
Prefs
   1/*
   2 * Copyright 2010 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 * Copyright 2009 Jerome Glisse.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22 * OTHER DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors: Dave Airlie
  25 *          Alex Deucher
  26 *          Jerome Glisse
  27 */
  28
  29#include "radeon.h"
  30#include "radeon_asic.h"
  31#include "evergreend.h"
  32#include "evergreen_reg_safe.h"
  33#include "cayman_reg_safe.h"
  34
  35#define MAX(a,b)                   (((a)>(b))?(a):(b))
  36#define MIN(a,b)                   (((a)<(b))?(a):(b))
  37
  38#define REG_SAFE_BM_SIZE ARRAY_SIZE(evergreen_reg_safe_bm)
  39
  40int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
  41                           struct radeon_bo_list **cs_reloc);
  42struct evergreen_cs_track {
  43        u32                     group_size;
  44        u32                     nbanks;
  45        u32                     npipes;
  46        u32                     row_size;
  47        /* value we track */
  48        u32                     nsamples;               /* unused */
  49        struct radeon_bo        *cb_color_bo[12];
  50        u32                     cb_color_bo_offset[12];
  51        struct radeon_bo        *cb_color_fmask_bo[8];  /* unused */
  52        struct radeon_bo        *cb_color_cmask_bo[8];  /* unused */
  53        u32                     cb_color_info[12];
  54        u32                     cb_color_view[12];
  55        u32                     cb_color_pitch[12];
  56        u32                     cb_color_slice[12];
  57        u32                     cb_color_slice_idx[12];
  58        u32                     cb_color_attrib[12];
  59        u32                     cb_color_cmask_slice[8];/* unused */
  60        u32                     cb_color_fmask_slice[8];/* unused */
  61        u32                     cb_target_mask;
  62        u32                     cb_shader_mask; /* unused */
  63        u32                     vgt_strmout_config;
  64        u32                     vgt_strmout_buffer_config;
  65        struct radeon_bo        *vgt_strmout_bo[4];
  66        u32                     vgt_strmout_bo_offset[4];
  67        u32                     vgt_strmout_size[4];
  68        u32                     db_depth_control;
  69        u32                     db_depth_view;
  70        u32                     db_depth_slice;
  71        u32                     db_depth_size;
  72        u32                     db_z_info;
  73        u32                     db_z_read_offset;
  74        u32                     db_z_write_offset;
  75        struct radeon_bo        *db_z_read_bo;
  76        struct radeon_bo        *db_z_write_bo;
  77        u32                     db_s_info;
  78        u32                     db_s_read_offset;
  79        u32                     db_s_write_offset;
  80        struct radeon_bo        *db_s_read_bo;
  81        struct radeon_bo        *db_s_write_bo;
  82        bool                    sx_misc_kill_all_prims;
  83        bool                    cb_dirty;
  84        bool                    db_dirty;
  85        bool                    streamout_dirty;
  86        u32                     htile_offset;
  87        u32                     htile_surface;
  88        struct radeon_bo        *htile_bo;
  89        unsigned long           indirect_draw_buffer_size;
  90        const unsigned          *reg_safe_bm;
  91};
  92
  93static u32 evergreen_cs_get_aray_mode(u32 tiling_flags)
  94{
  95        if (tiling_flags & RADEON_TILING_MACRO)
  96                return ARRAY_2D_TILED_THIN1;
  97        else if (tiling_flags & RADEON_TILING_MICRO)
  98                return ARRAY_1D_TILED_THIN1;
  99        else
 100                return ARRAY_LINEAR_GENERAL;
 101}
 102
 103static u32 evergreen_cs_get_num_banks(u32 nbanks)
 104{
 105        switch (nbanks) {
 106        case 2:
 107                return ADDR_SURF_2_BANK;
 108        case 4:
 109                return ADDR_SURF_4_BANK;
 110        case 8:
 111        default:
 112                return ADDR_SURF_8_BANK;
 113        case 16:
 114                return ADDR_SURF_16_BANK;
 115        }
 116}
 117
 118static void evergreen_cs_track_init(struct evergreen_cs_track *track)
 119{
 120        int i;
 121
 122        for (i = 0; i < 8; i++) {
 123                track->cb_color_fmask_bo[i] = NULL;
 124                track->cb_color_cmask_bo[i] = NULL;
 125                track->cb_color_cmask_slice[i] = 0;
 126                track->cb_color_fmask_slice[i] = 0;
 127        }
 128
 129        for (i = 0; i < 12; i++) {
 130                track->cb_color_bo[i] = NULL;
 131                track->cb_color_bo_offset[i] = 0xFFFFFFFF;
 132                track->cb_color_info[i] = 0;
 133                track->cb_color_view[i] = 0xFFFFFFFF;
 134                track->cb_color_pitch[i] = 0;
 135                track->cb_color_slice[i] = 0xfffffff;
 136                track->cb_color_slice_idx[i] = 0;
 137        }
 138        track->cb_target_mask = 0xFFFFFFFF;
 139        track->cb_shader_mask = 0xFFFFFFFF;
 140        track->cb_dirty = true;
 141
 142        track->db_depth_slice = 0xffffffff;
 143        track->db_depth_view = 0xFFFFC000;
 144        track->db_depth_size = 0xFFFFFFFF;
 145        track->db_depth_control = 0xFFFFFFFF;
 146        track->db_z_info = 0xFFFFFFFF;
 147        track->db_z_read_offset = 0xFFFFFFFF;
 148        track->db_z_write_offset = 0xFFFFFFFF;
 149        track->db_z_read_bo = NULL;
 150        track->db_z_write_bo = NULL;
 151        track->db_s_info = 0xFFFFFFFF;
 152        track->db_s_read_offset = 0xFFFFFFFF;
 153        track->db_s_write_offset = 0xFFFFFFFF;
 154        track->db_s_read_bo = NULL;
 155        track->db_s_write_bo = NULL;
 156        track->db_dirty = true;
 157        track->htile_bo = NULL;
 158        track->htile_offset = 0xFFFFFFFF;
 159        track->htile_surface = 0;
 160
 161        for (i = 0; i < 4; i++) {
 162                track->vgt_strmout_size[i] = 0;
 163                track->vgt_strmout_bo[i] = NULL;
 164                track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
 165        }
 166        track->streamout_dirty = true;
 167        track->sx_misc_kill_all_prims = false;
 168}
 169
 170struct eg_surface {
 171        /* value gathered from cs */
 172        unsigned        nbx;
 173        unsigned        nby;
 174        unsigned        format;
 175        unsigned        mode;
 176        unsigned        nbanks;
 177        unsigned        bankw;
 178        unsigned        bankh;
 179        unsigned        tsplit;
 180        unsigned        mtilea;
 181        unsigned        nsamples;
 182        /* output value */
 183        unsigned        bpe;
 184        unsigned        layer_size;
 185        unsigned        palign;
 186        unsigned        halign;
 187        unsigned long   base_align;
 188};
 189
 190static int evergreen_surface_check_linear(struct radeon_cs_parser *p,
 191                                          struct eg_surface *surf,
 192                                          const char *prefix)
 193{
 194        surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
 195        surf->base_align = surf->bpe;
 196        surf->palign = 1;
 197        surf->halign = 1;
 198        return 0;
 199}
 200
 201static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser *p,
 202                                                  struct eg_surface *surf,
 203                                                  const char *prefix)
 204{
 205        struct evergreen_cs_track *track = p->track;
 206        unsigned palign;
 207
 208        palign = MAX(64, track->group_size / surf->bpe);
 209        surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
 210        surf->base_align = track->group_size;
 211        surf->palign = palign;
 212        surf->halign = 1;
 213        if (surf->nbx & (palign - 1)) {
 214                if (prefix) {
 215                        dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
 216                                 __func__, __LINE__, prefix, surf->nbx, palign);
 217                }
 218                return -EINVAL;
 219        }
 220        return 0;
 221}
 222
 223static int evergreen_surface_check_1d(struct radeon_cs_parser *p,
 224                                      struct eg_surface *surf,
 225                                      const char *prefix)
 226{
 227        struct evergreen_cs_track *track = p->track;
 228        unsigned palign;
 229
 230        palign = track->group_size / (8 * surf->bpe * surf->nsamples);
 231        palign = MAX(8, palign);
 232        surf->layer_size = surf->nbx * surf->nby * surf->bpe;
 233        surf->base_align = track->group_size;
 234        surf->palign = palign;
 235        surf->halign = 8;
 236        if ((surf->nbx & (palign - 1))) {
 237                if (prefix) {
 238                        dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d (%d %d %d)\n",
 239                                 __func__, __LINE__, prefix, surf->nbx, palign,
 240                                 track->group_size, surf->bpe, surf->nsamples);
 241                }
 242                return -EINVAL;
 243        }
 244        if ((surf->nby & (8 - 1))) {
 245                if (prefix) {
 246                        dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with 8\n",
 247                                 __func__, __LINE__, prefix, surf->nby);
 248                }
 249                return -EINVAL;
 250        }
 251        return 0;
 252}
 253
 254static int evergreen_surface_check_2d(struct radeon_cs_parser *p,
 255                                      struct eg_surface *surf,
 256                                      const char *prefix)
 257{
 258        struct evergreen_cs_track *track = p->track;
 259        unsigned palign, halign, tileb, slice_pt;
 260        unsigned mtile_pr, mtile_ps, mtileb;
 261
 262        tileb = 64 * surf->bpe * surf->nsamples;
 263        slice_pt = 1;
 264        if (tileb > surf->tsplit) {
 265                slice_pt = tileb / surf->tsplit;
 266        }
 267        tileb = tileb / slice_pt;
 268        /* macro tile width & height */
 269        palign = (8 * surf->bankw * track->npipes) * surf->mtilea;
 270        halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea;
 271        mtileb = (palign / 8) * (halign / 8) * tileb;
 272        mtile_pr = surf->nbx / palign;
 273        mtile_ps = (mtile_pr * surf->nby) / halign;
 274        surf->layer_size = mtile_ps * mtileb * slice_pt;
 275        surf->base_align = (palign / 8) * (halign / 8) * tileb;
 276        surf->palign = palign;
 277        surf->halign = halign;
 278
 279        if ((surf->nbx & (palign - 1))) {
 280                if (prefix) {
 281                        dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
 282                                 __func__, __LINE__, prefix, surf->nbx, palign);
 283                }
 284                return -EINVAL;
 285        }
 286        if ((surf->nby & (halign - 1))) {
 287                if (prefix) {
 288                        dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with %d\n",
 289                                 __func__, __LINE__, prefix, surf->nby, halign);
 290                }
 291                return -EINVAL;
 292        }
 293
 294        return 0;
 295}
 296
 297static int evergreen_surface_check(struct radeon_cs_parser *p,
 298                                   struct eg_surface *surf,
 299                                   const char *prefix)
 300{
 301        /* some common value computed here */
 302        surf->bpe = r600_fmt_get_blocksize(surf->format);
 303
 304        switch (surf->mode) {
 305        case ARRAY_LINEAR_GENERAL:
 306                return evergreen_surface_check_linear(p, surf, prefix);
 307        case ARRAY_LINEAR_ALIGNED:
 308                return evergreen_surface_check_linear_aligned(p, surf, prefix);
 309        case ARRAY_1D_TILED_THIN1:
 310                return evergreen_surface_check_1d(p, surf, prefix);
 311        case ARRAY_2D_TILED_THIN1:
 312                return evergreen_surface_check_2d(p, surf, prefix);
 313        default:
 314                dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
 315                                __func__, __LINE__, prefix, surf->mode);
 316                return -EINVAL;
 317        }
 318        return -EINVAL;
 319}
 320
 321static int evergreen_surface_value_conv_check(struct radeon_cs_parser *p,
 322                                              struct eg_surface *surf,
 323                                              const char *prefix)
 324{
 325        switch (surf->mode) {
 326        case ARRAY_2D_TILED_THIN1:
 327                break;
 328        case ARRAY_LINEAR_GENERAL:
 329        case ARRAY_LINEAR_ALIGNED:
 330        case ARRAY_1D_TILED_THIN1:
 331                return 0;
 332        default:
 333                dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
 334                                __func__, __LINE__, prefix, surf->mode);
 335                return -EINVAL;
 336        }
 337
 338        switch (surf->nbanks) {
 339        case 0: surf->nbanks = 2; break;
 340        case 1: surf->nbanks = 4; break;
 341        case 2: surf->nbanks = 8; break;
 342        case 3: surf->nbanks = 16; break;
 343        default:
 344                dev_warn(p->dev, "%s:%d %s invalid number of banks %d\n",
 345                         __func__, __LINE__, prefix, surf->nbanks);
 346                return -EINVAL;
 347        }
 348        switch (surf->bankw) {
 349        case 0: surf->bankw = 1; break;
 350        case 1: surf->bankw = 2; break;
 351        case 2: surf->bankw = 4; break;
 352        case 3: surf->bankw = 8; break;
 353        default:
 354                dev_warn(p->dev, "%s:%d %s invalid bankw %d\n",
 355                         __func__, __LINE__, prefix, surf->bankw);
 356                return -EINVAL;
 357        }
 358        switch (surf->bankh) {
 359        case 0: surf->bankh = 1; break;
 360        case 1: surf->bankh = 2; break;
 361        case 2: surf->bankh = 4; break;
 362        case 3: surf->bankh = 8; break;
 363        default:
 364                dev_warn(p->dev, "%s:%d %s invalid bankh %d\n",
 365                         __func__, __LINE__, prefix, surf->bankh);
 366                return -EINVAL;
 367        }
 368        switch (surf->mtilea) {
 369        case 0: surf->mtilea = 1; break;
 370        case 1: surf->mtilea = 2; break;
 371        case 2: surf->mtilea = 4; break;
 372        case 3: surf->mtilea = 8; break;
 373        default:
 374                dev_warn(p->dev, "%s:%d %s invalid macro tile aspect %d\n",
 375                         __func__, __LINE__, prefix, surf->mtilea);
 376                return -EINVAL;
 377        }
 378        switch (surf->tsplit) {
 379        case 0: surf->tsplit = 64; break;
 380        case 1: surf->tsplit = 128; break;
 381        case 2: surf->tsplit = 256; break;
 382        case 3: surf->tsplit = 512; break;
 383        case 4: surf->tsplit = 1024; break;
 384        case 5: surf->tsplit = 2048; break;
 385        case 6: surf->tsplit = 4096; break;
 386        default:
 387                dev_warn(p->dev, "%s:%d %s invalid tile split %d\n",
 388                         __func__, __LINE__, prefix, surf->tsplit);
 389                return -EINVAL;
 390        }
 391        return 0;
 392}
 393
 394static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned id)
 395{
 396        struct evergreen_cs_track *track = p->track;
 397        struct eg_surface surf;
 398        unsigned pitch, slice, mslice;
 399        unsigned long offset;
 400        int r;
 401
 402        mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1;
 403        pitch = track->cb_color_pitch[id];
 404        slice = track->cb_color_slice[id];
 405        surf.nbx = (pitch + 1) * 8;
 406        surf.nby = ((slice + 1) * 64) / surf.nbx;
 407        surf.mode = G_028C70_ARRAY_MODE(track->cb_color_info[id]);
 408        surf.format = G_028C70_FORMAT(track->cb_color_info[id]);
 409        surf.tsplit = G_028C74_TILE_SPLIT(track->cb_color_attrib[id]);
 410        surf.nbanks = G_028C74_NUM_BANKS(track->cb_color_attrib[id]);
 411        surf.bankw = G_028C74_BANK_WIDTH(track->cb_color_attrib[id]);
 412        surf.bankh = G_028C74_BANK_HEIGHT(track->cb_color_attrib[id]);
 413        surf.mtilea = G_028C74_MACRO_TILE_ASPECT(track->cb_color_attrib[id]);
 414        surf.nsamples = 1;
 415
 416        if (!r600_fmt_is_valid_color(surf.format)) {
 417                dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08x)\n",
 418                         __func__, __LINE__, surf.format,
 419                        id, track->cb_color_info[id]);
 420                return -EINVAL;
 421        }
 422
 423        r = evergreen_surface_value_conv_check(p, &surf, "cb");
 424        if (r) {
 425                return r;
 426        }
 427
 428        r = evergreen_surface_check(p, &surf, "cb");
 429        if (r) {
 430                dev_warn(p->dev, "%s:%d cb[%d] invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
 431                         __func__, __LINE__, id, track->cb_color_pitch[id],
 432                         track->cb_color_slice[id], track->cb_color_attrib[id],
 433                         track->cb_color_info[id]);
 434                return r;
 435        }
 436
 437        offset = track->cb_color_bo_offset[id] << 8;
 438        if (offset & (surf.base_align - 1)) {
 439                dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n",
 440                         __func__, __LINE__, id, offset, surf.base_align);
 441                return -EINVAL;
 442        }
 443
 444        offset += surf.layer_size * mslice;
 445        if (offset > radeon_bo_size(track->cb_color_bo[id])) {
 446                /* old ddx are broken they allocate bo with w*h*bpp but
 447                 * program slice with ALIGN(h, 8), catch this and patch
 448                 * command stream.
 449                 */
 450                if (!surf.mode) {
 451                        uint32_t *ib = p->ib.ptr;
 452                        unsigned long tmp, nby, bsize, size, min = 0;
 453
 454                        /* find the height the ddx wants */
 455                        if (surf.nby > 8) {
 456                                min = surf.nby - 8;
 457                        }
 458                        bsize = radeon_bo_size(track->cb_color_bo[id]);
 459                        tmp = track->cb_color_bo_offset[id] << 8;
 460                        for (nby = surf.nby; nby > min; nby--) {
 461                                size = nby * surf.nbx * surf.bpe * surf.nsamples;
 462                                if ((tmp + size * mslice) <= bsize) {
 463                                        break;
 464                                }
 465                        }
 466                        if (nby > min) {
 467                                surf.nby = nby;
 468                                slice = ((nby * surf.nbx) / 64) - 1;
 469                                if (!evergreen_surface_check(p, &surf, "cb")) {
 470                                        /* check if this one works */
 471                                        tmp += surf.layer_size * mslice;
 472                                        if (tmp <= bsize) {
 473                                                ib[track->cb_color_slice_idx[id]] = slice;
 474                                                goto old_ddx_ok;
 475                                        }
 476                                }
 477                        }
 478                }
 479                dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, "
 480                         "offset %d, max layer %d, bo size %ld, slice %d)\n",
 481                         __func__, __LINE__, id, surf.layer_size,
 482                        track->cb_color_bo_offset[id] << 8, mslice,
 483                        radeon_bo_size(track->cb_color_bo[id]), slice);
 484                dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
 485                         __func__, __LINE__, surf.nbx, surf.nby,
 486                        surf.mode, surf.bpe, surf.nsamples,
 487                        surf.bankw, surf.bankh,
 488                        surf.tsplit, surf.mtilea);
 489                return -EINVAL;
 490        }
 491old_ddx_ok:
 492
 493        return 0;
 494}
 495
 496static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
 497                                                unsigned nbx, unsigned nby)
 498{
 499        struct evergreen_cs_track *track = p->track;
 500        unsigned long size;
 501
 502        if (track->htile_bo == NULL) {
 503                dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n",
 504                                __func__, __LINE__, track->db_z_info);
 505                return -EINVAL;
 506        }
 507
 508        if (G_028ABC_LINEAR(track->htile_surface)) {
 509                /* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */
 510                nbx = round_up(nbx, 16 * 8);
 511                /* height is npipes htiles aligned == npipes * 8 pixel aligned */
 512                nby = round_up(nby, track->npipes * 8);
 513        } else {
 514                /* always assume 8x8 htile */
 515                /* align is htile align * 8, htile align vary according to
 516                 * number of pipe and tile width and nby
 517                 */
 518                switch (track->npipes) {
 519                case 8:
 520                        /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
 521                        nbx = round_up(nbx, 64 * 8);
 522                        nby = round_up(nby, 64 * 8);
 523                        break;
 524                case 4:
 525                        /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
 526                        nbx = round_up(nbx, 64 * 8);
 527                        nby = round_up(nby, 32 * 8);
 528                        break;
 529                case 2:
 530                        /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
 531                        nbx = round_up(nbx, 32 * 8);
 532                        nby = round_up(nby, 32 * 8);
 533                        break;
 534                case 1:
 535                        /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
 536                        nbx = round_up(nbx, 32 * 8);
 537                        nby = round_up(nby, 16 * 8);
 538                        break;
 539                default:
 540                        dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
 541                                        __func__, __LINE__, track->npipes);
 542                        return -EINVAL;
 543                }
 544        }
 545        /* compute number of htile */
 546        nbx = nbx >> 3;
 547        nby = nby >> 3;
 548        /* size must be aligned on npipes * 2K boundary */
 549        size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
 550        size += track->htile_offset;
 551
 552        if (size > radeon_bo_size(track->htile_bo)) {
 553                dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
 554                                __func__, __LINE__, radeon_bo_size(track->htile_bo),
 555                                size, nbx, nby);
 556                return -EINVAL;
 557        }
 558        return 0;
 559}
 560
 561static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p)
 562{
 563        struct evergreen_cs_track *track = p->track;
 564        struct eg_surface surf;
 565        unsigned pitch, slice, mslice;
 566        unsigned long offset;
 567        int r;
 568
 569        mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
 570        pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
 571        slice = track->db_depth_slice;
 572        surf.nbx = (pitch + 1) * 8;
 573        surf.nby = ((slice + 1) * 64) / surf.nbx;
 574        surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
 575        surf.format = G_028044_FORMAT(track->db_s_info);
 576        surf.tsplit = G_028044_TILE_SPLIT(track->db_s_info);
 577        surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
 578        surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
 579        surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
 580        surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
 581        surf.nsamples = 1;
 582
 583        if (surf.format != 1) {
 584                dev_warn(p->dev, "%s:%d stencil invalid format %d\n",
 585                         __func__, __LINE__, surf.format);
 586                return -EINVAL;
 587        }
 588        /* replace by color format so we can use same code */
 589        surf.format = V_028C70_COLOR_8;
 590
 591        r = evergreen_surface_value_conv_check(p, &surf, "stencil");
 592        if (r) {
 593                return r;
 594        }
 595
 596        r = evergreen_surface_check(p, &surf, NULL);
 597        if (r) {
 598                /* old userspace doesn't compute proper depth/stencil alignment
 599                 * check that alignment against a bigger byte per elements and
 600                 * only report if that alignment is wrong too.
 601                 */
 602                surf.format = V_028C70_COLOR_8_8_8_8;
 603                r = evergreen_surface_check(p, &surf, "stencil");
 604                if (r) {
 605                        dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
 606                                 __func__, __LINE__, track->db_depth_size,
 607                                 track->db_depth_slice, track->db_s_info, track->db_z_info);
 608                }
 609                return r;
 610        }
 611
 612        offset = track->db_s_read_offset << 8;
 613        if (offset & (surf.base_align - 1)) {
 614                dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
 615                         __func__, __LINE__, offset, surf.base_align);
 616                return -EINVAL;
 617        }
 618        offset += surf.layer_size * mslice;
 619        if (offset > radeon_bo_size(track->db_s_read_bo)) {
 620                dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, "
 621                         "offset %ld, max layer %d, bo size %ld)\n",
 622                         __func__, __LINE__, surf.layer_size,
 623                        (unsigned long)track->db_s_read_offset << 8, mslice,
 624                        radeon_bo_size(track->db_s_read_bo));
 625                dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
 626                         __func__, __LINE__, track->db_depth_size,
 627                         track->db_depth_slice, track->db_s_info, track->db_z_info);
 628                return -EINVAL;
 629        }
 630
 631        offset = track->db_s_write_offset << 8;
 632        if (offset & (surf.base_align - 1)) {
 633                dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
 634                         __func__, __LINE__, offset, surf.base_align);
 635                return -EINVAL;
 636        }
 637        offset += surf.layer_size * mslice;
 638        if (offset > radeon_bo_size(track->db_s_write_bo)) {
 639                dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, "
 640                         "offset %ld, max layer %d, bo size %ld)\n",
 641                         __func__, __LINE__, surf.layer_size,
 642                        (unsigned long)track->db_s_write_offset << 8, mslice,
 643                        radeon_bo_size(track->db_s_write_bo));
 644                return -EINVAL;
 645        }
 646
 647        /* hyperz */
 648        if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
 649                r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
 650                if (r) {
 651                        return r;
 652                }
 653        }
 654
 655        return 0;
 656}
 657
 658static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p)
 659{
 660        struct evergreen_cs_track *track = p->track;
 661        struct eg_surface surf;
 662        unsigned pitch, slice, mslice;
 663        unsigned long offset;
 664        int r;
 665
 666        mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
 667        pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
 668        slice = track->db_depth_slice;
 669        surf.nbx = (pitch + 1) * 8;
 670        surf.nby = ((slice + 1) * 64) / surf.nbx;
 671        surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
 672        surf.format = G_028040_FORMAT(track->db_z_info);
 673        surf.tsplit = G_028040_TILE_SPLIT(track->db_z_info);
 674        surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
 675        surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
 676        surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
 677        surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
 678        surf.nsamples = 1;
 679
 680        switch (surf.format) {
 681        case V_028040_Z_16:
 682                surf.format = V_028C70_COLOR_16;
 683                break;
 684        case V_028040_Z_24:
 685        case V_028040_Z_32_FLOAT:
 686                surf.format = V_028C70_COLOR_8_8_8_8;
 687                break;
 688        default:
 689                dev_warn(p->dev, "%s:%d depth invalid format %d\n",
 690                         __func__, __LINE__, surf.format);
 691                return -EINVAL;
 692        }
 693
 694        r = evergreen_surface_value_conv_check(p, &surf, "depth");
 695        if (r) {
 696                dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
 697                         __func__, __LINE__, track->db_depth_size,
 698                         track->db_depth_slice, track->db_z_info);
 699                return r;
 700        }
 701
 702        r = evergreen_surface_check(p, &surf, "depth");
 703        if (r) {
 704                dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
 705                         __func__, __LINE__, track->db_depth_size,
 706                         track->db_depth_slice, track->db_z_info);
 707                return r;
 708        }
 709
 710        offset = track->db_z_read_offset << 8;
 711        if (offset & (surf.base_align - 1)) {
 712                dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
 713                         __func__, __LINE__, offset, surf.base_align);
 714                return -EINVAL;
 715        }
 716        offset += surf.layer_size * mslice;
 717        if (offset > radeon_bo_size(track->db_z_read_bo)) {
 718                dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, "
 719                         "offset %ld, max layer %d, bo size %ld)\n",
 720                         __func__, __LINE__, surf.layer_size,
 721                        (unsigned long)track->db_z_read_offset << 8, mslice,
 722                        radeon_bo_size(track->db_z_read_bo));
 723                return -EINVAL;
 724        }
 725
 726        offset = track->db_z_write_offset << 8;
 727        if (offset & (surf.base_align - 1)) {
 728                dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
 729                         __func__, __LINE__, offset, surf.base_align);
 730                return -EINVAL;
 731        }
 732        offset += surf.layer_size * mslice;
 733        if (offset > radeon_bo_size(track->db_z_write_bo)) {
 734                dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, "
 735                         "offset %ld, max layer %d, bo size %ld)\n",
 736                         __func__, __LINE__, surf.layer_size,
 737                        (unsigned long)track->db_z_write_offset << 8, mslice,
 738                        radeon_bo_size(track->db_z_write_bo));
 739                return -EINVAL;
 740        }
 741
 742        /* hyperz */
 743        if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
 744                r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
 745                if (r) {
 746                        return r;
 747                }
 748        }
 749
 750        return 0;
 751}
 752
 753static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p,
 754                                               struct radeon_bo *texture,
 755                                               struct radeon_bo *mipmap,
 756                                               unsigned idx)
 757{
 758        struct eg_surface surf;
 759        unsigned long toffset, moffset;
 760        unsigned dim, llevel, mslice, width, height, depth, i;
 761        u32 texdw[8];
 762        int r;
 763
 764        texdw[0] = radeon_get_ib_value(p, idx + 0);
 765        texdw[1] = radeon_get_ib_value(p, idx + 1);
 766        texdw[2] = radeon_get_ib_value(p, idx + 2);
 767        texdw[3] = radeon_get_ib_value(p, idx + 3);
 768        texdw[4] = radeon_get_ib_value(p, idx + 4);
 769        texdw[5] = radeon_get_ib_value(p, idx + 5);
 770        texdw[6] = radeon_get_ib_value(p, idx + 6);
 771        texdw[7] = radeon_get_ib_value(p, idx + 7);
 772        dim = G_030000_DIM(texdw[0]);
 773        llevel = G_030014_LAST_LEVEL(texdw[5]);
 774        mslice = G_030014_LAST_ARRAY(texdw[5]) + 1;
 775        width = G_030000_TEX_WIDTH(texdw[0]) + 1;
 776        height =  G_030004_TEX_HEIGHT(texdw[1]) + 1;
 777        depth = G_030004_TEX_DEPTH(texdw[1]) + 1;
 778        surf.format = G_03001C_DATA_FORMAT(texdw[7]);
 779        surf.nbx = (G_030000_PITCH(texdw[0]) + 1) * 8;
 780        surf.nbx = r600_fmt_get_nblocksx(surf.format, surf.nbx);
 781        surf.nby = r600_fmt_get_nblocksy(surf.format, height);
 782        surf.mode = G_030004_ARRAY_MODE(texdw[1]);
 783        surf.tsplit = G_030018_TILE_SPLIT(texdw[6]);
 784        surf.nbanks = G_03001C_NUM_BANKS(texdw[7]);
 785        surf.bankw = G_03001C_BANK_WIDTH(texdw[7]);
 786        surf.bankh = G_03001C_BANK_HEIGHT(texdw[7]);
 787        surf.mtilea = G_03001C_MACRO_TILE_ASPECT(texdw[7]);
 788        surf.nsamples = 1;
 789        toffset = texdw[2] << 8;
 790        moffset = texdw[3] << 8;
 791
 792        if (!r600_fmt_is_valid_texture(surf.format, p->family)) {
 793                dev_warn(p->dev, "%s:%d texture invalid format %d\n",
 794                         __func__, __LINE__, surf.format);
 795                return -EINVAL;
 796        }
 797        switch (dim) {
 798        case V_030000_SQ_TEX_DIM_1D:
 799        case V_030000_SQ_TEX_DIM_2D:
 800        case V_030000_SQ_TEX_DIM_CUBEMAP:
 801        case V_030000_SQ_TEX_DIM_1D_ARRAY:
 802        case V_030000_SQ_TEX_DIM_2D_ARRAY:
 803                depth = 1;
 804                break;
 805        case V_030000_SQ_TEX_DIM_2D_MSAA:
 806        case V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA:
 807                surf.nsamples = 1 << llevel;
 808                llevel = 0;
 809                depth = 1;
 810                break;
 811        case V_030000_SQ_TEX_DIM_3D:
 812                break;
 813        default:
 814                dev_warn(p->dev, "%s:%d texture invalid dimension %d\n",
 815                         __func__, __LINE__, dim);
 816                return -EINVAL;
 817        }
 818
 819        r = evergreen_surface_value_conv_check(p, &surf, "texture");
 820        if (r) {
 821                return r;
 822        }
 823
 824        /* align height */
 825        evergreen_surface_check(p, &surf, NULL);
 826        surf.nby = ALIGN(surf.nby, surf.halign);
 827
 828        r = evergreen_surface_check(p, &surf, "texture");
 829        if (r) {
 830                dev_warn(p->dev, "%s:%d texture invalid 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
 831                         __func__, __LINE__, texdw[0], texdw[1], texdw[4],
 832                         texdw[5], texdw[6], texdw[7]);
 833                return r;
 834        }
 835
 836        /* check texture size */
 837        if (toffset & (surf.base_align - 1)) {
 838                dev_warn(p->dev, "%s:%d texture bo base %ld not aligned with %ld\n",
 839                         __func__, __LINE__, toffset, surf.base_align);
 840                return -EINVAL;
 841        }
 842        if (surf.nsamples <= 1 && moffset & (surf.base_align - 1)) {
 843                dev_warn(p->dev, "%s:%d mipmap bo base %ld not aligned with %ld\n",
 844                         __func__, __LINE__, moffset, surf.base_align);
 845                return -EINVAL;
 846        }
 847        if (dim == SQ_TEX_DIM_3D) {
 848                toffset += surf.layer_size * depth;
 849        } else {
 850                toffset += surf.layer_size * mslice;
 851        }
 852        if (toffset > radeon_bo_size(texture)) {
 853                dev_warn(p->dev, "%s:%d texture bo too small (layer size %d, "
 854                         "offset %ld, max layer %d, depth %d, bo size %ld) (%d %d)\n",
 855                         __func__, __LINE__, surf.layer_size,
 856                        (unsigned long)texdw[2] << 8, mslice,
 857                        depth, radeon_bo_size(texture),
 858                        surf.nbx, surf.nby);
 859                return -EINVAL;
 860        }
 861
 862        if (!mipmap) {
 863                if (llevel) {
 864                        dev_warn(p->dev, "%s:%i got NULL MIP_ADDRESS relocation\n",
 865                                 __func__, __LINE__);
 866                        return -EINVAL;
 867                } else {
 868                        return 0; /* everything's ok */
 869                }
 870        }
 871
 872        /* check mipmap size */
 873        for (i = 1; i <= llevel; i++) {
 874                unsigned w, h, d;
 875
 876                w = r600_mip_minify(width, i);
 877                h = r600_mip_minify(height, i);
 878                d = r600_mip_minify(depth, i);
 879                surf.nbx = r600_fmt_get_nblocksx(surf.format, w);
 880                surf.nby = r600_fmt_get_nblocksy(surf.format, h);
 881
 882                switch (surf.mode) {
 883                case ARRAY_2D_TILED_THIN1:
 884                        if (surf.nbx < surf.palign || surf.nby < surf.halign) {
 885                                surf.mode = ARRAY_1D_TILED_THIN1;
 886                        }
 887                        /* recompute alignment */
 888                        evergreen_surface_check(p, &surf, NULL);
 889                        break;
 890                case ARRAY_LINEAR_GENERAL:
 891                case ARRAY_LINEAR_ALIGNED:
 892                case ARRAY_1D_TILED_THIN1:
 893                        break;
 894                default:
 895                        dev_warn(p->dev, "%s:%d invalid array mode %d\n",
 896                                 __func__, __LINE__, surf.mode);
 897                        return -EINVAL;
 898                }
 899                surf.nbx = ALIGN(surf.nbx, surf.palign);
 900                surf.nby = ALIGN(surf.nby, surf.halign);
 901
 902                r = evergreen_surface_check(p, &surf, "mipmap");
 903                if (r) {
 904                        return r;
 905                }
 906
 907                if (dim == SQ_TEX_DIM_3D) {
 908                        moffset += surf.layer_size * d;
 909                } else {
 910                        moffset += surf.layer_size * mslice;
 911                }
 912                if (moffset > radeon_bo_size(mipmap)) {
 913                        dev_warn(p->dev, "%s:%d mipmap [%d] bo too small (layer size %d, "
 914                                        "offset %ld, coffset %ld, max layer %d, depth %d, "
 915                                        "bo size %ld) level0 (%d %d %d)\n",
 916                                        __func__, __LINE__, i, surf.layer_size,
 917                                        (unsigned long)texdw[3] << 8, moffset, mslice,
 918                                        d, radeon_bo_size(mipmap),
 919                                        width, height, depth);
 920                        dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
 921                                 __func__, __LINE__, surf.nbx, surf.nby,
 922                                surf.mode, surf.bpe, surf.nsamples,
 923                                surf.bankw, surf.bankh,
 924                                surf.tsplit, surf.mtilea);
 925                        return -EINVAL;
 926                }
 927        }
 928
 929        return 0;
 930}
 931
 932static int evergreen_cs_track_check(struct radeon_cs_parser *p)
 933{
 934        struct evergreen_cs_track *track = p->track;
 935        unsigned tmp, i;
 936        int r;
 937        unsigned buffer_mask = 0;
 938
 939        /* check streamout */
 940        if (track->streamout_dirty && track->vgt_strmout_config) {
 941                for (i = 0; i < 4; i++) {
 942                        if (track->vgt_strmout_config & (1 << i)) {
 943                                buffer_mask |= (track->vgt_strmout_buffer_config >> (i * 4)) & 0xf;
 944                        }
 945                }
 946
 947                for (i = 0; i < 4; i++) {
 948                        if (buffer_mask & (1 << i)) {
 949                                if (track->vgt_strmout_bo[i]) {
 950                                        u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
 951                                                        (u64)track->vgt_strmout_size[i];
 952                                        if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
 953                                                DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
 954                                                          i, offset,
 955                                                          radeon_bo_size(track->vgt_strmout_bo[i]));
 956                                                return -EINVAL;
 957                                        }
 958                                } else {
 959                                        dev_warn(p->dev, "No buffer for streamout %d\n", i);
 960                                        return -EINVAL;
 961                                }
 962                        }
 963                }
 964                track->streamout_dirty = false;
 965        }
 966
 967        if (track->sx_misc_kill_all_prims)
 968                return 0;
 969
 970        /* check that we have a cb for each enabled target
 971         */
 972        if (track->cb_dirty) {
 973                tmp = track->cb_target_mask;
 974                for (i = 0; i < 8; i++) {
 975                        u32 format = G_028C70_FORMAT(track->cb_color_info[i]);
 976
 977                        if (format != V_028C70_COLOR_INVALID &&
 978                            (tmp >> (i * 4)) & 0xF) {
 979                                /* at least one component is enabled */
 980                                if (track->cb_color_bo[i] == NULL) {
 981                                        dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
 982                                                __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
 983                                        return -EINVAL;
 984                                }
 985                                /* check cb */
 986                                r = evergreen_cs_track_validate_cb(p, i);
 987                                if (r) {
 988                                        return r;
 989                                }
 990                        }
 991                }
 992                track->cb_dirty = false;
 993        }
 994
 995        if (track->db_dirty) {
 996                /* Check stencil buffer */
 997                if (G_028044_FORMAT(track->db_s_info) != V_028044_STENCIL_INVALID &&
 998                    G_028800_STENCIL_ENABLE(track->db_depth_control)) {
 999                        r = evergreen_cs_track_validate_stencil(p);
1000                        if (r)
1001                                return r;
1002                }
1003                /* Check depth buffer */
1004                if (G_028040_FORMAT(track->db_z_info) != V_028040_Z_INVALID &&
1005                    G_028800_Z_ENABLE(track->db_depth_control)) {
1006                        r = evergreen_cs_track_validate_depth(p);
1007                        if (r)
1008                                return r;
1009                }
1010                track->db_dirty = false;
1011        }
1012
1013        return 0;
1014}
1015
1016/**
1017 * evergreen_cs_packet_parse_vline() - parse userspace VLINE packet
1018 * @parser:             parser structure holding parsing context.
1019 *
1020 * This is an Evergreen(+)-specific function for parsing VLINE packets.
1021 * Real work is done by r600_cs_common_vline_parse function.
1022 * Here we just set up ASIC-specific register table and call
1023 * the common implementation function.
1024 */
1025static int evergreen_cs_packet_parse_vline(struct radeon_cs_parser *p)
1026{
1027
1028        static uint32_t vline_start_end[6] = {
1029                EVERGREEN_VLINE_START_END + EVERGREEN_CRTC0_REGISTER_OFFSET,
1030                EVERGREEN_VLINE_START_END + EVERGREEN_CRTC1_REGISTER_OFFSET,
1031                EVERGREEN_VLINE_START_END + EVERGREEN_CRTC2_REGISTER_OFFSET,
1032                EVERGREEN_VLINE_START_END + EVERGREEN_CRTC3_REGISTER_OFFSET,
1033                EVERGREEN_VLINE_START_END + EVERGREEN_CRTC4_REGISTER_OFFSET,
1034                EVERGREEN_VLINE_START_END + EVERGREEN_CRTC5_REGISTER_OFFSET
1035        };
1036        static uint32_t vline_status[6] = {
1037                EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
1038                EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
1039                EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
1040                EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
1041                EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
1042                EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET
1043        };
1044
1045        return r600_cs_common_vline_parse(p, vline_start_end, vline_status);
1046}
1047
1048static int evergreen_packet0_check(struct radeon_cs_parser *p,
1049                                   struct radeon_cs_packet *pkt,
1050                                   unsigned idx, unsigned reg)
1051{
1052        int r;
1053
1054        switch (reg) {
1055        case EVERGREEN_VLINE_START_END:
1056                r = evergreen_cs_packet_parse_vline(p);
1057                if (r) {
1058                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1059                                        idx, reg);
1060                        return r;
1061                }
1062                break;
1063        default:
1064                pr_err("Forbidden register 0x%04X in cs at %d\n", reg, idx);
1065                return -EINVAL;
1066        }
1067        return 0;
1068}
1069
1070static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
1071                                      struct radeon_cs_packet *pkt)
1072{
1073        unsigned reg, i;
1074        unsigned idx;
1075        int r;
1076
1077        idx = pkt->idx + 1;
1078        reg = pkt->reg;
1079        for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
1080                r = evergreen_packet0_check(p, pkt, idx, reg);
1081                if (r) {
1082                        return r;
1083                }
1084        }
1085        return 0;
1086}
1087
1088/**
1089 * evergreen_cs_handle_reg() - process registers that need special handling.
1090 * @parser: parser structure holding parsing context
1091 * @reg: register we are testing
1092 * @idx: index into the cs buffer
1093 */
1094static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1095{
1096        struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
1097        struct radeon_bo_list *reloc;
1098        u32 tmp, *ib;
1099        int r;
1100
1101        ib = p->ib.ptr;
1102        switch (reg) {
1103        /* force following reg to 0 in an attempt to disable out buffer
1104         * which will need us to better understand how it works to perform
1105         * security check on it (Jerome)
1106         */
1107        case SQ_ESGS_RING_SIZE:
1108        case SQ_GSVS_RING_SIZE:
1109        case SQ_ESTMP_RING_SIZE:
1110        case SQ_GSTMP_RING_SIZE:
1111        case SQ_HSTMP_RING_SIZE:
1112        case SQ_LSTMP_RING_SIZE:
1113        case SQ_PSTMP_RING_SIZE:
1114        case SQ_VSTMP_RING_SIZE:
1115        case SQ_ESGS_RING_ITEMSIZE:
1116        case SQ_ESTMP_RING_ITEMSIZE:
1117        case SQ_GSTMP_RING_ITEMSIZE:
1118        case SQ_GSVS_RING_ITEMSIZE:
1119        case SQ_GS_VERT_ITEMSIZE:
1120        case SQ_GS_VERT_ITEMSIZE_1:
1121        case SQ_GS_VERT_ITEMSIZE_2:
1122        case SQ_GS_VERT_ITEMSIZE_3:
1123        case SQ_GSVS_RING_OFFSET_1:
1124        case SQ_GSVS_RING_OFFSET_2:
1125        case SQ_GSVS_RING_OFFSET_3:
1126        case SQ_HSTMP_RING_ITEMSIZE:
1127        case SQ_LSTMP_RING_ITEMSIZE:
1128        case SQ_PSTMP_RING_ITEMSIZE:
1129        case SQ_VSTMP_RING_ITEMSIZE:
1130        case VGT_TF_RING_SIZE:
1131                /* get value to populate the IB don't remove */
1132                /*tmp =radeon_get_ib_value(p, idx);
1133                  ib[idx] = 0;*/
1134                break;
1135        case SQ_ESGS_RING_BASE:
1136        case SQ_GSVS_RING_BASE:
1137        case SQ_ESTMP_RING_BASE:
1138        case SQ_GSTMP_RING_BASE:
1139        case SQ_HSTMP_RING_BASE:
1140        case SQ_LSTMP_RING_BASE:
1141        case SQ_PSTMP_RING_BASE:
1142        case SQ_VSTMP_RING_BASE:
1143                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1144                if (r) {
1145                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1146                                        "0x%04X\n", reg);
1147                        return -EINVAL;
1148                }
1149                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1150                break;
1151        case DB_DEPTH_CONTROL:
1152                track->db_depth_control = radeon_get_ib_value(p, idx);
1153                track->db_dirty = true;
1154                break;
1155        case CAYMAN_DB_EQAA:
1156                if (p->rdev->family < CHIP_CAYMAN) {
1157                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1158                                 "0x%04X\n", reg);
1159                        return -EINVAL;
1160                }
1161                break;
1162        case CAYMAN_DB_DEPTH_INFO:
1163                if (p->rdev->family < CHIP_CAYMAN) {
1164                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1165                                 "0x%04X\n", reg);
1166                        return -EINVAL;
1167                }
1168                break;
1169        case DB_Z_INFO:
1170                track->db_z_info = radeon_get_ib_value(p, idx);
1171                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1172                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1173                        if (r) {
1174                                dev_warn(p->dev, "bad SET_CONTEXT_REG "
1175                                                "0x%04X\n", reg);
1176                                return -EINVAL;
1177                        }
1178                        ib[idx] &= ~Z_ARRAY_MODE(0xf);
1179                        track->db_z_info &= ~Z_ARRAY_MODE(0xf);
1180                        ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1181                        track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1182                        if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1183                                unsigned bankw, bankh, mtaspect, tile_split;
1184
1185                                evergreen_tiling_fields(reloc->tiling_flags,
1186                                                        &bankw, &bankh, &mtaspect,
1187                                                        &tile_split);
1188                                ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1189                                ib[idx] |= DB_TILE_SPLIT(tile_split) |
1190                                                DB_BANK_WIDTH(bankw) |
1191                                                DB_BANK_HEIGHT(bankh) |
1192                                                DB_MACRO_TILE_ASPECT(mtaspect);
1193                        }
1194                }
1195                track->db_dirty = true;
1196                break;
1197        case DB_STENCIL_INFO:
1198                track->db_s_info = radeon_get_ib_value(p, idx);
1199                track->db_dirty = true;
1200                break;
1201        case DB_DEPTH_VIEW:
1202                track->db_depth_view = radeon_get_ib_value(p, idx);
1203                track->db_dirty = true;
1204                break;
1205        case DB_DEPTH_SIZE:
1206                track->db_depth_size = radeon_get_ib_value(p, idx);
1207                track->db_dirty = true;
1208                break;
1209        case R_02805C_DB_DEPTH_SLICE:
1210                track->db_depth_slice = radeon_get_ib_value(p, idx);
1211                track->db_dirty = true;
1212                break;
1213        case DB_Z_READ_BASE:
1214                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1215                if (r) {
1216                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1217                                        "0x%04X\n", reg);
1218                        return -EINVAL;
1219                }
1220                track->db_z_read_offset = radeon_get_ib_value(p, idx);
1221                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1222                track->db_z_read_bo = reloc->robj;
1223                track->db_dirty = true;
1224                break;
1225        case DB_Z_WRITE_BASE:
1226                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1227                if (r) {
1228                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1229                                        "0x%04X\n", reg);
1230                        return -EINVAL;
1231                }
1232                track->db_z_write_offset = radeon_get_ib_value(p, idx);
1233                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1234                track->db_z_write_bo = reloc->robj;
1235                track->db_dirty = true;
1236                break;
1237        case DB_STENCIL_READ_BASE:
1238                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1239                if (r) {
1240                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1241                                        "0x%04X\n", reg);
1242                        return -EINVAL;
1243                }
1244                track->db_s_read_offset = radeon_get_ib_value(p, idx);
1245                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1246                track->db_s_read_bo = reloc->robj;
1247                track->db_dirty = true;
1248                break;
1249        case DB_STENCIL_WRITE_BASE:
1250                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1251                if (r) {
1252                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1253                                        "0x%04X\n", reg);
1254                        return -EINVAL;
1255                }
1256                track->db_s_write_offset = radeon_get_ib_value(p, idx);
1257                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1258                track->db_s_write_bo = reloc->robj;
1259                track->db_dirty = true;
1260                break;
1261        case VGT_STRMOUT_CONFIG:
1262                track->vgt_strmout_config = radeon_get_ib_value(p, idx);
1263                track->streamout_dirty = true;
1264                break;
1265        case VGT_STRMOUT_BUFFER_CONFIG:
1266                track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx);
1267                track->streamout_dirty = true;
1268                break;
1269        case VGT_STRMOUT_BUFFER_BASE_0:
1270        case VGT_STRMOUT_BUFFER_BASE_1:
1271        case VGT_STRMOUT_BUFFER_BASE_2:
1272        case VGT_STRMOUT_BUFFER_BASE_3:
1273                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1274                if (r) {
1275                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1276                                        "0x%04X\n", reg);
1277                        return -EINVAL;
1278                }
1279                tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
1280                track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1281                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1282                track->vgt_strmout_bo[tmp] = reloc->robj;
1283                track->streamout_dirty = true;
1284                break;
1285        case VGT_STRMOUT_BUFFER_SIZE_0:
1286        case VGT_STRMOUT_BUFFER_SIZE_1:
1287        case VGT_STRMOUT_BUFFER_SIZE_2:
1288        case VGT_STRMOUT_BUFFER_SIZE_3:
1289                tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
1290                /* size in register is DWs, convert to bytes */
1291                track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
1292                track->streamout_dirty = true;
1293                break;
1294        case CP_COHER_BASE:
1295                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1296                if (r) {
1297                        dev_warn(p->dev, "missing reloc for CP_COHER_BASE "
1298                                        "0x%04X\n", reg);
1299                        return -EINVAL;
1300                }
1301                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1302                break;
1303        case CB_TARGET_MASK:
1304                track->cb_target_mask = radeon_get_ib_value(p, idx);
1305                track->cb_dirty = true;
1306                break;
1307        case CB_SHADER_MASK:
1308                track->cb_shader_mask = radeon_get_ib_value(p, idx);
1309                track->cb_dirty = true;
1310                break;
1311        case PA_SC_AA_CONFIG:
1312                if (p->rdev->family >= CHIP_CAYMAN) {
1313                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1314                                 "0x%04X\n", reg);
1315                        return -EINVAL;
1316                }
1317                tmp = radeon_get_ib_value(p, idx) & MSAA_NUM_SAMPLES_MASK;
1318                track->nsamples = 1 << tmp;
1319                break;
1320        case CAYMAN_PA_SC_AA_CONFIG:
1321                if (p->rdev->family < CHIP_CAYMAN) {
1322                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1323                                 "0x%04X\n", reg);
1324                        return -EINVAL;
1325                }
1326                tmp = radeon_get_ib_value(p, idx) & CAYMAN_MSAA_NUM_SAMPLES_MASK;
1327                track->nsamples = 1 << tmp;
1328                break;
1329        case CB_COLOR0_VIEW:
1330        case CB_COLOR1_VIEW:
1331        case CB_COLOR2_VIEW:
1332        case CB_COLOR3_VIEW:
1333        case CB_COLOR4_VIEW:
1334        case CB_COLOR5_VIEW:
1335        case CB_COLOR6_VIEW:
1336        case CB_COLOR7_VIEW:
1337                tmp = (reg - CB_COLOR0_VIEW) / 0x3c;
1338                track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1339                track->cb_dirty = true;
1340                break;
1341        case CB_COLOR8_VIEW:
1342        case CB_COLOR9_VIEW:
1343        case CB_COLOR10_VIEW:
1344        case CB_COLOR11_VIEW:
1345                tmp = ((reg - CB_COLOR8_VIEW) / 0x1c) + 8;
1346                track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1347                track->cb_dirty = true;
1348                break;
1349        case CB_COLOR0_INFO:
1350        case CB_COLOR1_INFO:
1351        case CB_COLOR2_INFO:
1352        case CB_COLOR3_INFO:
1353        case CB_COLOR4_INFO:
1354        case CB_COLOR5_INFO:
1355        case CB_COLOR6_INFO:
1356        case CB_COLOR7_INFO:
1357                tmp = (reg - CB_COLOR0_INFO) / 0x3c;
1358                track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1359                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1360                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1361                        if (r) {
1362                                dev_warn(p->dev, "bad SET_CONTEXT_REG "
1363                                                "0x%04X\n", reg);
1364                                return -EINVAL;
1365                        }
1366                        ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1367                        track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1368                }
1369                track->cb_dirty = true;
1370                break;
1371        case CB_COLOR8_INFO:
1372        case CB_COLOR9_INFO:
1373        case CB_COLOR10_INFO:
1374        case CB_COLOR11_INFO:
1375                tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8;
1376                track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1377                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1378                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1379                        if (r) {
1380                                dev_warn(p->dev, "bad SET_CONTEXT_REG "
1381                                                "0x%04X\n", reg);
1382                                return -EINVAL;
1383                        }
1384                        ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1385                        track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1386                }
1387                track->cb_dirty = true;
1388                break;
1389        case CB_COLOR0_PITCH:
1390        case CB_COLOR1_PITCH:
1391        case CB_COLOR2_PITCH:
1392        case CB_COLOR3_PITCH:
1393        case CB_COLOR4_PITCH:
1394        case CB_COLOR5_PITCH:
1395        case CB_COLOR6_PITCH:
1396        case CB_COLOR7_PITCH:
1397                tmp = (reg - CB_COLOR0_PITCH) / 0x3c;
1398                track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1399                track->cb_dirty = true;
1400                break;
1401        case CB_COLOR8_PITCH:
1402        case CB_COLOR9_PITCH:
1403        case CB_COLOR10_PITCH:
1404        case CB_COLOR11_PITCH:
1405                tmp = ((reg - CB_COLOR8_PITCH) / 0x1c) + 8;
1406                track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1407                track->cb_dirty = true;
1408                break;
1409        case CB_COLOR0_SLICE:
1410        case CB_COLOR1_SLICE:
1411        case CB_COLOR2_SLICE:
1412        case CB_COLOR3_SLICE:
1413        case CB_COLOR4_SLICE:
1414        case CB_COLOR5_SLICE:
1415        case CB_COLOR6_SLICE:
1416        case CB_COLOR7_SLICE:
1417                tmp = (reg - CB_COLOR0_SLICE) / 0x3c;
1418                track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1419                track->cb_color_slice_idx[tmp] = idx;
1420                track->cb_dirty = true;
1421                break;
1422        case CB_COLOR8_SLICE:
1423        case CB_COLOR9_SLICE:
1424        case CB_COLOR10_SLICE:
1425        case CB_COLOR11_SLICE:
1426                tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8;
1427                track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1428                track->cb_color_slice_idx[tmp] = idx;
1429                track->cb_dirty = true;
1430                break;
1431        case CB_COLOR0_ATTRIB:
1432        case CB_COLOR1_ATTRIB:
1433        case CB_COLOR2_ATTRIB:
1434        case CB_COLOR3_ATTRIB:
1435        case CB_COLOR4_ATTRIB:
1436        case CB_COLOR5_ATTRIB:
1437        case CB_COLOR6_ATTRIB:
1438        case CB_COLOR7_ATTRIB:
1439                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1440                if (r) {
1441                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1442                                        "0x%04X\n", reg);
1443                        return -EINVAL;
1444                }
1445                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1446                        if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1447                                unsigned bankw, bankh, mtaspect, tile_split;
1448
1449                                evergreen_tiling_fields(reloc->tiling_flags,
1450                                                        &bankw, &bankh, &mtaspect,
1451                                                        &tile_split);
1452                                ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1453                                ib[idx] |= CB_TILE_SPLIT(tile_split) |
1454                                           CB_BANK_WIDTH(bankw) |
1455                                           CB_BANK_HEIGHT(bankh) |
1456                                           CB_MACRO_TILE_ASPECT(mtaspect);
1457                        }
1458                }
1459                tmp = ((reg - CB_COLOR0_ATTRIB) / 0x3c);
1460                track->cb_color_attrib[tmp] = ib[idx];
1461                track->cb_dirty = true;
1462                break;
1463        case CB_COLOR8_ATTRIB:
1464        case CB_COLOR9_ATTRIB:
1465        case CB_COLOR10_ATTRIB:
1466        case CB_COLOR11_ATTRIB:
1467                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1468                if (r) {
1469                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1470                                        "0x%04X\n", reg);
1471                        return -EINVAL;
1472                }
1473                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1474                        if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1475                                unsigned bankw, bankh, mtaspect, tile_split;
1476
1477                                evergreen_tiling_fields(reloc->tiling_flags,
1478                                                        &bankw, &bankh, &mtaspect,
1479                                                        &tile_split);
1480                                ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1481                                ib[idx] |= CB_TILE_SPLIT(tile_split) |
1482                                           CB_BANK_WIDTH(bankw) |
1483                                           CB_BANK_HEIGHT(bankh) |
1484                                           CB_MACRO_TILE_ASPECT(mtaspect);
1485                        }
1486                }
1487                tmp = ((reg - CB_COLOR8_ATTRIB) / 0x1c) + 8;
1488                track->cb_color_attrib[tmp] = ib[idx];
1489                track->cb_dirty = true;
1490                break;
1491        case CB_COLOR0_FMASK:
1492        case CB_COLOR1_FMASK:
1493        case CB_COLOR2_FMASK:
1494        case CB_COLOR3_FMASK:
1495        case CB_COLOR4_FMASK:
1496        case CB_COLOR5_FMASK:
1497        case CB_COLOR6_FMASK:
1498        case CB_COLOR7_FMASK:
1499                tmp = (reg - CB_COLOR0_FMASK) / 0x3c;
1500                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1501                if (r) {
1502                        dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1503                        return -EINVAL;
1504                }
1505                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1506                track->cb_color_fmask_bo[tmp] = reloc->robj;
1507                break;
1508        case CB_COLOR0_CMASK:
1509        case CB_COLOR1_CMASK:
1510        case CB_COLOR2_CMASK:
1511        case CB_COLOR3_CMASK:
1512        case CB_COLOR4_CMASK:
1513        case CB_COLOR5_CMASK:
1514        case CB_COLOR6_CMASK:
1515        case CB_COLOR7_CMASK:
1516                tmp = (reg - CB_COLOR0_CMASK) / 0x3c;
1517                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1518                if (r) {
1519                        dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1520                        return -EINVAL;
1521                }
1522                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1523                track->cb_color_cmask_bo[tmp] = reloc->robj;
1524                break;
1525        case CB_COLOR0_FMASK_SLICE:
1526        case CB_COLOR1_FMASK_SLICE:
1527        case CB_COLOR2_FMASK_SLICE:
1528        case CB_COLOR3_FMASK_SLICE:
1529        case CB_COLOR4_FMASK_SLICE:
1530        case CB_COLOR5_FMASK_SLICE:
1531        case CB_COLOR6_FMASK_SLICE:
1532        case CB_COLOR7_FMASK_SLICE:
1533                tmp = (reg - CB_COLOR0_FMASK_SLICE) / 0x3c;
1534                track->cb_color_fmask_slice[tmp] = radeon_get_ib_value(p, idx);
1535                break;
1536        case CB_COLOR0_CMASK_SLICE:
1537        case CB_COLOR1_CMASK_SLICE:
1538        case CB_COLOR2_CMASK_SLICE:
1539        case CB_COLOR3_CMASK_SLICE:
1540        case CB_COLOR4_CMASK_SLICE:
1541        case CB_COLOR5_CMASK_SLICE:
1542        case CB_COLOR6_CMASK_SLICE:
1543        case CB_COLOR7_CMASK_SLICE:
1544                tmp = (reg - CB_COLOR0_CMASK_SLICE) / 0x3c;
1545                track->cb_color_cmask_slice[tmp] = radeon_get_ib_value(p, idx);
1546                break;
1547        case CB_COLOR0_BASE:
1548        case CB_COLOR1_BASE:
1549        case CB_COLOR2_BASE:
1550        case CB_COLOR3_BASE:
1551        case CB_COLOR4_BASE:
1552        case CB_COLOR5_BASE:
1553        case CB_COLOR6_BASE:
1554        case CB_COLOR7_BASE:
1555                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1556                if (r) {
1557                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1558                                        "0x%04X\n", reg);
1559                        return -EINVAL;
1560                }
1561                tmp = (reg - CB_COLOR0_BASE) / 0x3c;
1562                track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1563                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1564                track->cb_color_bo[tmp] = reloc->robj;
1565                track->cb_dirty = true;
1566                break;
1567        case CB_COLOR8_BASE:
1568        case CB_COLOR9_BASE:
1569        case CB_COLOR10_BASE:
1570        case CB_COLOR11_BASE:
1571                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1572                if (r) {
1573                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1574                                        "0x%04X\n", reg);
1575                        return -EINVAL;
1576                }
1577                tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8;
1578                track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1579                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1580                track->cb_color_bo[tmp] = reloc->robj;
1581                track->cb_dirty = true;
1582                break;
1583        case DB_HTILE_DATA_BASE:
1584                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1585                if (r) {
1586                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1587                                        "0x%04X\n", reg);
1588                        return -EINVAL;
1589                }
1590                track->htile_offset = radeon_get_ib_value(p, idx);
1591                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1592                track->htile_bo = reloc->robj;
1593                track->db_dirty = true;
1594                break;
1595        case DB_HTILE_SURFACE:
1596                /* 8x8 only */
1597                track->htile_surface = radeon_get_ib_value(p, idx);
1598                /* force 8x8 htile width and height */
1599                ib[idx] |= 3;
1600                track->db_dirty = true;
1601                break;
1602        case CB_IMMED0_BASE:
1603        case CB_IMMED1_BASE:
1604        case CB_IMMED2_BASE:
1605        case CB_IMMED3_BASE:
1606        case CB_IMMED4_BASE:
1607        case CB_IMMED5_BASE:
1608        case CB_IMMED6_BASE:
1609        case CB_IMMED7_BASE:
1610        case CB_IMMED8_BASE:
1611        case CB_IMMED9_BASE:
1612        case CB_IMMED10_BASE:
1613        case CB_IMMED11_BASE:
1614        case SQ_PGM_START_FS:
1615        case SQ_PGM_START_ES:
1616        case SQ_PGM_START_VS:
1617        case SQ_PGM_START_GS:
1618        case SQ_PGM_START_PS:
1619        case SQ_PGM_START_HS:
1620        case SQ_PGM_START_LS:
1621        case SQ_CONST_MEM_BASE:
1622        case SQ_ALU_CONST_CACHE_GS_0:
1623        case SQ_ALU_CONST_CACHE_GS_1:
1624        case SQ_ALU_CONST_CACHE_GS_2:
1625        case SQ_ALU_CONST_CACHE_GS_3:
1626        case SQ_ALU_CONST_CACHE_GS_4:
1627        case SQ_ALU_CONST_CACHE_GS_5:
1628        case SQ_ALU_CONST_CACHE_GS_6:
1629        case SQ_ALU_CONST_CACHE_GS_7:
1630        case SQ_ALU_CONST_CACHE_GS_8:
1631        case SQ_ALU_CONST_CACHE_GS_9:
1632        case SQ_ALU_CONST_CACHE_GS_10:
1633        case SQ_ALU_CONST_CACHE_GS_11:
1634        case SQ_ALU_CONST_CACHE_GS_12:
1635        case SQ_ALU_CONST_CACHE_GS_13:
1636        case SQ_ALU_CONST_CACHE_GS_14:
1637        case SQ_ALU_CONST_CACHE_GS_15:
1638        case SQ_ALU_CONST_CACHE_PS_0:
1639        case SQ_ALU_CONST_CACHE_PS_1:
1640        case SQ_ALU_CONST_CACHE_PS_2:
1641        case SQ_ALU_CONST_CACHE_PS_3:
1642        case SQ_ALU_CONST_CACHE_PS_4:
1643        case SQ_ALU_CONST_CACHE_PS_5:
1644        case SQ_ALU_CONST_CACHE_PS_6:
1645        case SQ_ALU_CONST_CACHE_PS_7:
1646        case SQ_ALU_CONST_CACHE_PS_8:
1647        case SQ_ALU_CONST_CACHE_PS_9:
1648        case SQ_ALU_CONST_CACHE_PS_10:
1649        case SQ_ALU_CONST_CACHE_PS_11:
1650        case SQ_ALU_CONST_CACHE_PS_12:
1651        case SQ_ALU_CONST_CACHE_PS_13:
1652        case SQ_ALU_CONST_CACHE_PS_14:
1653        case SQ_ALU_CONST_CACHE_PS_15:
1654        case SQ_ALU_CONST_CACHE_VS_0:
1655        case SQ_ALU_CONST_CACHE_VS_1:
1656        case SQ_ALU_CONST_CACHE_VS_2:
1657        case SQ_ALU_CONST_CACHE_VS_3:
1658        case SQ_ALU_CONST_CACHE_VS_4:
1659        case SQ_ALU_CONST_CACHE_VS_5:
1660        case SQ_ALU_CONST_CACHE_VS_6:
1661        case SQ_ALU_CONST_CACHE_VS_7:
1662        case SQ_ALU_CONST_CACHE_VS_8:
1663        case SQ_ALU_CONST_CACHE_VS_9:
1664        case SQ_ALU_CONST_CACHE_VS_10:
1665        case SQ_ALU_CONST_CACHE_VS_11:
1666        case SQ_ALU_CONST_CACHE_VS_12:
1667        case SQ_ALU_CONST_CACHE_VS_13:
1668        case SQ_ALU_CONST_CACHE_VS_14:
1669        case SQ_ALU_CONST_CACHE_VS_15:
1670        case SQ_ALU_CONST_CACHE_HS_0:
1671        case SQ_ALU_CONST_CACHE_HS_1:
1672        case SQ_ALU_CONST_CACHE_HS_2:
1673        case SQ_ALU_CONST_CACHE_HS_3:
1674        case SQ_ALU_CONST_CACHE_HS_4:
1675        case SQ_ALU_CONST_CACHE_HS_5:
1676        case SQ_ALU_CONST_CACHE_HS_6:
1677        case SQ_ALU_CONST_CACHE_HS_7:
1678        case SQ_ALU_CONST_CACHE_HS_8:
1679        case SQ_ALU_CONST_CACHE_HS_9:
1680        case SQ_ALU_CONST_CACHE_HS_10:
1681        case SQ_ALU_CONST_CACHE_HS_11:
1682        case SQ_ALU_CONST_CACHE_HS_12:
1683        case SQ_ALU_CONST_CACHE_HS_13:
1684        case SQ_ALU_CONST_CACHE_HS_14:
1685        case SQ_ALU_CONST_CACHE_HS_15:
1686        case SQ_ALU_CONST_CACHE_LS_0:
1687        case SQ_ALU_CONST_CACHE_LS_1:
1688        case SQ_ALU_CONST_CACHE_LS_2:
1689        case SQ_ALU_CONST_CACHE_LS_3:
1690        case SQ_ALU_CONST_CACHE_LS_4:
1691        case SQ_ALU_CONST_CACHE_LS_5:
1692        case SQ_ALU_CONST_CACHE_LS_6:
1693        case SQ_ALU_CONST_CACHE_LS_7:
1694        case SQ_ALU_CONST_CACHE_LS_8:
1695        case SQ_ALU_CONST_CACHE_LS_9:
1696        case SQ_ALU_CONST_CACHE_LS_10:
1697        case SQ_ALU_CONST_CACHE_LS_11:
1698        case SQ_ALU_CONST_CACHE_LS_12:
1699        case SQ_ALU_CONST_CACHE_LS_13:
1700        case SQ_ALU_CONST_CACHE_LS_14:
1701        case SQ_ALU_CONST_CACHE_LS_15:
1702                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1703                if (r) {
1704                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1705                                        "0x%04X\n", reg);
1706                        return -EINVAL;
1707                }
1708                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1709                break;
1710        case SX_MEMORY_EXPORT_BASE:
1711                if (p->rdev->family >= CHIP_CAYMAN) {
1712                        dev_warn(p->dev, "bad SET_CONFIG_REG "
1713                                 "0x%04X\n", reg);
1714                        return -EINVAL;
1715                }
1716                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1717                if (r) {
1718                        dev_warn(p->dev, "bad SET_CONFIG_REG "
1719                                        "0x%04X\n", reg);
1720                        return -EINVAL;
1721                }
1722                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1723                break;
1724        case CAYMAN_SX_SCATTER_EXPORT_BASE:
1725                if (p->rdev->family < CHIP_CAYMAN) {
1726                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1727                                 "0x%04X\n", reg);
1728                        return -EINVAL;
1729                }
1730                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1731                if (r) {
1732                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1733                                        "0x%04X\n", reg);
1734                        return -EINVAL;
1735                }
1736                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1737                break;
1738        case SX_MISC:
1739                track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0;
1740                break;
1741        default:
1742                dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1743                return -EINVAL;
1744        }
1745        return 0;
1746}
1747
1748/**
1749 * evergreen_is_safe_reg() - check if register is authorized or not
1750 * @parser: parser structure holding parsing context
1751 * @reg: register we are testing
1752 *
1753 * This function will test against reg_safe_bm and return true
1754 * if register is safe or false otherwise.
1755 */
1756static inline bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg)
1757{
1758        struct evergreen_cs_track *track = p->track;
1759        u32 m, i;
1760
1761        i = (reg >> 7);
1762        if (unlikely(i >= REG_SAFE_BM_SIZE)) {
1763                return false;
1764        }
1765        m = 1 << ((reg >> 2) & 31);
1766        if (!(track->reg_safe_bm[i] & m))
1767                return true;
1768
1769        return false;
1770}
1771
1772static int evergreen_packet3_check(struct radeon_cs_parser *p,
1773                                   struct radeon_cs_packet *pkt)
1774{
1775        struct radeon_bo_list *reloc;
1776        struct evergreen_cs_track *track;
1777        uint32_t *ib;
1778        unsigned idx;
1779        unsigned i;
1780        unsigned start_reg, end_reg, reg;
1781        int r;
1782        u32 idx_value;
1783
1784        track = (struct evergreen_cs_track *)p->track;
1785        ib = p->ib.ptr;
1786        idx = pkt->idx + 1;
1787        idx_value = radeon_get_ib_value(p, idx);
1788
1789        switch (pkt->opcode) {
1790        case PACKET3_SET_PREDICATION:
1791        {
1792                int pred_op;
1793                int tmp;
1794                uint64_t offset;
1795
1796                if (pkt->count != 1) {
1797                        DRM_ERROR("bad SET PREDICATION\n");
1798                        return -EINVAL;
1799                }
1800
1801                tmp = radeon_get_ib_value(p, idx + 1);
1802                pred_op = (tmp >> 16) & 0x7;
1803
1804                /* for the clear predicate operation */
1805                if (pred_op == 0)
1806                        return 0;
1807
1808                if (pred_op > 2) {
1809                        DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op);
1810                        return -EINVAL;
1811                }
1812
1813                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1814                if (r) {
1815                        DRM_ERROR("bad SET PREDICATION\n");
1816                        return -EINVAL;
1817                }
1818
1819                offset = reloc->gpu_offset +
1820                         (idx_value & 0xfffffff0) +
1821                         ((u64)(tmp & 0xff) << 32);
1822
1823                ib[idx + 0] = offset;
1824                ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
1825        }
1826        break;
1827        case PACKET3_CONTEXT_CONTROL:
1828                if (pkt->count != 1) {
1829                        DRM_ERROR("bad CONTEXT_CONTROL\n");
1830                        return -EINVAL;
1831                }
1832                break;
1833        case PACKET3_INDEX_TYPE:
1834        case PACKET3_NUM_INSTANCES:
1835        case PACKET3_CLEAR_STATE:
1836                if (pkt->count) {
1837                        DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1838                        return -EINVAL;
1839                }
1840                break;
1841        case CAYMAN_PACKET3_DEALLOC_STATE:
1842                if (p->rdev->family < CHIP_CAYMAN) {
1843                        DRM_ERROR("bad PACKET3_DEALLOC_STATE\n");
1844                        return -EINVAL;
1845                }
1846                if (pkt->count) {
1847                        DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1848                        return -EINVAL;
1849                }
1850                break;
1851        case PACKET3_INDEX_BASE:
1852        {
1853                uint64_t offset;
1854
1855                if (pkt->count != 1) {
1856                        DRM_ERROR("bad INDEX_BASE\n");
1857                        return -EINVAL;
1858                }
1859                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1860                if (r) {
1861                        DRM_ERROR("bad INDEX_BASE\n");
1862                        return -EINVAL;
1863                }
1864
1865                offset = reloc->gpu_offset +
1866                         idx_value +
1867                         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1868
1869                ib[idx+0] = offset;
1870                ib[idx+1] = upper_32_bits(offset) & 0xff;
1871
1872                r = evergreen_cs_track_check(p);
1873                if (r) {
1874                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1875                        return r;
1876                }
1877                break;
1878        }
1879        case PACKET3_INDEX_BUFFER_SIZE:
1880        {
1881                if (pkt->count != 0) {
1882                        DRM_ERROR("bad INDEX_BUFFER_SIZE\n");
1883                        return -EINVAL;
1884                }
1885                break;
1886        }
1887        case PACKET3_DRAW_INDEX:
1888        {
1889                uint64_t offset;
1890                if (pkt->count != 3) {
1891                        DRM_ERROR("bad DRAW_INDEX\n");
1892                        return -EINVAL;
1893                }
1894                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1895                if (r) {
1896                        DRM_ERROR("bad DRAW_INDEX\n");
1897                        return -EINVAL;
1898                }
1899
1900                offset = reloc->gpu_offset +
1901                         idx_value +
1902                         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1903
1904                ib[idx+0] = offset;
1905                ib[idx+1] = upper_32_bits(offset) & 0xff;
1906
1907                r = evergreen_cs_track_check(p);
1908                if (r) {
1909                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1910                        return r;
1911                }
1912                break;
1913        }
1914        case PACKET3_DRAW_INDEX_2:
1915        {
1916                uint64_t offset;
1917
1918                if (pkt->count != 4) {
1919                        DRM_ERROR("bad DRAW_INDEX_2\n");
1920                        return -EINVAL;
1921                }
1922                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1923                if (r) {
1924                        DRM_ERROR("bad DRAW_INDEX_2\n");
1925                        return -EINVAL;
1926                }
1927
1928                offset = reloc->gpu_offset +
1929                         radeon_get_ib_value(p, idx+1) +
1930                         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
1931
1932                ib[idx+1] = offset;
1933                ib[idx+2] = upper_32_bits(offset) & 0xff;
1934
1935                r = evergreen_cs_track_check(p);
1936                if (r) {
1937                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1938                        return r;
1939                }
1940                break;
1941        }
1942        case PACKET3_DRAW_INDEX_AUTO:
1943                if (pkt->count != 1) {
1944                        DRM_ERROR("bad DRAW_INDEX_AUTO\n");
1945                        return -EINVAL;
1946                }
1947                r = evergreen_cs_track_check(p);
1948                if (r) {
1949                        dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1950                        return r;
1951                }
1952                break;
1953        case PACKET3_DRAW_INDEX_MULTI_AUTO:
1954                if (pkt->count != 2) {
1955                        DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n");
1956                        return -EINVAL;
1957                }
1958                r = evergreen_cs_track_check(p);
1959                if (r) {
1960                        dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1961                        return r;
1962                }
1963                break;
1964        case PACKET3_DRAW_INDEX_IMMD:
1965                if (pkt->count < 2) {
1966                        DRM_ERROR("bad DRAW_INDEX_IMMD\n");
1967                        return -EINVAL;
1968                }
1969                r = evergreen_cs_track_check(p);
1970                if (r) {
1971                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1972                        return r;
1973                }
1974                break;
1975        case PACKET3_DRAW_INDEX_OFFSET:
1976                if (pkt->count != 2) {
1977                        DRM_ERROR("bad DRAW_INDEX_OFFSET\n");
1978                        return -EINVAL;
1979                }
1980                r = evergreen_cs_track_check(p);
1981                if (r) {
1982                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1983                        return r;
1984                }
1985                break;
1986        case PACKET3_DRAW_INDEX_OFFSET_2:
1987                if (pkt->count != 3) {
1988                        DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n");
1989                        return -EINVAL;
1990                }
1991                r = evergreen_cs_track_check(p);
1992                if (r) {
1993                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1994                        return r;
1995                }
1996                break;
1997        case PACKET3_SET_BASE:
1998        {
1999                /*
2000                DW 1 HEADER Header of the packet. Shader_Type in bit 1 of the Header will correspond to the shader type of the Load, see Type-3 Packet.
2001                   2 BASE_INDEX Bits [3:0] BASE_INDEX - Base Index specifies which base address is specified in the last two DWs.
2002                     0001: DX11 Draw_Index_Indirect Patch Table Base: Base address for Draw_Index_Indirect data.
2003                   3 ADDRESS_LO Bits [31:3] - Lower bits of QWORD-Aligned Address. Bits [2:0] - Reserved
2004                   4 ADDRESS_HI Bits [31:8] - Reserved. Bits [7:0] - Upper bits of Address [47:32]
2005                */
2006                if (pkt->count != 2) {
2007                        DRM_ERROR("bad SET_BASE\n");
2008                        return -EINVAL;
2009                }
2010
2011                /* currently only supporting setting indirect draw buffer base address */
2012                if (idx_value != 1) {
2013                        DRM_ERROR("bad SET_BASE\n");
2014                        return -EINVAL;
2015                }
2016
2017                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2018                if (r) {
2019                        DRM_ERROR("bad SET_BASE\n");
2020                        return -EINVAL;
2021                }
2022
2023                track->indirect_draw_buffer_size = radeon_bo_size(reloc->robj);
2024
2025                ib[idx+1] = reloc->gpu_offset;
2026                ib[idx+2] = upper_32_bits(reloc->gpu_offset) & 0xff;
2027
2028                break;
2029        }
2030        case PACKET3_DRAW_INDIRECT:
2031        case PACKET3_DRAW_INDEX_INDIRECT:
2032        {
2033                u64 size = pkt->opcode == PACKET3_DRAW_INDIRECT ? 16 : 20;
2034
2035                /*
2036                DW 1 HEADER
2037                   2 DATA_OFFSET Bits [31:0] + byte aligned offset where the required data structure starts. Bits 1:0 are zero
2038                   3 DRAW_INITIATOR Draw Initiator Register. Written to the VGT_DRAW_INITIATOR register for the assigned context
2039                */
2040                if (pkt->count != 1) {
2041                        DRM_ERROR("bad DRAW_INDIRECT\n");
2042                        return -EINVAL;
2043                }
2044
2045                if (idx_value + size > track->indirect_draw_buffer_size) {
2046                        dev_warn(p->dev, "DRAW_INDIRECT buffer too small %u + %llu > %lu\n",
2047                                idx_value, size, track->indirect_draw_buffer_size);
2048                        return -EINVAL;
2049                }
2050
2051                r = evergreen_cs_track_check(p);
2052                if (r) {
2053                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2054                        return r;
2055                }
2056                break;
2057        }
2058        case PACKET3_DISPATCH_DIRECT:
2059                if (pkt->count != 3) {
2060                        DRM_ERROR("bad DISPATCH_DIRECT\n");
2061                        return -EINVAL;
2062                }
2063                r = evergreen_cs_track_check(p);
2064                if (r) {
2065                        dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2066                        return r;
2067                }
2068                break;
2069        case PACKET3_DISPATCH_INDIRECT:
2070                if (pkt->count != 1) {
2071                        DRM_ERROR("bad DISPATCH_INDIRECT\n");
2072                        return -EINVAL;
2073                }
2074                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2075                if (r) {
2076                        DRM_ERROR("bad DISPATCH_INDIRECT\n");
2077                        return -EINVAL;
2078                }
2079                ib[idx+0] = idx_value + (u32)(reloc->gpu_offset & 0xffffffff);
2080                r = evergreen_cs_track_check(p);
2081                if (r) {
2082                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2083                        return r;
2084                }
2085                break;
2086        case PACKET3_WAIT_REG_MEM:
2087                if (pkt->count != 5) {
2088                        DRM_ERROR("bad WAIT_REG_MEM\n");
2089                        return -EINVAL;
2090                }
2091                /* bit 4 is reg (0) or mem (1) */
2092                if (idx_value & 0x10) {
2093                        uint64_t offset;
2094
2095                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2096                        if (r) {
2097                                DRM_ERROR("bad WAIT_REG_MEM\n");
2098                                return -EINVAL;
2099                        }
2100
2101                        offset = reloc->gpu_offset +
2102                                 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2103                                 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2104
2105                        ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc);
2106                        ib[idx+2] = upper_32_bits(offset) & 0xff;
2107                } else if (idx_value & 0x100) {
2108                        DRM_ERROR("cannot use PFP on REG wait\n");
2109                        return -EINVAL;
2110                }
2111                break;
2112        case PACKET3_CP_DMA:
2113        {
2114                u32 command, size, info;
2115                u64 offset, tmp;
2116                if (pkt->count != 4) {
2117                        DRM_ERROR("bad CP DMA\n");
2118                        return -EINVAL;
2119                }
2120                command = radeon_get_ib_value(p, idx+4);
2121                size = command & 0x1fffff;
2122                info = radeon_get_ib_value(p, idx+1);
2123                if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
2124                    (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
2125                    ((((info & 0x00300000) >> 20) == 0) &&
2126                     (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
2127                    ((((info & 0x60000000) >> 29) == 0) &&
2128                     (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
2129                        /* non mem to mem copies requires dw aligned count */
2130                        if (size % 4) {
2131                                DRM_ERROR("CP DMA command requires dw count alignment\n");
2132                                return -EINVAL;
2133                        }
2134                }
2135                if (command & PACKET3_CP_DMA_CMD_SAS) {
2136                        /* src address space is register */
2137                        /* GDS is ok */
2138                        if (((info & 0x60000000) >> 29) != 1) {
2139                                DRM_ERROR("CP DMA SAS not supported\n");
2140                                return -EINVAL;
2141                        }
2142                } else {
2143                        if (command & PACKET3_CP_DMA_CMD_SAIC) {
2144                                DRM_ERROR("CP DMA SAIC only supported for registers\n");
2145                                return -EINVAL;
2146                        }
2147                        /* src address space is memory */
2148                        if (((info & 0x60000000) >> 29) == 0) {
2149                                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2150                                if (r) {
2151                                        DRM_ERROR("bad CP DMA SRC\n");
2152                                        return -EINVAL;
2153                                }
2154
2155                                tmp = radeon_get_ib_value(p, idx) +
2156                                        ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2157
2158                                offset = reloc->gpu_offset + tmp;
2159
2160                                if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2161                                        dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
2162                                                 tmp + size, radeon_bo_size(reloc->robj));
2163                                        return -EINVAL;
2164                                }
2165
2166                                ib[idx] = offset;
2167                                ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2168                        } else if (((info & 0x60000000) >> 29) != 2) {
2169                                DRM_ERROR("bad CP DMA SRC_SEL\n");
2170                                return -EINVAL;
2171                        }
2172                }
2173                if (command & PACKET3_CP_DMA_CMD_DAS) {
2174                        /* dst address space is register */
2175                        /* GDS is ok */
2176                        if (((info & 0x00300000) >> 20) != 1) {
2177                                DRM_ERROR("CP DMA DAS not supported\n");
2178                                return -EINVAL;
2179                        }
2180                } else {
2181                        /* dst address space is memory */
2182                        if (command & PACKET3_CP_DMA_CMD_DAIC) {
2183                                DRM_ERROR("CP DMA DAIC only supported for registers\n");
2184                                return -EINVAL;
2185                        }
2186                        if (((info & 0x00300000) >> 20) == 0) {
2187                                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2188                                if (r) {
2189                                        DRM_ERROR("bad CP DMA DST\n");
2190                                        return -EINVAL;
2191                                }
2192
2193                                tmp = radeon_get_ib_value(p, idx+2) +
2194                                        ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
2195
2196                                offset = reloc->gpu_offset + tmp;
2197
2198                                if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2199                                        dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
2200                                                 tmp + size, radeon_bo_size(reloc->robj));
2201                                        return -EINVAL;
2202                                }
2203
2204                                ib[idx+2] = offset;
2205                                ib[idx+3] = upper_32_bits(offset) & 0xff;
2206                        } else {
2207                                DRM_ERROR("bad CP DMA DST_SEL\n");
2208                                return -EINVAL;
2209                        }
2210                }
2211                break;
2212        }
2213        case PACKET3_PFP_SYNC_ME:
2214                if (pkt->count) {
2215                        DRM_ERROR("bad PFP_SYNC_ME\n");
2216                        return -EINVAL;
2217                }
2218                break;
2219        case PACKET3_SURFACE_SYNC:
2220                if (pkt->count != 3) {
2221                        DRM_ERROR("bad SURFACE_SYNC\n");
2222                        return -EINVAL;
2223                }
2224                /* 0xffffffff/0x0 is flush all cache flag */
2225                if (radeon_get_ib_value(p, idx + 1) != 0xffffffff ||
2226                    radeon_get_ib_value(p, idx + 2) != 0) {
2227                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2228                        if (r) {
2229                                DRM_ERROR("bad SURFACE_SYNC\n");
2230                                return -EINVAL;
2231                        }
2232                        ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2233                }
2234                break;
2235        case PACKET3_EVENT_WRITE:
2236                if (pkt->count != 2 && pkt->count != 0) {
2237                        DRM_ERROR("bad EVENT_WRITE\n");
2238                        return -EINVAL;
2239                }
2240                if (pkt->count) {
2241                        uint64_t offset;
2242
2243                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2244                        if (r) {
2245                                DRM_ERROR("bad EVENT_WRITE\n");
2246                                return -EINVAL;
2247                        }
2248                        offset = reloc->gpu_offset +
2249                                 (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
2250                                 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2251
2252                        ib[idx+1] = offset & 0xfffffff8;
2253                        ib[idx+2] = upper_32_bits(offset) & 0xff;
2254                }
2255                break;
2256        case PACKET3_EVENT_WRITE_EOP:
2257        {
2258                uint64_t offset;
2259
2260                if (pkt->count != 4) {
2261                        DRM_ERROR("bad EVENT_WRITE_EOP\n");
2262                        return -EINVAL;
2263                }
2264                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2265                if (r) {
2266                        DRM_ERROR("bad EVENT_WRITE_EOP\n");
2267                        return -EINVAL;
2268                }
2269
2270                offset = reloc->gpu_offset +
2271                         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2272                         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2273
2274                ib[idx+1] = offset & 0xfffffffc;
2275                ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2276                break;
2277        }
2278        case PACKET3_EVENT_WRITE_EOS:
2279        {
2280                uint64_t offset;
2281
2282                if (pkt->count != 3) {
2283                        DRM_ERROR("bad EVENT_WRITE_EOS\n");
2284                        return -EINVAL;
2285                }
2286                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2287                if (r) {
2288                        DRM_ERROR("bad EVENT_WRITE_EOS\n");
2289                        return -EINVAL;
2290                }
2291
2292                offset = reloc->gpu_offset +
2293                         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2294                         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2295
2296                ib[idx+1] = offset & 0xfffffffc;
2297                ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2298                break;
2299        }
2300        case PACKET3_SET_CONFIG_REG:
2301                start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2302                end_reg = 4 * pkt->count + start_reg - 4;
2303                if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2304                    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2305                    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2306                        DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2307                        return -EINVAL;
2308                }
2309                for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) {
2310                        if (evergreen_is_safe_reg(p, reg))
2311                                continue;
2312                        r = evergreen_cs_handle_reg(p, reg, idx);
2313                        if (r)
2314                                return r;
2315                }
2316                break;
2317        case PACKET3_SET_CONTEXT_REG:
2318                start_reg = (idx_value << 2) + PACKET3_SET_CONTEXT_REG_START;
2319                end_reg = 4 * pkt->count + start_reg - 4;
2320                if ((start_reg < PACKET3_SET_CONTEXT_REG_START) ||
2321                    (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
2322                    (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
2323                        DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
2324                        return -EINVAL;
2325                }
2326                for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) {
2327                        if (evergreen_is_safe_reg(p, reg))
2328                                continue;
2329                        r = evergreen_cs_handle_reg(p, reg, idx);
2330                        if (r)
2331                                return r;
2332                }
2333                break;
2334        case PACKET3_SET_RESOURCE:
2335                if (pkt->count % 8) {
2336                        DRM_ERROR("bad SET_RESOURCE\n");
2337                        return -EINVAL;
2338                }
2339                start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_START;
2340                end_reg = 4 * pkt->count + start_reg - 4;
2341                if ((start_reg < PACKET3_SET_RESOURCE_START) ||
2342                    (start_reg >= PACKET3_SET_RESOURCE_END) ||
2343                    (end_reg >= PACKET3_SET_RESOURCE_END)) {
2344                        DRM_ERROR("bad SET_RESOURCE\n");
2345                        return -EINVAL;
2346                }
2347                for (i = 0; i < (pkt->count / 8); i++) {
2348                        struct radeon_bo *texture, *mipmap;
2349                        u32 toffset, moffset;
2350                        u32 size, offset, mip_address, tex_dim;
2351
2352                        switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p, idx+1+(i*8)+7))) {
2353                        case SQ_TEX_VTX_VALID_TEXTURE:
2354                                /* tex base */
2355                                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2356                                if (r) {
2357                                        DRM_ERROR("bad SET_RESOURCE (tex)\n");
2358                                        return -EINVAL;
2359                                }
2360                                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
2361                                        ib[idx+1+(i*8)+1] |=
2362                                                TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
2363                                        if (reloc->tiling_flags & RADEON_TILING_MACRO) {
2364                                                unsigned bankw, bankh, mtaspect, tile_split;
2365
2366                                                evergreen_tiling_fields(reloc->tiling_flags,
2367                                                                        &bankw, &bankh, &mtaspect,
2368                                                                        &tile_split);
2369                                                ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split);
2370                                                ib[idx+1+(i*8)+7] |=
2371                                                        TEX_BANK_WIDTH(bankw) |
2372                                                        TEX_BANK_HEIGHT(bankh) |
2373                                                        MACRO_TILE_ASPECT(mtaspect) |
2374                                                        TEX_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
2375                                        }
2376                                }
2377                                texture = reloc->robj;
2378                                toffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2379
2380                                /* tex mip base */
2381                                tex_dim = ib[idx+1+(i*8)+0] & 0x7;
2382                                mip_address = ib[idx+1+(i*8)+3];
2383
2384                                if ((tex_dim == SQ_TEX_DIM_2D_MSAA || tex_dim == SQ_TEX_DIM_2D_ARRAY_MSAA) &&
2385                                    !mip_address &&
2386                                    !radeon_cs_packet_next_is_pkt3_nop(p)) {
2387                                        /* MIP_ADDRESS should point to FMASK for an MSAA texture.
2388                                         * It should be 0 if FMASK is disabled. */
2389                                        moffset = 0;
2390                                        mipmap = NULL;
2391                                } else {
2392                                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2393                                        if (r) {
2394                                                DRM_ERROR("bad SET_RESOURCE (tex)\n");
2395                                                return -EINVAL;
2396                                        }
2397                                        moffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2398                                        mipmap = reloc->robj;
2399                                }
2400
2401                                r = evergreen_cs_track_validate_texture(p, texture, mipmap, idx+1+(i*8));
2402                                if (r)
2403                                        return r;
2404                                ib[idx+1+(i*8)+2] += toffset;
2405                                ib[idx+1+(i*8)+3] += moffset;
2406                                break;
2407                        case SQ_TEX_VTX_VALID_BUFFER:
2408                        {
2409                                uint64_t offset64;
2410                                /* vtx base */
2411                                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2412                                if (r) {
2413                                        DRM_ERROR("bad SET_RESOURCE (vtx)\n");
2414                                        return -EINVAL;
2415                                }
2416                                offset = radeon_get_ib_value(p, idx+1+(i*8)+0);
2417                                size = radeon_get_ib_value(p, idx+1+(i*8)+1);
2418                                if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
2419                                        /* force size to size of the buffer */
2420                                        dev_warn_ratelimited(p->dev, "vbo resource seems too big for the bo\n");
2421                                        ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset;
2422                                }
2423
2424                                offset64 = reloc->gpu_offset + offset;
2425                                ib[idx+1+(i*8)+0] = offset64;
2426                                ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
2427                                                    (upper_32_bits(offset64) & 0xff);
2428                                break;
2429                        }
2430                        case SQ_TEX_VTX_INVALID_TEXTURE:
2431                        case SQ_TEX_VTX_INVALID_BUFFER:
2432                        default:
2433                                DRM_ERROR("bad SET_RESOURCE\n");
2434                                return -EINVAL;
2435                        }
2436                }
2437                break;
2438        case PACKET3_SET_ALU_CONST:
2439                /* XXX fix me ALU const buffers only */
2440                break;
2441        case PACKET3_SET_BOOL_CONST:
2442                start_reg = (idx_value << 2) + PACKET3_SET_BOOL_CONST_START;
2443                end_reg = 4 * pkt->count + start_reg - 4;
2444                if ((start_reg < PACKET3_SET_BOOL_CONST_START) ||
2445                    (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
2446                    (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
2447                        DRM_ERROR("bad SET_BOOL_CONST\n");
2448                        return -EINVAL;
2449                }
2450                break;
2451        case PACKET3_SET_LOOP_CONST:
2452                start_reg = (idx_value << 2) + PACKET3_SET_LOOP_CONST_START;
2453                end_reg = 4 * pkt->count + start_reg - 4;
2454                if ((start_reg < PACKET3_SET_LOOP_CONST_START) ||
2455                    (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
2456                    (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
2457                        DRM_ERROR("bad SET_LOOP_CONST\n");
2458                        return -EINVAL;
2459                }
2460                break;
2461        case PACKET3_SET_CTL_CONST:
2462                start_reg = (idx_value << 2) + PACKET3_SET_CTL_CONST_START;
2463                end_reg = 4 * pkt->count + start_reg - 4;
2464                if ((start_reg < PACKET3_SET_CTL_CONST_START) ||
2465                    (start_reg >= PACKET3_SET_CTL_CONST_END) ||
2466                    (end_reg >= PACKET3_SET_CTL_CONST_END)) {
2467                        DRM_ERROR("bad SET_CTL_CONST\n");
2468                        return -EINVAL;
2469                }
2470                break;
2471        case PACKET3_SET_SAMPLER:
2472                if (pkt->count % 3) {
2473                        DRM_ERROR("bad SET_SAMPLER\n");
2474                        return -EINVAL;
2475                }
2476                start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_START;
2477                end_reg = 4 * pkt->count + start_reg - 4;
2478                if ((start_reg < PACKET3_SET_SAMPLER_START) ||
2479                    (start_reg >= PACKET3_SET_SAMPLER_END) ||
2480                    (end_reg >= PACKET3_SET_SAMPLER_END)) {
2481                        DRM_ERROR("bad SET_SAMPLER\n");
2482                        return -EINVAL;
2483                }
2484                break;
2485        case PACKET3_STRMOUT_BUFFER_UPDATE:
2486                if (pkt->count != 4) {
2487                        DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
2488                        return -EINVAL;
2489                }
2490                /* Updating memory at DST_ADDRESS. */
2491                if (idx_value & 0x1) {
2492                        u64 offset;
2493                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2494                        if (r) {
2495                                DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
2496                                return -EINVAL;
2497                        }
2498                        offset = radeon_get_ib_value(p, idx+1);
2499                        offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2500                        if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2501                                DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n",
2502                                          offset + 4, radeon_bo_size(reloc->robj));
2503                                return -EINVAL;
2504                        }
2505                        offset += reloc->gpu_offset;
2506                        ib[idx+1] = offset;
2507                        ib[idx+2] = upper_32_bits(offset) & 0xff;
2508                }
2509                /* Reading data from SRC_ADDRESS. */
2510                if (((idx_value >> 1) & 0x3) == 2) {
2511                        u64 offset;
2512                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2513                        if (r) {
2514                                DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
2515                                return -EINVAL;
2516                        }
2517                        offset = radeon_get_ib_value(p, idx+3);
2518                        offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2519                        if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2520                                DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n",
2521                                          offset + 4, radeon_bo_size(reloc->robj));
2522                                return -EINVAL;
2523                        }
2524                        offset += reloc->gpu_offset;
2525                        ib[idx+3] = offset;
2526                        ib[idx+4] = upper_32_bits(offset) & 0xff;
2527                }
2528                break;
2529        case PACKET3_MEM_WRITE:
2530        {
2531                u64 offset;
2532
2533                if (pkt->count != 3) {
2534                        DRM_ERROR("bad MEM_WRITE (invalid count)\n");
2535                        return -EINVAL;
2536                }
2537                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2538                if (r) {
2539                        DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
2540                        return -EINVAL;
2541                }
2542                offset = radeon_get_ib_value(p, idx+0);
2543                offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
2544                if (offset & 0x7) {
2545                        DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
2546                        return -EINVAL;
2547                }
2548                if ((offset + 8) > radeon_bo_size(reloc->robj)) {
2549                        DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
2550                                  offset + 8, radeon_bo_size(reloc->robj));
2551                        return -EINVAL;
2552                }
2553                offset += reloc->gpu_offset;
2554                ib[idx+0] = offset;
2555                ib[idx+1] = upper_32_bits(offset) & 0xff;
2556                break;
2557        }
2558        case PACKET3_COPY_DW:
2559                if (pkt->count != 4) {
2560                        DRM_ERROR("bad COPY_DW (invalid count)\n");
2561                        return -EINVAL;
2562                }
2563                if (idx_value & 0x1) {
2564                        u64 offset;
2565                        /* SRC is memory. */
2566                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2567                        if (r) {
2568                                DRM_ERROR("bad COPY_DW (missing src reloc)\n");
2569                                return -EINVAL;
2570                        }
2571                        offset = radeon_get_ib_value(p, idx+1);
2572                        offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2573                        if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2574                                DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n",
2575                                          offset + 4, radeon_bo_size(reloc->robj));
2576                                return -EINVAL;
2577                        }
2578                        offset += reloc->gpu_offset;
2579                        ib[idx+1] = offset;
2580                        ib[idx+2] = upper_32_bits(offset) & 0xff;
2581                } else {
2582                        /* SRC is a reg. */
2583                        reg = radeon_get_ib_value(p, idx+1) << 2;
2584                        if (!evergreen_is_safe_reg(p, reg)) {
2585                                dev_warn(p->dev, "forbidden register 0x%08x at %d\n",
2586                                         reg, idx + 1);
2587                                return -EINVAL;
2588                        }
2589                }
2590                if (idx_value & 0x2) {
2591                        u64 offset;
2592                        /* DST is memory. */
2593                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2594                        if (r) {
2595                                DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
2596                                return -EINVAL;
2597                        }
2598                        offset = radeon_get_ib_value(p, idx+3);
2599                        offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2600                        if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2601                                DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n",
2602                                          offset + 4, radeon_bo_size(reloc->robj));
2603                                return -EINVAL;
2604                        }
2605                        offset += reloc->gpu_offset;
2606                        ib[idx+3] = offset;
2607                        ib[idx+4] = upper_32_bits(offset) & 0xff;
2608                } else {
2609                        /* DST is a reg. */
2610                        reg = radeon_get_ib_value(p, idx+3) << 2;
2611                        if (!evergreen_is_safe_reg(p, reg)) {
2612                                dev_warn(p->dev, "forbidden register 0x%08x at %d\n",
2613                                         reg, idx + 3);
2614                                return -EINVAL;
2615                        }
2616                }
2617                break;
2618        case PACKET3_SET_APPEND_CNT:
2619        {
2620                uint32_t areg;
2621                uint32_t allowed_reg_base;
2622                uint32_t source_sel;
2623                if (pkt->count != 2) {
2624                        DRM_ERROR("bad SET_APPEND_CNT (invalid count)\n");
2625                        return -EINVAL;
2626                }
2627
2628                allowed_reg_base = GDS_APPEND_COUNT_0;
2629                allowed_reg_base -= PACKET3_SET_CONTEXT_REG_START;
2630                allowed_reg_base >>= 2;
2631
2632                areg = idx_value >> 16;
2633                if (areg < allowed_reg_base || areg > (allowed_reg_base + 11)) {
2634                        dev_warn(p->dev, "forbidden register for append cnt 0x%08x at %d\n",
2635                                 areg, idx);
2636                        return -EINVAL;
2637                }
2638
2639                source_sel = G_PACKET3_SET_APPEND_CNT_SRC_SELECT(idx_value);
2640                if (source_sel == PACKET3_SAC_SRC_SEL_MEM) {
2641                        uint64_t offset;
2642                        uint32_t swap;
2643                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2644                        if (r) {
2645                                DRM_ERROR("bad SET_APPEND_CNT (missing reloc)\n");
2646                                return -EINVAL;
2647                        }
2648                        offset = radeon_get_ib_value(p, idx + 1);
2649                        swap = offset & 0x3;
2650                        offset &= ~0x3;
2651
2652                        offset += ((u64)(radeon_get_ib_value(p, idx + 2) & 0xff)) << 32;
2653
2654                        offset += reloc->gpu_offset;
2655                        ib[idx+1] = (offset & 0xfffffffc) | swap;
2656                        ib[idx+2] = upper_32_bits(offset) & 0xff;
2657                } else {
2658                        DRM_ERROR("bad SET_APPEND_CNT (unsupported operation)\n");
2659                        return -EINVAL;
2660                }
2661                break;
2662        }
2663        case PACKET3_NOP:
2664                break;
2665        default:
2666                DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2667                return -EINVAL;
2668        }
2669        return 0;
2670}
2671
2672int evergreen_cs_parse(struct radeon_cs_parser *p)
2673{
2674        struct radeon_cs_packet pkt;
2675        struct evergreen_cs_track *track;
2676        u32 tmp;
2677        int r;
2678
2679        if (p->track == NULL) {
2680                /* initialize tracker, we are in kms */
2681                track = kzalloc(sizeof(*track), GFP_KERNEL);
2682                if (track == NULL)
2683                        return -ENOMEM;
2684                evergreen_cs_track_init(track);
2685                if (p->rdev->family >= CHIP_CAYMAN) {
2686                        tmp = p->rdev->config.cayman.tile_config;
2687                        track->reg_safe_bm = cayman_reg_safe_bm;
2688                } else {
2689                        tmp = p->rdev->config.evergreen.tile_config;
2690                        track->reg_safe_bm = evergreen_reg_safe_bm;
2691                }
2692                BUILD_BUG_ON(ARRAY_SIZE(cayman_reg_safe_bm) != REG_SAFE_BM_SIZE);
2693                BUILD_BUG_ON(ARRAY_SIZE(evergreen_reg_safe_bm) != REG_SAFE_BM_SIZE);
2694                switch (tmp & 0xf) {
2695                case 0:
2696                        track->npipes = 1;
2697                        break;
2698                case 1:
2699                default:
2700                        track->npipes = 2;
2701                        break;
2702                case 2:
2703                        track->npipes = 4;
2704                        break;
2705                case 3:
2706                        track->npipes = 8;
2707                        break;
2708                }
2709
2710                switch ((tmp & 0xf0) >> 4) {
2711                case 0:
2712                        track->nbanks = 4;
2713                        break;
2714                case 1:
2715                default:
2716                        track->nbanks = 8;
2717                        break;
2718                case 2:
2719                        track->nbanks = 16;
2720                        break;
2721                }
2722
2723                switch ((tmp & 0xf00) >> 8) {
2724                case 0:
2725                        track->group_size = 256;
2726                        break;
2727                case 1:
2728                default:
2729                        track->group_size = 512;
2730                        break;
2731                }
2732
2733                switch ((tmp & 0xf000) >> 12) {
2734                case 0:
2735                        track->row_size = 1;
2736                        break;
2737                case 1:
2738                default:
2739                        track->row_size = 2;
2740                        break;
2741                case 2:
2742                        track->row_size = 4;
2743                        break;
2744                }
2745
2746                p->track = track;
2747        }
2748        do {
2749                r = radeon_cs_packet_parse(p, &pkt, p->idx);
2750                if (r) {
2751                        kfree(p->track);
2752                        p->track = NULL;
2753                        return r;
2754                }
2755                p->idx += pkt.count + 2;
2756                switch (pkt.type) {
2757                case RADEON_PACKET_TYPE0:
2758                        r = evergreen_cs_parse_packet0(p, &pkt);
2759                        break;
2760                case RADEON_PACKET_TYPE2:
2761                        break;
2762                case RADEON_PACKET_TYPE3:
2763                        r = evergreen_packet3_check(p, &pkt);
2764                        break;
2765                default:
2766                        DRM_ERROR("Unknown packet type %d !\n", pkt.type);
2767                        kfree(p->track);
2768                        p->track = NULL;
2769                        return -EINVAL;
2770                }
2771                if (r) {
2772                        kfree(p->track);
2773                        p->track = NULL;
2774                        return r;
2775                }
2776        } while (p->idx < p->chunk_ib->length_dw);
2777#if 0
2778        for (r = 0; r < p->ib.length_dw; r++) {
2779                pr_info("%05d  0x%08X\n", r, p->ib.ptr[r]);
2780                mdelay(1);
2781        }
2782#endif
2783        kfree(p->track);
2784        p->track = NULL;
2785        return 0;
2786}
2787
2788/**
2789 * evergreen_dma_cs_parse() - parse the DMA IB
2790 * @p:          parser structure holding parsing context.
2791 *
2792 * Parses the DMA IB from the CS ioctl and updates
2793 * the GPU addresses based on the reloc information and
2794 * checks for errors. (Evergreen-Cayman)
2795 * Returns 0 for success and an error on failure.
2796 **/
2797int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2798{
2799        struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
2800        struct radeon_bo_list *src_reloc, *dst_reloc, *dst2_reloc;
2801        u32 header, cmd, count, sub_cmd;
2802        uint32_t *ib = p->ib.ptr;
2803        u32 idx;
2804        u64 src_offset, dst_offset, dst2_offset;
2805        int r;
2806
2807        do {
2808                if (p->idx >= ib_chunk->length_dw) {
2809                        DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2810                                  p->idx, ib_chunk->length_dw);
2811                        return -EINVAL;
2812                }
2813                idx = p->idx;
2814                header = radeon_get_ib_value(p, idx);
2815                cmd = GET_DMA_CMD(header);
2816                count = GET_DMA_COUNT(header);
2817                sub_cmd = GET_DMA_SUB_CMD(header);
2818
2819                switch (cmd) {
2820                case DMA_PACKET_WRITE:
2821                        r = r600_dma_cs_next_reloc(p, &dst_reloc);
2822                        if (r) {
2823                                DRM_ERROR("bad DMA_PACKET_WRITE\n");
2824                                return -EINVAL;
2825                        }
2826                        switch (sub_cmd) {
2827                        /* tiled */
2828                        case 8:
2829                                dst_offset = radeon_get_ib_value(p, idx+1);
2830                                dst_offset <<= 8;
2831
2832                                ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2833                                p->idx += count + 7;
2834                                break;
2835                        /* linear */
2836                        case 0:
2837                                dst_offset = radeon_get_ib_value(p, idx+1);
2838                                dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2839
2840                                ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2841                                ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2842                                p->idx += count + 3;
2843                                break;
2844                        default:
2845                                DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, header);
2846                                return -EINVAL;
2847                        }
2848                        if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2849                                dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
2850                                         dst_offset, radeon_bo_size(dst_reloc->robj));
2851                                return -EINVAL;
2852                        }
2853                        break;
2854                case DMA_PACKET_COPY:
2855                        r = r600_dma_cs_next_reloc(p, &src_reloc);
2856                        if (r) {
2857                                DRM_ERROR("bad DMA_PACKET_COPY\n");
2858                                return -EINVAL;
2859                        }
2860                        r = r600_dma_cs_next_reloc(p, &dst_reloc);
2861                        if (r) {
2862                                DRM_ERROR("bad DMA_PACKET_COPY\n");
2863                                return -EINVAL;
2864                        }
2865                        switch (sub_cmd) {
2866                        /* Copy L2L, DW aligned */
2867                        case 0x00:
2868                                /* L2L, dw */
2869                                src_offset = radeon_get_ib_value(p, idx+2);
2870                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2871                                dst_offset = radeon_get_ib_value(p, idx+1);
2872                                dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2873                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2874                                        dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
2875                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2876                                        return -EINVAL;
2877                                }
2878                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2879                                        dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
2880                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2881                                        return -EINVAL;
2882                                }
2883                                ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2884                                ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2885                                ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2886                                ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2887                                p->idx += 5;
2888                                break;
2889                        /* Copy L2T/T2L */
2890                        case 0x08:
2891                                /* detile bit */
2892                                if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2893                                        /* tiled src, linear dst */
2894                                        src_offset = radeon_get_ib_value(p, idx+1);
2895                                        src_offset <<= 8;
2896                                        ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
2897
2898                                        dst_offset = radeon_get_ib_value(p, idx + 7);
2899                                        dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2900                                        ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2901                                        ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2902                                } else {
2903                                        /* linear src, tiled dst */
2904                                        src_offset = radeon_get_ib_value(p, idx+7);
2905                                        src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2906                                        ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2907                                        ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2908
2909                                        dst_offset = radeon_get_ib_value(p, idx+1);
2910                                        dst_offset <<= 8;
2911                                        ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2912                                }
2913                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2914                                        dev_warn(p->dev, "DMA L2T, src buffer too small (%llu %lu)\n",
2915                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2916                                        return -EINVAL;
2917                                }
2918                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2919                                        dev_warn(p->dev, "DMA L2T, dst buffer too small (%llu %lu)\n",
2920                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2921                                        return -EINVAL;
2922                                }
2923                                p->idx += 9;
2924                                break;
2925                        /* Copy L2L, byte aligned */
2926                        case 0x40:
2927                                /* L2L, byte */
2928                                src_offset = radeon_get_ib_value(p, idx+2);
2929                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2930                                dst_offset = radeon_get_ib_value(p, idx+1);
2931                                dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2932                                if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
2933                                        dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
2934                                                        src_offset + count, radeon_bo_size(src_reloc->robj));
2935                                        return -EINVAL;
2936                                }
2937                                if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
2938                                        dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
2939                                                        dst_offset + count, radeon_bo_size(dst_reloc->robj));
2940                                        return -EINVAL;
2941                                }
2942                                ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2943                                ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2944                                ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2945                                ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2946                                p->idx += 5;
2947                                break;
2948                        /* Copy L2L, partial */
2949                        case 0x41:
2950                                /* L2L, partial */
2951                                if (p->family < CHIP_CAYMAN) {
2952                                        DRM_ERROR("L2L Partial is cayman only !\n");
2953                                        return -EINVAL;
2954                                }
2955                                ib[idx+1] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2956                                ib[idx+2] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2957                                ib[idx+4] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2958                                ib[idx+5] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2959
2960                                p->idx += 9;
2961                                break;
2962                        /* Copy L2L, DW aligned, broadcast */
2963                        case 0x44:
2964                                /* L2L, dw, broadcast */
2965                                r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2966                                if (r) {
2967                                        DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
2968                                        return -EINVAL;
2969                                }
2970                                dst_offset = radeon_get_ib_value(p, idx+1);
2971                                dst_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2972                                dst2_offset = radeon_get_ib_value(p, idx+2);
2973                                dst2_offset |= ((u64)(radeon_get_ib_value(p, idx+5) & 0xff)) << 32;
2974                                src_offset = radeon_get_ib_value(p, idx+3);
2975                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
2976                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2977                                        dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
2978                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2979                                        return -EINVAL;
2980                                }
2981                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2982                                        dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
2983                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2984                                        return -EINVAL;
2985                                }
2986                                if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2987                                        dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
2988                                                        dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2989                                        return -EINVAL;
2990                                }
2991                                ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2992                                ib[idx+2] += (u32)(dst2_reloc->gpu_offset & 0xfffffffc);
2993                                ib[idx+3] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2994                                ib[idx+4] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2995                                ib[idx+5] += upper_32_bits(dst2_reloc->gpu_offset) & 0xff;
2996                                ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2997                                p->idx += 7;
2998                                break;
2999                        /* Copy L2T Frame to Field */
3000                        case 0x48:
3001                                if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3002                                        DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
3003                                        return -EINVAL;
3004                                }
3005                                r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3006                                if (r) {
3007                                        DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
3008                                        return -EINVAL;
3009                                }
3010                                dst_offset = radeon_get_ib_value(p, idx+1);
3011                                dst_offset <<= 8;
3012                                dst2_offset = radeon_get_ib_value(p, idx+2);
3013                                dst2_offset <<= 8;
3014                                src_offset = radeon_get_ib_value(p, idx+8);
3015                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3016                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3017                                        dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
3018                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3019                                        return -EINVAL;
3020                                }
3021                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3022                                        dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
3023                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3024                                        return -EINVAL;
3025                                }
3026                                if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3027                                        dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
3028                                                        dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3029                                        return -EINVAL;
3030                                }
3031                                ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3032                                ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3033                                ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3034                                ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3035                                p->idx += 10;
3036                                break;
3037                        /* Copy L2T/T2L, partial */
3038                        case 0x49:
3039                                /* L2T, T2L partial */
3040                                if (p->family < CHIP_CAYMAN) {
3041                                        DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3042                                        return -EINVAL;
3043                                }
3044                                /* detile bit */
3045                                if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3046                                        /* tiled src, linear dst */
3047                                        ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3048
3049                                        ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3050                                        ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
3051                                } else {
3052                                        /* linear src, tiled dst */
3053                                        ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3054                                        ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3055
3056                                        ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3057                                }
3058                                p->idx += 12;
3059                                break;
3060                        /* Copy L2T broadcast */
3061                        case 0x4b:
3062                                /* L2T, broadcast */
3063                                if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3064                                        DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3065                                        return -EINVAL;
3066                                }
3067                                r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3068                                if (r) {
3069                                        DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3070                                        return -EINVAL;
3071                                }
3072                                dst_offset = radeon_get_ib_value(p, idx+1);
3073                                dst_offset <<= 8;
3074                                dst2_offset = radeon_get_ib_value(p, idx+2);
3075                                dst2_offset <<= 8;
3076                                src_offset = radeon_get_ib_value(p, idx+8);
3077                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3078                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3079                                        dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3080                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3081                                        return -EINVAL;
3082                                }
3083                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3084                                        dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3085                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3086                                        return -EINVAL;
3087                                }
3088                                if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3089                                        dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3090                                                        dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3091                                        return -EINVAL;
3092                                }
3093                                ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3094                                ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3095                                ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3096                                ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3097                                p->idx += 10;
3098                                break;
3099                        /* Copy L2T/T2L (tile units) */
3100                        case 0x4c:
3101                                /* L2T, T2L */
3102                                /* detile bit */
3103                                if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3104                                        /* tiled src, linear dst */
3105                                        src_offset = radeon_get_ib_value(p, idx+1);
3106                                        src_offset <<= 8;
3107                                        ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3108
3109                                        dst_offset = radeon_get_ib_value(p, idx+7);
3110                                        dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3111                                        ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3112                                        ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
3113                                } else {
3114                                        /* linear src, tiled dst */
3115                                        src_offset = radeon_get_ib_value(p, idx+7);
3116                                        src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3117                                        ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3118                                        ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3119
3120                                        dst_offset = radeon_get_ib_value(p, idx+1);
3121                                        dst_offset <<= 8;
3122                                        ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3123                                }
3124                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3125                                        dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
3126                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3127                                        return -EINVAL;
3128                                }
3129                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3130                                        dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
3131                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3132                                        return -EINVAL;
3133                                }
3134                                p->idx += 9;
3135                                break;
3136                        /* Copy T2T, partial (tile units) */
3137                        case 0x4d:
3138                                /* T2T partial */
3139                                if (p->family < CHIP_CAYMAN) {
3140                                        DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3141                                        return -EINVAL;
3142                                }
3143                                ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3144                                ib[idx+4] += (u32)(dst_reloc->gpu_offset >> 8);
3145                                p->idx += 13;
3146                                break;
3147                        /* Copy L2T broadcast (tile units) */
3148                        case 0x4f:
3149                                /* L2T, broadcast */
3150                                if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3151                                        DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3152                                        return -EINVAL;
3153                                }
3154                                r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3155                                if (r) {
3156                                        DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3157                                        return -EINVAL;
3158                                }
3159                                dst_offset = radeon_get_ib_value(p, idx+1);
3160                                dst_offset <<= 8;
3161                                dst2_offset = radeon_get_ib_value(p, idx+2);
3162                                dst2_offset <<= 8;
3163                                src_offset = radeon_get_ib_value(p, idx+8);
3164                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3165                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3166                                        dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3167                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3168                                        return -EINVAL;
3169                                }
3170                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3171                                        dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3172                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3173                                        return -EINVAL;
3174                                }
3175                                if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3176                                        dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3177                                                        dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3178                                        return -EINVAL;
3179                                }
3180                                ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3181                                ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3182                                ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3183                                ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3184                                p->idx += 10;
3185                                break;
3186                        default:
3187                                DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, header);
3188                                return -EINVAL;
3189                        }
3190                        break;
3191                case DMA_PACKET_CONSTANT_FILL:
3192                        r = r600_dma_cs_next_reloc(p, &dst_reloc);
3193                        if (r) {
3194                                DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
3195                                return -EINVAL;
3196                        }
3197                        dst_offset = radeon_get_ib_value(p, idx+1);
3198                        dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
3199                        if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3200                                dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
3201                                         dst_offset, radeon_bo_size(dst_reloc->robj));
3202                                return -EINVAL;
3203                        }
3204                        ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3205                        ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000;
3206                        p->idx += 4;
3207                        break;
3208                case DMA_PACKET_NOP:
3209                        p->idx += 1;
3210                        break;
3211                default:
3212                        DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3213                        return -EINVAL;
3214                }
3215        } while (p->idx < p->chunk_ib->length_dw);
3216#if 0
3217        for (r = 0; r < p->ib->length_dw; r++) {
3218                pr_info("%05d  0x%08X\n", r, p->ib.ptr[r]);
3219                mdelay(1);
3220        }
3221#endif
3222        return 0;
3223}
3224
3225/* vm parser */
3226static bool evergreen_vm_reg_valid(u32 reg)
3227{
3228        /* context regs are fine */
3229        if (reg >= 0x28000)
3230                return true;
3231
3232        /* check config regs */
3233        switch (reg) {
3234        case WAIT_UNTIL:
3235        case GRBM_GFX_INDEX:
3236        case CP_STRMOUT_CNTL:
3237        case CP_COHER_CNTL:
3238        case CP_COHER_SIZE:
3239        case VGT_VTX_VECT_EJECT_REG:
3240        case VGT_CACHE_INVALIDATION:
3241        case VGT_GS_VERTEX_REUSE:
3242        case VGT_PRIMITIVE_TYPE:
3243        case VGT_INDEX_TYPE:
3244        case VGT_NUM_INDICES:
3245        case VGT_NUM_INSTANCES:
3246        case VGT_COMPUTE_DIM_X:
3247        case VGT_COMPUTE_DIM_Y:
3248        case VGT_COMPUTE_DIM_Z:
3249        case VGT_COMPUTE_START_X:
3250        case VGT_COMPUTE_START_Y:
3251        case VGT_COMPUTE_START_Z:
3252        case VGT_COMPUTE_INDEX:
3253        case VGT_COMPUTE_THREAD_GROUP_SIZE:
3254        case VGT_HS_OFFCHIP_PARAM:
3255        case PA_CL_ENHANCE:
3256        case PA_SU_LINE_STIPPLE_VALUE:
3257        case PA_SC_LINE_STIPPLE_STATE:
3258        case PA_SC_ENHANCE:
3259        case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ:
3260        case SQ_DYN_GPR_SIMD_LOCK_EN:
3261        case SQ_CONFIG:
3262        case SQ_GPR_RESOURCE_MGMT_1:
3263        case SQ_GLOBAL_GPR_RESOURCE_MGMT_1:
3264        case SQ_GLOBAL_GPR_RESOURCE_MGMT_2:
3265        case SQ_CONST_MEM_BASE:
3266        case SQ_STATIC_THREAD_MGMT_1:
3267        case SQ_STATIC_THREAD_MGMT_2:
3268        case SQ_STATIC_THREAD_MGMT_3:
3269        case SPI_CONFIG_CNTL:
3270        case SPI_CONFIG_CNTL_1:
3271        case TA_CNTL_AUX:
3272        case DB_DEBUG:
3273        case DB_DEBUG2:
3274        case DB_DEBUG3:
3275        case DB_DEBUG4:
3276        case DB_WATERMARKS:
3277        case TD_PS_BORDER_COLOR_INDEX:
3278        case TD_PS_BORDER_COLOR_RED:
3279        case TD_PS_BORDER_COLOR_GREEN:
3280        case TD_PS_BORDER_COLOR_BLUE:
3281        case TD_PS_BORDER_COLOR_ALPHA:
3282        case TD_VS_BORDER_COLOR_INDEX:
3283        case TD_VS_BORDER_COLOR_RED:
3284        case TD_VS_BORDER_COLOR_GREEN:
3285        case TD_VS_BORDER_COLOR_BLUE:
3286        case TD_VS_BORDER_COLOR_ALPHA:
3287        case TD_GS_BORDER_COLOR_INDEX:
3288        case TD_GS_BORDER_COLOR_RED:
3289        case TD_GS_BORDER_COLOR_GREEN:
3290        case TD_GS_BORDER_COLOR_BLUE:
3291        case TD_GS_BORDER_COLOR_ALPHA:
3292        case TD_HS_BORDER_COLOR_INDEX:
3293        case TD_HS_BORDER_COLOR_RED:
3294        case TD_HS_BORDER_COLOR_GREEN:
3295        case TD_HS_BORDER_COLOR_BLUE:
3296        case TD_HS_BORDER_COLOR_ALPHA:
3297        case TD_LS_BORDER_COLOR_INDEX:
3298        case TD_LS_BORDER_COLOR_RED:
3299        case TD_LS_BORDER_COLOR_GREEN:
3300        case TD_LS_BORDER_COLOR_BLUE:
3301        case TD_LS_BORDER_COLOR_ALPHA:
3302        case TD_CS_BORDER_COLOR_INDEX:
3303        case TD_CS_BORDER_COLOR_RED:
3304        case TD_CS_BORDER_COLOR_GREEN:
3305        case TD_CS_BORDER_COLOR_BLUE:
3306        case TD_CS_BORDER_COLOR_ALPHA:
3307        case SQ_ESGS_RING_SIZE:
3308        case SQ_GSVS_RING_SIZE:
3309        case SQ_ESTMP_RING_SIZE:
3310        case SQ_GSTMP_RING_SIZE:
3311        case SQ_HSTMP_RING_SIZE:
3312        case SQ_LSTMP_RING_SIZE:
3313        case SQ_PSTMP_RING_SIZE:
3314        case SQ_VSTMP_RING_SIZE:
3315        case SQ_ESGS_RING_ITEMSIZE:
3316        case SQ_ESTMP_RING_ITEMSIZE:
3317        case SQ_GSTMP_RING_ITEMSIZE:
3318        case SQ_GSVS_RING_ITEMSIZE:
3319        case SQ_GS_VERT_ITEMSIZE:
3320        case SQ_GS_VERT_ITEMSIZE_1:
3321        case SQ_GS_VERT_ITEMSIZE_2:
3322        case SQ_GS_VERT_ITEMSIZE_3:
3323        case SQ_GSVS_RING_OFFSET_1:
3324        case SQ_GSVS_RING_OFFSET_2:
3325        case SQ_GSVS_RING_OFFSET_3:
3326        case SQ_HSTMP_RING_ITEMSIZE:
3327        case SQ_LSTMP_RING_ITEMSIZE:
3328        case SQ_PSTMP_RING_ITEMSIZE:
3329        case SQ_VSTMP_RING_ITEMSIZE:
3330        case VGT_TF_RING_SIZE:
3331        case SQ_ESGS_RING_BASE:
3332        case SQ_GSVS_RING_BASE:
3333        case SQ_ESTMP_RING_BASE:
3334        case SQ_GSTMP_RING_BASE:
3335        case SQ_HSTMP_RING_BASE:
3336        case SQ_LSTMP_RING_BASE:
3337        case SQ_PSTMP_RING_BASE:
3338        case SQ_VSTMP_RING_BASE:
3339        case CAYMAN_VGT_OFFCHIP_LDS_BASE:
3340        case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS:
3341                return true;
3342        default:
3343                DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3344                return false;
3345        }
3346}
3347
3348static int evergreen_vm_packet3_check(struct radeon_device *rdev,
3349                                      u32 *ib, struct radeon_cs_packet *pkt)
3350{
3351        u32 idx = pkt->idx + 1;
3352        u32 idx_value = ib[idx];
3353        u32 start_reg, end_reg, reg, i;
3354        u32 command, info;
3355
3356        switch (pkt->opcode) {
3357        case PACKET3_NOP:
3358                break;
3359        case PACKET3_SET_BASE:
3360                if (idx_value != 1) {
3361                        DRM_ERROR("bad SET_BASE");
3362                        return -EINVAL;
3363                }
3364                break;
3365        case PACKET3_CLEAR_STATE:
3366        case PACKET3_INDEX_BUFFER_SIZE:
3367        case PACKET3_DISPATCH_DIRECT:
3368        case PACKET3_DISPATCH_INDIRECT:
3369        case PACKET3_MODE_CONTROL:
3370        case PACKET3_SET_PREDICATION:
3371        case PACKET3_COND_EXEC:
3372        case PACKET3_PRED_EXEC:
3373        case PACKET3_DRAW_INDIRECT:
3374        case PACKET3_DRAW_INDEX_INDIRECT:
3375        case PACKET3_INDEX_BASE:
3376        case PACKET3_DRAW_INDEX_2:
3377        case PACKET3_CONTEXT_CONTROL:
3378        case PACKET3_DRAW_INDEX_OFFSET:
3379        case PACKET3_INDEX_TYPE:
3380        case PACKET3_DRAW_INDEX:
3381        case PACKET3_DRAW_INDEX_AUTO:
3382        case PACKET3_DRAW_INDEX_IMMD:
3383        case PACKET3_NUM_INSTANCES:
3384        case PACKET3_DRAW_INDEX_MULTI_AUTO:
3385        case PACKET3_STRMOUT_BUFFER_UPDATE:
3386        case PACKET3_DRAW_INDEX_OFFSET_2:
3387        case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3388        case PACKET3_MPEG_INDEX:
3389        case PACKET3_WAIT_REG_MEM:
3390        case PACKET3_MEM_WRITE:
3391        case PACKET3_PFP_SYNC_ME:
3392        case PACKET3_SURFACE_SYNC:
3393        case PACKET3_EVENT_WRITE:
3394        case PACKET3_EVENT_WRITE_EOP:
3395        case PACKET3_EVENT_WRITE_EOS:
3396        case PACKET3_SET_CONTEXT_REG:
3397        case PACKET3_SET_BOOL_CONST:
3398        case PACKET3_SET_LOOP_CONST:
3399        case PACKET3_SET_RESOURCE:
3400        case PACKET3_SET_SAMPLER:
3401        case PACKET3_SET_CTL_CONST:
3402        case PACKET3_SET_RESOURCE_OFFSET:
3403        case PACKET3_SET_CONTEXT_REG_INDIRECT:
3404        case PACKET3_SET_RESOURCE_INDIRECT:
3405        case CAYMAN_PACKET3_DEALLOC_STATE:
3406                break;
3407        case PACKET3_COND_WRITE:
3408                if (idx_value & 0x100) {
3409                        reg = ib[idx + 5] * 4;
3410                        if (!evergreen_vm_reg_valid(reg))
3411                                return -EINVAL;
3412                }
3413                break;
3414        case PACKET3_COPY_DW:
3415                if (idx_value & 0x2) {
3416                        reg = ib[idx + 3] * 4;
3417                        if (!evergreen_vm_reg_valid(reg))
3418                                return -EINVAL;
3419                }
3420                break;
3421        case PACKET3_SET_CONFIG_REG:
3422                start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3423                end_reg = 4 * pkt->count + start_reg - 4;
3424                if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3425                    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3426                    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3427                        DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3428                        return -EINVAL;
3429                }
3430                for (i = 0; i < pkt->count; i++) {
3431                        reg = start_reg + (4 * i);
3432                        if (!evergreen_vm_reg_valid(reg))
3433                                return -EINVAL;
3434                }
3435                break;
3436        case PACKET3_CP_DMA:
3437                command = ib[idx + 4];
3438                info = ib[idx + 1];
3439                if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
3440                    (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
3441                    ((((info & 0x00300000) >> 20) == 0) &&
3442                     (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
3443                    ((((info & 0x60000000) >> 29) == 0) &&
3444                     (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
3445                        /* non mem to mem copies requires dw aligned count */
3446                        if ((command & 0x1fffff) % 4) {
3447                                DRM_ERROR("CP DMA command requires dw count alignment\n");
3448                                return -EINVAL;
3449                        }
3450                }
3451                if (command & PACKET3_CP_DMA_CMD_SAS) {
3452                        /* src address space is register */
3453                        if (((info & 0x60000000) >> 29) == 0) {
3454                                start_reg = idx_value << 2;
3455                                if (command & PACKET3_CP_DMA_CMD_SAIC) {
3456                                        reg = start_reg;
3457                                        if (!evergreen_vm_reg_valid(reg)) {
3458                                                DRM_ERROR("CP DMA Bad SRC register\n");
3459                                                return -EINVAL;
3460                                        }
3461                                } else {
3462                                        for (i = 0; i < (command & 0x1fffff); i++) {
3463                                                reg = start_reg + (4 * i);
3464                                                if (!evergreen_vm_reg_valid(reg)) {
3465                                                        DRM_ERROR("CP DMA Bad SRC register\n");
3466                                                        return -EINVAL;
3467                                                }
3468                                        }
3469                                }
3470                        }
3471                }
3472                if (command & PACKET3_CP_DMA_CMD_DAS) {
3473                        /* dst address space is register */
3474                        if (((info & 0x00300000) >> 20) == 0) {
3475                                start_reg = ib[idx + 2];
3476                                if (command & PACKET3_CP_DMA_CMD_DAIC) {
3477                                        reg = start_reg;
3478                                        if (!evergreen_vm_reg_valid(reg)) {
3479                                                DRM_ERROR("CP DMA Bad DST register\n");
3480                                                return -EINVAL;
3481                                        }
3482                                } else {
3483                                        for (i = 0; i < (command & 0x1fffff); i++) {
3484                                                reg = start_reg + (4 * i);
3485                                                if (!evergreen_vm_reg_valid(reg)) {
3486                                                        DRM_ERROR("CP DMA Bad DST register\n");
3487                                                        return -EINVAL;
3488                                                }
3489                                        }
3490                                }
3491                        }
3492                }
3493                break;
3494        case PACKET3_SET_APPEND_CNT: {
3495                uint32_t areg;
3496                uint32_t allowed_reg_base;
3497
3498                if (pkt->count != 2) {
3499                        DRM_ERROR("bad SET_APPEND_CNT (invalid count)\n");
3500                        return -EINVAL;
3501                }
3502
3503                allowed_reg_base = GDS_APPEND_COUNT_0;
3504                allowed_reg_base -= PACKET3_SET_CONTEXT_REG_START;
3505                allowed_reg_base >>= 2;
3506
3507                areg = idx_value >> 16;
3508                if (areg < allowed_reg_base || areg > (allowed_reg_base + 11)) {
3509                        DRM_ERROR("forbidden register for append cnt 0x%08x at %d\n",
3510                                  areg, idx);
3511                        return -EINVAL;
3512                }
3513                break;
3514        }
3515        default:
3516                return -EINVAL;
3517        }
3518        return 0;
3519}
3520
3521int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3522{
3523        int ret = 0;
3524        u32 idx = 0;
3525        struct radeon_cs_packet pkt;
3526
3527        do {
3528                pkt.idx = idx;
3529                pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
3530                pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
3531                pkt.one_reg_wr = 0;
3532                switch (pkt.type) {
3533                case RADEON_PACKET_TYPE0:
3534                        dev_err(rdev->dev, "Packet0 not allowed!\n");
3535                        ret = -EINVAL;
3536                        break;
3537                case RADEON_PACKET_TYPE2:
3538                        idx += 1;
3539                        break;
3540                case RADEON_PACKET_TYPE3:
3541                        pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
3542                        ret = evergreen_vm_packet3_check(rdev, ib->ptr, &pkt);
3543                        idx += pkt.count + 2;
3544                        break;
3545                default:
3546                        dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
3547                        ret = -EINVAL;
3548                        break;
3549                }
3550                if (ret)
3551                        break;
3552        } while (idx < ib->length_dw);
3553
3554        return ret;
3555}
3556
3557/**
3558 * evergreen_dma_ib_parse() - parse the DMA IB for VM
3559 * @rdev: radeon_device pointer
3560 * @ib: radeon_ib pointer
3561 *
3562 * Parses the DMA IB from the VM CS ioctl
3563 * checks for errors. (Cayman-SI)
3564 * Returns 0 for success and an error on failure.
3565 **/
3566int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3567{
3568        u32 idx = 0;
3569        u32 header, cmd, count, sub_cmd;
3570
3571        do {
3572                header = ib->ptr[idx];
3573                cmd = GET_DMA_CMD(header);
3574                count = GET_DMA_COUNT(header);
3575                sub_cmd = GET_DMA_SUB_CMD(header);
3576
3577                switch (cmd) {
3578                case DMA_PACKET_WRITE:
3579                        switch (sub_cmd) {
3580                        /* tiled */
3581                        case 8:
3582                                idx += count + 7;
3583                                break;
3584                        /* linear */
3585                        case 0:
3586                                idx += count + 3;
3587                                break;
3588                        default:
3589                                DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib->ptr[idx]);
3590                                return -EINVAL;
3591                        }
3592                        break;
3593                case DMA_PACKET_COPY:
3594                        switch (sub_cmd) {
3595                        /* Copy L2L, DW aligned */
3596                        case 0x00:
3597                                idx += 5;
3598                                break;
3599                        /* Copy L2T/T2L */
3600                        case 0x08:
3601                                idx += 9;
3602                                break;
3603                        /* Copy L2L, byte aligned */
3604                        case 0x40:
3605                                idx += 5;
3606                                break;
3607                        /* Copy L2L, partial */
3608                        case 0x41:
3609                                idx += 9;
3610                                break;
3611                        /* Copy L2L, DW aligned, broadcast */
3612                        case 0x44:
3613                                idx += 7;
3614                                break;
3615                        /* Copy L2T Frame to Field */
3616                        case 0x48:
3617                                idx += 10;
3618                                break;
3619                        /* Copy L2T/T2L, partial */
3620                        case 0x49:
3621                                idx += 12;
3622                                break;
3623                        /* Copy L2T broadcast */
3624                        case 0x4b:
3625                                idx += 10;
3626                                break;
3627                        /* Copy L2T/T2L (tile units) */
3628                        case 0x4c:
3629                                idx += 9;
3630                                break;
3631                        /* Copy T2T, partial (tile units) */
3632                        case 0x4d:
3633                                idx += 13;
3634                                break;
3635                        /* Copy L2T broadcast (tile units) */
3636                        case 0x4f:
3637                                idx += 10;
3638                                break;
3639                        default:
3640                                DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib->ptr[idx]);
3641                                return -EINVAL;
3642                        }
3643                        break;
3644                case DMA_PACKET_CONSTANT_FILL:
3645                        idx += 4;
3646                        break;
3647                case DMA_PACKET_NOP:
3648                        idx += 1;
3649                        break;
3650                default:
3651                        DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3652                        return -EINVAL;
3653                }
3654        } while (idx < ib->length_dw);
3655
3656        return 0;
3657}
3658