linux/drivers/gpu/drm/radeon/evergreen_cs.c
<<
>>
Prefs
   1/*
   2 * Copyright 2010 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 * Copyright 2009 Jerome Glisse.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22 * OTHER DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors: Dave Airlie
  25 *          Alex Deucher
  26 *          Jerome Glisse
  27 */
  28#include <drm/drmP.h>
  29#include "radeon.h"
  30#include "evergreend.h"
  31#include "evergreen_reg_safe.h"
  32#include "cayman_reg_safe.h"
  33
  34#define MAX(a,b)                   (((a)>(b))?(a):(b))
  35#define MIN(a,b)                   (((a)<(b))?(a):(b))
  36
  37int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
  38                           struct radeon_cs_reloc **cs_reloc);
  39static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
  40                                          struct radeon_cs_reloc **cs_reloc);
  41
  42struct evergreen_cs_track {
  43        u32                     group_size;
  44        u32                     nbanks;
  45        u32                     npipes;
  46        u32                     row_size;
  47        /* value we track */
  48        u32                     nsamples;               /* unused */
  49        struct radeon_bo        *cb_color_bo[12];
  50        u32                     cb_color_bo_offset[12];
  51        struct radeon_bo        *cb_color_fmask_bo[8];  /* unused */
  52        struct radeon_bo        *cb_color_cmask_bo[8];  /* unused */
  53        u32                     cb_color_info[12];
  54        u32                     cb_color_view[12];
  55        u32                     cb_color_pitch[12];
  56        u32                     cb_color_slice[12];
  57        u32                     cb_color_slice_idx[12];
  58        u32                     cb_color_attrib[12];
  59        u32                     cb_color_cmask_slice[8];/* unused */
  60        u32                     cb_color_fmask_slice[8];/* unused */
  61        u32                     cb_target_mask;
  62        u32                     cb_shader_mask; /* unused */
  63        u32                     vgt_strmout_config;
  64        u32                     vgt_strmout_buffer_config;
  65        struct radeon_bo        *vgt_strmout_bo[4];
  66        u32                     vgt_strmout_bo_offset[4];
  67        u32                     vgt_strmout_size[4];
  68        u32                     db_depth_control;
  69        u32                     db_depth_view;
  70        u32                     db_depth_slice;
  71        u32                     db_depth_size;
  72        u32                     db_z_info;
  73        u32                     db_z_read_offset;
  74        u32                     db_z_write_offset;
  75        struct radeon_bo        *db_z_read_bo;
  76        struct radeon_bo        *db_z_write_bo;
  77        u32                     db_s_info;
  78        u32                     db_s_read_offset;
  79        u32                     db_s_write_offset;
  80        struct radeon_bo        *db_s_read_bo;
  81        struct radeon_bo        *db_s_write_bo;
  82        bool                    sx_misc_kill_all_prims;
  83        bool                    cb_dirty;
  84        bool                    db_dirty;
  85        bool                    streamout_dirty;
  86        u32                     htile_offset;
  87        u32                     htile_surface;
  88        struct radeon_bo        *htile_bo;
  89};
  90
  91static u32 evergreen_cs_get_aray_mode(u32 tiling_flags)
  92{
  93        if (tiling_flags & RADEON_TILING_MACRO)
  94                return ARRAY_2D_TILED_THIN1;
  95        else if (tiling_flags & RADEON_TILING_MICRO)
  96                return ARRAY_1D_TILED_THIN1;
  97        else
  98                return ARRAY_LINEAR_GENERAL;
  99}
 100
 101static u32 evergreen_cs_get_num_banks(u32 nbanks)
 102{
 103        switch (nbanks) {
 104        case 2:
 105                return ADDR_SURF_2_BANK;
 106        case 4:
 107                return ADDR_SURF_4_BANK;
 108        case 8:
 109        default:
 110                return ADDR_SURF_8_BANK;
 111        case 16:
 112                return ADDR_SURF_16_BANK;
 113        }
 114}
 115
 116static void evergreen_cs_track_init(struct evergreen_cs_track *track)
 117{
 118        int i;
 119
 120        for (i = 0; i < 8; i++) {
 121                track->cb_color_fmask_bo[i] = NULL;
 122                track->cb_color_cmask_bo[i] = NULL;
 123                track->cb_color_cmask_slice[i] = 0;
 124                track->cb_color_fmask_slice[i] = 0;
 125        }
 126
 127        for (i = 0; i < 12; i++) {
 128                track->cb_color_bo[i] = NULL;
 129                track->cb_color_bo_offset[i] = 0xFFFFFFFF;
 130                track->cb_color_info[i] = 0;
 131                track->cb_color_view[i] = 0xFFFFFFFF;
 132                track->cb_color_pitch[i] = 0;
 133                track->cb_color_slice[i] = 0xfffffff;
 134                track->cb_color_slice_idx[i] = 0;
 135        }
 136        track->cb_target_mask = 0xFFFFFFFF;
 137        track->cb_shader_mask = 0xFFFFFFFF;
 138        track->cb_dirty = true;
 139
 140        track->db_depth_slice = 0xffffffff;
 141        track->db_depth_view = 0xFFFFC000;
 142        track->db_depth_size = 0xFFFFFFFF;
 143        track->db_depth_control = 0xFFFFFFFF;
 144        track->db_z_info = 0xFFFFFFFF;
 145        track->db_z_read_offset = 0xFFFFFFFF;
 146        track->db_z_write_offset = 0xFFFFFFFF;
 147        track->db_z_read_bo = NULL;
 148        track->db_z_write_bo = NULL;
 149        track->db_s_info = 0xFFFFFFFF;
 150        track->db_s_read_offset = 0xFFFFFFFF;
 151        track->db_s_write_offset = 0xFFFFFFFF;
 152        track->db_s_read_bo = NULL;
 153        track->db_s_write_bo = NULL;
 154        track->db_dirty = true;
 155        track->htile_bo = NULL;
 156        track->htile_offset = 0xFFFFFFFF;
 157        track->htile_surface = 0;
 158
 159        for (i = 0; i < 4; i++) {
 160                track->vgt_strmout_size[i] = 0;
 161                track->vgt_strmout_bo[i] = NULL;
 162                track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
 163        }
 164        track->streamout_dirty = true;
 165        track->sx_misc_kill_all_prims = false;
 166}
 167
 168struct eg_surface {
 169        /* value gathered from cs */
 170        unsigned        nbx;
 171        unsigned        nby;
 172        unsigned        format;
 173        unsigned        mode;
 174        unsigned        nbanks;
 175        unsigned        bankw;
 176        unsigned        bankh;
 177        unsigned        tsplit;
 178        unsigned        mtilea;
 179        unsigned        nsamples;
 180        /* output value */
 181        unsigned        bpe;
 182        unsigned        layer_size;
 183        unsigned        palign;
 184        unsigned        halign;
 185        unsigned long   base_align;
 186};
 187
 188static int evergreen_surface_check_linear(struct radeon_cs_parser *p,
 189                                          struct eg_surface *surf,
 190                                          const char *prefix)
 191{
 192        surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
 193        surf->base_align = surf->bpe;
 194        surf->palign = 1;
 195        surf->halign = 1;
 196        return 0;
 197}
 198
 199static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser *p,
 200                                                  struct eg_surface *surf,
 201                                                  const char *prefix)
 202{
 203        struct evergreen_cs_track *track = p->track;
 204        unsigned palign;
 205
 206        palign = MAX(64, track->group_size / surf->bpe);
 207        surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
 208        surf->base_align = track->group_size;
 209        surf->palign = palign;
 210        surf->halign = 1;
 211        if (surf->nbx & (palign - 1)) {
 212                if (prefix) {
 213                        dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
 214                                 __func__, __LINE__, prefix, surf->nbx, palign);
 215                }
 216                return -EINVAL;
 217        }
 218        return 0;
 219}
 220
 221static int evergreen_surface_check_1d(struct radeon_cs_parser *p,
 222                                      struct eg_surface *surf,
 223                                      const char *prefix)
 224{
 225        struct evergreen_cs_track *track = p->track;
 226        unsigned palign;
 227
 228        palign = track->group_size / (8 * surf->bpe * surf->nsamples);
 229        palign = MAX(8, palign);
 230        surf->layer_size = surf->nbx * surf->nby * surf->bpe;
 231        surf->base_align = track->group_size;
 232        surf->palign = palign;
 233        surf->halign = 8;
 234        if ((surf->nbx & (palign - 1))) {
 235                if (prefix) {
 236                        dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d (%d %d %d)\n",
 237                                 __func__, __LINE__, prefix, surf->nbx, palign,
 238                                 track->group_size, surf->bpe, surf->nsamples);
 239                }
 240                return -EINVAL;
 241        }
 242        if ((surf->nby & (8 - 1))) {
 243                if (prefix) {
 244                        dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with 8\n",
 245                                 __func__, __LINE__, prefix, surf->nby);
 246                }
 247                return -EINVAL;
 248        }
 249        return 0;
 250}
 251
 252static int evergreen_surface_check_2d(struct radeon_cs_parser *p,
 253                                      struct eg_surface *surf,
 254                                      const char *prefix)
 255{
 256        struct evergreen_cs_track *track = p->track;
 257        unsigned palign, halign, tileb, slice_pt;
 258        unsigned mtile_pr, mtile_ps, mtileb;
 259
 260        tileb = 64 * surf->bpe * surf->nsamples;
 261        slice_pt = 1;
 262        if (tileb > surf->tsplit) {
 263                slice_pt = tileb / surf->tsplit;
 264        }
 265        tileb = tileb / slice_pt;
 266        /* macro tile width & height */
 267        palign = (8 * surf->bankw * track->npipes) * surf->mtilea;
 268        halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea;
 269        mtileb = (palign / 8) * (halign / 8) * tileb;
 270        mtile_pr = surf->nbx / palign;
 271        mtile_ps = (mtile_pr * surf->nby) / halign;
 272        surf->layer_size = mtile_ps * mtileb * slice_pt;
 273        surf->base_align = (palign / 8) * (halign / 8) * tileb;
 274        surf->palign = palign;
 275        surf->halign = halign;
 276
 277        if ((surf->nbx & (palign - 1))) {
 278                if (prefix) {
 279                        dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
 280                                 __func__, __LINE__, prefix, surf->nbx, palign);
 281                }
 282                return -EINVAL;
 283        }
 284        if ((surf->nby & (halign - 1))) {
 285                if (prefix) {
 286                        dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with %d\n",
 287                                 __func__, __LINE__, prefix, surf->nby, halign);
 288                }
 289                return -EINVAL;
 290        }
 291
 292        return 0;
 293}
 294
 295static int evergreen_surface_check(struct radeon_cs_parser *p,
 296                                   struct eg_surface *surf,
 297                                   const char *prefix)
 298{
 299        /* some common value computed here */
 300        surf->bpe = r600_fmt_get_blocksize(surf->format);
 301
 302        switch (surf->mode) {
 303        case ARRAY_LINEAR_GENERAL:
 304                return evergreen_surface_check_linear(p, surf, prefix);
 305        case ARRAY_LINEAR_ALIGNED:
 306                return evergreen_surface_check_linear_aligned(p, surf, prefix);
 307        case ARRAY_1D_TILED_THIN1:
 308                return evergreen_surface_check_1d(p, surf, prefix);
 309        case ARRAY_2D_TILED_THIN1:
 310                return evergreen_surface_check_2d(p, surf, prefix);
 311        default:
 312                dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
 313                                __func__, __LINE__, prefix, surf->mode);
 314                return -EINVAL;
 315        }
 316        return -EINVAL;
 317}
 318
 319static int evergreen_surface_value_conv_check(struct radeon_cs_parser *p,
 320                                              struct eg_surface *surf,
 321                                              const char *prefix)
 322{
 323        switch (surf->mode) {
 324        case ARRAY_2D_TILED_THIN1:
 325                break;
 326        case ARRAY_LINEAR_GENERAL:
 327        case ARRAY_LINEAR_ALIGNED:
 328        case ARRAY_1D_TILED_THIN1:
 329                return 0;
 330        default:
 331                dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
 332                                __func__, __LINE__, prefix, surf->mode);
 333                return -EINVAL;
 334        }
 335
 336        switch (surf->nbanks) {
 337        case 0: surf->nbanks = 2; break;
 338        case 1: surf->nbanks = 4; break;
 339        case 2: surf->nbanks = 8; break;
 340        case 3: surf->nbanks = 16; break;
 341        default:
 342                dev_warn(p->dev, "%s:%d %s invalid number of banks %d\n",
 343                         __func__, __LINE__, prefix, surf->nbanks);
 344                return -EINVAL;
 345        }
 346        switch (surf->bankw) {
 347        case 0: surf->bankw = 1; break;
 348        case 1: surf->bankw = 2; break;
 349        case 2: surf->bankw = 4; break;
 350        case 3: surf->bankw = 8; break;
 351        default:
 352                dev_warn(p->dev, "%s:%d %s invalid bankw %d\n",
 353                         __func__, __LINE__, prefix, surf->bankw);
 354                return -EINVAL;
 355        }
 356        switch (surf->bankh) {
 357        case 0: surf->bankh = 1; break;
 358        case 1: surf->bankh = 2; break;
 359        case 2: surf->bankh = 4; break;
 360        case 3: surf->bankh = 8; break;
 361        default:
 362                dev_warn(p->dev, "%s:%d %s invalid bankh %d\n",
 363                         __func__, __LINE__, prefix, surf->bankh);
 364                return -EINVAL;
 365        }
 366        switch (surf->mtilea) {
 367        case 0: surf->mtilea = 1; break;
 368        case 1: surf->mtilea = 2; break;
 369        case 2: surf->mtilea = 4; break;
 370        case 3: surf->mtilea = 8; break;
 371        default:
 372                dev_warn(p->dev, "%s:%d %s invalid macro tile aspect %d\n",
 373                         __func__, __LINE__, prefix, surf->mtilea);
 374                return -EINVAL;
 375        }
 376        switch (surf->tsplit) {
 377        case 0: surf->tsplit = 64; break;
 378        case 1: surf->tsplit = 128; break;
 379        case 2: surf->tsplit = 256; break;
 380        case 3: surf->tsplit = 512; break;
 381        case 4: surf->tsplit = 1024; break;
 382        case 5: surf->tsplit = 2048; break;
 383        case 6: surf->tsplit = 4096; break;
 384        default:
 385                dev_warn(p->dev, "%s:%d %s invalid tile split %d\n",
 386                         __func__, __LINE__, prefix, surf->tsplit);
 387                return -EINVAL;
 388        }
 389        return 0;
 390}
 391
 392static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned id)
 393{
 394        struct evergreen_cs_track *track = p->track;
 395        struct eg_surface surf;
 396        unsigned pitch, slice, mslice;
 397        unsigned long offset;
 398        int r;
 399
 400        mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1;
 401        pitch = track->cb_color_pitch[id];
 402        slice = track->cb_color_slice[id];
 403        surf.nbx = (pitch + 1) * 8;
 404        surf.nby = ((slice + 1) * 64) / surf.nbx;
 405        surf.mode = G_028C70_ARRAY_MODE(track->cb_color_info[id]);
 406        surf.format = G_028C70_FORMAT(track->cb_color_info[id]);
 407        surf.tsplit = G_028C74_TILE_SPLIT(track->cb_color_attrib[id]);
 408        surf.nbanks = G_028C74_NUM_BANKS(track->cb_color_attrib[id]);
 409        surf.bankw = G_028C74_BANK_WIDTH(track->cb_color_attrib[id]);
 410        surf.bankh = G_028C74_BANK_HEIGHT(track->cb_color_attrib[id]);
 411        surf.mtilea = G_028C74_MACRO_TILE_ASPECT(track->cb_color_attrib[id]);
 412        surf.nsamples = 1;
 413
 414        if (!r600_fmt_is_valid_color(surf.format)) {
 415                dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08x)\n",
 416                         __func__, __LINE__, surf.format,
 417                        id, track->cb_color_info[id]);
 418                return -EINVAL;
 419        }
 420
 421        r = evergreen_surface_value_conv_check(p, &surf, "cb");
 422        if (r) {
 423                return r;
 424        }
 425
 426        r = evergreen_surface_check(p, &surf, "cb");
 427        if (r) {
 428                dev_warn(p->dev, "%s:%d cb[%d] invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
 429                         __func__, __LINE__, id, track->cb_color_pitch[id],
 430                         track->cb_color_slice[id], track->cb_color_attrib[id],
 431                         track->cb_color_info[id]);
 432                return r;
 433        }
 434
 435        offset = track->cb_color_bo_offset[id] << 8;
 436        if (offset & (surf.base_align - 1)) {
 437                dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n",
 438                         __func__, __LINE__, id, offset, surf.base_align);
 439                return -EINVAL;
 440        }
 441
 442        offset += surf.layer_size * mslice;
 443        if (offset > radeon_bo_size(track->cb_color_bo[id])) {
 444                /* old ddx are broken they allocate bo with w*h*bpp but
 445                 * program slice with ALIGN(h, 8), catch this and patch
 446                 * command stream.
 447                 */
 448                if (!surf.mode) {
 449                        volatile u32 *ib = p->ib.ptr;
 450                        unsigned long tmp, nby, bsize, size, min = 0;
 451
 452                        /* find the height the ddx wants */
 453                        if (surf.nby > 8) {
 454                                min = surf.nby - 8;
 455                        }
 456                        bsize = radeon_bo_size(track->cb_color_bo[id]);
 457                        tmp = track->cb_color_bo_offset[id] << 8;
 458                        for (nby = surf.nby; nby > min; nby--) {
 459                                size = nby * surf.nbx * surf.bpe * surf.nsamples;
 460                                if ((tmp + size * mslice) <= bsize) {
 461                                        break;
 462                                }
 463                        }
 464                        if (nby > min) {
 465                                surf.nby = nby;
 466                                slice = ((nby * surf.nbx) / 64) - 1;
 467                                if (!evergreen_surface_check(p, &surf, "cb")) {
 468                                        /* check if this one works */
 469                                        tmp += surf.layer_size * mslice;
 470                                        if (tmp <= bsize) {
 471                                                ib[track->cb_color_slice_idx[id]] = slice;
 472                                                goto old_ddx_ok;
 473                                        }
 474                                }
 475                        }
 476                }
 477                dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, "
 478                         "offset %d, max layer %d, bo size %ld, slice %d)\n",
 479                         __func__, __LINE__, id, surf.layer_size,
 480                        track->cb_color_bo_offset[id] << 8, mslice,
 481                        radeon_bo_size(track->cb_color_bo[id]), slice);
 482                dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
 483                         __func__, __LINE__, surf.nbx, surf.nby,
 484                        surf.mode, surf.bpe, surf.nsamples,
 485                        surf.bankw, surf.bankh,
 486                        surf.tsplit, surf.mtilea);
 487                return -EINVAL;
 488        }
 489old_ddx_ok:
 490
 491        return 0;
 492}
 493
 494static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
 495                                                unsigned nbx, unsigned nby)
 496{
 497        struct evergreen_cs_track *track = p->track;
 498        unsigned long size;
 499
 500        if (track->htile_bo == NULL) {
 501                dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n",
 502                                __func__, __LINE__, track->db_z_info);
 503                return -EINVAL;
 504        }
 505
 506        if (G_028ABC_LINEAR(track->htile_surface)) {
 507                /* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */
 508                nbx = round_up(nbx, 16 * 8);
 509                /* height is npipes htiles aligned == npipes * 8 pixel aligned */
 510                nby = round_up(nby, track->npipes * 8);
 511        } else {
 512                /* always assume 8x8 htile */
 513                /* align is htile align * 8, htile align vary according to
 514                 * number of pipe and tile width and nby
 515                 */
 516                switch (track->npipes) {
 517                case 8:
 518                        /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
 519                        nbx = round_up(nbx, 64 * 8);
 520                        nby = round_up(nby, 64 * 8);
 521                        break;
 522                case 4:
 523                        /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
 524                        nbx = round_up(nbx, 64 * 8);
 525                        nby = round_up(nby, 32 * 8);
 526                        break;
 527                case 2:
 528                        /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
 529                        nbx = round_up(nbx, 32 * 8);
 530                        nby = round_up(nby, 32 * 8);
 531                        break;
 532                case 1:
 533                        /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
 534                        nbx = round_up(nbx, 32 * 8);
 535                        nby = round_up(nby, 16 * 8);
 536                        break;
 537                default:
 538                        dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
 539                                        __func__, __LINE__, track->npipes);
 540                        return -EINVAL;
 541                }
 542        }
 543        /* compute number of htile */
 544        nbx = nbx >> 3;
 545        nby = nby >> 3;
 546        /* size must be aligned on npipes * 2K boundary */
 547        size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
 548        size += track->htile_offset;
 549
 550        if (size > radeon_bo_size(track->htile_bo)) {
 551                dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
 552                                __func__, __LINE__, radeon_bo_size(track->htile_bo),
 553                                size, nbx, nby);
 554                return -EINVAL;
 555        }
 556        return 0;
 557}
 558
 559static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p)
 560{
 561        struct evergreen_cs_track *track = p->track;
 562        struct eg_surface surf;
 563        unsigned pitch, slice, mslice;
 564        unsigned long offset;
 565        int r;
 566
 567        mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
 568        pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
 569        slice = track->db_depth_slice;
 570        surf.nbx = (pitch + 1) * 8;
 571        surf.nby = ((slice + 1) * 64) / surf.nbx;
 572        surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
 573        surf.format = G_028044_FORMAT(track->db_s_info);
 574        surf.tsplit = G_028044_TILE_SPLIT(track->db_s_info);
 575        surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
 576        surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
 577        surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
 578        surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
 579        surf.nsamples = 1;
 580
 581        if (surf.format != 1) {
 582                dev_warn(p->dev, "%s:%d stencil invalid format %d\n",
 583                         __func__, __LINE__, surf.format);
 584                return -EINVAL;
 585        }
 586        /* replace by color format so we can use same code */
 587        surf.format = V_028C70_COLOR_8;
 588
 589        r = evergreen_surface_value_conv_check(p, &surf, "stencil");
 590        if (r) {
 591                return r;
 592        }
 593
 594        r = evergreen_surface_check(p, &surf, NULL);
 595        if (r) {
 596                /* old userspace doesn't compute proper depth/stencil alignment
 597                 * check that alignment against a bigger byte per elements and
 598                 * only report if that alignment is wrong too.
 599                 */
 600                surf.format = V_028C70_COLOR_8_8_8_8;
 601                r = evergreen_surface_check(p, &surf, "stencil");
 602                if (r) {
 603                        dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
 604                                 __func__, __LINE__, track->db_depth_size,
 605                                 track->db_depth_slice, track->db_s_info, track->db_z_info);
 606                }
 607                return r;
 608        }
 609
 610        offset = track->db_s_read_offset << 8;
 611        if (offset & (surf.base_align - 1)) {
 612                dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
 613                         __func__, __LINE__, offset, surf.base_align);
 614                return -EINVAL;
 615        }
 616        offset += surf.layer_size * mslice;
 617        if (offset > radeon_bo_size(track->db_s_read_bo)) {
 618                dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, "
 619                         "offset %ld, max layer %d, bo size %ld)\n",
 620                         __func__, __LINE__, surf.layer_size,
 621                        (unsigned long)track->db_s_read_offset << 8, mslice,
 622                        radeon_bo_size(track->db_s_read_bo));
 623                dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
 624                         __func__, __LINE__, track->db_depth_size,
 625                         track->db_depth_slice, track->db_s_info, track->db_z_info);
 626                return -EINVAL;
 627        }
 628
 629        offset = track->db_s_write_offset << 8;
 630        if (offset & (surf.base_align - 1)) {
 631                dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
 632                         __func__, __LINE__, offset, surf.base_align);
 633                return -EINVAL;
 634        }
 635        offset += surf.layer_size * mslice;
 636        if (offset > radeon_bo_size(track->db_s_write_bo)) {
 637                dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, "
 638                         "offset %ld, max layer %d, bo size %ld)\n",
 639                         __func__, __LINE__, surf.layer_size,
 640                        (unsigned long)track->db_s_write_offset << 8, mslice,
 641                        radeon_bo_size(track->db_s_write_bo));
 642                return -EINVAL;
 643        }
 644
 645        /* hyperz */
 646        if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
 647                r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
 648                if (r) {
 649                        return r;
 650                }
 651        }
 652
 653        return 0;
 654}
 655
 656static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p)
 657{
 658        struct evergreen_cs_track *track = p->track;
 659        struct eg_surface surf;
 660        unsigned pitch, slice, mslice;
 661        unsigned long offset;
 662        int r;
 663
 664        mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
 665        pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
 666        slice = track->db_depth_slice;
 667        surf.nbx = (pitch + 1) * 8;
 668        surf.nby = ((slice + 1) * 64) / surf.nbx;
 669        surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
 670        surf.format = G_028040_FORMAT(track->db_z_info);
 671        surf.tsplit = G_028040_TILE_SPLIT(track->db_z_info);
 672        surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
 673        surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
 674        surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
 675        surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
 676        surf.nsamples = 1;
 677
 678        switch (surf.format) {
 679        case V_028040_Z_16:
 680                surf.format = V_028C70_COLOR_16;
 681                break;
 682        case V_028040_Z_24:
 683        case V_028040_Z_32_FLOAT:
 684                surf.format = V_028C70_COLOR_8_8_8_8;
 685                break;
 686        default:
 687                dev_warn(p->dev, "%s:%d depth invalid format %d\n",
 688                         __func__, __LINE__, surf.format);
 689                return -EINVAL;
 690        }
 691
 692        r = evergreen_surface_value_conv_check(p, &surf, "depth");
 693        if (r) {
 694                dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
 695                         __func__, __LINE__, track->db_depth_size,
 696                         track->db_depth_slice, track->db_z_info);
 697                return r;
 698        }
 699
 700        r = evergreen_surface_check(p, &surf, "depth");
 701        if (r) {
 702                dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
 703                         __func__, __LINE__, track->db_depth_size,
 704                         track->db_depth_slice, track->db_z_info);
 705                return r;
 706        }
 707
 708        offset = track->db_z_read_offset << 8;
 709        if (offset & (surf.base_align - 1)) {
 710                dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
 711                         __func__, __LINE__, offset, surf.base_align);
 712                return -EINVAL;
 713        }
 714        offset += surf.layer_size * mslice;
 715        if (offset > radeon_bo_size(track->db_z_read_bo)) {
 716                dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, "
 717                         "offset %ld, max layer %d, bo size %ld)\n",
 718                         __func__, __LINE__, surf.layer_size,
 719                        (unsigned long)track->db_z_read_offset << 8, mslice,
 720                        radeon_bo_size(track->db_z_read_bo));
 721                return -EINVAL;
 722        }
 723
 724        offset = track->db_z_write_offset << 8;
 725        if (offset & (surf.base_align - 1)) {
 726                dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
 727                         __func__, __LINE__, offset, surf.base_align);
 728                return -EINVAL;
 729        }
 730        offset += surf.layer_size * mslice;
 731        if (offset > radeon_bo_size(track->db_z_write_bo)) {
 732                dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, "
 733                         "offset %ld, max layer %d, bo size %ld)\n",
 734                         __func__, __LINE__, surf.layer_size,
 735                        (unsigned long)track->db_z_write_offset << 8, mslice,
 736                        radeon_bo_size(track->db_z_write_bo));
 737                return -EINVAL;
 738        }
 739
 740        /* hyperz */
 741        if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
 742                r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
 743                if (r) {
 744                        return r;
 745                }
 746        }
 747
 748        return 0;
 749}
 750
 751static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p,
 752                                               struct radeon_bo *texture,
 753                                               struct radeon_bo *mipmap,
 754                                               unsigned idx)
 755{
 756        struct eg_surface surf;
 757        unsigned long toffset, moffset;
 758        unsigned dim, llevel, mslice, width, height, depth, i;
 759        u32 texdw[8];
 760        int r;
 761
 762        texdw[0] = radeon_get_ib_value(p, idx + 0);
 763        texdw[1] = radeon_get_ib_value(p, idx + 1);
 764        texdw[2] = radeon_get_ib_value(p, idx + 2);
 765        texdw[3] = radeon_get_ib_value(p, idx + 3);
 766        texdw[4] = radeon_get_ib_value(p, idx + 4);
 767        texdw[5] = radeon_get_ib_value(p, idx + 5);
 768        texdw[6] = radeon_get_ib_value(p, idx + 6);
 769        texdw[7] = radeon_get_ib_value(p, idx + 7);
 770        dim = G_030000_DIM(texdw[0]);
 771        llevel = G_030014_LAST_LEVEL(texdw[5]);
 772        mslice = G_030014_LAST_ARRAY(texdw[5]) + 1;
 773        width = G_030000_TEX_WIDTH(texdw[0]) + 1;
 774        height =  G_030004_TEX_HEIGHT(texdw[1]) + 1;
 775        depth = G_030004_TEX_DEPTH(texdw[1]) + 1;
 776        surf.format = G_03001C_DATA_FORMAT(texdw[7]);
 777        surf.nbx = (G_030000_PITCH(texdw[0]) + 1) * 8;
 778        surf.nbx = r600_fmt_get_nblocksx(surf.format, surf.nbx);
 779        surf.nby = r600_fmt_get_nblocksy(surf.format, height);
 780        surf.mode = G_030004_ARRAY_MODE(texdw[1]);
 781        surf.tsplit = G_030018_TILE_SPLIT(texdw[6]);
 782        surf.nbanks = G_03001C_NUM_BANKS(texdw[7]);
 783        surf.bankw = G_03001C_BANK_WIDTH(texdw[7]);
 784        surf.bankh = G_03001C_BANK_HEIGHT(texdw[7]);
 785        surf.mtilea = G_03001C_MACRO_TILE_ASPECT(texdw[7]);
 786        surf.nsamples = 1;
 787        toffset = texdw[2] << 8;
 788        moffset = texdw[3] << 8;
 789
 790        if (!r600_fmt_is_valid_texture(surf.format, p->family)) {
 791                dev_warn(p->dev, "%s:%d texture invalid format %d\n",
 792                         __func__, __LINE__, surf.format);
 793                return -EINVAL;
 794        }
 795        switch (dim) {
 796        case V_030000_SQ_TEX_DIM_1D:
 797        case V_030000_SQ_TEX_DIM_2D:
 798        case V_030000_SQ_TEX_DIM_CUBEMAP:
 799        case V_030000_SQ_TEX_DIM_1D_ARRAY:
 800        case V_030000_SQ_TEX_DIM_2D_ARRAY:
 801                depth = 1;
 802                break;
 803        case V_030000_SQ_TEX_DIM_2D_MSAA:
 804        case V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA:
 805                surf.nsamples = 1 << llevel;
 806                llevel = 0;
 807                depth = 1;
 808                break;
 809        case V_030000_SQ_TEX_DIM_3D:
 810                break;
 811        default:
 812                dev_warn(p->dev, "%s:%d texture invalid dimension %d\n",
 813                         __func__, __LINE__, dim);
 814                return -EINVAL;
 815        }
 816
 817        r = evergreen_surface_value_conv_check(p, &surf, "texture");
 818        if (r) {
 819                return r;
 820        }
 821
 822        /* align height */
 823        evergreen_surface_check(p, &surf, NULL);
 824        surf.nby = ALIGN(surf.nby, surf.halign);
 825
 826        r = evergreen_surface_check(p, &surf, "texture");
 827        if (r) {
 828                dev_warn(p->dev, "%s:%d texture invalid 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
 829                         __func__, __LINE__, texdw[0], texdw[1], texdw[4],
 830                         texdw[5], texdw[6], texdw[7]);
 831                return r;
 832        }
 833
 834        /* check texture size */
 835        if (toffset & (surf.base_align - 1)) {
 836                dev_warn(p->dev, "%s:%d texture bo base %ld not aligned with %ld\n",
 837                         __func__, __LINE__, toffset, surf.base_align);
 838                return -EINVAL;
 839        }
 840        if (moffset & (surf.base_align - 1)) {
 841                dev_warn(p->dev, "%s:%d mipmap bo base %ld not aligned with %ld\n",
 842                         __func__, __LINE__, moffset, surf.base_align);
 843                return -EINVAL;
 844        }
 845        if (dim == SQ_TEX_DIM_3D) {
 846                toffset += surf.layer_size * depth;
 847        } else {
 848                toffset += surf.layer_size * mslice;
 849        }
 850        if (toffset > radeon_bo_size(texture)) {
 851                dev_warn(p->dev, "%s:%d texture bo too small (layer size %d, "
 852                         "offset %ld, max layer %d, depth %d, bo size %ld) (%d %d)\n",
 853                         __func__, __LINE__, surf.layer_size,
 854                        (unsigned long)texdw[2] << 8, mslice,
 855                        depth, radeon_bo_size(texture),
 856                        surf.nbx, surf.nby);
 857                return -EINVAL;
 858        }
 859
 860        if (!mipmap) {
 861                if (llevel) {
 862                        dev_warn(p->dev, "%s:%i got NULL MIP_ADDRESS relocation\n",
 863                                 __func__, __LINE__);
 864                        return -EINVAL;
 865                } else {
 866                        return 0; /* everything's ok */
 867                }
 868        }
 869
 870        /* check mipmap size */
 871        for (i = 1; i <= llevel; i++) {
 872                unsigned w, h, d;
 873
 874                w = r600_mip_minify(width, i);
 875                h = r600_mip_minify(height, i);
 876                d = r600_mip_minify(depth, i);
 877                surf.nbx = r600_fmt_get_nblocksx(surf.format, w);
 878                surf.nby = r600_fmt_get_nblocksy(surf.format, h);
 879
 880                switch (surf.mode) {
 881                case ARRAY_2D_TILED_THIN1:
 882                        if (surf.nbx < surf.palign || surf.nby < surf.halign) {
 883                                surf.mode = ARRAY_1D_TILED_THIN1;
 884                        }
 885                        /* recompute alignment */
 886                        evergreen_surface_check(p, &surf, NULL);
 887                        break;
 888                case ARRAY_LINEAR_GENERAL:
 889                case ARRAY_LINEAR_ALIGNED:
 890                case ARRAY_1D_TILED_THIN1:
 891                        break;
 892                default:
 893                        dev_warn(p->dev, "%s:%d invalid array mode %d\n",
 894                                 __func__, __LINE__, surf.mode);
 895                        return -EINVAL;
 896                }
 897                surf.nbx = ALIGN(surf.nbx, surf.palign);
 898                surf.nby = ALIGN(surf.nby, surf.halign);
 899
 900                r = evergreen_surface_check(p, &surf, "mipmap");
 901                if (r) {
 902                        return r;
 903                }
 904
 905                if (dim == SQ_TEX_DIM_3D) {
 906                        moffset += surf.layer_size * d;
 907                } else {
 908                        moffset += surf.layer_size * mslice;
 909                }
 910                if (moffset > radeon_bo_size(mipmap)) {
 911                        dev_warn(p->dev, "%s:%d mipmap [%d] bo too small (layer size %d, "
 912                                        "offset %ld, coffset %ld, max layer %d, depth %d, "
 913                                        "bo size %ld) level0 (%d %d %d)\n",
 914                                        __func__, __LINE__, i, surf.layer_size,
 915                                        (unsigned long)texdw[3] << 8, moffset, mslice,
 916                                        d, radeon_bo_size(mipmap),
 917                                        width, height, depth);
 918                        dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
 919                                 __func__, __LINE__, surf.nbx, surf.nby,
 920                                surf.mode, surf.bpe, surf.nsamples,
 921                                surf.bankw, surf.bankh,
 922                                surf.tsplit, surf.mtilea);
 923                        return -EINVAL;
 924                }
 925        }
 926
 927        return 0;
 928}
 929
 930static int evergreen_cs_track_check(struct radeon_cs_parser *p)
 931{
 932        struct evergreen_cs_track *track = p->track;
 933        unsigned tmp, i;
 934        int r;
 935        unsigned buffer_mask = 0;
 936
 937        /* check streamout */
 938        if (track->streamout_dirty && track->vgt_strmout_config) {
 939                for (i = 0; i < 4; i++) {
 940                        if (track->vgt_strmout_config & (1 << i)) {
 941                                buffer_mask |= (track->vgt_strmout_buffer_config >> (i * 4)) & 0xf;
 942                        }
 943                }
 944
 945                for (i = 0; i < 4; i++) {
 946                        if (buffer_mask & (1 << i)) {
 947                                if (track->vgt_strmout_bo[i]) {
 948                                        u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
 949                                                        (u64)track->vgt_strmout_size[i];
 950                                        if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
 951                                                DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
 952                                                          i, offset,
 953                                                          radeon_bo_size(track->vgt_strmout_bo[i]));
 954                                                return -EINVAL;
 955                                        }
 956                                } else {
 957                                        dev_warn(p->dev, "No buffer for streamout %d\n", i);
 958                                        return -EINVAL;
 959                                }
 960                        }
 961                }
 962                track->streamout_dirty = false;
 963        }
 964
 965        if (track->sx_misc_kill_all_prims)
 966                return 0;
 967
 968        /* check that we have a cb for each enabled target
 969         */
 970        if (track->cb_dirty) {
 971                tmp = track->cb_target_mask;
 972                for (i = 0; i < 8; i++) {
 973                        if ((tmp >> (i * 4)) & 0xF) {
 974                                /* at least one component is enabled */
 975                                if (track->cb_color_bo[i] == NULL) {
 976                                        dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
 977                                                __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
 978                                        return -EINVAL;
 979                                }
 980                                /* check cb */
 981                                r = evergreen_cs_track_validate_cb(p, i);
 982                                if (r) {
 983                                        return r;
 984                                }
 985                        }
 986                }
 987                track->cb_dirty = false;
 988        }
 989
 990        if (track->db_dirty) {
 991                /* Check stencil buffer */
 992                if (G_028044_FORMAT(track->db_s_info) != V_028044_STENCIL_INVALID &&
 993                    G_028800_STENCIL_ENABLE(track->db_depth_control)) {
 994                        r = evergreen_cs_track_validate_stencil(p);
 995                        if (r)
 996                                return r;
 997                }
 998                /* Check depth buffer */
 999                if (G_028040_FORMAT(track->db_z_info) != V_028040_Z_INVALID &&
1000                    G_028800_Z_ENABLE(track->db_depth_control)) {
1001                        r = evergreen_cs_track_validate_depth(p);
1002                        if (r)
1003                                return r;
1004                }
1005                track->db_dirty = false;
1006        }
1007
1008        return 0;
1009}
1010
1011/**
1012 * evergreen_cs_packet_parse() - parse cp packet and point ib index to next packet
1013 * @parser:     parser structure holding parsing context.
1014 * @pkt:        where to store packet informations
1015 *
1016 * Assume that chunk_ib_index is properly set. Will return -EINVAL
1017 * if packet is bigger than remaining ib size. or if packets is unknown.
1018 **/
1019static int evergreen_cs_packet_parse(struct radeon_cs_parser *p,
1020                              struct radeon_cs_packet *pkt,
1021                              unsigned idx)
1022{
1023        struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
1024        uint32_t header;
1025
1026        if (idx >= ib_chunk->length_dw) {
1027                DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
1028                          idx, ib_chunk->length_dw);
1029                return -EINVAL;
1030        }
1031        header = radeon_get_ib_value(p, idx);
1032        pkt->idx = idx;
1033        pkt->type = CP_PACKET_GET_TYPE(header);
1034        pkt->count = CP_PACKET_GET_COUNT(header);
1035        pkt->one_reg_wr = 0;
1036        switch (pkt->type) {
1037        case PACKET_TYPE0:
1038                pkt->reg = CP_PACKET0_GET_REG(header);
1039                break;
1040        case PACKET_TYPE3:
1041                pkt->opcode = CP_PACKET3_GET_OPCODE(header);
1042                break;
1043        case PACKET_TYPE2:
1044                pkt->count = -1;
1045                break;
1046        default:
1047                DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
1048                return -EINVAL;
1049        }
1050        if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
1051                DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
1052                          pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
1053                return -EINVAL;
1054        }
1055        return 0;
1056}
1057
1058/**
1059 * evergreen_cs_packet_next_reloc() - parse next packet which should be reloc packet3
1060 * @parser:             parser structure holding parsing context.
1061 * @data:               pointer to relocation data
1062 * @offset_start:       starting offset
1063 * @offset_mask:        offset mask (to align start offset on)
1064 * @reloc:              reloc informations
1065 *
1066 * Check next packet is relocation packet3, do bo validation and compute
1067 * GPU offset using the provided start.
1068 **/
1069static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
1070                                          struct radeon_cs_reloc **cs_reloc)
1071{
1072        struct radeon_cs_chunk *relocs_chunk;
1073        struct radeon_cs_packet p3reloc;
1074        unsigned idx;
1075        int r;
1076
1077        if (p->chunk_relocs_idx == -1) {
1078                DRM_ERROR("No relocation chunk !\n");
1079                return -EINVAL;
1080        }
1081        *cs_reloc = NULL;
1082        relocs_chunk = &p->chunks[p->chunk_relocs_idx];
1083        r = evergreen_cs_packet_parse(p, &p3reloc, p->idx);
1084        if (r) {
1085                return r;
1086        }
1087        p->idx += p3reloc.count + 2;
1088        if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
1089                DRM_ERROR("No packet3 for relocation for packet at %d.\n",
1090                          p3reloc.idx);
1091                return -EINVAL;
1092        }
1093        idx = radeon_get_ib_value(p, p3reloc.idx + 1);
1094        if (idx >= relocs_chunk->length_dw) {
1095                DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
1096                          idx, relocs_chunk->length_dw);
1097                return -EINVAL;
1098        }
1099        /* FIXME: we assume reloc size is 4 dwords */
1100        *cs_reloc = p->relocs_ptr[(idx / 4)];
1101        return 0;
1102}
1103
1104/**
1105 * evergreen_cs_packet_next_is_pkt3_nop() - test if the next packet is NOP
1106 * @p:          structure holding the parser context.
1107 *
1108 * Check if the next packet is a relocation packet3.
1109 **/
1110static bool evergreen_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
1111{
1112        struct radeon_cs_packet p3reloc;
1113        int r;
1114
1115        r = evergreen_cs_packet_parse(p, &p3reloc, p->idx);
1116        if (r) {
1117                return false;
1118        }
1119        if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
1120                return false;
1121        }
1122        return true;
1123}
1124
1125/**
1126 * evergreen_cs_packet_next_vline() - parse userspace VLINE packet
1127 * @parser:             parser structure holding parsing context.
1128 *
1129 * Userspace sends a special sequence for VLINE waits.
1130 * PACKET0 - VLINE_START_END + value
1131 * PACKET3 - WAIT_REG_MEM poll vline status reg
1132 * RELOC (P3) - crtc_id in reloc.
1133 *
1134 * This function parses this and relocates the VLINE START END
1135 * and WAIT_REG_MEM packets to the correct crtc.
1136 * It also detects a switched off crtc and nulls out the
1137 * wait in that case.
1138 */
1139static int evergreen_cs_packet_parse_vline(struct radeon_cs_parser *p)
1140{
1141        struct drm_mode_object *obj;
1142        struct drm_crtc *crtc;
1143        struct radeon_crtc *radeon_crtc;
1144        struct radeon_cs_packet p3reloc, wait_reg_mem;
1145        int crtc_id;
1146        int r;
1147        uint32_t header, h_idx, reg, wait_reg_mem_info;
1148        volatile uint32_t *ib;
1149
1150        ib = p->ib.ptr;
1151
1152        /* parse the WAIT_REG_MEM */
1153        r = evergreen_cs_packet_parse(p, &wait_reg_mem, p->idx);
1154        if (r)
1155                return r;
1156
1157        /* check its a WAIT_REG_MEM */
1158        if (wait_reg_mem.type != PACKET_TYPE3 ||
1159            wait_reg_mem.opcode != PACKET3_WAIT_REG_MEM) {
1160                DRM_ERROR("vline wait missing WAIT_REG_MEM segment\n");
1161                return -EINVAL;
1162        }
1163
1164        wait_reg_mem_info = radeon_get_ib_value(p, wait_reg_mem.idx + 1);
1165        /* bit 4 is reg (0) or mem (1) */
1166        if (wait_reg_mem_info & 0x10) {
1167                DRM_ERROR("vline WAIT_REG_MEM waiting on MEM rather than REG\n");
1168                return -EINVAL;
1169        }
1170        /* waiting for value to be equal */
1171        if ((wait_reg_mem_info & 0x7) != 0x3) {
1172                DRM_ERROR("vline WAIT_REG_MEM function not equal\n");
1173                return -EINVAL;
1174        }
1175        if ((radeon_get_ib_value(p, wait_reg_mem.idx + 2) << 2) != EVERGREEN_VLINE_STATUS) {
1176                DRM_ERROR("vline WAIT_REG_MEM bad reg\n");
1177                return -EINVAL;
1178        }
1179
1180        if (radeon_get_ib_value(p, wait_reg_mem.idx + 5) != EVERGREEN_VLINE_STAT) {
1181                DRM_ERROR("vline WAIT_REG_MEM bad bit mask\n");
1182                return -EINVAL;
1183        }
1184
1185        /* jump over the NOP */
1186        r = evergreen_cs_packet_parse(p, &p3reloc, p->idx + wait_reg_mem.count + 2);
1187        if (r)
1188                return r;
1189
1190        h_idx = p->idx - 2;
1191        p->idx += wait_reg_mem.count + 2;
1192        p->idx += p3reloc.count + 2;
1193
1194        header = radeon_get_ib_value(p, h_idx);
1195        crtc_id = radeon_get_ib_value(p, h_idx + 2 + 7 + 1);
1196        reg = CP_PACKET0_GET_REG(header);
1197        obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC);
1198        if (!obj) {
1199                DRM_ERROR("cannot find crtc %d\n", crtc_id);
1200                return -EINVAL;
1201        }
1202        crtc = obj_to_crtc(obj);
1203        radeon_crtc = to_radeon_crtc(crtc);
1204        crtc_id = radeon_crtc->crtc_id;
1205
1206        if (!crtc->enabled) {
1207                /* if the CRTC isn't enabled - we need to nop out the WAIT_REG_MEM */
1208                ib[h_idx + 2] = PACKET2(0);
1209                ib[h_idx + 3] = PACKET2(0);
1210                ib[h_idx + 4] = PACKET2(0);
1211                ib[h_idx + 5] = PACKET2(0);
1212                ib[h_idx + 6] = PACKET2(0);
1213                ib[h_idx + 7] = PACKET2(0);
1214                ib[h_idx + 8] = PACKET2(0);
1215        } else {
1216                switch (reg) {
1217                case EVERGREEN_VLINE_START_END:
1218                        header &= ~R600_CP_PACKET0_REG_MASK;
1219                        header |= (EVERGREEN_VLINE_START_END + radeon_crtc->crtc_offset) >> 2;
1220                        ib[h_idx] = header;
1221                        ib[h_idx + 4] = (EVERGREEN_VLINE_STATUS + radeon_crtc->crtc_offset) >> 2;
1222                        break;
1223                default:
1224                        DRM_ERROR("unknown crtc reloc\n");
1225                        return -EINVAL;
1226                }
1227        }
1228        return 0;
1229}
1230
1231static int evergreen_packet0_check(struct radeon_cs_parser *p,
1232                                   struct radeon_cs_packet *pkt,
1233                                   unsigned idx, unsigned reg)
1234{
1235        int r;
1236
1237        switch (reg) {
1238        case EVERGREEN_VLINE_START_END:
1239                r = evergreen_cs_packet_parse_vline(p);
1240                if (r) {
1241                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1242                                        idx, reg);
1243                        return r;
1244                }
1245                break;
1246        default:
1247                printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1248                       reg, idx);
1249                return -EINVAL;
1250        }
1251        return 0;
1252}
1253
1254static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
1255                                      struct radeon_cs_packet *pkt)
1256{
1257        unsigned reg, i;
1258        unsigned idx;
1259        int r;
1260
1261        idx = pkt->idx + 1;
1262        reg = pkt->reg;
1263        for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
1264                r = evergreen_packet0_check(p, pkt, idx, reg);
1265                if (r) {
1266                        return r;
1267                }
1268        }
1269        return 0;
1270}
1271
1272/**
1273 * evergreen_cs_check_reg() - check if register is authorized or not
1274 * @parser: parser structure holding parsing context
1275 * @reg: register we are testing
1276 * @idx: index into the cs buffer
1277 *
1278 * This function will test against evergreen_reg_safe_bm and return 0
1279 * if register is safe. If register is not flag as safe this function
1280 * will test it against a list of register needind special handling.
1281 */
1282static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1283{
1284        struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
1285        struct radeon_cs_reloc *reloc;
1286        u32 last_reg;
1287        u32 m, i, tmp, *ib;
1288        int r;
1289
1290        if (p->rdev->family >= CHIP_CAYMAN)
1291                last_reg = ARRAY_SIZE(cayman_reg_safe_bm);
1292        else
1293                last_reg = ARRAY_SIZE(evergreen_reg_safe_bm);
1294
1295        i = (reg >> 7);
1296        if (i >= last_reg) {
1297                dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1298                return -EINVAL;
1299        }
1300        m = 1 << ((reg >> 2) & 31);
1301        if (p->rdev->family >= CHIP_CAYMAN) {
1302                if (!(cayman_reg_safe_bm[i] & m))
1303                        return 0;
1304        } else {
1305                if (!(evergreen_reg_safe_bm[i] & m))
1306                        return 0;
1307        }
1308        ib = p->ib.ptr;
1309        switch (reg) {
1310        /* force following reg to 0 in an attempt to disable out buffer
1311         * which will need us to better understand how it works to perform
1312         * security check on it (Jerome)
1313         */
1314        case SQ_ESGS_RING_SIZE:
1315        case SQ_GSVS_RING_SIZE:
1316        case SQ_ESTMP_RING_SIZE:
1317        case SQ_GSTMP_RING_SIZE:
1318        case SQ_HSTMP_RING_SIZE:
1319        case SQ_LSTMP_RING_SIZE:
1320        case SQ_PSTMP_RING_SIZE:
1321        case SQ_VSTMP_RING_SIZE:
1322        case SQ_ESGS_RING_ITEMSIZE:
1323        case SQ_ESTMP_RING_ITEMSIZE:
1324        case SQ_GSTMP_RING_ITEMSIZE:
1325        case SQ_GSVS_RING_ITEMSIZE:
1326        case SQ_GS_VERT_ITEMSIZE:
1327        case SQ_GS_VERT_ITEMSIZE_1:
1328        case SQ_GS_VERT_ITEMSIZE_2:
1329        case SQ_GS_VERT_ITEMSIZE_3:
1330        case SQ_GSVS_RING_OFFSET_1:
1331        case SQ_GSVS_RING_OFFSET_2:
1332        case SQ_GSVS_RING_OFFSET_3:
1333        case SQ_HSTMP_RING_ITEMSIZE:
1334        case SQ_LSTMP_RING_ITEMSIZE:
1335        case SQ_PSTMP_RING_ITEMSIZE:
1336        case SQ_VSTMP_RING_ITEMSIZE:
1337        case VGT_TF_RING_SIZE:
1338                /* get value to populate the IB don't remove */
1339                /*tmp =radeon_get_ib_value(p, idx);
1340                  ib[idx] = 0;*/
1341                break;
1342        case SQ_ESGS_RING_BASE:
1343        case SQ_GSVS_RING_BASE:
1344        case SQ_ESTMP_RING_BASE:
1345        case SQ_GSTMP_RING_BASE:
1346        case SQ_HSTMP_RING_BASE:
1347        case SQ_LSTMP_RING_BASE:
1348        case SQ_PSTMP_RING_BASE:
1349        case SQ_VSTMP_RING_BASE:
1350                r = evergreen_cs_packet_next_reloc(p, &reloc);
1351                if (r) {
1352                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1353                                        "0x%04X\n", reg);
1354                        return -EINVAL;
1355                }
1356                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1357                break;
1358        case DB_DEPTH_CONTROL:
1359                track->db_depth_control = radeon_get_ib_value(p, idx);
1360                track->db_dirty = true;
1361                break;
1362        case CAYMAN_DB_EQAA:
1363                if (p->rdev->family < CHIP_CAYMAN) {
1364                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1365                                 "0x%04X\n", reg);
1366                        return -EINVAL;
1367                }
1368                break;
1369        case CAYMAN_DB_DEPTH_INFO:
1370                if (p->rdev->family < CHIP_CAYMAN) {
1371                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1372                                 "0x%04X\n", reg);
1373                        return -EINVAL;
1374                }
1375                break;
1376        case DB_Z_INFO:
1377                track->db_z_info = radeon_get_ib_value(p, idx);
1378                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1379                        r = evergreen_cs_packet_next_reloc(p, &reloc);
1380                        if (r) {
1381                                dev_warn(p->dev, "bad SET_CONTEXT_REG "
1382                                                "0x%04X\n", reg);
1383                                return -EINVAL;
1384                        }
1385                        ib[idx] &= ~Z_ARRAY_MODE(0xf);
1386                        track->db_z_info &= ~Z_ARRAY_MODE(0xf);
1387                        ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1388                        track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1389                        if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
1390                                unsigned bankw, bankh, mtaspect, tile_split;
1391
1392                                evergreen_tiling_fields(reloc->lobj.tiling_flags,
1393                                                        &bankw, &bankh, &mtaspect,
1394                                                        &tile_split);
1395                                ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1396                                ib[idx] |= DB_TILE_SPLIT(tile_split) |
1397                                                DB_BANK_WIDTH(bankw) |
1398                                                DB_BANK_HEIGHT(bankh) |
1399                                                DB_MACRO_TILE_ASPECT(mtaspect);
1400                        }
1401                }
1402                track->db_dirty = true;
1403                break;
1404        case DB_STENCIL_INFO:
1405                track->db_s_info = radeon_get_ib_value(p, idx);
1406                track->db_dirty = true;
1407                break;
1408        case DB_DEPTH_VIEW:
1409                track->db_depth_view = radeon_get_ib_value(p, idx);
1410                track->db_dirty = true;
1411                break;
1412        case DB_DEPTH_SIZE:
1413                track->db_depth_size = radeon_get_ib_value(p, idx);
1414                track->db_dirty = true;
1415                break;
1416        case R_02805C_DB_DEPTH_SLICE:
1417                track->db_depth_slice = radeon_get_ib_value(p, idx);
1418                track->db_dirty = true;
1419                break;
1420        case DB_Z_READ_BASE:
1421                r = evergreen_cs_packet_next_reloc(p, &reloc);
1422                if (r) {
1423                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1424                                        "0x%04X\n", reg);
1425                        return -EINVAL;
1426                }
1427                track->db_z_read_offset = radeon_get_ib_value(p, idx);
1428                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1429                track->db_z_read_bo = reloc->robj;
1430                track->db_dirty = true;
1431                break;
1432        case DB_Z_WRITE_BASE:
1433                r = evergreen_cs_packet_next_reloc(p, &reloc);
1434                if (r) {
1435                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1436                                        "0x%04X\n", reg);
1437                        return -EINVAL;
1438                }
1439                track->db_z_write_offset = radeon_get_ib_value(p, idx);
1440                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1441                track->db_z_write_bo = reloc->robj;
1442                track->db_dirty = true;
1443                break;
1444        case DB_STENCIL_READ_BASE:
1445                r = evergreen_cs_packet_next_reloc(p, &reloc);
1446                if (r) {
1447                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1448                                        "0x%04X\n", reg);
1449                        return -EINVAL;
1450                }
1451                track->db_s_read_offset = radeon_get_ib_value(p, idx);
1452                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1453                track->db_s_read_bo = reloc->robj;
1454                track->db_dirty = true;
1455                break;
1456        case DB_STENCIL_WRITE_BASE:
1457                r = evergreen_cs_packet_next_reloc(p, &reloc);
1458                if (r) {
1459                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1460                                        "0x%04X\n", reg);
1461                        return -EINVAL;
1462                }
1463                track->db_s_write_offset = radeon_get_ib_value(p, idx);
1464                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1465                track->db_s_write_bo = reloc->robj;
1466                track->db_dirty = true;
1467                break;
1468        case VGT_STRMOUT_CONFIG:
1469                track->vgt_strmout_config = radeon_get_ib_value(p, idx);
1470                track->streamout_dirty = true;
1471                break;
1472        case VGT_STRMOUT_BUFFER_CONFIG:
1473                track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx);
1474                track->streamout_dirty = true;
1475                break;
1476        case VGT_STRMOUT_BUFFER_BASE_0:
1477        case VGT_STRMOUT_BUFFER_BASE_1:
1478        case VGT_STRMOUT_BUFFER_BASE_2:
1479        case VGT_STRMOUT_BUFFER_BASE_3:
1480                r = evergreen_cs_packet_next_reloc(p, &reloc);
1481                if (r) {
1482                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1483                                        "0x%04X\n", reg);
1484                        return -EINVAL;
1485                }
1486                tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
1487                track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1488                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1489                track->vgt_strmout_bo[tmp] = reloc->robj;
1490                track->streamout_dirty = true;
1491                break;
1492        case VGT_STRMOUT_BUFFER_SIZE_0:
1493        case VGT_STRMOUT_BUFFER_SIZE_1:
1494        case VGT_STRMOUT_BUFFER_SIZE_2:
1495        case VGT_STRMOUT_BUFFER_SIZE_3:
1496                tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
1497                /* size in register is DWs, convert to bytes */
1498                track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
1499                track->streamout_dirty = true;
1500                break;
1501        case CP_COHER_BASE:
1502                r = evergreen_cs_packet_next_reloc(p, &reloc);
1503                if (r) {
1504                        dev_warn(p->dev, "missing reloc for CP_COHER_BASE "
1505                                        "0x%04X\n", reg);
1506                        return -EINVAL;
1507                }
1508                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1509        case CB_TARGET_MASK:
1510                track->cb_target_mask = radeon_get_ib_value(p, idx);
1511                track->cb_dirty = true;
1512                break;
1513        case CB_SHADER_MASK:
1514                track->cb_shader_mask = radeon_get_ib_value(p, idx);
1515                track->cb_dirty = true;
1516                break;
1517        case PA_SC_AA_CONFIG:
1518                if (p->rdev->family >= CHIP_CAYMAN) {
1519                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1520                                 "0x%04X\n", reg);
1521                        return -EINVAL;
1522                }
1523                tmp = radeon_get_ib_value(p, idx) & MSAA_NUM_SAMPLES_MASK;
1524                track->nsamples = 1 << tmp;
1525                break;
1526        case CAYMAN_PA_SC_AA_CONFIG:
1527                if (p->rdev->family < CHIP_CAYMAN) {
1528                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1529                                 "0x%04X\n", reg);
1530                        return -EINVAL;
1531                }
1532                tmp = radeon_get_ib_value(p, idx) & CAYMAN_MSAA_NUM_SAMPLES_MASK;
1533                track->nsamples = 1 << tmp;
1534                break;
1535        case CB_COLOR0_VIEW:
1536        case CB_COLOR1_VIEW:
1537        case CB_COLOR2_VIEW:
1538        case CB_COLOR3_VIEW:
1539        case CB_COLOR4_VIEW:
1540        case CB_COLOR5_VIEW:
1541        case CB_COLOR6_VIEW:
1542        case CB_COLOR7_VIEW:
1543                tmp = (reg - CB_COLOR0_VIEW) / 0x3c;
1544                track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1545                track->cb_dirty = true;
1546                break;
1547        case CB_COLOR8_VIEW:
1548        case CB_COLOR9_VIEW:
1549        case CB_COLOR10_VIEW:
1550        case CB_COLOR11_VIEW:
1551                tmp = ((reg - CB_COLOR8_VIEW) / 0x1c) + 8;
1552                track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1553                track->cb_dirty = true;
1554                break;
1555        case CB_COLOR0_INFO:
1556        case CB_COLOR1_INFO:
1557        case CB_COLOR2_INFO:
1558        case CB_COLOR3_INFO:
1559        case CB_COLOR4_INFO:
1560        case CB_COLOR5_INFO:
1561        case CB_COLOR6_INFO:
1562        case CB_COLOR7_INFO:
1563                tmp = (reg - CB_COLOR0_INFO) / 0x3c;
1564                track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1565                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1566                        r = evergreen_cs_packet_next_reloc(p, &reloc);
1567                        if (r) {
1568                                dev_warn(p->dev, "bad SET_CONTEXT_REG "
1569                                                "0x%04X\n", reg);
1570                                return -EINVAL;
1571                        }
1572                        ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1573                        track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1574                }
1575                track->cb_dirty = true;
1576                break;
1577        case CB_COLOR8_INFO:
1578        case CB_COLOR9_INFO:
1579        case CB_COLOR10_INFO:
1580        case CB_COLOR11_INFO:
1581                tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8;
1582                track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1583                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1584                        r = evergreen_cs_packet_next_reloc(p, &reloc);
1585                        if (r) {
1586                                dev_warn(p->dev, "bad SET_CONTEXT_REG "
1587                                                "0x%04X\n", reg);
1588                                return -EINVAL;
1589                        }
1590                        ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1591                        track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1592                }
1593                track->cb_dirty = true;
1594                break;
1595        case CB_COLOR0_PITCH:
1596        case CB_COLOR1_PITCH:
1597        case CB_COLOR2_PITCH:
1598        case CB_COLOR3_PITCH:
1599        case CB_COLOR4_PITCH:
1600        case CB_COLOR5_PITCH:
1601        case CB_COLOR6_PITCH:
1602        case CB_COLOR7_PITCH:
1603                tmp = (reg - CB_COLOR0_PITCH) / 0x3c;
1604                track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1605                track->cb_dirty = true;
1606                break;
1607        case CB_COLOR8_PITCH:
1608        case CB_COLOR9_PITCH:
1609        case CB_COLOR10_PITCH:
1610        case CB_COLOR11_PITCH:
1611                tmp = ((reg - CB_COLOR8_PITCH) / 0x1c) + 8;
1612                track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1613                track->cb_dirty = true;
1614                break;
1615        case CB_COLOR0_SLICE:
1616        case CB_COLOR1_SLICE:
1617        case CB_COLOR2_SLICE:
1618        case CB_COLOR3_SLICE:
1619        case CB_COLOR4_SLICE:
1620        case CB_COLOR5_SLICE:
1621        case CB_COLOR6_SLICE:
1622        case CB_COLOR7_SLICE:
1623                tmp = (reg - CB_COLOR0_SLICE) / 0x3c;
1624                track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1625                track->cb_color_slice_idx[tmp] = idx;
1626                track->cb_dirty = true;
1627                break;
1628        case CB_COLOR8_SLICE:
1629        case CB_COLOR9_SLICE:
1630        case CB_COLOR10_SLICE:
1631        case CB_COLOR11_SLICE:
1632                tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8;
1633                track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1634                track->cb_color_slice_idx[tmp] = idx;
1635                track->cb_dirty = true;
1636                break;
1637        case CB_COLOR0_ATTRIB:
1638        case CB_COLOR1_ATTRIB:
1639        case CB_COLOR2_ATTRIB:
1640        case CB_COLOR3_ATTRIB:
1641        case CB_COLOR4_ATTRIB:
1642        case CB_COLOR5_ATTRIB:
1643        case CB_COLOR6_ATTRIB:
1644        case CB_COLOR7_ATTRIB:
1645                r = evergreen_cs_packet_next_reloc(p, &reloc);
1646                if (r) {
1647                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1648                                        "0x%04X\n", reg);
1649                        return -EINVAL;
1650                }
1651                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1652                        if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
1653                                unsigned bankw, bankh, mtaspect, tile_split;
1654
1655                                evergreen_tiling_fields(reloc->lobj.tiling_flags,
1656                                                        &bankw, &bankh, &mtaspect,
1657                                                        &tile_split);
1658                                ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1659                                ib[idx] |= CB_TILE_SPLIT(tile_split) |
1660                                           CB_BANK_WIDTH(bankw) |
1661                                           CB_BANK_HEIGHT(bankh) |
1662                                           CB_MACRO_TILE_ASPECT(mtaspect);
1663                        }
1664                }
1665                tmp = ((reg - CB_COLOR0_ATTRIB) / 0x3c);
1666                track->cb_color_attrib[tmp] = ib[idx];
1667                track->cb_dirty = true;
1668                break;
1669        case CB_COLOR8_ATTRIB:
1670        case CB_COLOR9_ATTRIB:
1671        case CB_COLOR10_ATTRIB:
1672        case CB_COLOR11_ATTRIB:
1673                r = evergreen_cs_packet_next_reloc(p, &reloc);
1674                if (r) {
1675                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1676                                        "0x%04X\n", reg);
1677                        return -EINVAL;
1678                }
1679                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1680                        if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
1681                                unsigned bankw, bankh, mtaspect, tile_split;
1682
1683                                evergreen_tiling_fields(reloc->lobj.tiling_flags,
1684                                                        &bankw, &bankh, &mtaspect,
1685                                                        &tile_split);
1686                                ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1687                                ib[idx] |= CB_TILE_SPLIT(tile_split) |
1688                                           CB_BANK_WIDTH(bankw) |
1689                                           CB_BANK_HEIGHT(bankh) |
1690                                           CB_MACRO_TILE_ASPECT(mtaspect);
1691                        }
1692                }
1693                tmp = ((reg - CB_COLOR8_ATTRIB) / 0x1c) + 8;
1694                track->cb_color_attrib[tmp] = ib[idx];
1695                track->cb_dirty = true;
1696                break;
1697        case CB_COLOR0_FMASK:
1698        case CB_COLOR1_FMASK:
1699        case CB_COLOR2_FMASK:
1700        case CB_COLOR3_FMASK:
1701        case CB_COLOR4_FMASK:
1702        case CB_COLOR5_FMASK:
1703        case CB_COLOR6_FMASK:
1704        case CB_COLOR7_FMASK:
1705                tmp = (reg - CB_COLOR0_FMASK) / 0x3c;
1706                r = evergreen_cs_packet_next_reloc(p, &reloc);
1707                if (r) {
1708                        dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1709                        return -EINVAL;
1710                }
1711                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1712                track->cb_color_fmask_bo[tmp] = reloc->robj;
1713                break;
1714        case CB_COLOR0_CMASK:
1715        case CB_COLOR1_CMASK:
1716        case CB_COLOR2_CMASK:
1717        case CB_COLOR3_CMASK:
1718        case CB_COLOR4_CMASK:
1719        case CB_COLOR5_CMASK:
1720        case CB_COLOR6_CMASK:
1721        case CB_COLOR7_CMASK:
1722                tmp = (reg - CB_COLOR0_CMASK) / 0x3c;
1723                r = evergreen_cs_packet_next_reloc(p, &reloc);
1724                if (r) {
1725                        dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1726                        return -EINVAL;
1727                }
1728                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1729                track->cb_color_cmask_bo[tmp] = reloc->robj;
1730                break;
1731        case CB_COLOR0_FMASK_SLICE:
1732        case CB_COLOR1_FMASK_SLICE:
1733        case CB_COLOR2_FMASK_SLICE:
1734        case CB_COLOR3_FMASK_SLICE:
1735        case CB_COLOR4_FMASK_SLICE:
1736        case CB_COLOR5_FMASK_SLICE:
1737        case CB_COLOR6_FMASK_SLICE:
1738        case CB_COLOR7_FMASK_SLICE:
1739                tmp = (reg - CB_COLOR0_FMASK_SLICE) / 0x3c;
1740                track->cb_color_fmask_slice[tmp] = radeon_get_ib_value(p, idx);
1741                break;
1742        case CB_COLOR0_CMASK_SLICE:
1743        case CB_COLOR1_CMASK_SLICE:
1744        case CB_COLOR2_CMASK_SLICE:
1745        case CB_COLOR3_CMASK_SLICE:
1746        case CB_COLOR4_CMASK_SLICE:
1747        case CB_COLOR5_CMASK_SLICE:
1748        case CB_COLOR6_CMASK_SLICE:
1749        case CB_COLOR7_CMASK_SLICE:
1750                tmp = (reg - CB_COLOR0_CMASK_SLICE) / 0x3c;
1751                track->cb_color_cmask_slice[tmp] = radeon_get_ib_value(p, idx);
1752                break;
1753        case CB_COLOR0_BASE:
1754        case CB_COLOR1_BASE:
1755        case CB_COLOR2_BASE:
1756        case CB_COLOR3_BASE:
1757        case CB_COLOR4_BASE:
1758        case CB_COLOR5_BASE:
1759        case CB_COLOR6_BASE:
1760        case CB_COLOR7_BASE:
1761                r = evergreen_cs_packet_next_reloc(p, &reloc);
1762                if (r) {
1763                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1764                                        "0x%04X\n", reg);
1765                        return -EINVAL;
1766                }
1767                tmp = (reg - CB_COLOR0_BASE) / 0x3c;
1768                track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1769                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1770                track->cb_color_bo[tmp] = reloc->robj;
1771                track->cb_dirty = true;
1772                break;
1773        case CB_COLOR8_BASE:
1774        case CB_COLOR9_BASE:
1775        case CB_COLOR10_BASE:
1776        case CB_COLOR11_BASE:
1777                r = evergreen_cs_packet_next_reloc(p, &reloc);
1778                if (r) {
1779                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1780                                        "0x%04X\n", reg);
1781                        return -EINVAL;
1782                }
1783                tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8;
1784                track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1785                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1786                track->cb_color_bo[tmp] = reloc->robj;
1787                track->cb_dirty = true;
1788                break;
1789        case DB_HTILE_DATA_BASE:
1790                r = evergreen_cs_packet_next_reloc(p, &reloc);
1791                if (r) {
1792                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1793                                        "0x%04X\n", reg);
1794                        return -EINVAL;
1795                }
1796                track->htile_offset = radeon_get_ib_value(p, idx);
1797                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1798                track->htile_bo = reloc->robj;
1799                track->db_dirty = true;
1800                break;
1801        case DB_HTILE_SURFACE:
1802                /* 8x8 only */
1803                track->htile_surface = radeon_get_ib_value(p, idx);
1804                /* force 8x8 htile width and height */
1805                ib[idx] |= 3;
1806                track->db_dirty = true;
1807                break;
1808        case CB_IMMED0_BASE:
1809        case CB_IMMED1_BASE:
1810        case CB_IMMED2_BASE:
1811        case CB_IMMED3_BASE:
1812        case CB_IMMED4_BASE:
1813        case CB_IMMED5_BASE:
1814        case CB_IMMED6_BASE:
1815        case CB_IMMED7_BASE:
1816        case CB_IMMED8_BASE:
1817        case CB_IMMED9_BASE:
1818        case CB_IMMED10_BASE:
1819        case CB_IMMED11_BASE:
1820        case SQ_PGM_START_FS:
1821        case SQ_PGM_START_ES:
1822        case SQ_PGM_START_VS:
1823        case SQ_PGM_START_GS:
1824        case SQ_PGM_START_PS:
1825        case SQ_PGM_START_HS:
1826        case SQ_PGM_START_LS:
1827        case SQ_CONST_MEM_BASE:
1828        case SQ_ALU_CONST_CACHE_GS_0:
1829        case SQ_ALU_CONST_CACHE_GS_1:
1830        case SQ_ALU_CONST_CACHE_GS_2:
1831        case SQ_ALU_CONST_CACHE_GS_3:
1832        case SQ_ALU_CONST_CACHE_GS_4:
1833        case SQ_ALU_CONST_CACHE_GS_5:
1834        case SQ_ALU_CONST_CACHE_GS_6:
1835        case SQ_ALU_CONST_CACHE_GS_7:
1836        case SQ_ALU_CONST_CACHE_GS_8:
1837        case SQ_ALU_CONST_CACHE_GS_9:
1838        case SQ_ALU_CONST_CACHE_GS_10:
1839        case SQ_ALU_CONST_CACHE_GS_11:
1840        case SQ_ALU_CONST_CACHE_GS_12:
1841        case SQ_ALU_CONST_CACHE_GS_13:
1842        case SQ_ALU_CONST_CACHE_GS_14:
1843        case SQ_ALU_CONST_CACHE_GS_15:
1844        case SQ_ALU_CONST_CACHE_PS_0:
1845        case SQ_ALU_CONST_CACHE_PS_1:
1846        case SQ_ALU_CONST_CACHE_PS_2:
1847        case SQ_ALU_CONST_CACHE_PS_3:
1848        case SQ_ALU_CONST_CACHE_PS_4:
1849        case SQ_ALU_CONST_CACHE_PS_5:
1850        case SQ_ALU_CONST_CACHE_PS_6:
1851        case SQ_ALU_CONST_CACHE_PS_7:
1852        case SQ_ALU_CONST_CACHE_PS_8:
1853        case SQ_ALU_CONST_CACHE_PS_9:
1854        case SQ_ALU_CONST_CACHE_PS_10:
1855        case SQ_ALU_CONST_CACHE_PS_11:
1856        case SQ_ALU_CONST_CACHE_PS_12:
1857        case SQ_ALU_CONST_CACHE_PS_13:
1858        case SQ_ALU_CONST_CACHE_PS_14:
1859        case SQ_ALU_CONST_CACHE_PS_15:
1860        case SQ_ALU_CONST_CACHE_VS_0:
1861        case SQ_ALU_CONST_CACHE_VS_1:
1862        case SQ_ALU_CONST_CACHE_VS_2:
1863        case SQ_ALU_CONST_CACHE_VS_3:
1864        case SQ_ALU_CONST_CACHE_VS_4:
1865        case SQ_ALU_CONST_CACHE_VS_5:
1866        case SQ_ALU_CONST_CACHE_VS_6:
1867        case SQ_ALU_CONST_CACHE_VS_7:
1868        case SQ_ALU_CONST_CACHE_VS_8:
1869        case SQ_ALU_CONST_CACHE_VS_9:
1870        case SQ_ALU_CONST_CACHE_VS_10:
1871        case SQ_ALU_CONST_CACHE_VS_11:
1872        case SQ_ALU_CONST_CACHE_VS_12:
1873        case SQ_ALU_CONST_CACHE_VS_13:
1874        case SQ_ALU_CONST_CACHE_VS_14:
1875        case SQ_ALU_CONST_CACHE_VS_15:
1876        case SQ_ALU_CONST_CACHE_HS_0:
1877        case SQ_ALU_CONST_CACHE_HS_1:
1878        case SQ_ALU_CONST_CACHE_HS_2:
1879        case SQ_ALU_CONST_CACHE_HS_3:
1880        case SQ_ALU_CONST_CACHE_HS_4:
1881        case SQ_ALU_CONST_CACHE_HS_5:
1882        case SQ_ALU_CONST_CACHE_HS_6:
1883        case SQ_ALU_CONST_CACHE_HS_7:
1884        case SQ_ALU_CONST_CACHE_HS_8:
1885        case SQ_ALU_CONST_CACHE_HS_9:
1886        case SQ_ALU_CONST_CACHE_HS_10:
1887        case SQ_ALU_CONST_CACHE_HS_11:
1888        case SQ_ALU_CONST_CACHE_HS_12:
1889        case SQ_ALU_CONST_CACHE_HS_13:
1890        case SQ_ALU_CONST_CACHE_HS_14:
1891        case SQ_ALU_CONST_CACHE_HS_15:
1892        case SQ_ALU_CONST_CACHE_LS_0:
1893        case SQ_ALU_CONST_CACHE_LS_1:
1894        case SQ_ALU_CONST_CACHE_LS_2:
1895        case SQ_ALU_CONST_CACHE_LS_3:
1896        case SQ_ALU_CONST_CACHE_LS_4:
1897        case SQ_ALU_CONST_CACHE_LS_5:
1898        case SQ_ALU_CONST_CACHE_LS_6:
1899        case SQ_ALU_CONST_CACHE_LS_7:
1900        case SQ_ALU_CONST_CACHE_LS_8:
1901        case SQ_ALU_CONST_CACHE_LS_9:
1902        case SQ_ALU_CONST_CACHE_LS_10:
1903        case SQ_ALU_CONST_CACHE_LS_11:
1904        case SQ_ALU_CONST_CACHE_LS_12:
1905        case SQ_ALU_CONST_CACHE_LS_13:
1906        case SQ_ALU_CONST_CACHE_LS_14:
1907        case SQ_ALU_CONST_CACHE_LS_15:
1908                r = evergreen_cs_packet_next_reloc(p, &reloc);
1909                if (r) {
1910                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1911                                        "0x%04X\n", reg);
1912                        return -EINVAL;
1913                }
1914                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1915                break;
1916        case SX_MEMORY_EXPORT_BASE:
1917                if (p->rdev->family >= CHIP_CAYMAN) {
1918                        dev_warn(p->dev, "bad SET_CONFIG_REG "
1919                                 "0x%04X\n", reg);
1920                        return -EINVAL;
1921                }
1922                r = evergreen_cs_packet_next_reloc(p, &reloc);
1923                if (r) {
1924                        dev_warn(p->dev, "bad SET_CONFIG_REG "
1925                                        "0x%04X\n", reg);
1926                        return -EINVAL;
1927                }
1928                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1929                break;
1930        case CAYMAN_SX_SCATTER_EXPORT_BASE:
1931                if (p->rdev->family < CHIP_CAYMAN) {
1932                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1933                                 "0x%04X\n", reg);
1934                        return -EINVAL;
1935                }
1936                r = evergreen_cs_packet_next_reloc(p, &reloc);
1937                if (r) {
1938                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1939                                        "0x%04X\n", reg);
1940                        return -EINVAL;
1941                }
1942                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1943                break;
1944        case SX_MISC:
1945                track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0;
1946                break;
1947        default:
1948                dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1949                return -EINVAL;
1950        }
1951        return 0;
1952}
1953
1954static bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1955{
1956        u32 last_reg, m, i;
1957
1958        if (p->rdev->family >= CHIP_CAYMAN)
1959                last_reg = ARRAY_SIZE(cayman_reg_safe_bm);
1960        else
1961                last_reg = ARRAY_SIZE(evergreen_reg_safe_bm);
1962
1963        i = (reg >> 7);
1964        if (i >= last_reg) {
1965                dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1966                return false;
1967        }
1968        m = 1 << ((reg >> 2) & 31);
1969        if (p->rdev->family >= CHIP_CAYMAN) {
1970                if (!(cayman_reg_safe_bm[i] & m))
1971                        return true;
1972        } else {
1973                if (!(evergreen_reg_safe_bm[i] & m))
1974                        return true;
1975        }
1976        dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1977        return false;
1978}
1979
1980static int evergreen_packet3_check(struct radeon_cs_parser *p,
1981                                   struct radeon_cs_packet *pkt)
1982{
1983        struct radeon_cs_reloc *reloc;
1984        struct evergreen_cs_track *track;
1985        volatile u32 *ib;
1986        unsigned idx;
1987        unsigned i;
1988        unsigned start_reg, end_reg, reg;
1989        int r;
1990        u32 idx_value;
1991
1992        track = (struct evergreen_cs_track *)p->track;
1993        ib = p->ib.ptr;
1994        idx = pkt->idx + 1;
1995        idx_value = radeon_get_ib_value(p, idx);
1996
1997        switch (pkt->opcode) {
1998        case PACKET3_SET_PREDICATION:
1999        {
2000                int pred_op;
2001                int tmp;
2002                uint64_t offset;
2003
2004                if (pkt->count != 1) {
2005                        DRM_ERROR("bad SET PREDICATION\n");
2006                        return -EINVAL;
2007                }
2008
2009                tmp = radeon_get_ib_value(p, idx + 1);
2010                pred_op = (tmp >> 16) & 0x7;
2011
2012                /* for the clear predicate operation */
2013                if (pred_op == 0)
2014                        return 0;
2015
2016                if (pred_op > 2) {
2017                        DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op);
2018                        return -EINVAL;
2019                }
2020
2021                r = evergreen_cs_packet_next_reloc(p, &reloc);
2022                if (r) {
2023                        DRM_ERROR("bad SET PREDICATION\n");
2024                        return -EINVAL;
2025                }
2026
2027                offset = reloc->lobj.gpu_offset +
2028                         (idx_value & 0xfffffff0) +
2029                         ((u64)(tmp & 0xff) << 32);
2030
2031                ib[idx + 0] = offset;
2032                ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2033        }
2034        break;
2035        case PACKET3_CONTEXT_CONTROL:
2036                if (pkt->count != 1) {
2037                        DRM_ERROR("bad CONTEXT_CONTROL\n");
2038                        return -EINVAL;
2039                }
2040                break;
2041        case PACKET3_INDEX_TYPE:
2042        case PACKET3_NUM_INSTANCES:
2043        case PACKET3_CLEAR_STATE:
2044                if (pkt->count) {
2045                        DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
2046                        return -EINVAL;
2047                }
2048                break;
2049        case CAYMAN_PACKET3_DEALLOC_STATE:
2050                if (p->rdev->family < CHIP_CAYMAN) {
2051                        DRM_ERROR("bad PACKET3_DEALLOC_STATE\n");
2052                        return -EINVAL;
2053                }
2054                if (pkt->count) {
2055                        DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
2056                        return -EINVAL;
2057                }
2058                break;
2059        case PACKET3_INDEX_BASE:
2060        {
2061                uint64_t offset;
2062
2063                if (pkt->count != 1) {
2064                        DRM_ERROR("bad INDEX_BASE\n");
2065                        return -EINVAL;
2066                }
2067                r = evergreen_cs_packet_next_reloc(p, &reloc);
2068                if (r) {
2069                        DRM_ERROR("bad INDEX_BASE\n");
2070                        return -EINVAL;
2071                }
2072
2073                offset = reloc->lobj.gpu_offset +
2074                         idx_value +
2075                         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2076
2077                ib[idx+0] = offset;
2078                ib[idx+1] = upper_32_bits(offset) & 0xff;
2079
2080                r = evergreen_cs_track_check(p);
2081                if (r) {
2082                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2083                        return r;
2084                }
2085                break;
2086        }
2087        case PACKET3_DRAW_INDEX:
2088        {
2089                uint64_t offset;
2090                if (pkt->count != 3) {
2091                        DRM_ERROR("bad DRAW_INDEX\n");
2092                        return -EINVAL;
2093                }
2094                r = evergreen_cs_packet_next_reloc(p, &reloc);
2095                if (r) {
2096                        DRM_ERROR("bad DRAW_INDEX\n");
2097                        return -EINVAL;
2098                }
2099
2100                offset = reloc->lobj.gpu_offset +
2101                         idx_value +
2102                         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2103
2104                ib[idx+0] = offset;
2105                ib[idx+1] = upper_32_bits(offset) & 0xff;
2106
2107                r = evergreen_cs_track_check(p);
2108                if (r) {
2109                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2110                        return r;
2111                }
2112                break;
2113        }
2114        case PACKET3_DRAW_INDEX_2:
2115        {
2116                uint64_t offset;
2117
2118                if (pkt->count != 4) {
2119                        DRM_ERROR("bad DRAW_INDEX_2\n");
2120                        return -EINVAL;
2121                }
2122                r = evergreen_cs_packet_next_reloc(p, &reloc);
2123                if (r) {
2124                        DRM_ERROR("bad DRAW_INDEX_2\n");
2125                        return -EINVAL;
2126                }
2127
2128                offset = reloc->lobj.gpu_offset +
2129                         radeon_get_ib_value(p, idx+1) +
2130                         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2131
2132                ib[idx+1] = offset;
2133                ib[idx+2] = upper_32_bits(offset) & 0xff;
2134
2135                r = evergreen_cs_track_check(p);
2136                if (r) {
2137                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2138                        return r;
2139                }
2140                break;
2141        }
2142        case PACKET3_DRAW_INDEX_AUTO:
2143                if (pkt->count != 1) {
2144                        DRM_ERROR("bad DRAW_INDEX_AUTO\n");
2145                        return -EINVAL;
2146                }
2147                r = evergreen_cs_track_check(p);
2148                if (r) {
2149                        dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2150                        return r;
2151                }
2152                break;
2153        case PACKET3_DRAW_INDEX_MULTI_AUTO:
2154                if (pkt->count != 2) {
2155                        DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n");
2156                        return -EINVAL;
2157                }
2158                r = evergreen_cs_track_check(p);
2159                if (r) {
2160                        dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2161                        return r;
2162                }
2163                break;
2164        case PACKET3_DRAW_INDEX_IMMD:
2165                if (pkt->count < 2) {
2166                        DRM_ERROR("bad DRAW_INDEX_IMMD\n");
2167                        return -EINVAL;
2168                }
2169                r = evergreen_cs_track_check(p);
2170                if (r) {
2171                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2172                        return r;
2173                }
2174                break;
2175        case PACKET3_DRAW_INDEX_OFFSET:
2176                if (pkt->count != 2) {
2177                        DRM_ERROR("bad DRAW_INDEX_OFFSET\n");
2178                        return -EINVAL;
2179                }
2180                r = evergreen_cs_track_check(p);
2181                if (r) {
2182                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2183                        return r;
2184                }
2185                break;
2186        case PACKET3_DRAW_INDEX_OFFSET_2:
2187                if (pkt->count != 3) {
2188                        DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n");
2189                        return -EINVAL;
2190                }
2191                r = evergreen_cs_track_check(p);
2192                if (r) {
2193                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2194                        return r;
2195                }
2196                break;
2197        case PACKET3_DISPATCH_DIRECT:
2198                if (pkt->count != 3) {
2199                        DRM_ERROR("bad DISPATCH_DIRECT\n");
2200                        return -EINVAL;
2201                }
2202                r = evergreen_cs_track_check(p);
2203                if (r) {
2204                        dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2205                        return r;
2206                }
2207                break;
2208        case PACKET3_DISPATCH_INDIRECT:
2209                if (pkt->count != 1) {
2210                        DRM_ERROR("bad DISPATCH_INDIRECT\n");
2211                        return -EINVAL;
2212                }
2213                r = evergreen_cs_packet_next_reloc(p, &reloc);
2214                if (r) {
2215                        DRM_ERROR("bad DISPATCH_INDIRECT\n");
2216                        return -EINVAL;
2217                }
2218                ib[idx+0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff);
2219                r = evergreen_cs_track_check(p);
2220                if (r) {
2221                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2222                        return r;
2223                }
2224                break;
2225        case PACKET3_WAIT_REG_MEM:
2226                if (pkt->count != 5) {
2227                        DRM_ERROR("bad WAIT_REG_MEM\n");
2228                        return -EINVAL;
2229                }
2230                /* bit 4 is reg (0) or mem (1) */
2231                if (idx_value & 0x10) {
2232                        uint64_t offset;
2233
2234                        r = evergreen_cs_packet_next_reloc(p, &reloc);
2235                        if (r) {
2236                                DRM_ERROR("bad WAIT_REG_MEM\n");
2237                                return -EINVAL;
2238                        }
2239
2240                        offset = reloc->lobj.gpu_offset +
2241                                 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2242                                 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2243
2244                        ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc);
2245                        ib[idx+2] = upper_32_bits(offset) & 0xff;
2246                }
2247                break;
2248        case PACKET3_CP_DMA:
2249        {
2250                u32 command, size, info;
2251                u64 offset, tmp;
2252                if (pkt->count != 4) {
2253                        DRM_ERROR("bad CP DMA\n");
2254                        return -EINVAL;
2255                }
2256                command = radeon_get_ib_value(p, idx+4);
2257                size = command & 0x1fffff;
2258                info = radeon_get_ib_value(p, idx+1);
2259                if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
2260                    (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
2261                    ((((info & 0x00300000) >> 20) == 0) &&
2262                     (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
2263                    ((((info & 0x60000000) >> 29) == 0) &&
2264                     (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
2265                        /* non mem to mem copies requires dw aligned count */
2266                        if (size % 4) {
2267                                DRM_ERROR("CP DMA command requires dw count alignment\n");
2268                                return -EINVAL;
2269                        }
2270                }
2271                if (command & PACKET3_CP_DMA_CMD_SAS) {
2272                        /* src address space is register */
2273                        /* GDS is ok */
2274                        if (((info & 0x60000000) >> 29) != 1) {
2275                                DRM_ERROR("CP DMA SAS not supported\n");
2276                                return -EINVAL;
2277                        }
2278                } else {
2279                        if (command & PACKET3_CP_DMA_CMD_SAIC) {
2280                                DRM_ERROR("CP DMA SAIC only supported for registers\n");
2281                                return -EINVAL;
2282                        }
2283                        /* src address space is memory */
2284                        if (((info & 0x60000000) >> 29) == 0) {
2285                                r = evergreen_cs_packet_next_reloc(p, &reloc);
2286                                if (r) {
2287                                        DRM_ERROR("bad CP DMA SRC\n");
2288                                        return -EINVAL;
2289                                }
2290
2291                                tmp = radeon_get_ib_value(p, idx) +
2292                                        ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2293
2294                                offset = reloc->lobj.gpu_offset + tmp;
2295
2296                                if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2297                                        dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
2298                                                 tmp + size, radeon_bo_size(reloc->robj));
2299                                        return -EINVAL;
2300                                }
2301
2302                                ib[idx] = offset;
2303                                ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2304                        } else if (((info & 0x60000000) >> 29) != 2) {
2305                                DRM_ERROR("bad CP DMA SRC_SEL\n");
2306                                return -EINVAL;
2307                        }
2308                }
2309                if (command & PACKET3_CP_DMA_CMD_DAS) {
2310                        /* dst address space is register */
2311                        /* GDS is ok */
2312                        if (((info & 0x00300000) >> 20) != 1) {
2313                                DRM_ERROR("CP DMA DAS not supported\n");
2314                                return -EINVAL;
2315                        }
2316                } else {
2317                        /* dst address space is memory */
2318                        if (command & PACKET3_CP_DMA_CMD_DAIC) {
2319                                DRM_ERROR("CP DMA DAIC only supported for registers\n");
2320                                return -EINVAL;
2321                        }
2322                        if (((info & 0x00300000) >> 20) == 0) {
2323                                r = evergreen_cs_packet_next_reloc(p, &reloc);
2324                                if (r) {
2325                                        DRM_ERROR("bad CP DMA DST\n");
2326                                        return -EINVAL;
2327                                }
2328
2329                                tmp = radeon_get_ib_value(p, idx+2) +
2330                                        ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
2331
2332                                offset = reloc->lobj.gpu_offset + tmp;
2333
2334                                if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2335                                        dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
2336                                                 tmp + size, radeon_bo_size(reloc->robj));
2337                                        return -EINVAL;
2338                                }
2339
2340                                ib[idx+2] = offset;
2341                                ib[idx+3] = upper_32_bits(offset) & 0xff;
2342                        } else {
2343                                DRM_ERROR("bad CP DMA DST_SEL\n");
2344                                return -EINVAL;
2345                        }
2346                }
2347                break;
2348        }
2349        case PACKET3_SURFACE_SYNC:
2350                if (pkt->count != 3) {
2351                        DRM_ERROR("bad SURFACE_SYNC\n");
2352                        return -EINVAL;
2353                }
2354                /* 0xffffffff/0x0 is flush all cache flag */
2355                if (radeon_get_ib_value(p, idx + 1) != 0xffffffff ||
2356                    radeon_get_ib_value(p, idx + 2) != 0) {
2357                        r = evergreen_cs_packet_next_reloc(p, &reloc);
2358                        if (r) {
2359                                DRM_ERROR("bad SURFACE_SYNC\n");
2360                                return -EINVAL;
2361                        }
2362                        ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
2363                }
2364                break;
2365        case PACKET3_EVENT_WRITE:
2366                if (pkt->count != 2 && pkt->count != 0) {
2367                        DRM_ERROR("bad EVENT_WRITE\n");
2368                        return -EINVAL;
2369                }
2370                if (pkt->count) {
2371                        uint64_t offset;
2372
2373                        r = evergreen_cs_packet_next_reloc(p, &reloc);
2374                        if (r) {
2375                                DRM_ERROR("bad EVENT_WRITE\n");
2376                                return -EINVAL;
2377                        }
2378                        offset = reloc->lobj.gpu_offset +
2379                                 (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
2380                                 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2381
2382                        ib[idx+1] = offset & 0xfffffff8;
2383                        ib[idx+2] = upper_32_bits(offset) & 0xff;
2384                }
2385                break;
2386        case PACKET3_EVENT_WRITE_EOP:
2387        {
2388                uint64_t offset;
2389
2390                if (pkt->count != 4) {
2391                        DRM_ERROR("bad EVENT_WRITE_EOP\n");
2392                        return -EINVAL;
2393                }
2394                r = evergreen_cs_packet_next_reloc(p, &reloc);
2395                if (r) {
2396                        DRM_ERROR("bad EVENT_WRITE_EOP\n");
2397                        return -EINVAL;
2398                }
2399
2400                offset = reloc->lobj.gpu_offset +
2401                         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2402                         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2403
2404                ib[idx+1] = offset & 0xfffffffc;
2405                ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2406                break;
2407        }
2408        case PACKET3_EVENT_WRITE_EOS:
2409        {
2410                uint64_t offset;
2411
2412                if (pkt->count != 3) {
2413                        DRM_ERROR("bad EVENT_WRITE_EOS\n");
2414                        return -EINVAL;
2415                }
2416                r = evergreen_cs_packet_next_reloc(p, &reloc);
2417                if (r) {
2418                        DRM_ERROR("bad EVENT_WRITE_EOS\n");
2419                        return -EINVAL;
2420                }
2421
2422                offset = reloc->lobj.gpu_offset +
2423                         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2424                         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2425
2426                ib[idx+1] = offset & 0xfffffffc;
2427                ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2428                break;
2429        }
2430        case PACKET3_SET_CONFIG_REG:
2431                start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2432                end_reg = 4 * pkt->count + start_reg - 4;
2433                if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2434                    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2435                    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2436                        DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2437                        return -EINVAL;
2438                }
2439                for (i = 0; i < pkt->count; i++) {
2440                        reg = start_reg + (4 * i);
2441                        r = evergreen_cs_check_reg(p, reg, idx+1+i);
2442                        if (r)
2443                                return r;
2444                }
2445                break;
2446        case PACKET3_SET_CONTEXT_REG:
2447                start_reg = (idx_value << 2) + PACKET3_SET_CONTEXT_REG_START;
2448                end_reg = 4 * pkt->count + start_reg - 4;
2449                if ((start_reg < PACKET3_SET_CONTEXT_REG_START) ||
2450                    (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
2451                    (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
2452                        DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
2453                        return -EINVAL;
2454                }
2455                for (i = 0; i < pkt->count; i++) {
2456                        reg = start_reg + (4 * i);
2457                        r = evergreen_cs_check_reg(p, reg, idx+1+i);
2458                        if (r)
2459                                return r;
2460                }
2461                break;
2462        case PACKET3_SET_RESOURCE:
2463                if (pkt->count % 8) {
2464                        DRM_ERROR("bad SET_RESOURCE\n");
2465                        return -EINVAL;
2466                }
2467                start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_START;
2468                end_reg = 4 * pkt->count + start_reg - 4;
2469                if ((start_reg < PACKET3_SET_RESOURCE_START) ||
2470                    (start_reg >= PACKET3_SET_RESOURCE_END) ||
2471                    (end_reg >= PACKET3_SET_RESOURCE_END)) {
2472                        DRM_ERROR("bad SET_RESOURCE\n");
2473                        return -EINVAL;
2474                }
2475                for (i = 0; i < (pkt->count / 8); i++) {
2476                        struct radeon_bo *texture, *mipmap;
2477                        u32 toffset, moffset;
2478                        u32 size, offset, mip_address, tex_dim;
2479
2480                        switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p, idx+1+(i*8)+7))) {
2481                        case SQ_TEX_VTX_VALID_TEXTURE:
2482                                /* tex base */
2483                                r = evergreen_cs_packet_next_reloc(p, &reloc);
2484                                if (r) {
2485                                        DRM_ERROR("bad SET_RESOURCE (tex)\n");
2486                                        return -EINVAL;
2487                                }
2488                                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
2489                                        ib[idx+1+(i*8)+1] |=
2490                                                TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
2491                                        if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
2492                                                unsigned bankw, bankh, mtaspect, tile_split;
2493
2494                                                evergreen_tiling_fields(reloc->lobj.tiling_flags,
2495                                                                        &bankw, &bankh, &mtaspect,
2496                                                                        &tile_split);
2497                                                ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split);
2498                                                ib[idx+1+(i*8)+7] |=
2499                                                        TEX_BANK_WIDTH(bankw) |
2500                                                        TEX_BANK_HEIGHT(bankh) |
2501                                                        MACRO_TILE_ASPECT(mtaspect) |
2502                                                        TEX_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
2503                                        }
2504                                }
2505                                texture = reloc->robj;
2506                                toffset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
2507
2508                                /* tex mip base */
2509                                tex_dim = ib[idx+1+(i*8)+0] & 0x7;
2510                                mip_address = ib[idx+1+(i*8)+3];
2511
2512                                if ((tex_dim == SQ_TEX_DIM_2D_MSAA || tex_dim == SQ_TEX_DIM_2D_ARRAY_MSAA) &&
2513                                    !mip_address &&
2514                                    !evergreen_cs_packet_next_is_pkt3_nop(p)) {
2515                                        /* MIP_ADDRESS should point to FMASK for an MSAA texture.
2516                                         * It should be 0 if FMASK is disabled. */
2517                                        moffset = 0;
2518                                        mipmap = NULL;
2519                                } else {
2520                                        r = evergreen_cs_packet_next_reloc(p, &reloc);
2521                                        if (r) {
2522                                                DRM_ERROR("bad SET_RESOURCE (tex)\n");
2523                                                return -EINVAL;
2524                                        }
2525                                        moffset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
2526                                        mipmap = reloc->robj;
2527                                }
2528
2529                                r = evergreen_cs_track_validate_texture(p, texture, mipmap, idx+1+(i*8));
2530                                if (r)
2531                                        return r;
2532                                ib[idx+1+(i*8)+2] += toffset;
2533                                ib[idx+1+(i*8)+3] += moffset;
2534                                break;
2535                        case SQ_TEX_VTX_VALID_BUFFER:
2536                        {
2537                                uint64_t offset64;
2538                                /* vtx base */
2539                                r = evergreen_cs_packet_next_reloc(p, &reloc);
2540                                if (r) {
2541                                        DRM_ERROR("bad SET_RESOURCE (vtx)\n");
2542                                        return -EINVAL;
2543                                }
2544                                offset = radeon_get_ib_value(p, idx+1+(i*8)+0);
2545                                size = radeon_get_ib_value(p, idx+1+(i*8)+1);
2546                                if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
2547                                        /* force size to size of the buffer */
2548                                        dev_warn(p->dev, "vbo resource seems too big for the bo\n");
2549                                        ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset;
2550                                }
2551
2552                                offset64 = reloc->lobj.gpu_offset + offset;
2553                                ib[idx+1+(i*8)+0] = offset64;
2554                                ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
2555                                                    (upper_32_bits(offset64) & 0xff);
2556                                break;
2557                        }
2558                        case SQ_TEX_VTX_INVALID_TEXTURE:
2559                        case SQ_TEX_VTX_INVALID_BUFFER:
2560                        default:
2561                                DRM_ERROR("bad SET_RESOURCE\n");
2562                                return -EINVAL;
2563                        }
2564                }
2565                break;
2566        case PACKET3_SET_ALU_CONST:
2567                /* XXX fix me ALU const buffers only */
2568                break;
2569        case PACKET3_SET_BOOL_CONST:
2570                start_reg = (idx_value << 2) + PACKET3_SET_BOOL_CONST_START;
2571                end_reg = 4 * pkt->count + start_reg - 4;
2572                if ((start_reg < PACKET3_SET_BOOL_CONST_START) ||
2573                    (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
2574                    (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
2575                        DRM_ERROR("bad SET_BOOL_CONST\n");
2576                        return -EINVAL;
2577                }
2578                break;
2579        case PACKET3_SET_LOOP_CONST:
2580                start_reg = (idx_value << 2) + PACKET3_SET_LOOP_CONST_START;
2581                end_reg = 4 * pkt->count + start_reg - 4;
2582                if ((start_reg < PACKET3_SET_LOOP_CONST_START) ||
2583                    (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
2584                    (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
2585                        DRM_ERROR("bad SET_LOOP_CONST\n");
2586                        return -EINVAL;
2587                }
2588                break;
2589        case PACKET3_SET_CTL_CONST:
2590                start_reg = (idx_value << 2) + PACKET3_SET_CTL_CONST_START;
2591                end_reg = 4 * pkt->count + start_reg - 4;
2592                if ((start_reg < PACKET3_SET_CTL_CONST_START) ||
2593                    (start_reg >= PACKET3_SET_CTL_CONST_END) ||
2594                    (end_reg >= PACKET3_SET_CTL_CONST_END)) {
2595                        DRM_ERROR("bad SET_CTL_CONST\n");
2596                        return -EINVAL;
2597                }
2598                break;
2599        case PACKET3_SET_SAMPLER:
2600                if (pkt->count % 3) {
2601                        DRM_ERROR("bad SET_SAMPLER\n");
2602                        return -EINVAL;
2603                }
2604                start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_START;
2605                end_reg = 4 * pkt->count + start_reg - 4;
2606                if ((start_reg < PACKET3_SET_SAMPLER_START) ||
2607                    (start_reg >= PACKET3_SET_SAMPLER_END) ||
2608                    (end_reg >= PACKET3_SET_SAMPLER_END)) {
2609                        DRM_ERROR("bad SET_SAMPLER\n");
2610                        return -EINVAL;
2611                }
2612                break;
2613        case PACKET3_STRMOUT_BUFFER_UPDATE:
2614                if (pkt->count != 4) {
2615                        DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
2616                        return -EINVAL;
2617                }
2618                /* Updating memory at DST_ADDRESS. */
2619                if (idx_value & 0x1) {
2620                        u64 offset;
2621                        r = evergreen_cs_packet_next_reloc(p, &reloc);
2622                        if (r) {
2623                                DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
2624                                return -EINVAL;
2625                        }
2626                        offset = radeon_get_ib_value(p, idx+1);
2627                        offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2628                        if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2629                                DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n",
2630                                          offset + 4, radeon_bo_size(reloc->robj));
2631                                return -EINVAL;
2632                        }
2633                        offset += reloc->lobj.gpu_offset;
2634                        ib[idx+1] = offset;
2635                        ib[idx+2] = upper_32_bits(offset) & 0xff;
2636                }
2637                /* Reading data from SRC_ADDRESS. */
2638                if (((idx_value >> 1) & 0x3) == 2) {
2639                        u64 offset;
2640                        r = evergreen_cs_packet_next_reloc(p, &reloc);
2641                        if (r) {
2642                                DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
2643                                return -EINVAL;
2644                        }
2645                        offset = radeon_get_ib_value(p, idx+3);
2646                        offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2647                        if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2648                                DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n",
2649                                          offset + 4, radeon_bo_size(reloc->robj));
2650                                return -EINVAL;
2651                        }
2652                        offset += reloc->lobj.gpu_offset;
2653                        ib[idx+3] = offset;
2654                        ib[idx+4] = upper_32_bits(offset) & 0xff;
2655                }
2656                break;
2657        case PACKET3_MEM_WRITE:
2658        {
2659                u64 offset;
2660
2661                if (pkt->count != 3) {
2662                        DRM_ERROR("bad MEM_WRITE (invalid count)\n");
2663                        return -EINVAL;
2664                }
2665                r = evergreen_cs_packet_next_reloc(p, &reloc);
2666                if (r) {
2667                        DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
2668                        return -EINVAL;
2669                }
2670                offset = radeon_get_ib_value(p, idx+0);
2671                offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
2672                if (offset & 0x7) {
2673                        DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
2674                        return -EINVAL;
2675                }
2676                if ((offset + 8) > radeon_bo_size(reloc->robj)) {
2677                        DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
2678                                  offset + 8, radeon_bo_size(reloc->robj));
2679                        return -EINVAL;
2680                }
2681                offset += reloc->lobj.gpu_offset;
2682                ib[idx+0] = offset;
2683                ib[idx+1] = upper_32_bits(offset) & 0xff;
2684                break;
2685        }
2686        case PACKET3_COPY_DW:
2687                if (pkt->count != 4) {
2688                        DRM_ERROR("bad COPY_DW (invalid count)\n");
2689                        return -EINVAL;
2690                }
2691                if (idx_value & 0x1) {
2692                        u64 offset;
2693                        /* SRC is memory. */
2694                        r = evergreen_cs_packet_next_reloc(p, &reloc);
2695                        if (r) {
2696                                DRM_ERROR("bad COPY_DW (missing src reloc)\n");
2697                                return -EINVAL;
2698                        }
2699                        offset = radeon_get_ib_value(p, idx+1);
2700                        offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2701                        if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2702                                DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n",
2703                                          offset + 4, radeon_bo_size(reloc->robj));
2704                                return -EINVAL;
2705                        }
2706                        offset += reloc->lobj.gpu_offset;
2707                        ib[idx+1] = offset;
2708                        ib[idx+2] = upper_32_bits(offset) & 0xff;
2709                } else {
2710                        /* SRC is a reg. */
2711                        reg = radeon_get_ib_value(p, idx+1) << 2;
2712                        if (!evergreen_is_safe_reg(p, reg, idx+1))
2713                                return -EINVAL;
2714                }
2715                if (idx_value & 0x2) {
2716                        u64 offset;
2717                        /* DST is memory. */
2718                        r = evergreen_cs_packet_next_reloc(p, &reloc);
2719                        if (r) {
2720                                DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
2721                                return -EINVAL;
2722                        }
2723                        offset = radeon_get_ib_value(p, idx+3);
2724                        offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2725                        if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2726                                DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n",
2727                                          offset + 4, radeon_bo_size(reloc->robj));
2728                                return -EINVAL;
2729                        }
2730                        offset += reloc->lobj.gpu_offset;
2731                        ib[idx+3] = offset;
2732                        ib[idx+4] = upper_32_bits(offset) & 0xff;
2733                } else {
2734                        /* DST is a reg. */
2735                        reg = radeon_get_ib_value(p, idx+3) << 2;
2736                        if (!evergreen_is_safe_reg(p, reg, idx+3))
2737                                return -EINVAL;
2738                }
2739                break;
2740        case PACKET3_NOP:
2741                break;
2742        default:
2743                DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2744                return -EINVAL;
2745        }
2746        return 0;
2747}
2748
2749int evergreen_cs_parse(struct radeon_cs_parser *p)
2750{
2751        struct radeon_cs_packet pkt;
2752        struct evergreen_cs_track *track;
2753        u32 tmp;
2754        int r;
2755
2756        if (p->track == NULL) {
2757                /* initialize tracker, we are in kms */
2758                track = kzalloc(sizeof(*track), GFP_KERNEL);
2759                if (track == NULL)
2760                        return -ENOMEM;
2761                evergreen_cs_track_init(track);
2762                if (p->rdev->family >= CHIP_CAYMAN)
2763                        tmp = p->rdev->config.cayman.tile_config;
2764                else
2765                        tmp = p->rdev->config.evergreen.tile_config;
2766
2767                switch (tmp & 0xf) {
2768                case 0:
2769                        track->npipes = 1;
2770                        break;
2771                case 1:
2772                default:
2773                        track->npipes = 2;
2774                        break;
2775                case 2:
2776                        track->npipes = 4;
2777                        break;
2778                case 3:
2779                        track->npipes = 8;
2780                        break;
2781                }
2782
2783                switch ((tmp & 0xf0) >> 4) {
2784                case 0:
2785                        track->nbanks = 4;
2786                        break;
2787                case 1:
2788                default:
2789                        track->nbanks = 8;
2790                        break;
2791                case 2:
2792                        track->nbanks = 16;
2793                        break;
2794                }
2795
2796                switch ((tmp & 0xf00) >> 8) {
2797                case 0:
2798                        track->group_size = 256;
2799                        break;
2800                case 1:
2801                default:
2802                        track->group_size = 512;
2803                        break;
2804                }
2805
2806                switch ((tmp & 0xf000) >> 12) {
2807                case 0:
2808                        track->row_size = 1;
2809                        break;
2810                case 1:
2811                default:
2812                        track->row_size = 2;
2813                        break;
2814                case 2:
2815                        track->row_size = 4;
2816                        break;
2817                }
2818
2819                p->track = track;
2820        }
2821        do {
2822                r = evergreen_cs_packet_parse(p, &pkt, p->idx);
2823                if (r) {
2824                        kfree(p->track);
2825                        p->track = NULL;
2826                        return r;
2827                }
2828                p->idx += pkt.count + 2;
2829                switch (pkt.type) {
2830                case PACKET_TYPE0:
2831                        r = evergreen_cs_parse_packet0(p, &pkt);
2832                        break;
2833                case PACKET_TYPE2:
2834                        break;
2835                case PACKET_TYPE3:
2836                        r = evergreen_packet3_check(p, &pkt);
2837                        break;
2838                default:
2839                        DRM_ERROR("Unknown packet type %d !\n", pkt.type);
2840                        kfree(p->track);
2841                        p->track = NULL;
2842                        return -EINVAL;
2843                }
2844                if (r) {
2845                        kfree(p->track);
2846                        p->track = NULL;
2847                        return r;
2848                }
2849        } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
2850#if 0
2851        for (r = 0; r < p->ib.length_dw; r++) {
2852                printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
2853                mdelay(1);
2854        }
2855#endif
2856        kfree(p->track);
2857        p->track = NULL;
2858        return 0;
2859}
2860
2861/*
2862 *  DMA
2863 */
2864
2865#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
2866#define GET_DMA_COUNT(h) ((h) & 0x000fffff)
2867#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
2868#define GET_DMA_NEW(h) (((h) & 0x04000000) >> 26)
2869#define GET_DMA_MISC(h) (((h) & 0x0700000) >> 20)
2870
2871/**
2872 * evergreen_dma_cs_parse() - parse the DMA IB
2873 * @p:          parser structure holding parsing context.
2874 *
2875 * Parses the DMA IB from the CS ioctl and updates
2876 * the GPU addresses based on the reloc information and
2877 * checks for errors. (Evergreen-Cayman)
2878 * Returns 0 for success and an error on failure.
2879 **/
2880int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2881{
2882        struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
2883        struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc;
2884        u32 header, cmd, count, tiled, new_cmd, misc;
2885        volatile u32 *ib = p->ib.ptr;
2886        u32 idx, idx_value;
2887        u64 src_offset, dst_offset, dst2_offset;
2888        int r;
2889
2890        do {
2891                if (p->idx >= ib_chunk->length_dw) {
2892                        DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2893                                  p->idx, ib_chunk->length_dw);
2894                        return -EINVAL;
2895                }
2896                idx = p->idx;
2897                header = radeon_get_ib_value(p, idx);
2898                cmd = GET_DMA_CMD(header);
2899                count = GET_DMA_COUNT(header);
2900                tiled = GET_DMA_T(header);
2901                new_cmd = GET_DMA_NEW(header);
2902                misc = GET_DMA_MISC(header);
2903
2904                switch (cmd) {
2905                case DMA_PACKET_WRITE:
2906                        r = r600_dma_cs_next_reloc(p, &dst_reloc);
2907                        if (r) {
2908                                DRM_ERROR("bad DMA_PACKET_WRITE\n");
2909                                return -EINVAL;
2910                        }
2911                        if (tiled) {
2912                                dst_offset = radeon_get_ib_value(p, idx+1);
2913                                dst_offset <<= 8;
2914
2915                                ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2916                                p->idx += count + 7;
2917                        } else {
2918                                dst_offset = radeon_get_ib_value(p, idx+1);
2919                                dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2920
2921                                ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2922                                ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2923                                p->idx += count + 3;
2924                        }
2925                        if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2926                                dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
2927                                         dst_offset, radeon_bo_size(dst_reloc->robj));
2928                                return -EINVAL;
2929                        }
2930                        break;
2931                case DMA_PACKET_COPY:
2932                        r = r600_dma_cs_next_reloc(p, &src_reloc);
2933                        if (r) {
2934                                DRM_ERROR("bad DMA_PACKET_COPY\n");
2935                                return -EINVAL;
2936                        }
2937                        r = r600_dma_cs_next_reloc(p, &dst_reloc);
2938                        if (r) {
2939                                DRM_ERROR("bad DMA_PACKET_COPY\n");
2940                                return -EINVAL;
2941                        }
2942                        if (tiled) {
2943                                idx_value = radeon_get_ib_value(p, idx + 2);
2944                                if (new_cmd) {
2945                                        switch (misc) {
2946                                        case 0:
2947                                                /* L2T, frame to fields */
2948                                                if (idx_value & (1 << 31)) {
2949                                                        DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2950                                                        return -EINVAL;
2951                                                }
2952                                                r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2953                                                if (r) {
2954                                                        DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2955                                                        return -EINVAL;
2956                                                }
2957                                                dst_offset = radeon_get_ib_value(p, idx+1);
2958                                                dst_offset <<= 8;
2959                                                dst2_offset = radeon_get_ib_value(p, idx+2);
2960                                                dst2_offset <<= 8;
2961                                                src_offset = radeon_get_ib_value(p, idx+8);
2962                                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
2963                                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2964                                                        dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
2965                                                                 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2966                                                        return -EINVAL;
2967                                                }
2968                                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2969                                                        dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2970                                                                 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2971                                                        return -EINVAL;
2972                                                }
2973                                                if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2974                                                        dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2975                                                                 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2976                                                        return -EINVAL;
2977                                                }
2978                                                ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2979                                                ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
2980                                                ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2981                                                ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2982                                                p->idx += 10;
2983                                                break;
2984                                        case 1:
2985                                                /* L2T, T2L partial */
2986                                                if (p->family < CHIP_CAYMAN) {
2987                                                        DRM_ERROR("L2T, T2L Partial is cayman only !\n");
2988                                                        return -EINVAL;
2989                                                }
2990                                                /* detile bit */
2991                                                if (idx_value & (1 << 31)) {
2992                                                        /* tiled src, linear dst */
2993                                                        ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
2994
2995                                                        ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2996                                                        ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2997                                                } else {
2998                                                        /* linear src, tiled dst */
2999                                                        ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3000                                                        ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3001
3002                                                        ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3003                                                }
3004                                                p->idx += 12;
3005                                                break;
3006                                        case 3:
3007                                                /* L2T, broadcast */
3008                                                if (idx_value & (1 << 31)) {
3009                                                        DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3010                                                        return -EINVAL;
3011                                                }
3012                                                r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3013                                                if (r) {
3014                                                        DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3015                                                        return -EINVAL;
3016                                                }
3017                                                dst_offset = radeon_get_ib_value(p, idx+1);
3018                                                dst_offset <<= 8;
3019                                                dst2_offset = radeon_get_ib_value(p, idx+2);
3020                                                dst2_offset <<= 8;
3021                                                src_offset = radeon_get_ib_value(p, idx+8);
3022                                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3023                                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3024                                                        dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3025                                                                 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3026                                                        return -EINVAL;
3027                                                }
3028                                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3029                                                        dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3030                                                                 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3031                                                        return -EINVAL;
3032                                                }
3033                                                if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3034                                                        dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3035                                                                 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3036                                                        return -EINVAL;
3037                                                }
3038                                                ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3039                                                ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
3040                                                ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3041                                                ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3042                                                p->idx += 10;
3043                                                break;
3044                                        case 4:
3045                                                /* L2T, T2L */
3046                                                /* detile bit */
3047                                                if (idx_value & (1 << 31)) {
3048                                                        /* tiled src, linear dst */
3049                                                        src_offset = radeon_get_ib_value(p, idx+1);
3050                                                        src_offset <<= 8;
3051                                                        ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3052
3053                                                        dst_offset = radeon_get_ib_value(p, idx+7);
3054                                                        dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3055                                                        ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3056                                                        ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3057                                                } else {
3058                                                        /* linear src, tiled dst */
3059                                                        src_offset = radeon_get_ib_value(p, idx+7);
3060                                                        src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3061                                                        ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3062                                                        ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3063
3064                                                        dst_offset = radeon_get_ib_value(p, idx+1);
3065                                                        dst_offset <<= 8;
3066                                                        ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3067                                                }
3068                                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3069                                                        dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
3070                                                                 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3071                                                        return -EINVAL;
3072                                                }
3073                                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3074                                                        dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
3075                                                                 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3076                                                        return -EINVAL;
3077                                                }
3078                                                p->idx += 9;
3079                                                break;
3080                                        case 5:
3081                                                /* T2T partial */
3082                                                if (p->family < CHIP_CAYMAN) {
3083                                                        DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3084                                                        return -EINVAL;
3085                                                }
3086                                                ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3087                                                ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3088                                                p->idx += 13;
3089                                                break;
3090                                        case 7:
3091                                                /* L2T, broadcast */
3092                                                if (idx_value & (1 << 31)) {
3093                                                        DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3094                                                        return -EINVAL;
3095                                                }
3096                                                r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3097                                                if (r) {
3098                                                        DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3099                                                        return -EINVAL;
3100                                                }
3101                                                dst_offset = radeon_get_ib_value(p, idx+1);
3102                                                dst_offset <<= 8;
3103                                                dst2_offset = radeon_get_ib_value(p, idx+2);
3104                                                dst2_offset <<= 8;
3105                                                src_offset = radeon_get_ib_value(p, idx+8);
3106                                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3107                                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3108                                                        dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3109                                                                 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3110                                                        return -EINVAL;
3111                                                }
3112                                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3113                                                        dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3114                                                                 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3115                                                        return -EINVAL;
3116                                                }
3117                                                if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3118                                                        dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3119                                                                 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3120                                                        return -EINVAL;
3121                                                }
3122                                                ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3123                                                ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
3124                                                ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3125                                                ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3126                                                p->idx += 10;
3127                                                break;
3128                                        default:
3129                                                DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3130                                                return -EINVAL;
3131                                        }
3132                                } else {
3133                                        switch (misc) {
3134                                        case 0:
3135                                                /* detile bit */
3136                                                if (idx_value & (1 << 31)) {
3137                                                        /* tiled src, linear dst */
3138                                                        src_offset = radeon_get_ib_value(p, idx+1);
3139                                                        src_offset <<= 8;
3140                                                        ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3141
3142                                                        dst_offset = radeon_get_ib_value(p, idx+7);
3143                                                        dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3144                                                        ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3145                                                        ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3146                                                } else {
3147                                                        /* linear src, tiled dst */
3148                                                        src_offset = radeon_get_ib_value(p, idx+7);
3149                                                        src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3150                                                        ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3151                                                        ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3152
3153                                                        dst_offset = radeon_get_ib_value(p, idx+1);
3154                                                        dst_offset <<= 8;
3155                                                        ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3156                                                }
3157                                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3158                                                        dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3159                                                                 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3160                                                        return -EINVAL;
3161                                                }
3162                                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3163                                                        dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3164                                                                 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3165                                                        return -EINVAL;
3166                                                }
3167                                                p->idx += 9;
3168                                                break;
3169                                        default:
3170                                                DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3171                                                return -EINVAL;
3172                                        }
3173                                }
3174                        } else {
3175                                if (new_cmd) {
3176                                        switch (misc) {
3177                                        case 0:
3178                                                /* L2L, byte */
3179                                                src_offset = radeon_get_ib_value(p, idx+2);
3180                                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
3181                                                dst_offset = radeon_get_ib_value(p, idx+1);
3182                                                dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
3183                                                if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
3184                                                        dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
3185                                                                 src_offset + count, radeon_bo_size(src_reloc->robj));
3186                                                        return -EINVAL;
3187                                                }
3188                                                if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
3189                                                        dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
3190                                                                 dst_offset + count, radeon_bo_size(dst_reloc->robj));
3191                                                        return -EINVAL;
3192                                                }
3193                                                ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
3194                                                ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
3195                                                ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3196                                                ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3197                                                p->idx += 5;
3198                                                break;
3199                                        case 1:
3200                                                /* L2L, partial */
3201                                                if (p->family < CHIP_CAYMAN) {
3202                                                        DRM_ERROR("L2L Partial is cayman only !\n");
3203                                                        return -EINVAL;
3204                                                }
3205                                                ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
3206                                                ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3207                                                ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
3208                                                ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3209
3210                                                p->idx += 9;
3211                                                break;
3212                                        case 4:
3213                                                /* L2L, dw, broadcast */
3214                                                r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3215                                                if (r) {
3216                                                        DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
3217                                                        return -EINVAL;
3218                                                }
3219                                                dst_offset = radeon_get_ib_value(p, idx+1);
3220                                                dst_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
3221                                                dst2_offset = radeon_get_ib_value(p, idx+2);
3222                                                dst2_offset |= ((u64)(radeon_get_ib_value(p, idx+5) & 0xff)) << 32;
3223                                                src_offset = radeon_get_ib_value(p, idx+3);
3224                                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
3225                                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3226                                                        dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
3227                                                                 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3228                                                        return -EINVAL;
3229                                                }
3230                                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3231                                                        dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
3232                                                                 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3233                                                        return -EINVAL;
3234                                                }
3235                                                if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3236                                                        dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
3237                                                                 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3238                                                        return -EINVAL;
3239                                                }
3240                                                ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3241                                                ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc);
3242                                                ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3243                                                ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3244                                                ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff;
3245                                                ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3246                                                p->idx += 7;
3247                                                break;
3248                                        default:
3249                                                DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3250                                                return -EINVAL;
3251                                        }
3252                                } else {
3253                                        /* L2L, dw */
3254                                        src_offset = radeon_get_ib_value(p, idx+2);
3255                                        src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
3256                                        dst_offset = radeon_get_ib_value(p, idx+1);
3257                                        dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
3258                                        if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3259                                                dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
3260                                                         src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3261                                                return -EINVAL;
3262                                        }
3263                                        if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3264                                                dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
3265                                                         dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3266                                                return -EINVAL;
3267                                        }
3268                                        ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3269                                        ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3270                                        ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3271                                        ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3272                                        p->idx += 5;
3273                                }
3274                        }
3275                        break;
3276                case DMA_PACKET_CONSTANT_FILL:
3277                        r = r600_dma_cs_next_reloc(p, &dst_reloc);
3278                        if (r) {
3279                                DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
3280                                return -EINVAL;
3281                        }
3282                        dst_offset = radeon_get_ib_value(p, idx+1);
3283                        dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
3284                        if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3285                                dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
3286                                         dst_offset, radeon_bo_size(dst_reloc->robj));
3287                                return -EINVAL;
3288                        }
3289                        ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3290                        ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
3291                        p->idx += 4;
3292                        break;
3293                case DMA_PACKET_NOP:
3294                        p->idx += 1;
3295                        break;
3296                default:
3297                        DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3298                        return -EINVAL;
3299                }
3300        } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
3301#if 0
3302        for (r = 0; r < p->ib->length_dw; r++) {
3303                printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
3304                mdelay(1);
3305        }
3306#endif
3307        return 0;
3308}
3309
3310/* vm parser */
3311static bool evergreen_vm_reg_valid(u32 reg)
3312{
3313        /* context regs are fine */
3314        if (reg >= 0x28000)
3315                return true;
3316
3317        /* check config regs */
3318        switch (reg) {
3319        case WAIT_UNTIL:
3320        case GRBM_GFX_INDEX:
3321        case CP_STRMOUT_CNTL:
3322        case CP_COHER_CNTL:
3323        case CP_COHER_SIZE:
3324        case VGT_VTX_VECT_EJECT_REG:
3325        case VGT_CACHE_INVALIDATION:
3326        case VGT_GS_VERTEX_REUSE:
3327        case VGT_PRIMITIVE_TYPE:
3328        case VGT_INDEX_TYPE:
3329        case VGT_NUM_INDICES:
3330        case VGT_NUM_INSTANCES:
3331        case VGT_COMPUTE_DIM_X:
3332        case VGT_COMPUTE_DIM_Y:
3333        case VGT_COMPUTE_DIM_Z:
3334        case VGT_COMPUTE_START_X:
3335        case VGT_COMPUTE_START_Y:
3336        case VGT_COMPUTE_START_Z:
3337        case VGT_COMPUTE_INDEX:
3338        case VGT_COMPUTE_THREAD_GROUP_SIZE:
3339        case VGT_HS_OFFCHIP_PARAM:
3340        case PA_CL_ENHANCE:
3341        case PA_SU_LINE_STIPPLE_VALUE:
3342        case PA_SC_LINE_STIPPLE_STATE:
3343        case PA_SC_ENHANCE:
3344        case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ:
3345        case SQ_DYN_GPR_SIMD_LOCK_EN:
3346        case SQ_CONFIG:
3347        case SQ_GPR_RESOURCE_MGMT_1:
3348        case SQ_GLOBAL_GPR_RESOURCE_MGMT_1:
3349        case SQ_GLOBAL_GPR_RESOURCE_MGMT_2:
3350        case SQ_CONST_MEM_BASE:
3351        case SQ_STATIC_THREAD_MGMT_1:
3352        case SQ_STATIC_THREAD_MGMT_2:
3353        case SQ_STATIC_THREAD_MGMT_3:
3354        case SPI_CONFIG_CNTL:
3355        case SPI_CONFIG_CNTL_1:
3356        case TA_CNTL_AUX:
3357        case DB_DEBUG:
3358        case DB_DEBUG2:
3359        case DB_DEBUG3:
3360        case DB_DEBUG4:
3361        case DB_WATERMARKS:
3362        case TD_PS_BORDER_COLOR_INDEX:
3363        case TD_PS_BORDER_COLOR_RED:
3364        case TD_PS_BORDER_COLOR_GREEN:
3365        case TD_PS_BORDER_COLOR_BLUE:
3366        case TD_PS_BORDER_COLOR_ALPHA:
3367        case TD_VS_BORDER_COLOR_INDEX:
3368        case TD_VS_BORDER_COLOR_RED:
3369        case TD_VS_BORDER_COLOR_GREEN:
3370        case TD_VS_BORDER_COLOR_BLUE:
3371        case TD_VS_BORDER_COLOR_ALPHA:
3372        case TD_GS_BORDER_COLOR_INDEX:
3373        case TD_GS_BORDER_COLOR_RED:
3374        case TD_GS_BORDER_COLOR_GREEN:
3375        case TD_GS_BORDER_COLOR_BLUE:
3376        case TD_GS_BORDER_COLOR_ALPHA:
3377        case TD_HS_BORDER_COLOR_INDEX:
3378        case TD_HS_BORDER_COLOR_RED:
3379        case TD_HS_BORDER_COLOR_GREEN:
3380        case TD_HS_BORDER_COLOR_BLUE:
3381        case TD_HS_BORDER_COLOR_ALPHA:
3382        case TD_LS_BORDER_COLOR_INDEX:
3383        case TD_LS_BORDER_COLOR_RED:
3384        case TD_LS_BORDER_COLOR_GREEN:
3385        case TD_LS_BORDER_COLOR_BLUE:
3386        case TD_LS_BORDER_COLOR_ALPHA:
3387        case TD_CS_BORDER_COLOR_INDEX:
3388        case TD_CS_BORDER_COLOR_RED:
3389        case TD_CS_BORDER_COLOR_GREEN:
3390        case TD_CS_BORDER_COLOR_BLUE:
3391        case TD_CS_BORDER_COLOR_ALPHA:
3392        case SQ_ESGS_RING_SIZE:
3393        case SQ_GSVS_RING_SIZE:
3394        case SQ_ESTMP_RING_SIZE:
3395        case SQ_GSTMP_RING_SIZE:
3396        case SQ_HSTMP_RING_SIZE:
3397        case SQ_LSTMP_RING_SIZE:
3398        case SQ_PSTMP_RING_SIZE:
3399        case SQ_VSTMP_RING_SIZE:
3400        case SQ_ESGS_RING_ITEMSIZE:
3401        case SQ_ESTMP_RING_ITEMSIZE:
3402        case SQ_GSTMP_RING_ITEMSIZE:
3403        case SQ_GSVS_RING_ITEMSIZE:
3404        case SQ_GS_VERT_ITEMSIZE:
3405        case SQ_GS_VERT_ITEMSIZE_1:
3406        case SQ_GS_VERT_ITEMSIZE_2:
3407        case SQ_GS_VERT_ITEMSIZE_3:
3408        case SQ_GSVS_RING_OFFSET_1:
3409        case SQ_GSVS_RING_OFFSET_2:
3410        case SQ_GSVS_RING_OFFSET_3:
3411        case SQ_HSTMP_RING_ITEMSIZE:
3412        case SQ_LSTMP_RING_ITEMSIZE:
3413        case SQ_PSTMP_RING_ITEMSIZE:
3414        case SQ_VSTMP_RING_ITEMSIZE:
3415        case VGT_TF_RING_SIZE:
3416        case SQ_ESGS_RING_BASE:
3417        case SQ_GSVS_RING_BASE:
3418        case SQ_ESTMP_RING_BASE:
3419        case SQ_GSTMP_RING_BASE:
3420        case SQ_HSTMP_RING_BASE:
3421        case SQ_LSTMP_RING_BASE:
3422        case SQ_PSTMP_RING_BASE:
3423        case SQ_VSTMP_RING_BASE:
3424        case CAYMAN_VGT_OFFCHIP_LDS_BASE:
3425        case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS:
3426                return true;
3427        default:
3428                DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3429                return false;
3430        }
3431}
3432
3433static int evergreen_vm_packet3_check(struct radeon_device *rdev,
3434                                      u32 *ib, struct radeon_cs_packet *pkt)
3435{
3436        u32 idx = pkt->idx + 1;
3437        u32 idx_value = ib[idx];
3438        u32 start_reg, end_reg, reg, i;
3439        u32 command, info;
3440
3441        switch (pkt->opcode) {
3442        case PACKET3_NOP:
3443        case PACKET3_SET_BASE:
3444        case PACKET3_CLEAR_STATE:
3445        case PACKET3_INDEX_BUFFER_SIZE:
3446        case PACKET3_DISPATCH_DIRECT:
3447        case PACKET3_DISPATCH_INDIRECT:
3448        case PACKET3_MODE_CONTROL:
3449        case PACKET3_SET_PREDICATION:
3450        case PACKET3_COND_EXEC:
3451        case PACKET3_PRED_EXEC:
3452        case PACKET3_DRAW_INDIRECT:
3453        case PACKET3_DRAW_INDEX_INDIRECT:
3454        case PACKET3_INDEX_BASE:
3455        case PACKET3_DRAW_INDEX_2:
3456        case PACKET3_CONTEXT_CONTROL:
3457        case PACKET3_DRAW_INDEX_OFFSET:
3458        case PACKET3_INDEX_TYPE:
3459        case PACKET3_DRAW_INDEX:
3460        case PACKET3_DRAW_INDEX_AUTO:
3461        case PACKET3_DRAW_INDEX_IMMD:
3462        case PACKET3_NUM_INSTANCES:
3463        case PACKET3_DRAW_INDEX_MULTI_AUTO:
3464        case PACKET3_STRMOUT_BUFFER_UPDATE:
3465        case PACKET3_DRAW_INDEX_OFFSET_2:
3466        case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3467        case PACKET3_MPEG_INDEX:
3468        case PACKET3_WAIT_REG_MEM:
3469        case PACKET3_MEM_WRITE:
3470        case PACKET3_SURFACE_SYNC:
3471        case PACKET3_EVENT_WRITE:
3472        case PACKET3_EVENT_WRITE_EOP:
3473        case PACKET3_EVENT_WRITE_EOS:
3474        case PACKET3_SET_CONTEXT_REG:
3475        case PACKET3_SET_BOOL_CONST:
3476        case PACKET3_SET_LOOP_CONST:
3477        case PACKET3_SET_RESOURCE:
3478        case PACKET3_SET_SAMPLER:
3479        case PACKET3_SET_CTL_CONST:
3480        case PACKET3_SET_RESOURCE_OFFSET:
3481        case PACKET3_SET_CONTEXT_REG_INDIRECT:
3482        case PACKET3_SET_RESOURCE_INDIRECT:
3483        case CAYMAN_PACKET3_DEALLOC_STATE:
3484                break;
3485        case PACKET3_COND_WRITE:
3486                if (idx_value & 0x100) {
3487                        reg = ib[idx + 5] * 4;
3488                        if (!evergreen_vm_reg_valid(reg))
3489                                return -EINVAL;
3490                }
3491                break;
3492        case PACKET3_COPY_DW:
3493                if (idx_value & 0x2) {
3494                        reg = ib[idx + 3] * 4;
3495                        if (!evergreen_vm_reg_valid(reg))
3496                                return -EINVAL;
3497                }
3498                break;
3499        case PACKET3_SET_CONFIG_REG:
3500                start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3501                end_reg = 4 * pkt->count + start_reg - 4;
3502                if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3503                    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3504                    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3505                        DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3506                        return -EINVAL;
3507                }
3508                for (i = 0; i < pkt->count; i++) {
3509                        reg = start_reg + (4 * i);
3510                        if (!evergreen_vm_reg_valid(reg))
3511                                return -EINVAL;
3512                }
3513                break;
3514        case PACKET3_CP_DMA:
3515                command = ib[idx + 4];
3516                info = ib[idx + 1];
3517                if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
3518                    (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
3519                    ((((info & 0x00300000) >> 20) == 0) &&
3520                     (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
3521                    ((((info & 0x60000000) >> 29) == 0) &&
3522                     (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
3523                        /* non mem to mem copies requires dw aligned count */
3524                        if ((command & 0x1fffff) % 4) {
3525                                DRM_ERROR("CP DMA command requires dw count alignment\n");
3526                                return -EINVAL;
3527                        }
3528                }
3529                if (command & PACKET3_CP_DMA_CMD_SAS) {
3530                        /* src address space is register */
3531                        if (((info & 0x60000000) >> 29) == 0) {
3532                                start_reg = idx_value << 2;
3533                                if (command & PACKET3_CP_DMA_CMD_SAIC) {
3534                                        reg = start_reg;
3535                                        if (!evergreen_vm_reg_valid(reg)) {
3536                                                DRM_ERROR("CP DMA Bad SRC register\n");
3537                                                return -EINVAL;
3538                                        }
3539                                } else {
3540                                        for (i = 0; i < (command & 0x1fffff); i++) {
3541                                                reg = start_reg + (4 * i);
3542                                                if (!evergreen_vm_reg_valid(reg)) {
3543                                                        DRM_ERROR("CP DMA Bad SRC register\n");
3544                                                        return -EINVAL;
3545                                                }
3546                                        }
3547                                }
3548                        }
3549                }
3550                if (command & PACKET3_CP_DMA_CMD_DAS) {
3551                        /* dst address space is register */
3552                        if (((info & 0x00300000) >> 20) == 0) {
3553                                start_reg = ib[idx + 2];
3554                                if (command & PACKET3_CP_DMA_CMD_DAIC) {
3555                                        reg = start_reg;
3556                                        if (!evergreen_vm_reg_valid(reg)) {
3557                                                DRM_ERROR("CP DMA Bad DST register\n");
3558                                                return -EINVAL;
3559                                        }
3560                                } else {
3561                                        for (i = 0; i < (command & 0x1fffff); i++) {
3562                                                reg = start_reg + (4 * i);
3563                                                if (!evergreen_vm_reg_valid(reg)) {
3564                                                        DRM_ERROR("CP DMA Bad DST register\n");
3565                                                        return -EINVAL;
3566                                                }
3567                                        }
3568                                }
3569                        }
3570                }
3571                break;
3572        default:
3573                return -EINVAL;
3574        }
3575        return 0;
3576}
3577
3578int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3579{
3580        int ret = 0;
3581        u32 idx = 0;
3582        struct radeon_cs_packet pkt;
3583
3584        do {
3585                pkt.idx = idx;
3586                pkt.type = CP_PACKET_GET_TYPE(ib->ptr[idx]);
3587                pkt.count = CP_PACKET_GET_COUNT(ib->ptr[idx]);
3588                pkt.one_reg_wr = 0;
3589                switch (pkt.type) {
3590                case PACKET_TYPE0:
3591                        dev_err(rdev->dev, "Packet0 not allowed!\n");
3592                        ret = -EINVAL;
3593                        break;
3594                case PACKET_TYPE2:
3595                        idx += 1;
3596                        break;
3597                case PACKET_TYPE3:
3598                        pkt.opcode = CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
3599                        ret = evergreen_vm_packet3_check(rdev, ib->ptr, &pkt);
3600                        idx += pkt.count + 2;
3601                        break;
3602                default:
3603                        dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
3604                        ret = -EINVAL;
3605                        break;
3606                }
3607                if (ret)
3608                        break;
3609        } while (idx < ib->length_dw);
3610
3611        return ret;
3612}
3613
3614/**
3615 * evergreen_dma_ib_parse() - parse the DMA IB for VM
3616 * @rdev: radeon_device pointer
3617 * @ib: radeon_ib pointer
3618 *
3619 * Parses the DMA IB from the VM CS ioctl
3620 * checks for errors. (Cayman-SI)
3621 * Returns 0 for success and an error on failure.
3622 **/
3623int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3624{
3625        u32 idx = 0;
3626        u32 header, cmd, count, tiled, new_cmd, misc;
3627
3628        do {
3629                header = ib->ptr[idx];
3630                cmd = GET_DMA_CMD(header);
3631                count = GET_DMA_COUNT(header);
3632                tiled = GET_DMA_T(header);
3633                new_cmd = GET_DMA_NEW(header);
3634                misc = GET_DMA_MISC(header);
3635
3636                switch (cmd) {
3637                case DMA_PACKET_WRITE:
3638                        if (tiled)
3639                                idx += count + 7;
3640                        else
3641                                idx += count + 3;
3642                        break;
3643                case DMA_PACKET_COPY:
3644                        if (tiled) {
3645                                if (new_cmd) {
3646                                        switch (misc) {
3647                                        case 0:
3648                                                /* L2T, frame to fields */
3649                                                idx += 10;
3650                                                break;
3651                                        case 1:
3652                                                /* L2T, T2L partial */
3653                                                idx += 12;
3654                                                break;
3655                                        case 3:
3656                                                /* L2T, broadcast */
3657                                                idx += 10;
3658                                                break;
3659                                        case 4:
3660                                                /* L2T, T2L */
3661                                                idx += 9;
3662                                                break;
3663                                        case 5:
3664                                                /* T2T partial */
3665                                                idx += 13;
3666                                                break;
3667                                        case 7:
3668                                                /* L2T, broadcast */
3669                                                idx += 10;
3670                                                break;
3671                                        default:
3672                                                DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3673                                                return -EINVAL;
3674                                        }
3675                                } else {
3676                                        switch (misc) {
3677                                        case 0:
3678                                                idx += 9;
3679                                                break;
3680                                        default:
3681                                                DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3682                                                return -EINVAL;
3683                                        }
3684                                }
3685                        } else {
3686                                if (new_cmd) {
3687                                        switch (misc) {
3688                                        case 0:
3689                                                /* L2L, byte */
3690                                                idx += 5;
3691                                                break;
3692                                        case 1:
3693                                                /* L2L, partial */
3694                                                idx += 9;
3695                                                break;
3696                                        case 4:
3697                                                /* L2L, dw, broadcast */
3698                                                idx += 7;
3699                                                break;
3700                                        default:
3701                                                DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3702                                                return -EINVAL;
3703                                        }
3704                                } else {
3705                                        /* L2L, dw */
3706                                        idx += 5;
3707                                }
3708                        }
3709                        break;
3710                case DMA_PACKET_CONSTANT_FILL:
3711                        idx += 4;
3712                        break;
3713                case DMA_PACKET_NOP:
3714                        idx += 1;
3715                        break;
3716                default:
3717                        DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3718                        return -EINVAL;
3719                }
3720        } while (idx < ib->length_dw);
3721
3722        return 0;
3723}
3724