linux/drivers/gpu/drm/radeon/evergreen_cs.c
<<
>>
Prefs
   1/*
   2 * Copyright 2010 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 * Copyright 2009 Jerome Glisse.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22 * OTHER DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors: Dave Airlie
  25 *          Alex Deucher
  26 *          Jerome Glisse
  27 */
  28#include <drm/drmP.h>
  29#include "radeon.h"
  30#include "evergreend.h"
  31#include "evergreen_reg_safe.h"
  32#include "cayman_reg_safe.h"
  33
  34#define MAX(a,b)                   (((a)>(b))?(a):(b))
  35#define MIN(a,b)                   (((a)<(b))?(a):(b))
  36
  37#define REG_SAFE_BM_SIZE ARRAY_SIZE(evergreen_reg_safe_bm)
  38
  39int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
  40                           struct radeon_bo_list **cs_reloc);
  41struct evergreen_cs_track {
  42        u32                     group_size;
  43        u32                     nbanks;
  44        u32                     npipes;
  45        u32                     row_size;
  46        /* value we track */
  47        u32                     nsamples;               /* unused */
  48        struct radeon_bo        *cb_color_bo[12];
  49        u32                     cb_color_bo_offset[12];
  50        struct radeon_bo        *cb_color_fmask_bo[8];  /* unused */
  51        struct radeon_bo        *cb_color_cmask_bo[8];  /* unused */
  52        u32                     cb_color_info[12];
  53        u32                     cb_color_view[12];
  54        u32                     cb_color_pitch[12];
  55        u32                     cb_color_slice[12];
  56        u32                     cb_color_slice_idx[12];
  57        u32                     cb_color_attrib[12];
  58        u32                     cb_color_cmask_slice[8];/* unused */
  59        u32                     cb_color_fmask_slice[8];/* unused */
  60        u32                     cb_target_mask;
  61        u32                     cb_shader_mask; /* unused */
  62        u32                     vgt_strmout_config;
  63        u32                     vgt_strmout_buffer_config;
  64        struct radeon_bo        *vgt_strmout_bo[4];
  65        u32                     vgt_strmout_bo_offset[4];
  66        u32                     vgt_strmout_size[4];
  67        u32                     db_depth_control;
  68        u32                     db_depth_view;
  69        u32                     db_depth_slice;
  70        u32                     db_depth_size;
  71        u32                     db_z_info;
  72        u32                     db_z_read_offset;
  73        u32                     db_z_write_offset;
  74        struct radeon_bo        *db_z_read_bo;
  75        struct radeon_bo        *db_z_write_bo;
  76        u32                     db_s_info;
  77        u32                     db_s_read_offset;
  78        u32                     db_s_write_offset;
  79        struct radeon_bo        *db_s_read_bo;
  80        struct radeon_bo        *db_s_write_bo;
  81        bool                    sx_misc_kill_all_prims;
  82        bool                    cb_dirty;
  83        bool                    db_dirty;
  84        bool                    streamout_dirty;
  85        u32                     htile_offset;
  86        u32                     htile_surface;
  87        struct radeon_bo        *htile_bo;
  88        unsigned long           indirect_draw_buffer_size;
  89        const unsigned          *reg_safe_bm;
  90};
  91
  92static u32 evergreen_cs_get_aray_mode(u32 tiling_flags)
  93{
  94        if (tiling_flags & RADEON_TILING_MACRO)
  95                return ARRAY_2D_TILED_THIN1;
  96        else if (tiling_flags & RADEON_TILING_MICRO)
  97                return ARRAY_1D_TILED_THIN1;
  98        else
  99                return ARRAY_LINEAR_GENERAL;
 100}
 101
 102static u32 evergreen_cs_get_num_banks(u32 nbanks)
 103{
 104        switch (nbanks) {
 105        case 2:
 106                return ADDR_SURF_2_BANK;
 107        case 4:
 108                return ADDR_SURF_4_BANK;
 109        case 8:
 110        default:
 111                return ADDR_SURF_8_BANK;
 112        case 16:
 113                return ADDR_SURF_16_BANK;
 114        }
 115}
 116
 117static void evergreen_cs_track_init(struct evergreen_cs_track *track)
 118{
 119        int i;
 120
 121        for (i = 0; i < 8; i++) {
 122                track->cb_color_fmask_bo[i] = NULL;
 123                track->cb_color_cmask_bo[i] = NULL;
 124                track->cb_color_cmask_slice[i] = 0;
 125                track->cb_color_fmask_slice[i] = 0;
 126        }
 127
 128        for (i = 0; i < 12; i++) {
 129                track->cb_color_bo[i] = NULL;
 130                track->cb_color_bo_offset[i] = 0xFFFFFFFF;
 131                track->cb_color_info[i] = 0;
 132                track->cb_color_view[i] = 0xFFFFFFFF;
 133                track->cb_color_pitch[i] = 0;
 134                track->cb_color_slice[i] = 0xfffffff;
 135                track->cb_color_slice_idx[i] = 0;
 136        }
 137        track->cb_target_mask = 0xFFFFFFFF;
 138        track->cb_shader_mask = 0xFFFFFFFF;
 139        track->cb_dirty = true;
 140
 141        track->db_depth_slice = 0xffffffff;
 142        track->db_depth_view = 0xFFFFC000;
 143        track->db_depth_size = 0xFFFFFFFF;
 144        track->db_depth_control = 0xFFFFFFFF;
 145        track->db_z_info = 0xFFFFFFFF;
 146        track->db_z_read_offset = 0xFFFFFFFF;
 147        track->db_z_write_offset = 0xFFFFFFFF;
 148        track->db_z_read_bo = NULL;
 149        track->db_z_write_bo = NULL;
 150        track->db_s_info = 0xFFFFFFFF;
 151        track->db_s_read_offset = 0xFFFFFFFF;
 152        track->db_s_write_offset = 0xFFFFFFFF;
 153        track->db_s_read_bo = NULL;
 154        track->db_s_write_bo = NULL;
 155        track->db_dirty = true;
 156        track->htile_bo = NULL;
 157        track->htile_offset = 0xFFFFFFFF;
 158        track->htile_surface = 0;
 159
 160        for (i = 0; i < 4; i++) {
 161                track->vgt_strmout_size[i] = 0;
 162                track->vgt_strmout_bo[i] = NULL;
 163                track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
 164        }
 165        track->streamout_dirty = true;
 166        track->sx_misc_kill_all_prims = false;
 167}
 168
 169struct eg_surface {
 170        /* value gathered from cs */
 171        unsigned        nbx;
 172        unsigned        nby;
 173        unsigned        format;
 174        unsigned        mode;
 175        unsigned        nbanks;
 176        unsigned        bankw;
 177        unsigned        bankh;
 178        unsigned        tsplit;
 179        unsigned        mtilea;
 180        unsigned        nsamples;
 181        /* output value */
 182        unsigned        bpe;
 183        unsigned        layer_size;
 184        unsigned        palign;
 185        unsigned        halign;
 186        unsigned long   base_align;
 187};
 188
 189static int evergreen_surface_check_linear(struct radeon_cs_parser *p,
 190                                          struct eg_surface *surf,
 191                                          const char *prefix)
 192{
 193        surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
 194        surf->base_align = surf->bpe;
 195        surf->palign = 1;
 196        surf->halign = 1;
 197        return 0;
 198}
 199
 200static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser *p,
 201                                                  struct eg_surface *surf,
 202                                                  const char *prefix)
 203{
 204        struct evergreen_cs_track *track = p->track;
 205        unsigned palign;
 206
 207        palign = MAX(64, track->group_size / surf->bpe);
 208        surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
 209        surf->base_align = track->group_size;
 210        surf->palign = palign;
 211        surf->halign = 1;
 212        if (surf->nbx & (palign - 1)) {
 213                if (prefix) {
 214                        dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
 215                                 __func__, __LINE__, prefix, surf->nbx, palign);
 216                }
 217                return -EINVAL;
 218        }
 219        return 0;
 220}
 221
 222static int evergreen_surface_check_1d(struct radeon_cs_parser *p,
 223                                      struct eg_surface *surf,
 224                                      const char *prefix)
 225{
 226        struct evergreen_cs_track *track = p->track;
 227        unsigned palign;
 228
 229        palign = track->group_size / (8 * surf->bpe * surf->nsamples);
 230        palign = MAX(8, palign);
 231        surf->layer_size = surf->nbx * surf->nby * surf->bpe;
 232        surf->base_align = track->group_size;
 233        surf->palign = palign;
 234        surf->halign = 8;
 235        if ((surf->nbx & (palign - 1))) {
 236                if (prefix) {
 237                        dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d (%d %d %d)\n",
 238                                 __func__, __LINE__, prefix, surf->nbx, palign,
 239                                 track->group_size, surf->bpe, surf->nsamples);
 240                }
 241                return -EINVAL;
 242        }
 243        if ((surf->nby & (8 - 1))) {
 244                if (prefix) {
 245                        dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with 8\n",
 246                                 __func__, __LINE__, prefix, surf->nby);
 247                }
 248                return -EINVAL;
 249        }
 250        return 0;
 251}
 252
 253static int evergreen_surface_check_2d(struct radeon_cs_parser *p,
 254                                      struct eg_surface *surf,
 255                                      const char *prefix)
 256{
 257        struct evergreen_cs_track *track = p->track;
 258        unsigned palign, halign, tileb, slice_pt;
 259        unsigned mtile_pr, mtile_ps, mtileb;
 260
 261        tileb = 64 * surf->bpe * surf->nsamples;
 262        slice_pt = 1;
 263        if (tileb > surf->tsplit) {
 264                slice_pt = tileb / surf->tsplit;
 265        }
 266        tileb = tileb / slice_pt;
 267        /* macro tile width & height */
 268        palign = (8 * surf->bankw * track->npipes) * surf->mtilea;
 269        halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea;
 270        mtileb = (palign / 8) * (halign / 8) * tileb;
 271        mtile_pr = surf->nbx / palign;
 272        mtile_ps = (mtile_pr * surf->nby) / halign;
 273        surf->layer_size = mtile_ps * mtileb * slice_pt;
 274        surf->base_align = (palign / 8) * (halign / 8) * tileb;
 275        surf->palign = palign;
 276        surf->halign = halign;
 277
 278        if ((surf->nbx & (palign - 1))) {
 279                if (prefix) {
 280                        dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
 281                                 __func__, __LINE__, prefix, surf->nbx, palign);
 282                }
 283                return -EINVAL;
 284        }
 285        if ((surf->nby & (halign - 1))) {
 286                if (prefix) {
 287                        dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with %d\n",
 288                                 __func__, __LINE__, prefix, surf->nby, halign);
 289                }
 290                return -EINVAL;
 291        }
 292
 293        return 0;
 294}
 295
 296static int evergreen_surface_check(struct radeon_cs_parser *p,
 297                                   struct eg_surface *surf,
 298                                   const char *prefix)
 299{
 300        /* some common value computed here */
 301        surf->bpe = r600_fmt_get_blocksize(surf->format);
 302
 303        switch (surf->mode) {
 304        case ARRAY_LINEAR_GENERAL:
 305                return evergreen_surface_check_linear(p, surf, prefix);
 306        case ARRAY_LINEAR_ALIGNED:
 307                return evergreen_surface_check_linear_aligned(p, surf, prefix);
 308        case ARRAY_1D_TILED_THIN1:
 309                return evergreen_surface_check_1d(p, surf, prefix);
 310        case ARRAY_2D_TILED_THIN1:
 311                return evergreen_surface_check_2d(p, surf, prefix);
 312        default:
 313                dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
 314                                __func__, __LINE__, prefix, surf->mode);
 315                return -EINVAL;
 316        }
 317        return -EINVAL;
 318}
 319
 320static int evergreen_surface_value_conv_check(struct radeon_cs_parser *p,
 321                                              struct eg_surface *surf,
 322                                              const char *prefix)
 323{
 324        switch (surf->mode) {
 325        case ARRAY_2D_TILED_THIN1:
 326                break;
 327        case ARRAY_LINEAR_GENERAL:
 328        case ARRAY_LINEAR_ALIGNED:
 329        case ARRAY_1D_TILED_THIN1:
 330                return 0;
 331        default:
 332                dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
 333                                __func__, __LINE__, prefix, surf->mode);
 334                return -EINVAL;
 335        }
 336
 337        switch (surf->nbanks) {
 338        case 0: surf->nbanks = 2; break;
 339        case 1: surf->nbanks = 4; break;
 340        case 2: surf->nbanks = 8; break;
 341        case 3: surf->nbanks = 16; break;
 342        default:
 343                dev_warn(p->dev, "%s:%d %s invalid number of banks %d\n",
 344                         __func__, __LINE__, prefix, surf->nbanks);
 345                return -EINVAL;
 346        }
 347        switch (surf->bankw) {
 348        case 0: surf->bankw = 1; break;
 349        case 1: surf->bankw = 2; break;
 350        case 2: surf->bankw = 4; break;
 351        case 3: surf->bankw = 8; break;
 352        default:
 353                dev_warn(p->dev, "%s:%d %s invalid bankw %d\n",
 354                         __func__, __LINE__, prefix, surf->bankw);
 355                return -EINVAL;
 356        }
 357        switch (surf->bankh) {
 358        case 0: surf->bankh = 1; break;
 359        case 1: surf->bankh = 2; break;
 360        case 2: surf->bankh = 4; break;
 361        case 3: surf->bankh = 8; break;
 362        default:
 363                dev_warn(p->dev, "%s:%d %s invalid bankh %d\n",
 364                         __func__, __LINE__, prefix, surf->bankh);
 365                return -EINVAL;
 366        }
 367        switch (surf->mtilea) {
 368        case 0: surf->mtilea = 1; break;
 369        case 1: surf->mtilea = 2; break;
 370        case 2: surf->mtilea = 4; break;
 371        case 3: surf->mtilea = 8; break;
 372        default:
 373                dev_warn(p->dev, "%s:%d %s invalid macro tile aspect %d\n",
 374                         __func__, __LINE__, prefix, surf->mtilea);
 375                return -EINVAL;
 376        }
 377        switch (surf->tsplit) {
 378        case 0: surf->tsplit = 64; break;
 379        case 1: surf->tsplit = 128; break;
 380        case 2: surf->tsplit = 256; break;
 381        case 3: surf->tsplit = 512; break;
 382        case 4: surf->tsplit = 1024; break;
 383        case 5: surf->tsplit = 2048; break;
 384        case 6: surf->tsplit = 4096; break;
 385        default:
 386                dev_warn(p->dev, "%s:%d %s invalid tile split %d\n",
 387                         __func__, __LINE__, prefix, surf->tsplit);
 388                return -EINVAL;
 389        }
 390        return 0;
 391}
 392
 393static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned id)
 394{
 395        struct evergreen_cs_track *track = p->track;
 396        struct eg_surface surf;
 397        unsigned pitch, slice, mslice;
 398        unsigned long offset;
 399        int r;
 400
 401        mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1;
 402        pitch = track->cb_color_pitch[id];
 403        slice = track->cb_color_slice[id];
 404        surf.nbx = (pitch + 1) * 8;
 405        surf.nby = ((slice + 1) * 64) / surf.nbx;
 406        surf.mode = G_028C70_ARRAY_MODE(track->cb_color_info[id]);
 407        surf.format = G_028C70_FORMAT(track->cb_color_info[id]);
 408        surf.tsplit = G_028C74_TILE_SPLIT(track->cb_color_attrib[id]);
 409        surf.nbanks = G_028C74_NUM_BANKS(track->cb_color_attrib[id]);
 410        surf.bankw = G_028C74_BANK_WIDTH(track->cb_color_attrib[id]);
 411        surf.bankh = G_028C74_BANK_HEIGHT(track->cb_color_attrib[id]);
 412        surf.mtilea = G_028C74_MACRO_TILE_ASPECT(track->cb_color_attrib[id]);
 413        surf.nsamples = 1;
 414
 415        if (!r600_fmt_is_valid_color(surf.format)) {
 416                dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08x)\n",
 417                         __func__, __LINE__, surf.format,
 418                        id, track->cb_color_info[id]);
 419                return -EINVAL;
 420        }
 421
 422        r = evergreen_surface_value_conv_check(p, &surf, "cb");
 423        if (r) {
 424                return r;
 425        }
 426
 427        r = evergreen_surface_check(p, &surf, "cb");
 428        if (r) {
 429                dev_warn(p->dev, "%s:%d cb[%d] invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
 430                         __func__, __LINE__, id, track->cb_color_pitch[id],
 431                         track->cb_color_slice[id], track->cb_color_attrib[id],
 432                         track->cb_color_info[id]);
 433                return r;
 434        }
 435
 436        offset = track->cb_color_bo_offset[id] << 8;
 437        if (offset & (surf.base_align - 1)) {
 438                dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n",
 439                         __func__, __LINE__, id, offset, surf.base_align);
 440                return -EINVAL;
 441        }
 442
 443        offset += surf.layer_size * mslice;
 444        if (offset > radeon_bo_size(track->cb_color_bo[id])) {
 445                /* old ddx are broken they allocate bo with w*h*bpp but
 446                 * program slice with ALIGN(h, 8), catch this and patch
 447                 * command stream.
 448                 */
 449                if (!surf.mode) {
 450                        uint32_t *ib = p->ib.ptr;
 451                        unsigned long tmp, nby, bsize, size, min = 0;
 452
 453                        /* find the height the ddx wants */
 454                        if (surf.nby > 8) {
 455                                min = surf.nby - 8;
 456                        }
 457                        bsize = radeon_bo_size(track->cb_color_bo[id]);
 458                        tmp = track->cb_color_bo_offset[id] << 8;
 459                        for (nby = surf.nby; nby > min; nby--) {
 460                                size = nby * surf.nbx * surf.bpe * surf.nsamples;
 461                                if ((tmp + size * mslice) <= bsize) {
 462                                        break;
 463                                }
 464                        }
 465                        if (nby > min) {
 466                                surf.nby = nby;
 467                                slice = ((nby * surf.nbx) / 64) - 1;
 468                                if (!evergreen_surface_check(p, &surf, "cb")) {
 469                                        /* check if this one works */
 470                                        tmp += surf.layer_size * mslice;
 471                                        if (tmp <= bsize) {
 472                                                ib[track->cb_color_slice_idx[id]] = slice;
 473                                                goto old_ddx_ok;
 474                                        }
 475                                }
 476                        }
 477                }
 478                dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, "
 479                         "offset %d, max layer %d, bo size %ld, slice %d)\n",
 480                         __func__, __LINE__, id, surf.layer_size,
 481                        track->cb_color_bo_offset[id] << 8, mslice,
 482                        radeon_bo_size(track->cb_color_bo[id]), slice);
 483                dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
 484                         __func__, __LINE__, surf.nbx, surf.nby,
 485                        surf.mode, surf.bpe, surf.nsamples,
 486                        surf.bankw, surf.bankh,
 487                        surf.tsplit, surf.mtilea);
 488                return -EINVAL;
 489        }
 490old_ddx_ok:
 491
 492        return 0;
 493}
 494
 495static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
 496                                                unsigned nbx, unsigned nby)
 497{
 498        struct evergreen_cs_track *track = p->track;
 499        unsigned long size;
 500
 501        if (track->htile_bo == NULL) {
 502                dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n",
 503                                __func__, __LINE__, track->db_z_info);
 504                return -EINVAL;
 505        }
 506
 507        if (G_028ABC_LINEAR(track->htile_surface)) {
 508                /* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */
 509                nbx = round_up(nbx, 16 * 8);
 510                /* height is npipes htiles aligned == npipes * 8 pixel aligned */
 511                nby = round_up(nby, track->npipes * 8);
 512        } else {
 513                /* always assume 8x8 htile */
 514                /* align is htile align * 8, htile align vary according to
 515                 * number of pipe and tile width and nby
 516                 */
 517                switch (track->npipes) {
 518                case 8:
 519                        /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
 520                        nbx = round_up(nbx, 64 * 8);
 521                        nby = round_up(nby, 64 * 8);
 522                        break;
 523                case 4:
 524                        /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
 525                        nbx = round_up(nbx, 64 * 8);
 526                        nby = round_up(nby, 32 * 8);
 527                        break;
 528                case 2:
 529                        /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
 530                        nbx = round_up(nbx, 32 * 8);
 531                        nby = round_up(nby, 32 * 8);
 532                        break;
 533                case 1:
 534                        /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
 535                        nbx = round_up(nbx, 32 * 8);
 536                        nby = round_up(nby, 16 * 8);
 537                        break;
 538                default:
 539                        dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
 540                                        __func__, __LINE__, track->npipes);
 541                        return -EINVAL;
 542                }
 543        }
 544        /* compute number of htile */
 545        nbx = nbx >> 3;
 546        nby = nby >> 3;
 547        /* size must be aligned on npipes * 2K boundary */
 548        size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
 549        size += track->htile_offset;
 550
 551        if (size > radeon_bo_size(track->htile_bo)) {
 552                dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
 553                                __func__, __LINE__, radeon_bo_size(track->htile_bo),
 554                                size, nbx, nby);
 555                return -EINVAL;
 556        }
 557        return 0;
 558}
 559
 560static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p)
 561{
 562        struct evergreen_cs_track *track = p->track;
 563        struct eg_surface surf;
 564        unsigned pitch, slice, mslice;
 565        unsigned long offset;
 566        int r;
 567
 568        mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
 569        pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
 570        slice = track->db_depth_slice;
 571        surf.nbx = (pitch + 1) * 8;
 572        surf.nby = ((slice + 1) * 64) / surf.nbx;
 573        surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
 574        surf.format = G_028044_FORMAT(track->db_s_info);
 575        surf.tsplit = G_028044_TILE_SPLIT(track->db_s_info);
 576        surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
 577        surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
 578        surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
 579        surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
 580        surf.nsamples = 1;
 581
 582        if (surf.format != 1) {
 583                dev_warn(p->dev, "%s:%d stencil invalid format %d\n",
 584                         __func__, __LINE__, surf.format);
 585                return -EINVAL;
 586        }
 587        /* replace by color format so we can use same code */
 588        surf.format = V_028C70_COLOR_8;
 589
 590        r = evergreen_surface_value_conv_check(p, &surf, "stencil");
 591        if (r) {
 592                return r;
 593        }
 594
 595        r = evergreen_surface_check(p, &surf, NULL);
 596        if (r) {
 597                /* old userspace doesn't compute proper depth/stencil alignment
 598                 * check that alignment against a bigger byte per elements and
 599                 * only report if that alignment is wrong too.
 600                 */
 601                surf.format = V_028C70_COLOR_8_8_8_8;
 602                r = evergreen_surface_check(p, &surf, "stencil");
 603                if (r) {
 604                        dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
 605                                 __func__, __LINE__, track->db_depth_size,
 606                                 track->db_depth_slice, track->db_s_info, track->db_z_info);
 607                }
 608                return r;
 609        }
 610
 611        offset = track->db_s_read_offset << 8;
 612        if (offset & (surf.base_align - 1)) {
 613                dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
 614                         __func__, __LINE__, offset, surf.base_align);
 615                return -EINVAL;
 616        }
 617        offset += surf.layer_size * mslice;
 618        if (offset > radeon_bo_size(track->db_s_read_bo)) {
 619                dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, "
 620                         "offset %ld, max layer %d, bo size %ld)\n",
 621                         __func__, __LINE__, surf.layer_size,
 622                        (unsigned long)track->db_s_read_offset << 8, mslice,
 623                        radeon_bo_size(track->db_s_read_bo));
 624                dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
 625                         __func__, __LINE__, track->db_depth_size,
 626                         track->db_depth_slice, track->db_s_info, track->db_z_info);
 627                return -EINVAL;
 628        }
 629
 630        offset = track->db_s_write_offset << 8;
 631        if (offset & (surf.base_align - 1)) {
 632                dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
 633                         __func__, __LINE__, offset, surf.base_align);
 634                return -EINVAL;
 635        }
 636        offset += surf.layer_size * mslice;
 637        if (offset > radeon_bo_size(track->db_s_write_bo)) {
 638                dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, "
 639                         "offset %ld, max layer %d, bo size %ld)\n",
 640                         __func__, __LINE__, surf.layer_size,
 641                        (unsigned long)track->db_s_write_offset << 8, mslice,
 642                        radeon_bo_size(track->db_s_write_bo));
 643                return -EINVAL;
 644        }
 645
 646        /* hyperz */
 647        if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
 648                r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
 649                if (r) {
 650                        return r;
 651                }
 652        }
 653
 654        return 0;
 655}
 656
 657static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p)
 658{
 659        struct evergreen_cs_track *track = p->track;
 660        struct eg_surface surf;
 661        unsigned pitch, slice, mslice;
 662        unsigned long offset;
 663        int r;
 664
 665        mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
 666        pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
 667        slice = track->db_depth_slice;
 668        surf.nbx = (pitch + 1) * 8;
 669        surf.nby = ((slice + 1) * 64) / surf.nbx;
 670        surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
 671        surf.format = G_028040_FORMAT(track->db_z_info);
 672        surf.tsplit = G_028040_TILE_SPLIT(track->db_z_info);
 673        surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
 674        surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
 675        surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
 676        surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
 677        surf.nsamples = 1;
 678
 679        switch (surf.format) {
 680        case V_028040_Z_16:
 681                surf.format = V_028C70_COLOR_16;
 682                break;
 683        case V_028040_Z_24:
 684        case V_028040_Z_32_FLOAT:
 685                surf.format = V_028C70_COLOR_8_8_8_8;
 686                break;
 687        default:
 688                dev_warn(p->dev, "%s:%d depth invalid format %d\n",
 689                         __func__, __LINE__, surf.format);
 690                return -EINVAL;
 691        }
 692
 693        r = evergreen_surface_value_conv_check(p, &surf, "depth");
 694        if (r) {
 695                dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
 696                         __func__, __LINE__, track->db_depth_size,
 697                         track->db_depth_slice, track->db_z_info);
 698                return r;
 699        }
 700
 701        r = evergreen_surface_check(p, &surf, "depth");
 702        if (r) {
 703                dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
 704                         __func__, __LINE__, track->db_depth_size,
 705                         track->db_depth_slice, track->db_z_info);
 706                return r;
 707        }
 708
 709        offset = track->db_z_read_offset << 8;
 710        if (offset & (surf.base_align - 1)) {
 711                dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
 712                         __func__, __LINE__, offset, surf.base_align);
 713                return -EINVAL;
 714        }
 715        offset += surf.layer_size * mslice;
 716        if (offset > radeon_bo_size(track->db_z_read_bo)) {
 717                dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, "
 718                         "offset %ld, max layer %d, bo size %ld)\n",
 719                         __func__, __LINE__, surf.layer_size,
 720                        (unsigned long)track->db_z_read_offset << 8, mslice,
 721                        radeon_bo_size(track->db_z_read_bo));
 722                return -EINVAL;
 723        }
 724
 725        offset = track->db_z_write_offset << 8;
 726        if (offset & (surf.base_align - 1)) {
 727                dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
 728                         __func__, __LINE__, offset, surf.base_align);
 729                return -EINVAL;
 730        }
 731        offset += surf.layer_size * mslice;
 732        if (offset > radeon_bo_size(track->db_z_write_bo)) {
 733                dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, "
 734                         "offset %ld, max layer %d, bo size %ld)\n",
 735                         __func__, __LINE__, surf.layer_size,
 736                        (unsigned long)track->db_z_write_offset << 8, mslice,
 737                        radeon_bo_size(track->db_z_write_bo));
 738                return -EINVAL;
 739        }
 740
 741        /* hyperz */
 742        if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
 743                r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
 744                if (r) {
 745                        return r;
 746                }
 747        }
 748
 749        return 0;
 750}
 751
 752static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p,
 753                                               struct radeon_bo *texture,
 754                                               struct radeon_bo *mipmap,
 755                                               unsigned idx)
 756{
 757        struct eg_surface surf;
 758        unsigned long toffset, moffset;
 759        unsigned dim, llevel, mslice, width, height, depth, i;
 760        u32 texdw[8];
 761        int r;
 762
 763        texdw[0] = radeon_get_ib_value(p, idx + 0);
 764        texdw[1] = radeon_get_ib_value(p, idx + 1);
 765        texdw[2] = radeon_get_ib_value(p, idx + 2);
 766        texdw[3] = radeon_get_ib_value(p, idx + 3);
 767        texdw[4] = radeon_get_ib_value(p, idx + 4);
 768        texdw[5] = radeon_get_ib_value(p, idx + 5);
 769        texdw[6] = radeon_get_ib_value(p, idx + 6);
 770        texdw[7] = radeon_get_ib_value(p, idx + 7);
 771        dim = G_030000_DIM(texdw[0]);
 772        llevel = G_030014_LAST_LEVEL(texdw[5]);
 773        mslice = G_030014_LAST_ARRAY(texdw[5]) + 1;
 774        width = G_030000_TEX_WIDTH(texdw[0]) + 1;
 775        height =  G_030004_TEX_HEIGHT(texdw[1]) + 1;
 776        depth = G_030004_TEX_DEPTH(texdw[1]) + 1;
 777        surf.format = G_03001C_DATA_FORMAT(texdw[7]);
 778        surf.nbx = (G_030000_PITCH(texdw[0]) + 1) * 8;
 779        surf.nbx = r600_fmt_get_nblocksx(surf.format, surf.nbx);
 780        surf.nby = r600_fmt_get_nblocksy(surf.format, height);
 781        surf.mode = G_030004_ARRAY_MODE(texdw[1]);
 782        surf.tsplit = G_030018_TILE_SPLIT(texdw[6]);
 783        surf.nbanks = G_03001C_NUM_BANKS(texdw[7]);
 784        surf.bankw = G_03001C_BANK_WIDTH(texdw[7]);
 785        surf.bankh = G_03001C_BANK_HEIGHT(texdw[7]);
 786        surf.mtilea = G_03001C_MACRO_TILE_ASPECT(texdw[7]);
 787        surf.nsamples = 1;
 788        toffset = texdw[2] << 8;
 789        moffset = texdw[3] << 8;
 790
 791        if (!r600_fmt_is_valid_texture(surf.format, p->family)) {
 792                dev_warn(p->dev, "%s:%d texture invalid format %d\n",
 793                         __func__, __LINE__, surf.format);
 794                return -EINVAL;
 795        }
 796        switch (dim) {
 797        case V_030000_SQ_TEX_DIM_1D:
 798        case V_030000_SQ_TEX_DIM_2D:
 799        case V_030000_SQ_TEX_DIM_CUBEMAP:
 800        case V_030000_SQ_TEX_DIM_1D_ARRAY:
 801        case V_030000_SQ_TEX_DIM_2D_ARRAY:
 802                depth = 1;
 803                break;
 804        case V_030000_SQ_TEX_DIM_2D_MSAA:
 805        case V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA:
 806                surf.nsamples = 1 << llevel;
 807                llevel = 0;
 808                depth = 1;
 809                break;
 810        case V_030000_SQ_TEX_DIM_3D:
 811                break;
 812        default:
 813                dev_warn(p->dev, "%s:%d texture invalid dimension %d\n",
 814                         __func__, __LINE__, dim);
 815                return -EINVAL;
 816        }
 817
 818        r = evergreen_surface_value_conv_check(p, &surf, "texture");
 819        if (r) {
 820                return r;
 821        }
 822
 823        /* align height */
 824        evergreen_surface_check(p, &surf, NULL);
 825        surf.nby = ALIGN(surf.nby, surf.halign);
 826
 827        r = evergreen_surface_check(p, &surf, "texture");
 828        if (r) {
 829                dev_warn(p->dev, "%s:%d texture invalid 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
 830                         __func__, __LINE__, texdw[0], texdw[1], texdw[4],
 831                         texdw[5], texdw[6], texdw[7]);
 832                return r;
 833        }
 834
 835        /* check texture size */
 836        if (toffset & (surf.base_align - 1)) {
 837                dev_warn(p->dev, "%s:%d texture bo base %ld not aligned with %ld\n",
 838                         __func__, __LINE__, toffset, surf.base_align);
 839                return -EINVAL;
 840        }
 841        if (surf.nsamples <= 1 && moffset & (surf.base_align - 1)) {
 842                dev_warn(p->dev, "%s:%d mipmap bo base %ld not aligned with %ld\n",
 843                         __func__, __LINE__, moffset, surf.base_align);
 844                return -EINVAL;
 845        }
 846        if (dim == SQ_TEX_DIM_3D) {
 847                toffset += surf.layer_size * depth;
 848        } else {
 849                toffset += surf.layer_size * mslice;
 850        }
 851        if (toffset > radeon_bo_size(texture)) {
 852                dev_warn(p->dev, "%s:%d texture bo too small (layer size %d, "
 853                         "offset %ld, max layer %d, depth %d, bo size %ld) (%d %d)\n",
 854                         __func__, __LINE__, surf.layer_size,
 855                        (unsigned long)texdw[2] << 8, mslice,
 856                        depth, radeon_bo_size(texture),
 857                        surf.nbx, surf.nby);
 858                return -EINVAL;
 859        }
 860
 861        if (!mipmap) {
 862                if (llevel) {
 863                        dev_warn(p->dev, "%s:%i got NULL MIP_ADDRESS relocation\n",
 864                                 __func__, __LINE__);
 865                        return -EINVAL;
 866                } else {
 867                        return 0; /* everything's ok */
 868                }
 869        }
 870
 871        /* check mipmap size */
 872        for (i = 1; i <= llevel; i++) {
 873                unsigned w, h, d;
 874
 875                w = r600_mip_minify(width, i);
 876                h = r600_mip_minify(height, i);
 877                d = r600_mip_minify(depth, i);
 878                surf.nbx = r600_fmt_get_nblocksx(surf.format, w);
 879                surf.nby = r600_fmt_get_nblocksy(surf.format, h);
 880
 881                switch (surf.mode) {
 882                case ARRAY_2D_TILED_THIN1:
 883                        if (surf.nbx < surf.palign || surf.nby < surf.halign) {
 884                                surf.mode = ARRAY_1D_TILED_THIN1;
 885                        }
 886                        /* recompute alignment */
 887                        evergreen_surface_check(p, &surf, NULL);
 888                        break;
 889                case ARRAY_LINEAR_GENERAL:
 890                case ARRAY_LINEAR_ALIGNED:
 891                case ARRAY_1D_TILED_THIN1:
 892                        break;
 893                default:
 894                        dev_warn(p->dev, "%s:%d invalid array mode %d\n",
 895                                 __func__, __LINE__, surf.mode);
 896                        return -EINVAL;
 897                }
 898                surf.nbx = ALIGN(surf.nbx, surf.palign);
 899                surf.nby = ALIGN(surf.nby, surf.halign);
 900
 901                r = evergreen_surface_check(p, &surf, "mipmap");
 902                if (r) {
 903                        return r;
 904                }
 905
 906                if (dim == SQ_TEX_DIM_3D) {
 907                        moffset += surf.layer_size * d;
 908                } else {
 909                        moffset += surf.layer_size * mslice;
 910                }
 911                if (moffset > radeon_bo_size(mipmap)) {
 912                        dev_warn(p->dev, "%s:%d mipmap [%d] bo too small (layer size %d, "
 913                                        "offset %ld, coffset %ld, max layer %d, depth %d, "
 914                                        "bo size %ld) level0 (%d %d %d)\n",
 915                                        __func__, __LINE__, i, surf.layer_size,
 916                                        (unsigned long)texdw[3] << 8, moffset, mslice,
 917                                        d, radeon_bo_size(mipmap),
 918                                        width, height, depth);
 919                        dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
 920                                 __func__, __LINE__, surf.nbx, surf.nby,
 921                                surf.mode, surf.bpe, surf.nsamples,
 922                                surf.bankw, surf.bankh,
 923                                surf.tsplit, surf.mtilea);
 924                        return -EINVAL;
 925                }
 926        }
 927
 928        return 0;
 929}
 930
 931static int evergreen_cs_track_check(struct radeon_cs_parser *p)
 932{
 933        struct evergreen_cs_track *track = p->track;
 934        unsigned tmp, i;
 935        int r;
 936        unsigned buffer_mask = 0;
 937
 938        /* check streamout */
 939        if (track->streamout_dirty && track->vgt_strmout_config) {
 940                for (i = 0; i < 4; i++) {
 941                        if (track->vgt_strmout_config & (1 << i)) {
 942                                buffer_mask |= (track->vgt_strmout_buffer_config >> (i * 4)) & 0xf;
 943                        }
 944                }
 945
 946                for (i = 0; i < 4; i++) {
 947                        if (buffer_mask & (1 << i)) {
 948                                if (track->vgt_strmout_bo[i]) {
 949                                        u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
 950                                                        (u64)track->vgt_strmout_size[i];
 951                                        if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
 952                                                DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
 953                                                          i, offset,
 954                                                          radeon_bo_size(track->vgt_strmout_bo[i]));
 955                                                return -EINVAL;
 956                                        }
 957                                } else {
 958                                        dev_warn(p->dev, "No buffer for streamout %d\n", i);
 959                                        return -EINVAL;
 960                                }
 961                        }
 962                }
 963                track->streamout_dirty = false;
 964        }
 965
 966        if (track->sx_misc_kill_all_prims)
 967                return 0;
 968
 969        /* check that we have a cb for each enabled target
 970         */
 971        if (track->cb_dirty) {
 972                tmp = track->cb_target_mask;
 973                for (i = 0; i < 8; i++) {
 974                        u32 format = G_028C70_FORMAT(track->cb_color_info[i]);
 975
 976                        if (format != V_028C70_COLOR_INVALID &&
 977                            (tmp >> (i * 4)) & 0xF) {
 978                                /* at least one component is enabled */
 979                                if (track->cb_color_bo[i] == NULL) {
 980                                        dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
 981                                                __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
 982                                        return -EINVAL;
 983                                }
 984                                /* check cb */
 985                                r = evergreen_cs_track_validate_cb(p, i);
 986                                if (r) {
 987                                        return r;
 988                                }
 989                        }
 990                }
 991                track->cb_dirty = false;
 992        }
 993
 994        if (track->db_dirty) {
 995                /* Check stencil buffer */
 996                if (G_028044_FORMAT(track->db_s_info) != V_028044_STENCIL_INVALID &&
 997                    G_028800_STENCIL_ENABLE(track->db_depth_control)) {
 998                        r = evergreen_cs_track_validate_stencil(p);
 999                        if (r)
1000                                return r;
1001                }
1002                /* Check depth buffer */
1003                if (G_028040_FORMAT(track->db_z_info) != V_028040_Z_INVALID &&
1004                    G_028800_Z_ENABLE(track->db_depth_control)) {
1005                        r = evergreen_cs_track_validate_depth(p);
1006                        if (r)
1007                                return r;
1008                }
1009                track->db_dirty = false;
1010        }
1011
1012        return 0;
1013}
1014
1015/**
1016 * evergreen_cs_packet_parse_vline() - parse userspace VLINE packet
1017 * @parser:             parser structure holding parsing context.
1018 *
1019 * This is an Evergreen(+)-specific function for parsing VLINE packets.
1020 * Real work is done by r600_cs_common_vline_parse function.
1021 * Here we just set up ASIC-specific register table and call
1022 * the common implementation function.
1023 */
1024static int evergreen_cs_packet_parse_vline(struct radeon_cs_parser *p)
1025{
1026
1027        static uint32_t vline_start_end[6] = {
1028                EVERGREEN_VLINE_START_END + EVERGREEN_CRTC0_REGISTER_OFFSET,
1029                EVERGREEN_VLINE_START_END + EVERGREEN_CRTC1_REGISTER_OFFSET,
1030                EVERGREEN_VLINE_START_END + EVERGREEN_CRTC2_REGISTER_OFFSET,
1031                EVERGREEN_VLINE_START_END + EVERGREEN_CRTC3_REGISTER_OFFSET,
1032                EVERGREEN_VLINE_START_END + EVERGREEN_CRTC4_REGISTER_OFFSET,
1033                EVERGREEN_VLINE_START_END + EVERGREEN_CRTC5_REGISTER_OFFSET
1034        };
1035        static uint32_t vline_status[6] = {
1036                EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
1037                EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
1038                EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
1039                EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
1040                EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
1041                EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET
1042        };
1043
1044        return r600_cs_common_vline_parse(p, vline_start_end, vline_status);
1045}
1046
1047static int evergreen_packet0_check(struct radeon_cs_parser *p,
1048                                   struct radeon_cs_packet *pkt,
1049                                   unsigned idx, unsigned reg)
1050{
1051        int r;
1052
1053        switch (reg) {
1054        case EVERGREEN_VLINE_START_END:
1055                r = evergreen_cs_packet_parse_vline(p);
1056                if (r) {
1057                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1058                                        idx, reg);
1059                        return r;
1060                }
1061                break;
1062        default:
1063                printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1064                       reg, idx);
1065                return -EINVAL;
1066        }
1067        return 0;
1068}
1069
1070static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
1071                                      struct radeon_cs_packet *pkt)
1072{
1073        unsigned reg, i;
1074        unsigned idx;
1075        int r;
1076
1077        idx = pkt->idx + 1;
1078        reg = pkt->reg;
1079        for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
1080                r = evergreen_packet0_check(p, pkt, idx, reg);
1081                if (r) {
1082                        return r;
1083                }
1084        }
1085        return 0;
1086}
1087
1088/**
1089 * evergreen_cs_handle_reg() - process registers that need special handling.
1090 * @parser: parser structure holding parsing context
1091 * @reg: register we are testing
1092 * @idx: index into the cs buffer
1093 */
1094static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1095{
1096        struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
1097        struct radeon_bo_list *reloc;
1098        u32 tmp, *ib;
1099        int r;
1100
1101        ib = p->ib.ptr;
1102        switch (reg) {
1103        /* force following reg to 0 in an attempt to disable out buffer
1104         * which will need us to better understand how it works to perform
1105         * security check on it (Jerome)
1106         */
1107        case SQ_ESGS_RING_SIZE:
1108        case SQ_GSVS_RING_SIZE:
1109        case SQ_ESTMP_RING_SIZE:
1110        case SQ_GSTMP_RING_SIZE:
1111        case SQ_HSTMP_RING_SIZE:
1112        case SQ_LSTMP_RING_SIZE:
1113        case SQ_PSTMP_RING_SIZE:
1114        case SQ_VSTMP_RING_SIZE:
1115        case SQ_ESGS_RING_ITEMSIZE:
1116        case SQ_ESTMP_RING_ITEMSIZE:
1117        case SQ_GSTMP_RING_ITEMSIZE:
1118        case SQ_GSVS_RING_ITEMSIZE:
1119        case SQ_GS_VERT_ITEMSIZE:
1120        case SQ_GS_VERT_ITEMSIZE_1:
1121        case SQ_GS_VERT_ITEMSIZE_2:
1122        case SQ_GS_VERT_ITEMSIZE_3:
1123        case SQ_GSVS_RING_OFFSET_1:
1124        case SQ_GSVS_RING_OFFSET_2:
1125        case SQ_GSVS_RING_OFFSET_3:
1126        case SQ_HSTMP_RING_ITEMSIZE:
1127        case SQ_LSTMP_RING_ITEMSIZE:
1128        case SQ_PSTMP_RING_ITEMSIZE:
1129        case SQ_VSTMP_RING_ITEMSIZE:
1130        case VGT_TF_RING_SIZE:
1131                /* get value to populate the IB don't remove */
1132                /*tmp =radeon_get_ib_value(p, idx);
1133                  ib[idx] = 0;*/
1134                break;
1135        case SQ_ESGS_RING_BASE:
1136        case SQ_GSVS_RING_BASE:
1137        case SQ_ESTMP_RING_BASE:
1138        case SQ_GSTMP_RING_BASE:
1139        case SQ_HSTMP_RING_BASE:
1140        case SQ_LSTMP_RING_BASE:
1141        case SQ_PSTMP_RING_BASE:
1142        case SQ_VSTMP_RING_BASE:
1143                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1144                if (r) {
1145                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1146                                        "0x%04X\n", reg);
1147                        return -EINVAL;
1148                }
1149                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1150                break;
1151        case DB_DEPTH_CONTROL:
1152                track->db_depth_control = radeon_get_ib_value(p, idx);
1153                track->db_dirty = true;
1154                break;
1155        case CAYMAN_DB_EQAA:
1156                if (p->rdev->family < CHIP_CAYMAN) {
1157                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1158                                 "0x%04X\n", reg);
1159                        return -EINVAL;
1160                }
1161                break;
1162        case CAYMAN_DB_DEPTH_INFO:
1163                if (p->rdev->family < CHIP_CAYMAN) {
1164                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1165                                 "0x%04X\n", reg);
1166                        return -EINVAL;
1167                }
1168                break;
1169        case DB_Z_INFO:
1170                track->db_z_info = radeon_get_ib_value(p, idx);
1171                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1172                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1173                        if (r) {
1174                                dev_warn(p->dev, "bad SET_CONTEXT_REG "
1175                                                "0x%04X\n", reg);
1176                                return -EINVAL;
1177                        }
1178                        ib[idx] &= ~Z_ARRAY_MODE(0xf);
1179                        track->db_z_info &= ~Z_ARRAY_MODE(0xf);
1180                        ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1181                        track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1182                        if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1183                                unsigned bankw, bankh, mtaspect, tile_split;
1184
1185                                evergreen_tiling_fields(reloc->tiling_flags,
1186                                                        &bankw, &bankh, &mtaspect,
1187                                                        &tile_split);
1188                                ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1189                                ib[idx] |= DB_TILE_SPLIT(tile_split) |
1190                                                DB_BANK_WIDTH(bankw) |
1191                                                DB_BANK_HEIGHT(bankh) |
1192                                                DB_MACRO_TILE_ASPECT(mtaspect);
1193                        }
1194                }
1195                track->db_dirty = true;
1196                break;
1197        case DB_STENCIL_INFO:
1198                track->db_s_info = radeon_get_ib_value(p, idx);
1199                track->db_dirty = true;
1200                break;
1201        case DB_DEPTH_VIEW:
1202                track->db_depth_view = radeon_get_ib_value(p, idx);
1203                track->db_dirty = true;
1204                break;
1205        case DB_DEPTH_SIZE:
1206                track->db_depth_size = radeon_get_ib_value(p, idx);
1207                track->db_dirty = true;
1208                break;
1209        case R_02805C_DB_DEPTH_SLICE:
1210                track->db_depth_slice = radeon_get_ib_value(p, idx);
1211                track->db_dirty = true;
1212                break;
1213        case DB_Z_READ_BASE:
1214                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1215                if (r) {
1216                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1217                                        "0x%04X\n", reg);
1218                        return -EINVAL;
1219                }
1220                track->db_z_read_offset = radeon_get_ib_value(p, idx);
1221                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1222                track->db_z_read_bo = reloc->robj;
1223                track->db_dirty = true;
1224                break;
1225        case DB_Z_WRITE_BASE:
1226                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1227                if (r) {
1228                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1229                                        "0x%04X\n", reg);
1230                        return -EINVAL;
1231                }
1232                track->db_z_write_offset = radeon_get_ib_value(p, idx);
1233                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1234                track->db_z_write_bo = reloc->robj;
1235                track->db_dirty = true;
1236                break;
1237        case DB_STENCIL_READ_BASE:
1238                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1239                if (r) {
1240                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1241                                        "0x%04X\n", reg);
1242                        return -EINVAL;
1243                }
1244                track->db_s_read_offset = radeon_get_ib_value(p, idx);
1245                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1246                track->db_s_read_bo = reloc->robj;
1247                track->db_dirty = true;
1248                break;
1249        case DB_STENCIL_WRITE_BASE:
1250                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1251                if (r) {
1252                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1253                                        "0x%04X\n", reg);
1254                        return -EINVAL;
1255                }
1256                track->db_s_write_offset = radeon_get_ib_value(p, idx);
1257                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1258                track->db_s_write_bo = reloc->robj;
1259                track->db_dirty = true;
1260                break;
1261        case VGT_STRMOUT_CONFIG:
1262                track->vgt_strmout_config = radeon_get_ib_value(p, idx);
1263                track->streamout_dirty = true;
1264                break;
1265        case VGT_STRMOUT_BUFFER_CONFIG:
1266                track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx);
1267                track->streamout_dirty = true;
1268                break;
1269        case VGT_STRMOUT_BUFFER_BASE_0:
1270        case VGT_STRMOUT_BUFFER_BASE_1:
1271        case VGT_STRMOUT_BUFFER_BASE_2:
1272        case VGT_STRMOUT_BUFFER_BASE_3:
1273                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1274                if (r) {
1275                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1276                                        "0x%04X\n", reg);
1277                        return -EINVAL;
1278                }
1279                tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
1280                track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1281                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1282                track->vgt_strmout_bo[tmp] = reloc->robj;
1283                track->streamout_dirty = true;
1284                break;
1285        case VGT_STRMOUT_BUFFER_SIZE_0:
1286        case VGT_STRMOUT_BUFFER_SIZE_1:
1287        case VGT_STRMOUT_BUFFER_SIZE_2:
1288        case VGT_STRMOUT_BUFFER_SIZE_3:
1289                tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
1290                /* size in register is DWs, convert to bytes */
1291                track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
1292                track->streamout_dirty = true;
1293                break;
1294        case CP_COHER_BASE:
1295                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1296                if (r) {
1297                        dev_warn(p->dev, "missing reloc for CP_COHER_BASE "
1298                                        "0x%04X\n", reg);
1299                        return -EINVAL;
1300                }
1301                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1302        case CB_TARGET_MASK:
1303                track->cb_target_mask = radeon_get_ib_value(p, idx);
1304                track->cb_dirty = true;
1305                break;
1306        case CB_SHADER_MASK:
1307                track->cb_shader_mask = radeon_get_ib_value(p, idx);
1308                track->cb_dirty = true;
1309                break;
1310        case PA_SC_AA_CONFIG:
1311                if (p->rdev->family >= CHIP_CAYMAN) {
1312                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1313                                 "0x%04X\n", reg);
1314                        return -EINVAL;
1315                }
1316                tmp = radeon_get_ib_value(p, idx) & MSAA_NUM_SAMPLES_MASK;
1317                track->nsamples = 1 << tmp;
1318                break;
1319        case CAYMAN_PA_SC_AA_CONFIG:
1320                if (p->rdev->family < CHIP_CAYMAN) {
1321                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1322                                 "0x%04X\n", reg);
1323                        return -EINVAL;
1324                }
1325                tmp = radeon_get_ib_value(p, idx) & CAYMAN_MSAA_NUM_SAMPLES_MASK;
1326                track->nsamples = 1 << tmp;
1327                break;
1328        case CB_COLOR0_VIEW:
1329        case CB_COLOR1_VIEW:
1330        case CB_COLOR2_VIEW:
1331        case CB_COLOR3_VIEW:
1332        case CB_COLOR4_VIEW:
1333        case CB_COLOR5_VIEW:
1334        case CB_COLOR6_VIEW:
1335        case CB_COLOR7_VIEW:
1336                tmp = (reg - CB_COLOR0_VIEW) / 0x3c;
1337                track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1338                track->cb_dirty = true;
1339                break;
1340        case CB_COLOR8_VIEW:
1341        case CB_COLOR9_VIEW:
1342        case CB_COLOR10_VIEW:
1343        case CB_COLOR11_VIEW:
1344                tmp = ((reg - CB_COLOR8_VIEW) / 0x1c) + 8;
1345                track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1346                track->cb_dirty = true;
1347                break;
1348        case CB_COLOR0_INFO:
1349        case CB_COLOR1_INFO:
1350        case CB_COLOR2_INFO:
1351        case CB_COLOR3_INFO:
1352        case CB_COLOR4_INFO:
1353        case CB_COLOR5_INFO:
1354        case CB_COLOR6_INFO:
1355        case CB_COLOR7_INFO:
1356                tmp = (reg - CB_COLOR0_INFO) / 0x3c;
1357                track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1358                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1359                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1360                        if (r) {
1361                                dev_warn(p->dev, "bad SET_CONTEXT_REG "
1362                                                "0x%04X\n", reg);
1363                                return -EINVAL;
1364                        }
1365                        ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1366                        track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1367                }
1368                track->cb_dirty = true;
1369                break;
1370        case CB_COLOR8_INFO:
1371        case CB_COLOR9_INFO:
1372        case CB_COLOR10_INFO:
1373        case CB_COLOR11_INFO:
1374                tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8;
1375                track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1376                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1377                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1378                        if (r) {
1379                                dev_warn(p->dev, "bad SET_CONTEXT_REG "
1380                                                "0x%04X\n", reg);
1381                                return -EINVAL;
1382                        }
1383                        ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1384                        track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1385                }
1386                track->cb_dirty = true;
1387                break;
1388        case CB_COLOR0_PITCH:
1389        case CB_COLOR1_PITCH:
1390        case CB_COLOR2_PITCH:
1391        case CB_COLOR3_PITCH:
1392        case CB_COLOR4_PITCH:
1393        case CB_COLOR5_PITCH:
1394        case CB_COLOR6_PITCH:
1395        case CB_COLOR7_PITCH:
1396                tmp = (reg - CB_COLOR0_PITCH) / 0x3c;
1397                track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1398                track->cb_dirty = true;
1399                break;
1400        case CB_COLOR8_PITCH:
1401        case CB_COLOR9_PITCH:
1402        case CB_COLOR10_PITCH:
1403        case CB_COLOR11_PITCH:
1404                tmp = ((reg - CB_COLOR8_PITCH) / 0x1c) + 8;
1405                track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1406                track->cb_dirty = true;
1407                break;
1408        case CB_COLOR0_SLICE:
1409        case CB_COLOR1_SLICE:
1410        case CB_COLOR2_SLICE:
1411        case CB_COLOR3_SLICE:
1412        case CB_COLOR4_SLICE:
1413        case CB_COLOR5_SLICE:
1414        case CB_COLOR6_SLICE:
1415        case CB_COLOR7_SLICE:
1416                tmp = (reg - CB_COLOR0_SLICE) / 0x3c;
1417                track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1418                track->cb_color_slice_idx[tmp] = idx;
1419                track->cb_dirty = true;
1420                break;
1421        case CB_COLOR8_SLICE:
1422        case CB_COLOR9_SLICE:
1423        case CB_COLOR10_SLICE:
1424        case CB_COLOR11_SLICE:
1425                tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8;
1426                track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1427                track->cb_color_slice_idx[tmp] = idx;
1428                track->cb_dirty = true;
1429                break;
1430        case CB_COLOR0_ATTRIB:
1431        case CB_COLOR1_ATTRIB:
1432        case CB_COLOR2_ATTRIB:
1433        case CB_COLOR3_ATTRIB:
1434        case CB_COLOR4_ATTRIB:
1435        case CB_COLOR5_ATTRIB:
1436        case CB_COLOR6_ATTRIB:
1437        case CB_COLOR7_ATTRIB:
1438                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1439                if (r) {
1440                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1441                                        "0x%04X\n", reg);
1442                        return -EINVAL;
1443                }
1444                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1445                        if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1446                                unsigned bankw, bankh, mtaspect, tile_split;
1447
1448                                evergreen_tiling_fields(reloc->tiling_flags,
1449                                                        &bankw, &bankh, &mtaspect,
1450                                                        &tile_split);
1451                                ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1452                                ib[idx] |= CB_TILE_SPLIT(tile_split) |
1453                                           CB_BANK_WIDTH(bankw) |
1454                                           CB_BANK_HEIGHT(bankh) |
1455                                           CB_MACRO_TILE_ASPECT(mtaspect);
1456                        }
1457                }
1458                tmp = ((reg - CB_COLOR0_ATTRIB) / 0x3c);
1459                track->cb_color_attrib[tmp] = ib[idx];
1460                track->cb_dirty = true;
1461                break;
1462        case CB_COLOR8_ATTRIB:
1463        case CB_COLOR9_ATTRIB:
1464        case CB_COLOR10_ATTRIB:
1465        case CB_COLOR11_ATTRIB:
1466                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1467                if (r) {
1468                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1469                                        "0x%04X\n", reg);
1470                        return -EINVAL;
1471                }
1472                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1473                        if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1474                                unsigned bankw, bankh, mtaspect, tile_split;
1475
1476                                evergreen_tiling_fields(reloc->tiling_flags,
1477                                                        &bankw, &bankh, &mtaspect,
1478                                                        &tile_split);
1479                                ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1480                                ib[idx] |= CB_TILE_SPLIT(tile_split) |
1481                                           CB_BANK_WIDTH(bankw) |
1482                                           CB_BANK_HEIGHT(bankh) |
1483                                           CB_MACRO_TILE_ASPECT(mtaspect);
1484                        }
1485                }
1486                tmp = ((reg - CB_COLOR8_ATTRIB) / 0x1c) + 8;
1487                track->cb_color_attrib[tmp] = ib[idx];
1488                track->cb_dirty = true;
1489                break;
1490        case CB_COLOR0_FMASK:
1491        case CB_COLOR1_FMASK:
1492        case CB_COLOR2_FMASK:
1493        case CB_COLOR3_FMASK:
1494        case CB_COLOR4_FMASK:
1495        case CB_COLOR5_FMASK:
1496        case CB_COLOR6_FMASK:
1497        case CB_COLOR7_FMASK:
1498                tmp = (reg - CB_COLOR0_FMASK) / 0x3c;
1499                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1500                if (r) {
1501                        dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1502                        return -EINVAL;
1503                }
1504                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1505                track->cb_color_fmask_bo[tmp] = reloc->robj;
1506                break;
1507        case CB_COLOR0_CMASK:
1508        case CB_COLOR1_CMASK:
1509        case CB_COLOR2_CMASK:
1510        case CB_COLOR3_CMASK:
1511        case CB_COLOR4_CMASK:
1512        case CB_COLOR5_CMASK:
1513        case CB_COLOR6_CMASK:
1514        case CB_COLOR7_CMASK:
1515                tmp = (reg - CB_COLOR0_CMASK) / 0x3c;
1516                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1517                if (r) {
1518                        dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1519                        return -EINVAL;
1520                }
1521                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1522                track->cb_color_cmask_bo[tmp] = reloc->robj;
1523                break;
1524        case CB_COLOR0_FMASK_SLICE:
1525        case CB_COLOR1_FMASK_SLICE:
1526        case CB_COLOR2_FMASK_SLICE:
1527        case CB_COLOR3_FMASK_SLICE:
1528        case CB_COLOR4_FMASK_SLICE:
1529        case CB_COLOR5_FMASK_SLICE:
1530        case CB_COLOR6_FMASK_SLICE:
1531        case CB_COLOR7_FMASK_SLICE:
1532                tmp = (reg - CB_COLOR0_FMASK_SLICE) / 0x3c;
1533                track->cb_color_fmask_slice[tmp] = radeon_get_ib_value(p, idx);
1534                break;
1535        case CB_COLOR0_CMASK_SLICE:
1536        case CB_COLOR1_CMASK_SLICE:
1537        case CB_COLOR2_CMASK_SLICE:
1538        case CB_COLOR3_CMASK_SLICE:
1539        case CB_COLOR4_CMASK_SLICE:
1540        case CB_COLOR5_CMASK_SLICE:
1541        case CB_COLOR6_CMASK_SLICE:
1542        case CB_COLOR7_CMASK_SLICE:
1543                tmp = (reg - CB_COLOR0_CMASK_SLICE) / 0x3c;
1544                track->cb_color_cmask_slice[tmp] = radeon_get_ib_value(p, idx);
1545                break;
1546        case CB_COLOR0_BASE:
1547        case CB_COLOR1_BASE:
1548        case CB_COLOR2_BASE:
1549        case CB_COLOR3_BASE:
1550        case CB_COLOR4_BASE:
1551        case CB_COLOR5_BASE:
1552        case CB_COLOR6_BASE:
1553        case CB_COLOR7_BASE:
1554                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1555                if (r) {
1556                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1557                                        "0x%04X\n", reg);
1558                        return -EINVAL;
1559                }
1560                tmp = (reg - CB_COLOR0_BASE) / 0x3c;
1561                track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1562                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1563                track->cb_color_bo[tmp] = reloc->robj;
1564                track->cb_dirty = true;
1565                break;
1566        case CB_COLOR8_BASE:
1567        case CB_COLOR9_BASE:
1568        case CB_COLOR10_BASE:
1569        case CB_COLOR11_BASE:
1570                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1571                if (r) {
1572                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1573                                        "0x%04X\n", reg);
1574                        return -EINVAL;
1575                }
1576                tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8;
1577                track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1578                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1579                track->cb_color_bo[tmp] = reloc->robj;
1580                track->cb_dirty = true;
1581                break;
1582        case DB_HTILE_DATA_BASE:
1583                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1584                if (r) {
1585                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1586                                        "0x%04X\n", reg);
1587                        return -EINVAL;
1588                }
1589                track->htile_offset = radeon_get_ib_value(p, idx);
1590                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1591                track->htile_bo = reloc->robj;
1592                track->db_dirty = true;
1593                break;
1594        case DB_HTILE_SURFACE:
1595                /* 8x8 only */
1596                track->htile_surface = radeon_get_ib_value(p, idx);
1597                /* force 8x8 htile width and height */
1598                ib[idx] |= 3;
1599                track->db_dirty = true;
1600                break;
1601        case CB_IMMED0_BASE:
1602        case CB_IMMED1_BASE:
1603        case CB_IMMED2_BASE:
1604        case CB_IMMED3_BASE:
1605        case CB_IMMED4_BASE:
1606        case CB_IMMED5_BASE:
1607        case CB_IMMED6_BASE:
1608        case CB_IMMED7_BASE:
1609        case CB_IMMED8_BASE:
1610        case CB_IMMED9_BASE:
1611        case CB_IMMED10_BASE:
1612        case CB_IMMED11_BASE:
1613        case SQ_PGM_START_FS:
1614        case SQ_PGM_START_ES:
1615        case SQ_PGM_START_VS:
1616        case SQ_PGM_START_GS:
1617        case SQ_PGM_START_PS:
1618        case SQ_PGM_START_HS:
1619        case SQ_PGM_START_LS:
1620        case SQ_CONST_MEM_BASE:
1621        case SQ_ALU_CONST_CACHE_GS_0:
1622        case SQ_ALU_CONST_CACHE_GS_1:
1623        case SQ_ALU_CONST_CACHE_GS_2:
1624        case SQ_ALU_CONST_CACHE_GS_3:
1625        case SQ_ALU_CONST_CACHE_GS_4:
1626        case SQ_ALU_CONST_CACHE_GS_5:
1627        case SQ_ALU_CONST_CACHE_GS_6:
1628        case SQ_ALU_CONST_CACHE_GS_7:
1629        case SQ_ALU_CONST_CACHE_GS_8:
1630        case SQ_ALU_CONST_CACHE_GS_9:
1631        case SQ_ALU_CONST_CACHE_GS_10:
1632        case SQ_ALU_CONST_CACHE_GS_11:
1633        case SQ_ALU_CONST_CACHE_GS_12:
1634        case SQ_ALU_CONST_CACHE_GS_13:
1635        case SQ_ALU_CONST_CACHE_GS_14:
1636        case SQ_ALU_CONST_CACHE_GS_15:
1637        case SQ_ALU_CONST_CACHE_PS_0:
1638        case SQ_ALU_CONST_CACHE_PS_1:
1639        case SQ_ALU_CONST_CACHE_PS_2:
1640        case SQ_ALU_CONST_CACHE_PS_3:
1641        case SQ_ALU_CONST_CACHE_PS_4:
1642        case SQ_ALU_CONST_CACHE_PS_5:
1643        case SQ_ALU_CONST_CACHE_PS_6:
1644        case SQ_ALU_CONST_CACHE_PS_7:
1645        case SQ_ALU_CONST_CACHE_PS_8:
1646        case SQ_ALU_CONST_CACHE_PS_9:
1647        case SQ_ALU_CONST_CACHE_PS_10:
1648        case SQ_ALU_CONST_CACHE_PS_11:
1649        case SQ_ALU_CONST_CACHE_PS_12:
1650        case SQ_ALU_CONST_CACHE_PS_13:
1651        case SQ_ALU_CONST_CACHE_PS_14:
1652        case SQ_ALU_CONST_CACHE_PS_15:
1653        case SQ_ALU_CONST_CACHE_VS_0:
1654        case SQ_ALU_CONST_CACHE_VS_1:
1655        case SQ_ALU_CONST_CACHE_VS_2:
1656        case SQ_ALU_CONST_CACHE_VS_3:
1657        case SQ_ALU_CONST_CACHE_VS_4:
1658        case SQ_ALU_CONST_CACHE_VS_5:
1659        case SQ_ALU_CONST_CACHE_VS_6:
1660        case SQ_ALU_CONST_CACHE_VS_7:
1661        case SQ_ALU_CONST_CACHE_VS_8:
1662        case SQ_ALU_CONST_CACHE_VS_9:
1663        case SQ_ALU_CONST_CACHE_VS_10:
1664        case SQ_ALU_CONST_CACHE_VS_11:
1665        case SQ_ALU_CONST_CACHE_VS_12:
1666        case SQ_ALU_CONST_CACHE_VS_13:
1667        case SQ_ALU_CONST_CACHE_VS_14:
1668        case SQ_ALU_CONST_CACHE_VS_15:
1669        case SQ_ALU_CONST_CACHE_HS_0:
1670        case SQ_ALU_CONST_CACHE_HS_1:
1671        case SQ_ALU_CONST_CACHE_HS_2:
1672        case SQ_ALU_CONST_CACHE_HS_3:
1673        case SQ_ALU_CONST_CACHE_HS_4:
1674        case SQ_ALU_CONST_CACHE_HS_5:
1675        case SQ_ALU_CONST_CACHE_HS_6:
1676        case SQ_ALU_CONST_CACHE_HS_7:
1677        case SQ_ALU_CONST_CACHE_HS_8:
1678        case SQ_ALU_CONST_CACHE_HS_9:
1679        case SQ_ALU_CONST_CACHE_HS_10:
1680        case SQ_ALU_CONST_CACHE_HS_11:
1681        case SQ_ALU_CONST_CACHE_HS_12:
1682        case SQ_ALU_CONST_CACHE_HS_13:
1683        case SQ_ALU_CONST_CACHE_HS_14:
1684        case SQ_ALU_CONST_CACHE_HS_15:
1685        case SQ_ALU_CONST_CACHE_LS_0:
1686        case SQ_ALU_CONST_CACHE_LS_1:
1687        case SQ_ALU_CONST_CACHE_LS_2:
1688        case SQ_ALU_CONST_CACHE_LS_3:
1689        case SQ_ALU_CONST_CACHE_LS_4:
1690        case SQ_ALU_CONST_CACHE_LS_5:
1691        case SQ_ALU_CONST_CACHE_LS_6:
1692        case SQ_ALU_CONST_CACHE_LS_7:
1693        case SQ_ALU_CONST_CACHE_LS_8:
1694        case SQ_ALU_CONST_CACHE_LS_9:
1695        case SQ_ALU_CONST_CACHE_LS_10:
1696        case SQ_ALU_CONST_CACHE_LS_11:
1697        case SQ_ALU_CONST_CACHE_LS_12:
1698        case SQ_ALU_CONST_CACHE_LS_13:
1699        case SQ_ALU_CONST_CACHE_LS_14:
1700        case SQ_ALU_CONST_CACHE_LS_15:
1701                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1702                if (r) {
1703                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1704                                        "0x%04X\n", reg);
1705                        return -EINVAL;
1706                }
1707                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1708                break;
1709        case SX_MEMORY_EXPORT_BASE:
1710                if (p->rdev->family >= CHIP_CAYMAN) {
1711                        dev_warn(p->dev, "bad SET_CONFIG_REG "
1712                                 "0x%04X\n", reg);
1713                        return -EINVAL;
1714                }
1715                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1716                if (r) {
1717                        dev_warn(p->dev, "bad SET_CONFIG_REG "
1718                                        "0x%04X\n", reg);
1719                        return -EINVAL;
1720                }
1721                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1722                break;
1723        case CAYMAN_SX_SCATTER_EXPORT_BASE:
1724                if (p->rdev->family < CHIP_CAYMAN) {
1725                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1726                                 "0x%04X\n", reg);
1727                        return -EINVAL;
1728                }
1729                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1730                if (r) {
1731                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1732                                        "0x%04X\n", reg);
1733                        return -EINVAL;
1734                }
1735                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1736                break;
1737        case SX_MISC:
1738                track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0;
1739                break;
1740        default:
1741                dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1742                return -EINVAL;
1743        }
1744        return 0;
1745}
1746
1747/**
1748 * evergreen_is_safe_reg() - check if register is authorized or not
1749 * @parser: parser structure holding parsing context
1750 * @reg: register we are testing
1751 *
1752 * This function will test against reg_safe_bm and return true
1753 * if register is safe or false otherwise.
1754 */
1755static inline bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg)
1756{
1757        struct evergreen_cs_track *track = p->track;
1758        u32 m, i;
1759
1760        i = (reg >> 7);
1761        if (unlikely(i >= REG_SAFE_BM_SIZE)) {
1762                return false;
1763        }
1764        m = 1 << ((reg >> 2) & 31);
1765        if (!(track->reg_safe_bm[i] & m))
1766                return true;
1767
1768        return false;
1769}
1770
1771static int evergreen_packet3_check(struct radeon_cs_parser *p,
1772                                   struct radeon_cs_packet *pkt)
1773{
1774        struct radeon_bo_list *reloc;
1775        struct evergreen_cs_track *track;
1776        uint32_t *ib;
1777        unsigned idx;
1778        unsigned i;
1779        unsigned start_reg, end_reg, reg;
1780        int r;
1781        u32 idx_value;
1782
1783        track = (struct evergreen_cs_track *)p->track;
1784        ib = p->ib.ptr;
1785        idx = pkt->idx + 1;
1786        idx_value = radeon_get_ib_value(p, idx);
1787
1788        switch (pkt->opcode) {
1789        case PACKET3_SET_PREDICATION:
1790        {
1791                int pred_op;
1792                int tmp;
1793                uint64_t offset;
1794
1795                if (pkt->count != 1) {
1796                        DRM_ERROR("bad SET PREDICATION\n");
1797                        return -EINVAL;
1798                }
1799
1800                tmp = radeon_get_ib_value(p, idx + 1);
1801                pred_op = (tmp >> 16) & 0x7;
1802
1803                /* for the clear predicate operation */
1804                if (pred_op == 0)
1805                        return 0;
1806
1807                if (pred_op > 2) {
1808                        DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op);
1809                        return -EINVAL;
1810                }
1811
1812                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1813                if (r) {
1814                        DRM_ERROR("bad SET PREDICATION\n");
1815                        return -EINVAL;
1816                }
1817
1818                offset = reloc->gpu_offset +
1819                         (idx_value & 0xfffffff0) +
1820                         ((u64)(tmp & 0xff) << 32);
1821
1822                ib[idx + 0] = offset;
1823                ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
1824        }
1825        break;
1826        case PACKET3_CONTEXT_CONTROL:
1827                if (pkt->count != 1) {
1828                        DRM_ERROR("bad CONTEXT_CONTROL\n");
1829                        return -EINVAL;
1830                }
1831                break;
1832        case PACKET3_INDEX_TYPE:
1833        case PACKET3_NUM_INSTANCES:
1834        case PACKET3_CLEAR_STATE:
1835                if (pkt->count) {
1836                        DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1837                        return -EINVAL;
1838                }
1839                break;
1840        case CAYMAN_PACKET3_DEALLOC_STATE:
1841                if (p->rdev->family < CHIP_CAYMAN) {
1842                        DRM_ERROR("bad PACKET3_DEALLOC_STATE\n");
1843                        return -EINVAL;
1844                }
1845                if (pkt->count) {
1846                        DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1847                        return -EINVAL;
1848                }
1849                break;
1850        case PACKET3_INDEX_BASE:
1851        {
1852                uint64_t offset;
1853
1854                if (pkt->count != 1) {
1855                        DRM_ERROR("bad INDEX_BASE\n");
1856                        return -EINVAL;
1857                }
1858                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1859                if (r) {
1860                        DRM_ERROR("bad INDEX_BASE\n");
1861                        return -EINVAL;
1862                }
1863
1864                offset = reloc->gpu_offset +
1865                         idx_value +
1866                         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1867
1868                ib[idx+0] = offset;
1869                ib[idx+1] = upper_32_bits(offset) & 0xff;
1870
1871                r = evergreen_cs_track_check(p);
1872                if (r) {
1873                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1874                        return r;
1875                }
1876                break;
1877        }
1878        case PACKET3_INDEX_BUFFER_SIZE:
1879        {
1880                if (pkt->count != 0) {
1881                        DRM_ERROR("bad INDEX_BUFFER_SIZE\n");
1882                        return -EINVAL;
1883                }
1884                break;
1885        }
1886        case PACKET3_DRAW_INDEX:
1887        {
1888                uint64_t offset;
1889                if (pkt->count != 3) {
1890                        DRM_ERROR("bad DRAW_INDEX\n");
1891                        return -EINVAL;
1892                }
1893                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1894                if (r) {
1895                        DRM_ERROR("bad DRAW_INDEX\n");
1896                        return -EINVAL;
1897                }
1898
1899                offset = reloc->gpu_offset +
1900                         idx_value +
1901                         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1902
1903                ib[idx+0] = offset;
1904                ib[idx+1] = upper_32_bits(offset) & 0xff;
1905
1906                r = evergreen_cs_track_check(p);
1907                if (r) {
1908                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1909                        return r;
1910                }
1911                break;
1912        }
1913        case PACKET3_DRAW_INDEX_2:
1914        {
1915                uint64_t offset;
1916
1917                if (pkt->count != 4) {
1918                        DRM_ERROR("bad DRAW_INDEX_2\n");
1919                        return -EINVAL;
1920                }
1921                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1922                if (r) {
1923                        DRM_ERROR("bad DRAW_INDEX_2\n");
1924                        return -EINVAL;
1925                }
1926
1927                offset = reloc->gpu_offset +
1928                         radeon_get_ib_value(p, idx+1) +
1929                         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
1930
1931                ib[idx+1] = offset;
1932                ib[idx+2] = upper_32_bits(offset) & 0xff;
1933
1934                r = evergreen_cs_track_check(p);
1935                if (r) {
1936                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1937                        return r;
1938                }
1939                break;
1940        }
1941        case PACKET3_DRAW_INDEX_AUTO:
1942                if (pkt->count != 1) {
1943                        DRM_ERROR("bad DRAW_INDEX_AUTO\n");
1944                        return -EINVAL;
1945                }
1946                r = evergreen_cs_track_check(p);
1947                if (r) {
1948                        dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1949                        return r;
1950                }
1951                break;
1952        case PACKET3_DRAW_INDEX_MULTI_AUTO:
1953                if (pkt->count != 2) {
1954                        DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n");
1955                        return -EINVAL;
1956                }
1957                r = evergreen_cs_track_check(p);
1958                if (r) {
1959                        dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1960                        return r;
1961                }
1962                break;
1963        case PACKET3_DRAW_INDEX_IMMD:
1964                if (pkt->count < 2) {
1965                        DRM_ERROR("bad DRAW_INDEX_IMMD\n");
1966                        return -EINVAL;
1967                }
1968                r = evergreen_cs_track_check(p);
1969                if (r) {
1970                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1971                        return r;
1972                }
1973                break;
1974        case PACKET3_DRAW_INDEX_OFFSET:
1975                if (pkt->count != 2) {
1976                        DRM_ERROR("bad DRAW_INDEX_OFFSET\n");
1977                        return -EINVAL;
1978                }
1979                r = evergreen_cs_track_check(p);
1980                if (r) {
1981                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1982                        return r;
1983                }
1984                break;
1985        case PACKET3_DRAW_INDEX_OFFSET_2:
1986                if (pkt->count != 3) {
1987                        DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n");
1988                        return -EINVAL;
1989                }
1990                r = evergreen_cs_track_check(p);
1991                if (r) {
1992                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1993                        return r;
1994                }
1995                break;
1996        case PACKET3_SET_BASE:
1997        {
1998                /*
1999                DW 1 HEADER Header of the packet. Shader_Type in bit 1 of the Header will correspond to the shader type of the Load, see Type-3 Packet.
2000                   2 BASE_INDEX Bits [3:0] BASE_INDEX - Base Index specifies which base address is specified in the last two DWs.
2001                     0001: DX11 Draw_Index_Indirect Patch Table Base: Base address for Draw_Index_Indirect data.
2002                   3 ADDRESS_LO Bits [31:3] - Lower bits of QWORD-Aligned Address. Bits [2:0] - Reserved
2003                   4 ADDRESS_HI Bits [31:8] - Reserved. Bits [7:0] - Upper bits of Address [47:32]
2004                */
2005                if (pkt->count != 2) {
2006                        DRM_ERROR("bad SET_BASE\n");
2007                        return -EINVAL;
2008                }
2009
2010                /* currently only supporting setting indirect draw buffer base address */
2011                if (idx_value != 1) {
2012                        DRM_ERROR("bad SET_BASE\n");
2013                        return -EINVAL;
2014                }
2015
2016                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2017                if (r) {
2018                        DRM_ERROR("bad SET_BASE\n");
2019                        return -EINVAL;
2020                }
2021
2022                track->indirect_draw_buffer_size = radeon_bo_size(reloc->robj);
2023
2024                ib[idx+1] = reloc->gpu_offset;
2025                ib[idx+2] = upper_32_bits(reloc->gpu_offset) & 0xff;
2026
2027                break;
2028        }
2029        case PACKET3_DRAW_INDIRECT:
2030        case PACKET3_DRAW_INDEX_INDIRECT:
2031        {
2032                u64 size = pkt->opcode == PACKET3_DRAW_INDIRECT ? 16 : 20;
2033
2034                /*
2035                DW 1 HEADER
2036                   2 DATA_OFFSET Bits [31:0] + byte aligned offset where the required data structure starts. Bits 1:0 are zero
2037                   3 DRAW_INITIATOR Draw Initiator Register. Written to the VGT_DRAW_INITIATOR register for the assigned context
2038                */
2039                if (pkt->count != 1) {
2040                        DRM_ERROR("bad DRAW_INDIRECT\n");
2041                        return -EINVAL;
2042                }
2043
2044                if (idx_value + size > track->indirect_draw_buffer_size) {
2045                        dev_warn(p->dev, "DRAW_INDIRECT buffer too small %u + %llu > %lu\n",
2046                                idx_value, size, track->indirect_draw_buffer_size);
2047                        return -EINVAL;
2048                }
2049
2050                r = evergreen_cs_track_check(p);
2051                if (r) {
2052                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2053                        return r;
2054                }
2055                break;
2056        }
2057        case PACKET3_DISPATCH_DIRECT:
2058                if (pkt->count != 3) {
2059                        DRM_ERROR("bad DISPATCH_DIRECT\n");
2060                        return -EINVAL;
2061                }
2062                r = evergreen_cs_track_check(p);
2063                if (r) {
2064                        dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2065                        return r;
2066                }
2067                break;
2068        case PACKET3_DISPATCH_INDIRECT:
2069                if (pkt->count != 1) {
2070                        DRM_ERROR("bad DISPATCH_INDIRECT\n");
2071                        return -EINVAL;
2072                }
2073                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2074                if (r) {
2075                        DRM_ERROR("bad DISPATCH_INDIRECT\n");
2076                        return -EINVAL;
2077                }
2078                ib[idx+0] = idx_value + (u32)(reloc->gpu_offset & 0xffffffff);
2079                r = evergreen_cs_track_check(p);
2080                if (r) {
2081                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2082                        return r;
2083                }
2084                break;
2085        case PACKET3_WAIT_REG_MEM:
2086                if (pkt->count != 5) {
2087                        DRM_ERROR("bad WAIT_REG_MEM\n");
2088                        return -EINVAL;
2089                }
2090                /* bit 4 is reg (0) or mem (1) */
2091                if (idx_value & 0x10) {
2092                        uint64_t offset;
2093
2094                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2095                        if (r) {
2096                                DRM_ERROR("bad WAIT_REG_MEM\n");
2097                                return -EINVAL;
2098                        }
2099
2100                        offset = reloc->gpu_offset +
2101                                 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2102                                 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2103
2104                        ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc);
2105                        ib[idx+2] = upper_32_bits(offset) & 0xff;
2106                } else if (idx_value & 0x100) {
2107                        DRM_ERROR("cannot use PFP on REG wait\n");
2108                        return -EINVAL;
2109                }
2110                break;
2111        case PACKET3_CP_DMA:
2112        {
2113                u32 command, size, info;
2114                u64 offset, tmp;
2115                if (pkt->count != 4) {
2116                        DRM_ERROR("bad CP DMA\n");
2117                        return -EINVAL;
2118                }
2119                command = radeon_get_ib_value(p, idx+4);
2120                size = command & 0x1fffff;
2121                info = radeon_get_ib_value(p, idx+1);
2122                if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
2123                    (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
2124                    ((((info & 0x00300000) >> 20) == 0) &&
2125                     (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
2126                    ((((info & 0x60000000) >> 29) == 0) &&
2127                     (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
2128                        /* non mem to mem copies requires dw aligned count */
2129                        if (size % 4) {
2130                                DRM_ERROR("CP DMA command requires dw count alignment\n");
2131                                return -EINVAL;
2132                        }
2133                }
2134                if (command & PACKET3_CP_DMA_CMD_SAS) {
2135                        /* src address space is register */
2136                        /* GDS is ok */
2137                        if (((info & 0x60000000) >> 29) != 1) {
2138                                DRM_ERROR("CP DMA SAS not supported\n");
2139                                return -EINVAL;
2140                        }
2141                } else {
2142                        if (command & PACKET3_CP_DMA_CMD_SAIC) {
2143                                DRM_ERROR("CP DMA SAIC only supported for registers\n");
2144                                return -EINVAL;
2145                        }
2146                        /* src address space is memory */
2147                        if (((info & 0x60000000) >> 29) == 0) {
2148                                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2149                                if (r) {
2150                                        DRM_ERROR("bad CP DMA SRC\n");
2151                                        return -EINVAL;
2152                                }
2153
2154                                tmp = radeon_get_ib_value(p, idx) +
2155                                        ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2156
2157                                offset = reloc->gpu_offset + tmp;
2158
2159                                if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2160                                        dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
2161                                                 tmp + size, radeon_bo_size(reloc->robj));
2162                                        return -EINVAL;
2163                                }
2164
2165                                ib[idx] = offset;
2166                                ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2167                        } else if (((info & 0x60000000) >> 29) != 2) {
2168                                DRM_ERROR("bad CP DMA SRC_SEL\n");
2169                                return -EINVAL;
2170                        }
2171                }
2172                if (command & PACKET3_CP_DMA_CMD_DAS) {
2173                        /* dst address space is register */
2174                        /* GDS is ok */
2175                        if (((info & 0x00300000) >> 20) != 1) {
2176                                DRM_ERROR("CP DMA DAS not supported\n");
2177                                return -EINVAL;
2178                        }
2179                } else {
2180                        /* dst address space is memory */
2181                        if (command & PACKET3_CP_DMA_CMD_DAIC) {
2182                                DRM_ERROR("CP DMA DAIC only supported for registers\n");
2183                                return -EINVAL;
2184                        }
2185                        if (((info & 0x00300000) >> 20) == 0) {
2186                                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2187                                if (r) {
2188                                        DRM_ERROR("bad CP DMA DST\n");
2189                                        return -EINVAL;
2190                                }
2191
2192                                tmp = radeon_get_ib_value(p, idx+2) +
2193                                        ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
2194
2195                                offset = reloc->gpu_offset + tmp;
2196
2197                                if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2198                                        dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
2199                                                 tmp + size, radeon_bo_size(reloc->robj));
2200                                        return -EINVAL;
2201                                }
2202
2203                                ib[idx+2] = offset;
2204                                ib[idx+3] = upper_32_bits(offset) & 0xff;
2205                        } else {
2206                                DRM_ERROR("bad CP DMA DST_SEL\n");
2207                                return -EINVAL;
2208                        }
2209                }
2210                break;
2211        }
2212        case PACKET3_SURFACE_SYNC:
2213                if (pkt->count != 3) {
2214                        DRM_ERROR("bad SURFACE_SYNC\n");
2215                        return -EINVAL;
2216                }
2217                /* 0xffffffff/0x0 is flush all cache flag */
2218                if (radeon_get_ib_value(p, idx + 1) != 0xffffffff ||
2219                    radeon_get_ib_value(p, idx + 2) != 0) {
2220                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2221                        if (r) {
2222                                DRM_ERROR("bad SURFACE_SYNC\n");
2223                                return -EINVAL;
2224                        }
2225                        ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2226                }
2227                break;
2228        case PACKET3_EVENT_WRITE:
2229                if (pkt->count != 2 && pkt->count != 0) {
2230                        DRM_ERROR("bad EVENT_WRITE\n");
2231                        return -EINVAL;
2232                }
2233                if (pkt->count) {
2234                        uint64_t offset;
2235
2236                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2237                        if (r) {
2238                                DRM_ERROR("bad EVENT_WRITE\n");
2239                                return -EINVAL;
2240                        }
2241                        offset = reloc->gpu_offset +
2242                                 (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
2243                                 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2244
2245                        ib[idx+1] = offset & 0xfffffff8;
2246                        ib[idx+2] = upper_32_bits(offset) & 0xff;
2247                }
2248                break;
2249        case PACKET3_EVENT_WRITE_EOP:
2250        {
2251                uint64_t offset;
2252
2253                if (pkt->count != 4) {
2254                        DRM_ERROR("bad EVENT_WRITE_EOP\n");
2255                        return -EINVAL;
2256                }
2257                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2258                if (r) {
2259                        DRM_ERROR("bad EVENT_WRITE_EOP\n");
2260                        return -EINVAL;
2261                }
2262
2263                offset = reloc->gpu_offset +
2264                         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2265                         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2266
2267                ib[idx+1] = offset & 0xfffffffc;
2268                ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2269                break;
2270        }
2271        case PACKET3_EVENT_WRITE_EOS:
2272        {
2273                uint64_t offset;
2274
2275                if (pkt->count != 3) {
2276                        DRM_ERROR("bad EVENT_WRITE_EOS\n");
2277                        return -EINVAL;
2278                }
2279                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2280                if (r) {
2281                        DRM_ERROR("bad EVENT_WRITE_EOS\n");
2282                        return -EINVAL;
2283                }
2284
2285                offset = reloc->gpu_offset +
2286                         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2287                         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2288
2289                ib[idx+1] = offset & 0xfffffffc;
2290                ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2291                break;
2292        }
2293        case PACKET3_SET_CONFIG_REG:
2294                start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2295                end_reg = 4 * pkt->count + start_reg - 4;
2296                if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2297                    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2298                    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2299                        DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2300                        return -EINVAL;
2301                }
2302                for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) {
2303                        if (evergreen_is_safe_reg(p, reg))
2304                                continue;
2305                        r = evergreen_cs_handle_reg(p, reg, idx);
2306                        if (r)
2307                                return r;
2308                }
2309                break;
2310        case PACKET3_SET_CONTEXT_REG:
2311                start_reg = (idx_value << 2) + PACKET3_SET_CONTEXT_REG_START;
2312                end_reg = 4 * pkt->count + start_reg - 4;
2313                if ((start_reg < PACKET3_SET_CONTEXT_REG_START) ||
2314                    (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
2315                    (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
2316                        DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
2317                        return -EINVAL;
2318                }
2319                for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) {
2320                        if (evergreen_is_safe_reg(p, reg))
2321                                continue;
2322                        r = evergreen_cs_handle_reg(p, reg, idx);
2323                        if (r)
2324                                return r;
2325                }
2326                break;
2327        case PACKET3_SET_RESOURCE:
2328                if (pkt->count % 8) {
2329                        DRM_ERROR("bad SET_RESOURCE\n");
2330                        return -EINVAL;
2331                }
2332                start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_START;
2333                end_reg = 4 * pkt->count + start_reg - 4;
2334                if ((start_reg < PACKET3_SET_RESOURCE_START) ||
2335                    (start_reg >= PACKET3_SET_RESOURCE_END) ||
2336                    (end_reg >= PACKET3_SET_RESOURCE_END)) {
2337                        DRM_ERROR("bad SET_RESOURCE\n");
2338                        return -EINVAL;
2339                }
2340                for (i = 0; i < (pkt->count / 8); i++) {
2341                        struct radeon_bo *texture, *mipmap;
2342                        u32 toffset, moffset;
2343                        u32 size, offset, mip_address, tex_dim;
2344
2345                        switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p, idx+1+(i*8)+7))) {
2346                        case SQ_TEX_VTX_VALID_TEXTURE:
2347                                /* tex base */
2348                                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2349                                if (r) {
2350                                        DRM_ERROR("bad SET_RESOURCE (tex)\n");
2351                                        return -EINVAL;
2352                                }
2353                                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
2354                                        ib[idx+1+(i*8)+1] |=
2355                                                TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
2356                                        if (reloc->tiling_flags & RADEON_TILING_MACRO) {
2357                                                unsigned bankw, bankh, mtaspect, tile_split;
2358
2359                                                evergreen_tiling_fields(reloc->tiling_flags,
2360                                                                        &bankw, &bankh, &mtaspect,
2361                                                                        &tile_split);
2362                                                ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split);
2363                                                ib[idx+1+(i*8)+7] |=
2364                                                        TEX_BANK_WIDTH(bankw) |
2365                                                        TEX_BANK_HEIGHT(bankh) |
2366                                                        MACRO_TILE_ASPECT(mtaspect) |
2367                                                        TEX_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
2368                                        }
2369                                }
2370                                texture = reloc->robj;
2371                                toffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2372
2373                                /* tex mip base */
2374                                tex_dim = ib[idx+1+(i*8)+0] & 0x7;
2375                                mip_address = ib[idx+1+(i*8)+3];
2376
2377                                if ((tex_dim == SQ_TEX_DIM_2D_MSAA || tex_dim == SQ_TEX_DIM_2D_ARRAY_MSAA) &&
2378                                    !mip_address &&
2379                                    !radeon_cs_packet_next_is_pkt3_nop(p)) {
2380                                        /* MIP_ADDRESS should point to FMASK for an MSAA texture.
2381                                         * It should be 0 if FMASK is disabled. */
2382                                        moffset = 0;
2383                                        mipmap = NULL;
2384                                } else {
2385                                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2386                                        if (r) {
2387                                                DRM_ERROR("bad SET_RESOURCE (tex)\n");
2388                                                return -EINVAL;
2389                                        }
2390                                        moffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2391                                        mipmap = reloc->robj;
2392                                }
2393
2394                                r = evergreen_cs_track_validate_texture(p, texture, mipmap, idx+1+(i*8));
2395                                if (r)
2396                                        return r;
2397                                ib[idx+1+(i*8)+2] += toffset;
2398                                ib[idx+1+(i*8)+3] += moffset;
2399                                break;
2400                        case SQ_TEX_VTX_VALID_BUFFER:
2401                        {
2402                                uint64_t offset64;
2403                                /* vtx base */
2404                                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2405                                if (r) {
2406                                        DRM_ERROR("bad SET_RESOURCE (vtx)\n");
2407                                        return -EINVAL;
2408                                }
2409                                offset = radeon_get_ib_value(p, idx+1+(i*8)+0);
2410                                size = radeon_get_ib_value(p, idx+1+(i*8)+1);
2411                                if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
2412                                        /* force size to size of the buffer */
2413                                        dev_warn(p->dev, "vbo resource seems too big for the bo\n");
2414                                        ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset;
2415                                }
2416
2417                                offset64 = reloc->gpu_offset + offset;
2418                                ib[idx+1+(i*8)+0] = offset64;
2419                                ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
2420                                                    (upper_32_bits(offset64) & 0xff);
2421                                break;
2422                        }
2423                        case SQ_TEX_VTX_INVALID_TEXTURE:
2424                        case SQ_TEX_VTX_INVALID_BUFFER:
2425                        default:
2426                                DRM_ERROR("bad SET_RESOURCE\n");
2427                                return -EINVAL;
2428                        }
2429                }
2430                break;
2431        case PACKET3_SET_ALU_CONST:
2432                /* XXX fix me ALU const buffers only */
2433                break;
2434        case PACKET3_SET_BOOL_CONST:
2435                start_reg = (idx_value << 2) + PACKET3_SET_BOOL_CONST_START;
2436                end_reg = 4 * pkt->count + start_reg - 4;
2437                if ((start_reg < PACKET3_SET_BOOL_CONST_START) ||
2438                    (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
2439                    (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
2440                        DRM_ERROR("bad SET_BOOL_CONST\n");
2441                        return -EINVAL;
2442                }
2443                break;
2444        case PACKET3_SET_LOOP_CONST:
2445                start_reg = (idx_value << 2) + PACKET3_SET_LOOP_CONST_START;
2446                end_reg = 4 * pkt->count + start_reg - 4;
2447                if ((start_reg < PACKET3_SET_LOOP_CONST_START) ||
2448                    (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
2449                    (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
2450                        DRM_ERROR("bad SET_LOOP_CONST\n");
2451                        return -EINVAL;
2452                }
2453                break;
2454        case PACKET3_SET_CTL_CONST:
2455                start_reg = (idx_value << 2) + PACKET3_SET_CTL_CONST_START;
2456                end_reg = 4 * pkt->count + start_reg - 4;
2457                if ((start_reg < PACKET3_SET_CTL_CONST_START) ||
2458                    (start_reg >= PACKET3_SET_CTL_CONST_END) ||
2459                    (end_reg >= PACKET3_SET_CTL_CONST_END)) {
2460                        DRM_ERROR("bad SET_CTL_CONST\n");
2461                        return -EINVAL;
2462                }
2463                break;
2464        case PACKET3_SET_SAMPLER:
2465                if (pkt->count % 3) {
2466                        DRM_ERROR("bad SET_SAMPLER\n");
2467                        return -EINVAL;
2468                }
2469                start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_START;
2470                end_reg = 4 * pkt->count + start_reg - 4;
2471                if ((start_reg < PACKET3_SET_SAMPLER_START) ||
2472                    (start_reg >= PACKET3_SET_SAMPLER_END) ||
2473                    (end_reg >= PACKET3_SET_SAMPLER_END)) {
2474                        DRM_ERROR("bad SET_SAMPLER\n");
2475                        return -EINVAL;
2476                }
2477                break;
2478        case PACKET3_STRMOUT_BUFFER_UPDATE:
2479                if (pkt->count != 4) {
2480                        DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
2481                        return -EINVAL;
2482                }
2483                /* Updating memory at DST_ADDRESS. */
2484                if (idx_value & 0x1) {
2485                        u64 offset;
2486                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2487                        if (r) {
2488                                DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
2489                                return -EINVAL;
2490                        }
2491                        offset = radeon_get_ib_value(p, idx+1);
2492                        offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2493                        if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2494                                DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n",
2495                                          offset + 4, radeon_bo_size(reloc->robj));
2496                                return -EINVAL;
2497                        }
2498                        offset += reloc->gpu_offset;
2499                        ib[idx+1] = offset;
2500                        ib[idx+2] = upper_32_bits(offset) & 0xff;
2501                }
2502                /* Reading data from SRC_ADDRESS. */
2503                if (((idx_value >> 1) & 0x3) == 2) {
2504                        u64 offset;
2505                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2506                        if (r) {
2507                                DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
2508                                return -EINVAL;
2509                        }
2510                        offset = radeon_get_ib_value(p, idx+3);
2511                        offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2512                        if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2513                                DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n",
2514                                          offset + 4, radeon_bo_size(reloc->robj));
2515                                return -EINVAL;
2516                        }
2517                        offset += reloc->gpu_offset;
2518                        ib[idx+3] = offset;
2519                        ib[idx+4] = upper_32_bits(offset) & 0xff;
2520                }
2521                break;
2522        case PACKET3_MEM_WRITE:
2523        {
2524                u64 offset;
2525
2526                if (pkt->count != 3) {
2527                        DRM_ERROR("bad MEM_WRITE (invalid count)\n");
2528                        return -EINVAL;
2529                }
2530                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2531                if (r) {
2532                        DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
2533                        return -EINVAL;
2534                }
2535                offset = radeon_get_ib_value(p, idx+0);
2536                offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
2537                if (offset & 0x7) {
2538                        DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
2539                        return -EINVAL;
2540                }
2541                if ((offset + 8) > radeon_bo_size(reloc->robj)) {
2542                        DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
2543                                  offset + 8, radeon_bo_size(reloc->robj));
2544                        return -EINVAL;
2545                }
2546                offset += reloc->gpu_offset;
2547                ib[idx+0] = offset;
2548                ib[idx+1] = upper_32_bits(offset) & 0xff;
2549                break;
2550        }
2551        case PACKET3_COPY_DW:
2552                if (pkt->count != 4) {
2553                        DRM_ERROR("bad COPY_DW (invalid count)\n");
2554                        return -EINVAL;
2555                }
2556                if (idx_value & 0x1) {
2557                        u64 offset;
2558                        /* SRC is memory. */
2559                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2560                        if (r) {
2561                                DRM_ERROR("bad COPY_DW (missing src reloc)\n");
2562                                return -EINVAL;
2563                        }
2564                        offset = radeon_get_ib_value(p, idx+1);
2565                        offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2566                        if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2567                                DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n",
2568                                          offset + 4, radeon_bo_size(reloc->robj));
2569                                return -EINVAL;
2570                        }
2571                        offset += reloc->gpu_offset;
2572                        ib[idx+1] = offset;
2573                        ib[idx+2] = upper_32_bits(offset) & 0xff;
2574                } else {
2575                        /* SRC is a reg. */
2576                        reg = radeon_get_ib_value(p, idx+1) << 2;
2577                        if (!evergreen_is_safe_reg(p, reg)) {
2578                                dev_warn(p->dev, "forbidden register 0x%08x at %d\n",
2579                                         reg, idx + 1);
2580                                return -EINVAL;
2581                        }
2582                }
2583                if (idx_value & 0x2) {
2584                        u64 offset;
2585                        /* DST is memory. */
2586                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2587                        if (r) {
2588                                DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
2589                                return -EINVAL;
2590                        }
2591                        offset = radeon_get_ib_value(p, idx+3);
2592                        offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2593                        if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2594                                DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n",
2595                                          offset + 4, radeon_bo_size(reloc->robj));
2596                                return -EINVAL;
2597                        }
2598                        offset += reloc->gpu_offset;
2599                        ib[idx+3] = offset;
2600                        ib[idx+4] = upper_32_bits(offset) & 0xff;
2601                } else {
2602                        /* DST is a reg. */
2603                        reg = radeon_get_ib_value(p, idx+3) << 2;
2604                        if (!evergreen_is_safe_reg(p, reg)) {
2605                                dev_warn(p->dev, "forbidden register 0x%08x at %d\n",
2606                                         reg, idx + 3);
2607                                return -EINVAL;
2608                        }
2609                }
2610                break;
2611        case PACKET3_NOP:
2612                break;
2613        default:
2614                DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2615                return -EINVAL;
2616        }
2617        return 0;
2618}
2619
2620int evergreen_cs_parse(struct radeon_cs_parser *p)
2621{
2622        struct radeon_cs_packet pkt;
2623        struct evergreen_cs_track *track;
2624        u32 tmp;
2625        int r;
2626
2627        if (p->track == NULL) {
2628                /* initialize tracker, we are in kms */
2629                track = kzalloc(sizeof(*track), GFP_KERNEL);
2630                if (track == NULL)
2631                        return -ENOMEM;
2632                evergreen_cs_track_init(track);
2633                if (p->rdev->family >= CHIP_CAYMAN) {
2634                        tmp = p->rdev->config.cayman.tile_config;
2635                        track->reg_safe_bm = cayman_reg_safe_bm;
2636                } else {
2637                        tmp = p->rdev->config.evergreen.tile_config;
2638                        track->reg_safe_bm = evergreen_reg_safe_bm;
2639                }
2640                BUILD_BUG_ON(ARRAY_SIZE(cayman_reg_safe_bm) != REG_SAFE_BM_SIZE);
2641                BUILD_BUG_ON(ARRAY_SIZE(evergreen_reg_safe_bm) != REG_SAFE_BM_SIZE);
2642                switch (tmp & 0xf) {
2643                case 0:
2644                        track->npipes = 1;
2645                        break;
2646                case 1:
2647                default:
2648                        track->npipes = 2;
2649                        break;
2650                case 2:
2651                        track->npipes = 4;
2652                        break;
2653                case 3:
2654                        track->npipes = 8;
2655                        break;
2656                }
2657
2658                switch ((tmp & 0xf0) >> 4) {
2659                case 0:
2660                        track->nbanks = 4;
2661                        break;
2662                case 1:
2663                default:
2664                        track->nbanks = 8;
2665                        break;
2666                case 2:
2667                        track->nbanks = 16;
2668                        break;
2669                }
2670
2671                switch ((tmp & 0xf00) >> 8) {
2672                case 0:
2673                        track->group_size = 256;
2674                        break;
2675                case 1:
2676                default:
2677                        track->group_size = 512;
2678                        break;
2679                }
2680
2681                switch ((tmp & 0xf000) >> 12) {
2682                case 0:
2683                        track->row_size = 1;
2684                        break;
2685                case 1:
2686                default:
2687                        track->row_size = 2;
2688                        break;
2689                case 2:
2690                        track->row_size = 4;
2691                        break;
2692                }
2693
2694                p->track = track;
2695        }
2696        do {
2697                r = radeon_cs_packet_parse(p, &pkt, p->idx);
2698                if (r) {
2699                        kfree(p->track);
2700                        p->track = NULL;
2701                        return r;
2702                }
2703                p->idx += pkt.count + 2;
2704                switch (pkt.type) {
2705                case RADEON_PACKET_TYPE0:
2706                        r = evergreen_cs_parse_packet0(p, &pkt);
2707                        break;
2708                case RADEON_PACKET_TYPE2:
2709                        break;
2710                case RADEON_PACKET_TYPE3:
2711                        r = evergreen_packet3_check(p, &pkt);
2712                        break;
2713                default:
2714                        DRM_ERROR("Unknown packet type %d !\n", pkt.type);
2715                        kfree(p->track);
2716                        p->track = NULL;
2717                        return -EINVAL;
2718                }
2719                if (r) {
2720                        kfree(p->track);
2721                        p->track = NULL;
2722                        return r;
2723                }
2724        } while (p->idx < p->chunk_ib->length_dw);
2725#if 0
2726        for (r = 0; r < p->ib.length_dw; r++) {
2727                printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
2728                mdelay(1);
2729        }
2730#endif
2731        kfree(p->track);
2732        p->track = NULL;
2733        return 0;
2734}
2735
2736/**
2737 * evergreen_dma_cs_parse() - parse the DMA IB
2738 * @p:          parser structure holding parsing context.
2739 *
2740 * Parses the DMA IB from the CS ioctl and updates
2741 * the GPU addresses based on the reloc information and
2742 * checks for errors. (Evergreen-Cayman)
2743 * Returns 0 for success and an error on failure.
2744 **/
2745int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2746{
2747        struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
2748        struct radeon_bo_list *src_reloc, *dst_reloc, *dst2_reloc;
2749        u32 header, cmd, count, sub_cmd;
2750        uint32_t *ib = p->ib.ptr;
2751        u32 idx;
2752        u64 src_offset, dst_offset, dst2_offset;
2753        int r;
2754
2755        do {
2756                if (p->idx >= ib_chunk->length_dw) {
2757                        DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2758                                  p->idx, ib_chunk->length_dw);
2759                        return -EINVAL;
2760                }
2761                idx = p->idx;
2762                header = radeon_get_ib_value(p, idx);
2763                cmd = GET_DMA_CMD(header);
2764                count = GET_DMA_COUNT(header);
2765                sub_cmd = GET_DMA_SUB_CMD(header);
2766
2767                switch (cmd) {
2768                case DMA_PACKET_WRITE:
2769                        r = r600_dma_cs_next_reloc(p, &dst_reloc);
2770                        if (r) {
2771                                DRM_ERROR("bad DMA_PACKET_WRITE\n");
2772                                return -EINVAL;
2773                        }
2774                        switch (sub_cmd) {
2775                        /* tiled */
2776                        case 8:
2777                                dst_offset = radeon_get_ib_value(p, idx+1);
2778                                dst_offset <<= 8;
2779
2780                                ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2781                                p->idx += count + 7;
2782                                break;
2783                        /* linear */
2784                        case 0:
2785                                dst_offset = radeon_get_ib_value(p, idx+1);
2786                                dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2787
2788                                ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2789                                ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2790                                p->idx += count + 3;
2791                                break;
2792                        default:
2793                                DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, header);
2794                                return -EINVAL;
2795                        }
2796                        if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2797                                dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
2798                                         dst_offset, radeon_bo_size(dst_reloc->robj));
2799                                return -EINVAL;
2800                        }
2801                        break;
2802                case DMA_PACKET_COPY:
2803                        r = r600_dma_cs_next_reloc(p, &src_reloc);
2804                        if (r) {
2805                                DRM_ERROR("bad DMA_PACKET_COPY\n");
2806                                return -EINVAL;
2807                        }
2808                        r = r600_dma_cs_next_reloc(p, &dst_reloc);
2809                        if (r) {
2810                                DRM_ERROR("bad DMA_PACKET_COPY\n");
2811                                return -EINVAL;
2812                        }
2813                        switch (sub_cmd) {
2814                        /* Copy L2L, DW aligned */
2815                        case 0x00:
2816                                /* L2L, dw */
2817                                src_offset = radeon_get_ib_value(p, idx+2);
2818                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2819                                dst_offset = radeon_get_ib_value(p, idx+1);
2820                                dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2821                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2822                                        dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
2823                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2824                                        return -EINVAL;
2825                                }
2826                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2827                                        dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
2828                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2829                                        return -EINVAL;
2830                                }
2831                                ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2832                                ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2833                                ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2834                                ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2835                                p->idx += 5;
2836                                break;
2837                        /* Copy L2T/T2L */
2838                        case 0x08:
2839                                /* detile bit */
2840                                if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2841                                        /* tiled src, linear dst */
2842                                        src_offset = radeon_get_ib_value(p, idx+1);
2843                                        src_offset <<= 8;
2844                                        ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
2845
2846                                        dst_offset = radeon_get_ib_value(p, idx + 7);
2847                                        dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2848                                        ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2849                                        ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2850                                } else {
2851                                        /* linear src, tiled dst */
2852                                        src_offset = radeon_get_ib_value(p, idx+7);
2853                                        src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2854                                        ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2855                                        ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2856
2857                                        dst_offset = radeon_get_ib_value(p, idx+1);
2858                                        dst_offset <<= 8;
2859                                        ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2860                                }
2861                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2862                                        dev_warn(p->dev, "DMA L2T, src buffer too small (%llu %lu)\n",
2863                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2864                                        return -EINVAL;
2865                                }
2866                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2867                                        dev_warn(p->dev, "DMA L2T, dst buffer too small (%llu %lu)\n",
2868                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2869                                        return -EINVAL;
2870                                }
2871                                p->idx += 9;
2872                                break;
2873                        /* Copy L2L, byte aligned */
2874                        case 0x40:
2875                                /* L2L, byte */
2876                                src_offset = radeon_get_ib_value(p, idx+2);
2877                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2878                                dst_offset = radeon_get_ib_value(p, idx+1);
2879                                dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2880                                if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
2881                                        dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
2882                                                        src_offset + count, radeon_bo_size(src_reloc->robj));
2883                                        return -EINVAL;
2884                                }
2885                                if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
2886                                        dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
2887                                                        dst_offset + count, radeon_bo_size(dst_reloc->robj));
2888                                        return -EINVAL;
2889                                }
2890                                ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2891                                ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2892                                ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2893                                ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2894                                p->idx += 5;
2895                                break;
2896                        /* Copy L2L, partial */
2897                        case 0x41:
2898                                /* L2L, partial */
2899                                if (p->family < CHIP_CAYMAN) {
2900                                        DRM_ERROR("L2L Partial is cayman only !\n");
2901                                        return -EINVAL;
2902                                }
2903                                ib[idx+1] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2904                                ib[idx+2] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2905                                ib[idx+4] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2906                                ib[idx+5] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2907
2908                                p->idx += 9;
2909                                break;
2910                        /* Copy L2L, DW aligned, broadcast */
2911                        case 0x44:
2912                                /* L2L, dw, broadcast */
2913                                r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2914                                if (r) {
2915                                        DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
2916                                        return -EINVAL;
2917                                }
2918                                dst_offset = radeon_get_ib_value(p, idx+1);
2919                                dst_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2920                                dst2_offset = radeon_get_ib_value(p, idx+2);
2921                                dst2_offset |= ((u64)(radeon_get_ib_value(p, idx+5) & 0xff)) << 32;
2922                                src_offset = radeon_get_ib_value(p, idx+3);
2923                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
2924                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2925                                        dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
2926                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2927                                        return -EINVAL;
2928                                }
2929                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2930                                        dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
2931                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2932                                        return -EINVAL;
2933                                }
2934                                if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2935                                        dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
2936                                                        dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2937                                        return -EINVAL;
2938                                }
2939                                ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2940                                ib[idx+2] += (u32)(dst2_reloc->gpu_offset & 0xfffffffc);
2941                                ib[idx+3] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2942                                ib[idx+4] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2943                                ib[idx+5] += upper_32_bits(dst2_reloc->gpu_offset) & 0xff;
2944                                ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2945                                p->idx += 7;
2946                                break;
2947                        /* Copy L2T Frame to Field */
2948                        case 0x48:
2949                                if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2950                                        DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2951                                        return -EINVAL;
2952                                }
2953                                r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2954                                if (r) {
2955                                        DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2956                                        return -EINVAL;
2957                                }
2958                                dst_offset = radeon_get_ib_value(p, idx+1);
2959                                dst_offset <<= 8;
2960                                dst2_offset = radeon_get_ib_value(p, idx+2);
2961                                dst2_offset <<= 8;
2962                                src_offset = radeon_get_ib_value(p, idx+8);
2963                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
2964                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2965                                        dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
2966                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2967                                        return -EINVAL;
2968                                }
2969                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2970                                        dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2971                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2972                                        return -EINVAL;
2973                                }
2974                                if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2975                                        dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2976                                                        dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2977                                        return -EINVAL;
2978                                }
2979                                ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2980                                ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
2981                                ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2982                                ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2983                                p->idx += 10;
2984                                break;
2985                        /* Copy L2T/T2L, partial */
2986                        case 0x49:
2987                                /* L2T, T2L partial */
2988                                if (p->family < CHIP_CAYMAN) {
2989                                        DRM_ERROR("L2T, T2L Partial is cayman only !\n");
2990                                        return -EINVAL;
2991                                }
2992                                /* detile bit */
2993                                if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2994                                        /* tiled src, linear dst */
2995                                        ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
2996
2997                                        ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2998                                        ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2999                                } else {
3000                                        /* linear src, tiled dst */
3001                                        ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3002                                        ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3003
3004                                        ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3005                                }
3006                                p->idx += 12;
3007                                break;
3008                        /* Copy L2T broadcast */
3009                        case 0x4b:
3010                                /* L2T, broadcast */
3011                                if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3012                                        DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3013                                        return -EINVAL;
3014                                }
3015                                r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3016                                if (r) {
3017                                        DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3018                                        return -EINVAL;
3019                                }
3020                                dst_offset = radeon_get_ib_value(p, idx+1);
3021                                dst_offset <<= 8;
3022                                dst2_offset = radeon_get_ib_value(p, idx+2);
3023                                dst2_offset <<= 8;
3024                                src_offset = radeon_get_ib_value(p, idx+8);
3025                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3026                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3027                                        dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3028                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3029                                        return -EINVAL;
3030                                }
3031                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3032                                        dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3033                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3034                                        return -EINVAL;
3035                                }
3036                                if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3037                                        dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3038                                                        dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3039                                        return -EINVAL;
3040                                }
3041                                ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3042                                ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3043                                ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3044                                ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3045                                p->idx += 10;
3046                                break;
3047                        /* Copy L2T/T2L (tile units) */
3048                        case 0x4c:
3049                                /* L2T, T2L */
3050                                /* detile bit */
3051                                if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3052                                        /* tiled src, linear dst */
3053                                        src_offset = radeon_get_ib_value(p, idx+1);
3054                                        src_offset <<= 8;
3055                                        ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3056
3057                                        dst_offset = radeon_get_ib_value(p, idx+7);
3058                                        dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3059                                        ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3060                                        ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
3061                                } else {
3062                                        /* linear src, tiled dst */
3063                                        src_offset = radeon_get_ib_value(p, idx+7);
3064                                        src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3065                                        ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3066                                        ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3067
3068                                        dst_offset = radeon_get_ib_value(p, idx+1);
3069                                        dst_offset <<= 8;
3070                                        ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3071                                }
3072                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3073                                        dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
3074                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3075                                        return -EINVAL;
3076                                }
3077                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3078                                        dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
3079                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3080                                        return -EINVAL;
3081                                }
3082                                p->idx += 9;
3083                                break;
3084                        /* Copy T2T, partial (tile units) */
3085                        case 0x4d:
3086                                /* T2T partial */
3087                                if (p->family < CHIP_CAYMAN) {
3088                                        DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3089                                        return -EINVAL;
3090                                }
3091                                ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3092                                ib[idx+4] += (u32)(dst_reloc->gpu_offset >> 8);
3093                                p->idx += 13;
3094                                break;
3095                        /* Copy L2T broadcast (tile units) */
3096                        case 0x4f:
3097                                /* L2T, broadcast */
3098                                if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3099                                        DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3100                                        return -EINVAL;
3101                                }
3102                                r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3103                                if (r) {
3104                                        DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3105                                        return -EINVAL;
3106                                }
3107                                dst_offset = radeon_get_ib_value(p, idx+1);
3108                                dst_offset <<= 8;
3109                                dst2_offset = radeon_get_ib_value(p, idx+2);
3110                                dst2_offset <<= 8;
3111                                src_offset = radeon_get_ib_value(p, idx+8);
3112                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3113                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3114                                        dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3115                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3116                                        return -EINVAL;
3117                                }
3118                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3119                                        dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3120                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3121                                        return -EINVAL;
3122                                }
3123                                if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3124                                        dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3125                                                        dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3126                                        return -EINVAL;
3127                                }
3128                                ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3129                                ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3130                                ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3131                                ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3132                                p->idx += 10;
3133                                break;
3134                        default:
3135                                DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, header);
3136                                return -EINVAL;
3137                        }
3138                        break;
3139                case DMA_PACKET_CONSTANT_FILL:
3140                        r = r600_dma_cs_next_reloc(p, &dst_reloc);
3141                        if (r) {
3142                                DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
3143                                return -EINVAL;
3144                        }
3145                        dst_offset = radeon_get_ib_value(p, idx+1);
3146                        dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
3147                        if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3148                                dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
3149                                         dst_offset, radeon_bo_size(dst_reloc->robj));
3150                                return -EINVAL;
3151                        }
3152                        ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3153                        ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000;
3154                        p->idx += 4;
3155                        break;
3156                case DMA_PACKET_NOP:
3157                        p->idx += 1;
3158                        break;
3159                default:
3160                        DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3161                        return -EINVAL;
3162                }
3163        } while (p->idx < p->chunk_ib->length_dw);
3164#if 0
3165        for (r = 0; r < p->ib->length_dw; r++) {
3166                printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
3167                mdelay(1);
3168        }
3169#endif
3170        return 0;
3171}
3172
3173/* vm parser */
3174static bool evergreen_vm_reg_valid(u32 reg)
3175{
3176        /* context regs are fine */
3177        if (reg >= 0x28000)
3178                return true;
3179
3180        /* check config regs */
3181        switch (reg) {
3182        case WAIT_UNTIL:
3183        case GRBM_GFX_INDEX:
3184        case CP_STRMOUT_CNTL:
3185        case CP_COHER_CNTL:
3186        case CP_COHER_SIZE:
3187        case VGT_VTX_VECT_EJECT_REG:
3188        case VGT_CACHE_INVALIDATION:
3189        case VGT_GS_VERTEX_REUSE:
3190        case VGT_PRIMITIVE_TYPE:
3191        case VGT_INDEX_TYPE:
3192        case VGT_NUM_INDICES:
3193        case VGT_NUM_INSTANCES:
3194        case VGT_COMPUTE_DIM_X:
3195        case VGT_COMPUTE_DIM_Y:
3196        case VGT_COMPUTE_DIM_Z:
3197        case VGT_COMPUTE_START_X:
3198        case VGT_COMPUTE_START_Y:
3199        case VGT_COMPUTE_START_Z:
3200        case VGT_COMPUTE_INDEX:
3201        case VGT_COMPUTE_THREAD_GROUP_SIZE:
3202        case VGT_HS_OFFCHIP_PARAM:
3203        case PA_CL_ENHANCE:
3204        case PA_SU_LINE_STIPPLE_VALUE:
3205        case PA_SC_LINE_STIPPLE_STATE:
3206        case PA_SC_ENHANCE:
3207        case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ:
3208        case SQ_DYN_GPR_SIMD_LOCK_EN:
3209        case SQ_CONFIG:
3210        case SQ_GPR_RESOURCE_MGMT_1:
3211        case SQ_GLOBAL_GPR_RESOURCE_MGMT_1:
3212        case SQ_GLOBAL_GPR_RESOURCE_MGMT_2:
3213        case SQ_CONST_MEM_BASE:
3214        case SQ_STATIC_THREAD_MGMT_1:
3215        case SQ_STATIC_THREAD_MGMT_2:
3216        case SQ_STATIC_THREAD_MGMT_3:
3217        case SPI_CONFIG_CNTL:
3218        case SPI_CONFIG_CNTL_1:
3219        case TA_CNTL_AUX:
3220        case DB_DEBUG:
3221        case DB_DEBUG2:
3222        case DB_DEBUG3:
3223        case DB_DEBUG4:
3224        case DB_WATERMARKS:
3225        case TD_PS_BORDER_COLOR_INDEX:
3226        case TD_PS_BORDER_COLOR_RED:
3227        case TD_PS_BORDER_COLOR_GREEN:
3228        case TD_PS_BORDER_COLOR_BLUE:
3229        case TD_PS_BORDER_COLOR_ALPHA:
3230        case TD_VS_BORDER_COLOR_INDEX:
3231        case TD_VS_BORDER_COLOR_RED:
3232        case TD_VS_BORDER_COLOR_GREEN:
3233        case TD_VS_BORDER_COLOR_BLUE:
3234        case TD_VS_BORDER_COLOR_ALPHA:
3235        case TD_GS_BORDER_COLOR_INDEX:
3236        case TD_GS_BORDER_COLOR_RED:
3237        case TD_GS_BORDER_COLOR_GREEN:
3238        case TD_GS_BORDER_COLOR_BLUE:
3239        case TD_GS_BORDER_COLOR_ALPHA:
3240        case TD_HS_BORDER_COLOR_INDEX:
3241        case TD_HS_BORDER_COLOR_RED:
3242        case TD_HS_BORDER_COLOR_GREEN:
3243        case TD_HS_BORDER_COLOR_BLUE:
3244        case TD_HS_BORDER_COLOR_ALPHA:
3245        case TD_LS_BORDER_COLOR_INDEX:
3246        case TD_LS_BORDER_COLOR_RED:
3247        case TD_LS_BORDER_COLOR_GREEN:
3248        case TD_LS_BORDER_COLOR_BLUE:
3249        case TD_LS_BORDER_COLOR_ALPHA:
3250        case TD_CS_BORDER_COLOR_INDEX:
3251        case TD_CS_BORDER_COLOR_RED:
3252        case TD_CS_BORDER_COLOR_GREEN:
3253        case TD_CS_BORDER_COLOR_BLUE:
3254        case TD_CS_BORDER_COLOR_ALPHA:
3255        case SQ_ESGS_RING_SIZE:
3256        case SQ_GSVS_RING_SIZE:
3257        case SQ_ESTMP_RING_SIZE:
3258        case SQ_GSTMP_RING_SIZE:
3259        case SQ_HSTMP_RING_SIZE:
3260        case SQ_LSTMP_RING_SIZE:
3261        case SQ_PSTMP_RING_SIZE:
3262        case SQ_VSTMP_RING_SIZE:
3263        case SQ_ESGS_RING_ITEMSIZE:
3264        case SQ_ESTMP_RING_ITEMSIZE:
3265        case SQ_GSTMP_RING_ITEMSIZE:
3266        case SQ_GSVS_RING_ITEMSIZE:
3267        case SQ_GS_VERT_ITEMSIZE:
3268        case SQ_GS_VERT_ITEMSIZE_1:
3269        case SQ_GS_VERT_ITEMSIZE_2:
3270        case SQ_GS_VERT_ITEMSIZE_3:
3271        case SQ_GSVS_RING_OFFSET_1:
3272        case SQ_GSVS_RING_OFFSET_2:
3273        case SQ_GSVS_RING_OFFSET_3:
3274        case SQ_HSTMP_RING_ITEMSIZE:
3275        case SQ_LSTMP_RING_ITEMSIZE:
3276        case SQ_PSTMP_RING_ITEMSIZE:
3277        case SQ_VSTMP_RING_ITEMSIZE:
3278        case VGT_TF_RING_SIZE:
3279        case SQ_ESGS_RING_BASE:
3280        case SQ_GSVS_RING_BASE:
3281        case SQ_ESTMP_RING_BASE:
3282        case SQ_GSTMP_RING_BASE:
3283        case SQ_HSTMP_RING_BASE:
3284        case SQ_LSTMP_RING_BASE:
3285        case SQ_PSTMP_RING_BASE:
3286        case SQ_VSTMP_RING_BASE:
3287        case CAYMAN_VGT_OFFCHIP_LDS_BASE:
3288        case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS:
3289                return true;
3290        default:
3291                DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3292                return false;
3293        }
3294}
3295
3296static int evergreen_vm_packet3_check(struct radeon_device *rdev,
3297                                      u32 *ib, struct radeon_cs_packet *pkt)
3298{
3299        u32 idx = pkt->idx + 1;
3300        u32 idx_value = ib[idx];
3301        u32 start_reg, end_reg, reg, i;
3302        u32 command, info;
3303
3304        switch (pkt->opcode) {
3305        case PACKET3_NOP:
3306                break;
3307        case PACKET3_SET_BASE:
3308                if (idx_value != 1) {
3309                        DRM_ERROR("bad SET_BASE");
3310                        return -EINVAL;
3311                }
3312                break;
3313        case PACKET3_CLEAR_STATE:
3314        case PACKET3_INDEX_BUFFER_SIZE:
3315        case PACKET3_DISPATCH_DIRECT:
3316        case PACKET3_DISPATCH_INDIRECT:
3317        case PACKET3_MODE_CONTROL:
3318        case PACKET3_SET_PREDICATION:
3319        case PACKET3_COND_EXEC:
3320        case PACKET3_PRED_EXEC:
3321        case PACKET3_DRAW_INDIRECT:
3322        case PACKET3_DRAW_INDEX_INDIRECT:
3323        case PACKET3_INDEX_BASE:
3324        case PACKET3_DRAW_INDEX_2:
3325        case PACKET3_CONTEXT_CONTROL:
3326        case PACKET3_DRAW_INDEX_OFFSET:
3327        case PACKET3_INDEX_TYPE:
3328        case PACKET3_DRAW_INDEX:
3329        case PACKET3_DRAW_INDEX_AUTO:
3330        case PACKET3_DRAW_INDEX_IMMD:
3331        case PACKET3_NUM_INSTANCES:
3332        case PACKET3_DRAW_INDEX_MULTI_AUTO:
3333        case PACKET3_STRMOUT_BUFFER_UPDATE:
3334        case PACKET3_DRAW_INDEX_OFFSET_2:
3335        case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3336        case PACKET3_MPEG_INDEX:
3337        case PACKET3_WAIT_REG_MEM:
3338        case PACKET3_MEM_WRITE:
3339        case PACKET3_SURFACE_SYNC:
3340        case PACKET3_EVENT_WRITE:
3341        case PACKET3_EVENT_WRITE_EOP:
3342        case PACKET3_EVENT_WRITE_EOS:
3343        case PACKET3_SET_CONTEXT_REG:
3344        case PACKET3_SET_BOOL_CONST:
3345        case PACKET3_SET_LOOP_CONST:
3346        case PACKET3_SET_RESOURCE:
3347        case PACKET3_SET_SAMPLER:
3348        case PACKET3_SET_CTL_CONST:
3349        case PACKET3_SET_RESOURCE_OFFSET:
3350        case PACKET3_SET_CONTEXT_REG_INDIRECT:
3351        case PACKET3_SET_RESOURCE_INDIRECT:
3352        case CAYMAN_PACKET3_DEALLOC_STATE:
3353                break;
3354        case PACKET3_COND_WRITE:
3355                if (idx_value & 0x100) {
3356                        reg = ib[idx + 5] * 4;
3357                        if (!evergreen_vm_reg_valid(reg))
3358                                return -EINVAL;
3359                }
3360                break;
3361        case PACKET3_COPY_DW:
3362                if (idx_value & 0x2) {
3363                        reg = ib[idx + 3] * 4;
3364                        if (!evergreen_vm_reg_valid(reg))
3365                                return -EINVAL;
3366                }
3367                break;
3368        case PACKET3_SET_CONFIG_REG:
3369                start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3370                end_reg = 4 * pkt->count + start_reg - 4;
3371                if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3372                    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3373                    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3374                        DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3375                        return -EINVAL;
3376                }
3377                for (i = 0; i < pkt->count; i++) {
3378                        reg = start_reg + (4 * i);
3379                        if (!evergreen_vm_reg_valid(reg))
3380                                return -EINVAL;
3381                }
3382                break;
3383        case PACKET3_CP_DMA:
3384                command = ib[idx + 4];
3385                info = ib[idx + 1];
3386                if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
3387                    (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
3388                    ((((info & 0x00300000) >> 20) == 0) &&
3389                     (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
3390                    ((((info & 0x60000000) >> 29) == 0) &&
3391                     (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
3392                        /* non mem to mem copies requires dw aligned count */
3393                        if ((command & 0x1fffff) % 4) {
3394                                DRM_ERROR("CP DMA command requires dw count alignment\n");
3395                                return -EINVAL;
3396                        }
3397                }
3398                if (command & PACKET3_CP_DMA_CMD_SAS) {
3399                        /* src address space is register */
3400                        if (((info & 0x60000000) >> 29) == 0) {
3401                                start_reg = idx_value << 2;
3402                                if (command & PACKET3_CP_DMA_CMD_SAIC) {
3403                                        reg = start_reg;
3404                                        if (!evergreen_vm_reg_valid(reg)) {
3405                                                DRM_ERROR("CP DMA Bad SRC register\n");
3406                                                return -EINVAL;
3407                                        }
3408                                } else {
3409                                        for (i = 0; i < (command & 0x1fffff); i++) {
3410                                                reg = start_reg + (4 * i);
3411                                                if (!evergreen_vm_reg_valid(reg)) {
3412                                                        DRM_ERROR("CP DMA Bad SRC register\n");
3413                                                        return -EINVAL;
3414                                                }
3415                                        }
3416                                }
3417                        }
3418                }
3419                if (command & PACKET3_CP_DMA_CMD_DAS) {
3420                        /* dst address space is register */
3421                        if (((info & 0x00300000) >> 20) == 0) {
3422                                start_reg = ib[idx + 2];
3423                                if (command & PACKET3_CP_DMA_CMD_DAIC) {
3424                                        reg = start_reg;
3425                                        if (!evergreen_vm_reg_valid(reg)) {
3426                                                DRM_ERROR("CP DMA Bad DST register\n");
3427                                                return -EINVAL;
3428                                        }
3429                                } else {
3430                                        for (i = 0; i < (command & 0x1fffff); i++) {
3431                                                reg = start_reg + (4 * i);
3432                                                if (!evergreen_vm_reg_valid(reg)) {
3433                                                        DRM_ERROR("CP DMA Bad DST register\n");
3434                                                        return -EINVAL;
3435                                                }
3436                                        }
3437                                }
3438                        }
3439                }
3440                break;
3441        default:
3442                return -EINVAL;
3443        }
3444        return 0;
3445}
3446
3447int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3448{
3449        int ret = 0;
3450        u32 idx = 0;
3451        struct radeon_cs_packet pkt;
3452
3453        do {
3454                pkt.idx = idx;
3455                pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
3456                pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
3457                pkt.one_reg_wr = 0;
3458                switch (pkt.type) {
3459                case RADEON_PACKET_TYPE0:
3460                        dev_err(rdev->dev, "Packet0 not allowed!\n");
3461                        ret = -EINVAL;
3462                        break;
3463                case RADEON_PACKET_TYPE2:
3464                        idx += 1;
3465                        break;
3466                case RADEON_PACKET_TYPE3:
3467                        pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
3468                        ret = evergreen_vm_packet3_check(rdev, ib->ptr, &pkt);
3469                        idx += pkt.count + 2;
3470                        break;
3471                default:
3472                        dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
3473                        ret = -EINVAL;
3474                        break;
3475                }
3476                if (ret)
3477                        break;
3478        } while (idx < ib->length_dw);
3479
3480        return ret;
3481}
3482
3483/**
3484 * evergreen_dma_ib_parse() - parse the DMA IB for VM
3485 * @rdev: radeon_device pointer
3486 * @ib: radeon_ib pointer
3487 *
3488 * Parses the DMA IB from the VM CS ioctl
3489 * checks for errors. (Cayman-SI)
3490 * Returns 0 for success and an error on failure.
3491 **/
3492int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3493{
3494        u32 idx = 0;
3495        u32 header, cmd, count, sub_cmd;
3496
3497        do {
3498                header = ib->ptr[idx];
3499                cmd = GET_DMA_CMD(header);
3500                count = GET_DMA_COUNT(header);
3501                sub_cmd = GET_DMA_SUB_CMD(header);
3502
3503                switch (cmd) {
3504                case DMA_PACKET_WRITE:
3505                        switch (sub_cmd) {
3506                        /* tiled */
3507                        case 8:
3508                                idx += count + 7;
3509                                break;
3510                        /* linear */
3511                        case 0:
3512                                idx += count + 3;
3513                                break;
3514                        default:
3515                                DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib->ptr[idx]);
3516                                return -EINVAL;
3517                        }
3518                        break;
3519                case DMA_PACKET_COPY:
3520                        switch (sub_cmd) {
3521                        /* Copy L2L, DW aligned */
3522                        case 0x00:
3523                                idx += 5;
3524                                break;
3525                        /* Copy L2T/T2L */
3526                        case 0x08:
3527                                idx += 9;
3528                                break;
3529                        /* Copy L2L, byte aligned */
3530                        case 0x40:
3531                                idx += 5;
3532                                break;
3533                        /* Copy L2L, partial */
3534                        case 0x41:
3535                                idx += 9;
3536                                break;
3537                        /* Copy L2L, DW aligned, broadcast */
3538                        case 0x44:
3539                                idx += 7;
3540                                break;
3541                        /* Copy L2T Frame to Field */
3542                        case 0x48:
3543                                idx += 10;
3544                                break;
3545                        /* Copy L2T/T2L, partial */
3546                        case 0x49:
3547                                idx += 12;
3548                                break;
3549                        /* Copy L2T broadcast */
3550                        case 0x4b:
3551                                idx += 10;
3552                                break;
3553                        /* Copy L2T/T2L (tile units) */
3554                        case 0x4c:
3555                                idx += 9;
3556                                break;
3557                        /* Copy T2T, partial (tile units) */
3558                        case 0x4d:
3559                                idx += 13;
3560                                break;
3561                        /* Copy L2T broadcast (tile units) */
3562                        case 0x4f:
3563                                idx += 10;
3564                                break;
3565                        default:
3566                                DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib->ptr[idx]);
3567                                return -EINVAL;
3568                        }
3569                        break;
3570                case DMA_PACKET_CONSTANT_FILL:
3571                        idx += 4;
3572                        break;
3573                case DMA_PACKET_NOP:
3574                        idx += 1;
3575                        break;
3576                default:
3577                        DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3578                        return -EINVAL;
3579                }
3580        } while (idx < ib->length_dw);
3581
3582        return 0;
3583}
3584