linux/drivers/gpu/drm/radeon/evergreen_cs.c
<<
>>
Prefs
   1/*
   2 * Copyright 2010 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 * Copyright 2009 Jerome Glisse.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22 * OTHER DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors: Dave Airlie
  25 *          Alex Deucher
  26 *          Jerome Glisse
  27 */
  28#include <drm/drmP.h>
  29#include "radeon.h"
  30#include "evergreend.h"
  31#include "evergreen_reg_safe.h"
  32#include "cayman_reg_safe.h"
  33
  34#define MAX(a,b)                   (((a)>(b))?(a):(b))
  35#define MIN(a,b)                   (((a)<(b))?(a):(b))
  36
  37int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
  38                           struct radeon_cs_reloc **cs_reloc);
  39struct evergreen_cs_track {
  40        u32                     group_size;
  41        u32                     nbanks;
  42        u32                     npipes;
  43        u32                     row_size;
  44        /* value we track */
  45        u32                     nsamples;               /* unused */
  46        struct radeon_bo        *cb_color_bo[12];
  47        u32                     cb_color_bo_offset[12];
  48        struct radeon_bo        *cb_color_fmask_bo[8];  /* unused */
  49        struct radeon_bo        *cb_color_cmask_bo[8];  /* unused */
  50        u32                     cb_color_info[12];
  51        u32                     cb_color_view[12];
  52        u32                     cb_color_pitch[12];
  53        u32                     cb_color_slice[12];
  54        u32                     cb_color_slice_idx[12];
  55        u32                     cb_color_attrib[12];
  56        u32                     cb_color_cmask_slice[8];/* unused */
  57        u32                     cb_color_fmask_slice[8];/* unused */
  58        u32                     cb_target_mask;
  59        u32                     cb_shader_mask; /* unused */
  60        u32                     vgt_strmout_config;
  61        u32                     vgt_strmout_buffer_config;
  62        struct radeon_bo        *vgt_strmout_bo[4];
  63        u32                     vgt_strmout_bo_offset[4];
  64        u32                     vgt_strmout_size[4];
  65        u32                     db_depth_control;
  66        u32                     db_depth_view;
  67        u32                     db_depth_slice;
  68        u32                     db_depth_size;
  69        u32                     db_z_info;
  70        u32                     db_z_read_offset;
  71        u32                     db_z_write_offset;
  72        struct radeon_bo        *db_z_read_bo;
  73        struct radeon_bo        *db_z_write_bo;
  74        u32                     db_s_info;
  75        u32                     db_s_read_offset;
  76        u32                     db_s_write_offset;
  77        struct radeon_bo        *db_s_read_bo;
  78        struct radeon_bo        *db_s_write_bo;
  79        bool                    sx_misc_kill_all_prims;
  80        bool                    cb_dirty;
  81        bool                    db_dirty;
  82        bool                    streamout_dirty;
  83        u32                     htile_offset;
  84        u32                     htile_surface;
  85        struct radeon_bo        *htile_bo;
  86};
  87
  88static u32 evergreen_cs_get_aray_mode(u32 tiling_flags)
  89{
  90        if (tiling_flags & RADEON_TILING_MACRO)
  91                return ARRAY_2D_TILED_THIN1;
  92        else if (tiling_flags & RADEON_TILING_MICRO)
  93                return ARRAY_1D_TILED_THIN1;
  94        else
  95                return ARRAY_LINEAR_GENERAL;
  96}
  97
  98static u32 evergreen_cs_get_num_banks(u32 nbanks)
  99{
 100        switch (nbanks) {
 101        case 2:
 102                return ADDR_SURF_2_BANK;
 103        case 4:
 104                return ADDR_SURF_4_BANK;
 105        case 8:
 106        default:
 107                return ADDR_SURF_8_BANK;
 108        case 16:
 109                return ADDR_SURF_16_BANK;
 110        }
 111}
 112
 113static void evergreen_cs_track_init(struct evergreen_cs_track *track)
 114{
 115        int i;
 116
 117        for (i = 0; i < 8; i++) {
 118                track->cb_color_fmask_bo[i] = NULL;
 119                track->cb_color_cmask_bo[i] = NULL;
 120                track->cb_color_cmask_slice[i] = 0;
 121                track->cb_color_fmask_slice[i] = 0;
 122        }
 123
 124        for (i = 0; i < 12; i++) {
 125                track->cb_color_bo[i] = NULL;
 126                track->cb_color_bo_offset[i] = 0xFFFFFFFF;
 127                track->cb_color_info[i] = 0;
 128                track->cb_color_view[i] = 0xFFFFFFFF;
 129                track->cb_color_pitch[i] = 0;
 130                track->cb_color_slice[i] = 0xfffffff;
 131                track->cb_color_slice_idx[i] = 0;
 132        }
 133        track->cb_target_mask = 0xFFFFFFFF;
 134        track->cb_shader_mask = 0xFFFFFFFF;
 135        track->cb_dirty = true;
 136
 137        track->db_depth_slice = 0xffffffff;
 138        track->db_depth_view = 0xFFFFC000;
 139        track->db_depth_size = 0xFFFFFFFF;
 140        track->db_depth_control = 0xFFFFFFFF;
 141        track->db_z_info = 0xFFFFFFFF;
 142        track->db_z_read_offset = 0xFFFFFFFF;
 143        track->db_z_write_offset = 0xFFFFFFFF;
 144        track->db_z_read_bo = NULL;
 145        track->db_z_write_bo = NULL;
 146        track->db_s_info = 0xFFFFFFFF;
 147        track->db_s_read_offset = 0xFFFFFFFF;
 148        track->db_s_write_offset = 0xFFFFFFFF;
 149        track->db_s_read_bo = NULL;
 150        track->db_s_write_bo = NULL;
 151        track->db_dirty = true;
 152        track->htile_bo = NULL;
 153        track->htile_offset = 0xFFFFFFFF;
 154        track->htile_surface = 0;
 155
 156        for (i = 0; i < 4; i++) {
 157                track->vgt_strmout_size[i] = 0;
 158                track->vgt_strmout_bo[i] = NULL;
 159                track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
 160        }
 161        track->streamout_dirty = true;
 162        track->sx_misc_kill_all_prims = false;
 163}
 164
 165struct eg_surface {
 166        /* value gathered from cs */
 167        unsigned        nbx;
 168        unsigned        nby;
 169        unsigned        format;
 170        unsigned        mode;
 171        unsigned        nbanks;
 172        unsigned        bankw;
 173        unsigned        bankh;
 174        unsigned        tsplit;
 175        unsigned        mtilea;
 176        unsigned        nsamples;
 177        /* output value */
 178        unsigned        bpe;
 179        unsigned        layer_size;
 180        unsigned        palign;
 181        unsigned        halign;
 182        unsigned long   base_align;
 183};
 184
 185static int evergreen_surface_check_linear(struct radeon_cs_parser *p,
 186                                          struct eg_surface *surf,
 187                                          const char *prefix)
 188{
 189        surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
 190        surf->base_align = surf->bpe;
 191        surf->palign = 1;
 192        surf->halign = 1;
 193        return 0;
 194}
 195
 196static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser *p,
 197                                                  struct eg_surface *surf,
 198                                                  const char *prefix)
 199{
 200        struct evergreen_cs_track *track = p->track;
 201        unsigned palign;
 202
 203        palign = MAX(64, track->group_size / surf->bpe);
 204        surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
 205        surf->base_align = track->group_size;
 206        surf->palign = palign;
 207        surf->halign = 1;
 208        if (surf->nbx & (palign - 1)) {
 209                if (prefix) {
 210                        dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
 211                                 __func__, __LINE__, prefix, surf->nbx, palign);
 212                }
 213                return -EINVAL;
 214        }
 215        return 0;
 216}
 217
 218static int evergreen_surface_check_1d(struct radeon_cs_parser *p,
 219                                      struct eg_surface *surf,
 220                                      const char *prefix)
 221{
 222        struct evergreen_cs_track *track = p->track;
 223        unsigned palign;
 224
 225        palign = track->group_size / (8 * surf->bpe * surf->nsamples);
 226        palign = MAX(8, palign);
 227        surf->layer_size = surf->nbx * surf->nby * surf->bpe;
 228        surf->base_align = track->group_size;
 229        surf->palign = palign;
 230        surf->halign = 8;
 231        if ((surf->nbx & (palign - 1))) {
 232                if (prefix) {
 233                        dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d (%d %d %d)\n",
 234                                 __func__, __LINE__, prefix, surf->nbx, palign,
 235                                 track->group_size, surf->bpe, surf->nsamples);
 236                }
 237                return -EINVAL;
 238        }
 239        if ((surf->nby & (8 - 1))) {
 240                if (prefix) {
 241                        dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with 8\n",
 242                                 __func__, __LINE__, prefix, surf->nby);
 243                }
 244                return -EINVAL;
 245        }
 246        return 0;
 247}
 248
 249static int evergreen_surface_check_2d(struct radeon_cs_parser *p,
 250                                      struct eg_surface *surf,
 251                                      const char *prefix)
 252{
 253        struct evergreen_cs_track *track = p->track;
 254        unsigned palign, halign, tileb, slice_pt;
 255        unsigned mtile_pr, mtile_ps, mtileb;
 256
 257        tileb = 64 * surf->bpe * surf->nsamples;
 258        slice_pt = 1;
 259        if (tileb > surf->tsplit) {
 260                slice_pt = tileb / surf->tsplit;
 261        }
 262        tileb = tileb / slice_pt;
 263        /* macro tile width & height */
 264        palign = (8 * surf->bankw * track->npipes) * surf->mtilea;
 265        halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea;
 266        mtileb = (palign / 8) * (halign / 8) * tileb;
 267        mtile_pr = surf->nbx / palign;
 268        mtile_ps = (mtile_pr * surf->nby) / halign;
 269        surf->layer_size = mtile_ps * mtileb * slice_pt;
 270        surf->base_align = (palign / 8) * (halign / 8) * tileb;
 271        surf->palign = palign;
 272        surf->halign = halign;
 273
 274        if ((surf->nbx & (palign - 1))) {
 275                if (prefix) {
 276                        dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
 277                                 __func__, __LINE__, prefix, surf->nbx, palign);
 278                }
 279                return -EINVAL;
 280        }
 281        if ((surf->nby & (halign - 1))) {
 282                if (prefix) {
 283                        dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with %d\n",
 284                                 __func__, __LINE__, prefix, surf->nby, halign);
 285                }
 286                return -EINVAL;
 287        }
 288
 289        return 0;
 290}
 291
 292static int evergreen_surface_check(struct radeon_cs_parser *p,
 293                                   struct eg_surface *surf,
 294                                   const char *prefix)
 295{
 296        /* some common value computed here */
 297        surf->bpe = r600_fmt_get_blocksize(surf->format);
 298
 299        switch (surf->mode) {
 300        case ARRAY_LINEAR_GENERAL:
 301                return evergreen_surface_check_linear(p, surf, prefix);
 302        case ARRAY_LINEAR_ALIGNED:
 303                return evergreen_surface_check_linear_aligned(p, surf, prefix);
 304        case ARRAY_1D_TILED_THIN1:
 305                return evergreen_surface_check_1d(p, surf, prefix);
 306        case ARRAY_2D_TILED_THIN1:
 307                return evergreen_surface_check_2d(p, surf, prefix);
 308        default:
 309                dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
 310                                __func__, __LINE__, prefix, surf->mode);
 311                return -EINVAL;
 312        }
 313        return -EINVAL;
 314}
 315
 316static int evergreen_surface_value_conv_check(struct radeon_cs_parser *p,
 317                                              struct eg_surface *surf,
 318                                              const char *prefix)
 319{
 320        switch (surf->mode) {
 321        case ARRAY_2D_TILED_THIN1:
 322                break;
 323        case ARRAY_LINEAR_GENERAL:
 324        case ARRAY_LINEAR_ALIGNED:
 325        case ARRAY_1D_TILED_THIN1:
 326                return 0;
 327        default:
 328                dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
 329                                __func__, __LINE__, prefix, surf->mode);
 330                return -EINVAL;
 331        }
 332
 333        switch (surf->nbanks) {
 334        case 0: surf->nbanks = 2; break;
 335        case 1: surf->nbanks = 4; break;
 336        case 2: surf->nbanks = 8; break;
 337        case 3: surf->nbanks = 16; break;
 338        default:
 339                dev_warn(p->dev, "%s:%d %s invalid number of banks %d\n",
 340                         __func__, __LINE__, prefix, surf->nbanks);
 341                return -EINVAL;
 342        }
 343        switch (surf->bankw) {
 344        case 0: surf->bankw = 1; break;
 345        case 1: surf->bankw = 2; break;
 346        case 2: surf->bankw = 4; break;
 347        case 3: surf->bankw = 8; break;
 348        default:
 349                dev_warn(p->dev, "%s:%d %s invalid bankw %d\n",
 350                         __func__, __LINE__, prefix, surf->bankw);
 351                return -EINVAL;
 352        }
 353        switch (surf->bankh) {
 354        case 0: surf->bankh = 1; break;
 355        case 1: surf->bankh = 2; break;
 356        case 2: surf->bankh = 4; break;
 357        case 3: surf->bankh = 8; break;
 358        default:
 359                dev_warn(p->dev, "%s:%d %s invalid bankh %d\n",
 360                         __func__, __LINE__, prefix, surf->bankh);
 361                return -EINVAL;
 362        }
 363        switch (surf->mtilea) {
 364        case 0: surf->mtilea = 1; break;
 365        case 1: surf->mtilea = 2; break;
 366        case 2: surf->mtilea = 4; break;
 367        case 3: surf->mtilea = 8; break;
 368        default:
 369                dev_warn(p->dev, "%s:%d %s invalid macro tile aspect %d\n",
 370                         __func__, __LINE__, prefix, surf->mtilea);
 371                return -EINVAL;
 372        }
 373        switch (surf->tsplit) {
 374        case 0: surf->tsplit = 64; break;
 375        case 1: surf->tsplit = 128; break;
 376        case 2: surf->tsplit = 256; break;
 377        case 3: surf->tsplit = 512; break;
 378        case 4: surf->tsplit = 1024; break;
 379        case 5: surf->tsplit = 2048; break;
 380        case 6: surf->tsplit = 4096; break;
 381        default:
 382                dev_warn(p->dev, "%s:%d %s invalid tile split %d\n",
 383                         __func__, __LINE__, prefix, surf->tsplit);
 384                return -EINVAL;
 385        }
 386        return 0;
 387}
 388
 389static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned id)
 390{
 391        struct evergreen_cs_track *track = p->track;
 392        struct eg_surface surf;
 393        unsigned pitch, slice, mslice;
 394        unsigned long offset;
 395        int r;
 396
 397        mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1;
 398        pitch = track->cb_color_pitch[id];
 399        slice = track->cb_color_slice[id];
 400        surf.nbx = (pitch + 1) * 8;
 401        surf.nby = ((slice + 1) * 64) / surf.nbx;
 402        surf.mode = G_028C70_ARRAY_MODE(track->cb_color_info[id]);
 403        surf.format = G_028C70_FORMAT(track->cb_color_info[id]);
 404        surf.tsplit = G_028C74_TILE_SPLIT(track->cb_color_attrib[id]);
 405        surf.nbanks = G_028C74_NUM_BANKS(track->cb_color_attrib[id]);
 406        surf.bankw = G_028C74_BANK_WIDTH(track->cb_color_attrib[id]);
 407        surf.bankh = G_028C74_BANK_HEIGHT(track->cb_color_attrib[id]);
 408        surf.mtilea = G_028C74_MACRO_TILE_ASPECT(track->cb_color_attrib[id]);
 409        surf.nsamples = 1;
 410
 411        if (!r600_fmt_is_valid_color(surf.format)) {
 412                dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08x)\n",
 413                         __func__, __LINE__, surf.format,
 414                        id, track->cb_color_info[id]);
 415                return -EINVAL;
 416        }
 417
 418        r = evergreen_surface_value_conv_check(p, &surf, "cb");
 419        if (r) {
 420                return r;
 421        }
 422
 423        r = evergreen_surface_check(p, &surf, "cb");
 424        if (r) {
 425                dev_warn(p->dev, "%s:%d cb[%d] invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
 426                         __func__, __LINE__, id, track->cb_color_pitch[id],
 427                         track->cb_color_slice[id], track->cb_color_attrib[id],
 428                         track->cb_color_info[id]);
 429                return r;
 430        }
 431
 432        offset = track->cb_color_bo_offset[id] << 8;
 433        if (offset & (surf.base_align - 1)) {
 434                dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n",
 435                         __func__, __LINE__, id, offset, surf.base_align);
 436                return -EINVAL;
 437        }
 438
 439        offset += surf.layer_size * mslice;
 440        if (offset > radeon_bo_size(track->cb_color_bo[id])) {
 441                /* old ddx are broken they allocate bo with w*h*bpp but
 442                 * program slice with ALIGN(h, 8), catch this and patch
 443                 * command stream.
 444                 */
 445                if (!surf.mode) {
 446                        volatile u32 *ib = p->ib.ptr;
 447                        unsigned long tmp, nby, bsize, size, min = 0;
 448
 449                        /* find the height the ddx wants */
 450                        if (surf.nby > 8) {
 451                                min = surf.nby - 8;
 452                        }
 453                        bsize = radeon_bo_size(track->cb_color_bo[id]);
 454                        tmp = track->cb_color_bo_offset[id] << 8;
 455                        for (nby = surf.nby; nby > min; nby--) {
 456                                size = nby * surf.nbx * surf.bpe * surf.nsamples;
 457                                if ((tmp + size * mslice) <= bsize) {
 458                                        break;
 459                                }
 460                        }
 461                        if (nby > min) {
 462                                surf.nby = nby;
 463                                slice = ((nby * surf.nbx) / 64) - 1;
 464                                if (!evergreen_surface_check(p, &surf, "cb")) {
 465                                        /* check if this one works */
 466                                        tmp += surf.layer_size * mslice;
 467                                        if (tmp <= bsize) {
 468                                                ib[track->cb_color_slice_idx[id]] = slice;
 469                                                goto old_ddx_ok;
 470                                        }
 471                                }
 472                        }
 473                }
 474                dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, "
 475                         "offset %d, max layer %d, bo size %ld, slice %d)\n",
 476                         __func__, __LINE__, id, surf.layer_size,
 477                        track->cb_color_bo_offset[id] << 8, mslice,
 478                        radeon_bo_size(track->cb_color_bo[id]), slice);
 479                dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
 480                         __func__, __LINE__, surf.nbx, surf.nby,
 481                        surf.mode, surf.bpe, surf.nsamples,
 482                        surf.bankw, surf.bankh,
 483                        surf.tsplit, surf.mtilea);
 484                return -EINVAL;
 485        }
 486old_ddx_ok:
 487
 488        return 0;
 489}
 490
 491static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
 492                                                unsigned nbx, unsigned nby)
 493{
 494        struct evergreen_cs_track *track = p->track;
 495        unsigned long size;
 496
 497        if (track->htile_bo == NULL) {
 498                dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n",
 499                                __func__, __LINE__, track->db_z_info);
 500                return -EINVAL;
 501        }
 502
 503        if (G_028ABC_LINEAR(track->htile_surface)) {
 504                /* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */
 505                nbx = round_up(nbx, 16 * 8);
 506                /* height is npipes htiles aligned == npipes * 8 pixel aligned */
 507                nby = round_up(nby, track->npipes * 8);
 508        } else {
 509                /* always assume 8x8 htile */
 510                /* align is htile align * 8, htile align vary according to
 511                 * number of pipe and tile width and nby
 512                 */
 513                switch (track->npipes) {
 514                case 8:
 515                        /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
 516                        nbx = round_up(nbx, 64 * 8);
 517                        nby = round_up(nby, 64 * 8);
 518                        break;
 519                case 4:
 520                        /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
 521                        nbx = round_up(nbx, 64 * 8);
 522                        nby = round_up(nby, 32 * 8);
 523                        break;
 524                case 2:
 525                        /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
 526                        nbx = round_up(nbx, 32 * 8);
 527                        nby = round_up(nby, 32 * 8);
 528                        break;
 529                case 1:
 530                        /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
 531                        nbx = round_up(nbx, 32 * 8);
 532                        nby = round_up(nby, 16 * 8);
 533                        break;
 534                default:
 535                        dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
 536                                        __func__, __LINE__, track->npipes);
 537                        return -EINVAL;
 538                }
 539        }
 540        /* compute number of htile */
 541        nbx = nbx >> 3;
 542        nby = nby >> 3;
 543        /* size must be aligned on npipes * 2K boundary */
 544        size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
 545        size += track->htile_offset;
 546
 547        if (size > radeon_bo_size(track->htile_bo)) {
 548                dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
 549                                __func__, __LINE__, radeon_bo_size(track->htile_bo),
 550                                size, nbx, nby);
 551                return -EINVAL;
 552        }
 553        return 0;
 554}
 555
 556static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p)
 557{
 558        struct evergreen_cs_track *track = p->track;
 559        struct eg_surface surf;
 560        unsigned pitch, slice, mslice;
 561        unsigned long offset;
 562        int r;
 563
 564        mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
 565        pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
 566        slice = track->db_depth_slice;
 567        surf.nbx = (pitch + 1) * 8;
 568        surf.nby = ((slice + 1) * 64) / surf.nbx;
 569        surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
 570        surf.format = G_028044_FORMAT(track->db_s_info);
 571        surf.tsplit = G_028044_TILE_SPLIT(track->db_s_info);
 572        surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
 573        surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
 574        surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
 575        surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
 576        surf.nsamples = 1;
 577
 578        if (surf.format != 1) {
 579                dev_warn(p->dev, "%s:%d stencil invalid format %d\n",
 580                         __func__, __LINE__, surf.format);
 581                return -EINVAL;
 582        }
 583        /* replace by color format so we can use same code */
 584        surf.format = V_028C70_COLOR_8;
 585
 586        r = evergreen_surface_value_conv_check(p, &surf, "stencil");
 587        if (r) {
 588                return r;
 589        }
 590
 591        r = evergreen_surface_check(p, &surf, NULL);
 592        if (r) {
 593                /* old userspace doesn't compute proper depth/stencil alignment
 594                 * check that alignment against a bigger byte per elements and
 595                 * only report if that alignment is wrong too.
 596                 */
 597                surf.format = V_028C70_COLOR_8_8_8_8;
 598                r = evergreen_surface_check(p, &surf, "stencil");
 599                if (r) {
 600                        dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
 601                                 __func__, __LINE__, track->db_depth_size,
 602                                 track->db_depth_slice, track->db_s_info, track->db_z_info);
 603                }
 604                return r;
 605        }
 606
 607        offset = track->db_s_read_offset << 8;
 608        if (offset & (surf.base_align - 1)) {
 609                dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
 610                         __func__, __LINE__, offset, surf.base_align);
 611                return -EINVAL;
 612        }
 613        offset += surf.layer_size * mslice;
 614        if (offset > radeon_bo_size(track->db_s_read_bo)) {
 615                dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, "
 616                         "offset %ld, max layer %d, bo size %ld)\n",
 617                         __func__, __LINE__, surf.layer_size,
 618                        (unsigned long)track->db_s_read_offset << 8, mslice,
 619                        radeon_bo_size(track->db_s_read_bo));
 620                dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
 621                         __func__, __LINE__, track->db_depth_size,
 622                         track->db_depth_slice, track->db_s_info, track->db_z_info);
 623                return -EINVAL;
 624        }
 625
 626        offset = track->db_s_write_offset << 8;
 627        if (offset & (surf.base_align - 1)) {
 628                dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
 629                         __func__, __LINE__, offset, surf.base_align);
 630                return -EINVAL;
 631        }
 632        offset += surf.layer_size * mslice;
 633        if (offset > radeon_bo_size(track->db_s_write_bo)) {
 634                dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, "
 635                         "offset %ld, max layer %d, bo size %ld)\n",
 636                         __func__, __LINE__, surf.layer_size,
 637                        (unsigned long)track->db_s_write_offset << 8, mslice,
 638                        radeon_bo_size(track->db_s_write_bo));
 639                return -EINVAL;
 640        }
 641
 642        /* hyperz */
 643        if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
 644                r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
 645                if (r) {
 646                        return r;
 647                }
 648        }
 649
 650        return 0;
 651}
 652
 653static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p)
 654{
 655        struct evergreen_cs_track *track = p->track;
 656        struct eg_surface surf;
 657        unsigned pitch, slice, mslice;
 658        unsigned long offset;
 659        int r;
 660
 661        mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
 662        pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
 663        slice = track->db_depth_slice;
 664        surf.nbx = (pitch + 1) * 8;
 665        surf.nby = ((slice + 1) * 64) / surf.nbx;
 666        surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
 667        surf.format = G_028040_FORMAT(track->db_z_info);
 668        surf.tsplit = G_028040_TILE_SPLIT(track->db_z_info);
 669        surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
 670        surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
 671        surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
 672        surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
 673        surf.nsamples = 1;
 674
 675        switch (surf.format) {
 676        case V_028040_Z_16:
 677                surf.format = V_028C70_COLOR_16;
 678                break;
 679        case V_028040_Z_24:
 680        case V_028040_Z_32_FLOAT:
 681                surf.format = V_028C70_COLOR_8_8_8_8;
 682                break;
 683        default:
 684                dev_warn(p->dev, "%s:%d depth invalid format %d\n",
 685                         __func__, __LINE__, surf.format);
 686                return -EINVAL;
 687        }
 688
 689        r = evergreen_surface_value_conv_check(p, &surf, "depth");
 690        if (r) {
 691                dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
 692                         __func__, __LINE__, track->db_depth_size,
 693                         track->db_depth_slice, track->db_z_info);
 694                return r;
 695        }
 696
 697        r = evergreen_surface_check(p, &surf, "depth");
 698        if (r) {
 699                dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
 700                         __func__, __LINE__, track->db_depth_size,
 701                         track->db_depth_slice, track->db_z_info);
 702                return r;
 703        }
 704
 705        offset = track->db_z_read_offset << 8;
 706        if (offset & (surf.base_align - 1)) {
 707                dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
 708                         __func__, __LINE__, offset, surf.base_align);
 709                return -EINVAL;
 710        }
 711        offset += surf.layer_size * mslice;
 712        if (offset > radeon_bo_size(track->db_z_read_bo)) {
 713                dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, "
 714                         "offset %ld, max layer %d, bo size %ld)\n",
 715                         __func__, __LINE__, surf.layer_size,
 716                        (unsigned long)track->db_z_read_offset << 8, mslice,
 717                        radeon_bo_size(track->db_z_read_bo));
 718                return -EINVAL;
 719        }
 720
 721        offset = track->db_z_write_offset << 8;
 722        if (offset & (surf.base_align - 1)) {
 723                dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
 724                         __func__, __LINE__, offset, surf.base_align);
 725                return -EINVAL;
 726        }
 727        offset += surf.layer_size * mslice;
 728        if (offset > radeon_bo_size(track->db_z_write_bo)) {
 729                dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, "
 730                         "offset %ld, max layer %d, bo size %ld)\n",
 731                         __func__, __LINE__, surf.layer_size,
 732                        (unsigned long)track->db_z_write_offset << 8, mslice,
 733                        radeon_bo_size(track->db_z_write_bo));
 734                return -EINVAL;
 735        }
 736
 737        /* hyperz */
 738        if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
 739                r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
 740                if (r) {
 741                        return r;
 742                }
 743        }
 744
 745        return 0;
 746}
 747
 748static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p,
 749                                               struct radeon_bo *texture,
 750                                               struct radeon_bo *mipmap,
 751                                               unsigned idx)
 752{
 753        struct eg_surface surf;
 754        unsigned long toffset, moffset;
 755        unsigned dim, llevel, mslice, width, height, depth, i;
 756        u32 texdw[8];
 757        int r;
 758
 759        texdw[0] = radeon_get_ib_value(p, idx + 0);
 760        texdw[1] = radeon_get_ib_value(p, idx + 1);
 761        texdw[2] = radeon_get_ib_value(p, idx + 2);
 762        texdw[3] = radeon_get_ib_value(p, idx + 3);
 763        texdw[4] = radeon_get_ib_value(p, idx + 4);
 764        texdw[5] = radeon_get_ib_value(p, idx + 5);
 765        texdw[6] = radeon_get_ib_value(p, idx + 6);
 766        texdw[7] = radeon_get_ib_value(p, idx + 7);
 767        dim = G_030000_DIM(texdw[0]);
 768        llevel = G_030014_LAST_LEVEL(texdw[5]);
 769        mslice = G_030014_LAST_ARRAY(texdw[5]) + 1;
 770        width = G_030000_TEX_WIDTH(texdw[0]) + 1;
 771        height =  G_030004_TEX_HEIGHT(texdw[1]) + 1;
 772        depth = G_030004_TEX_DEPTH(texdw[1]) + 1;
 773        surf.format = G_03001C_DATA_FORMAT(texdw[7]);
 774        surf.nbx = (G_030000_PITCH(texdw[0]) + 1) * 8;
 775        surf.nbx = r600_fmt_get_nblocksx(surf.format, surf.nbx);
 776        surf.nby = r600_fmt_get_nblocksy(surf.format, height);
 777        surf.mode = G_030004_ARRAY_MODE(texdw[1]);
 778        surf.tsplit = G_030018_TILE_SPLIT(texdw[6]);
 779        surf.nbanks = G_03001C_NUM_BANKS(texdw[7]);
 780        surf.bankw = G_03001C_BANK_WIDTH(texdw[7]);
 781        surf.bankh = G_03001C_BANK_HEIGHT(texdw[7]);
 782        surf.mtilea = G_03001C_MACRO_TILE_ASPECT(texdw[7]);
 783        surf.nsamples = 1;
 784        toffset = texdw[2] << 8;
 785        moffset = texdw[3] << 8;
 786
 787        if (!r600_fmt_is_valid_texture(surf.format, p->family)) {
 788                dev_warn(p->dev, "%s:%d texture invalid format %d\n",
 789                         __func__, __LINE__, surf.format);
 790                return -EINVAL;
 791        }
 792        switch (dim) {
 793        case V_030000_SQ_TEX_DIM_1D:
 794        case V_030000_SQ_TEX_DIM_2D:
 795        case V_030000_SQ_TEX_DIM_CUBEMAP:
 796        case V_030000_SQ_TEX_DIM_1D_ARRAY:
 797        case V_030000_SQ_TEX_DIM_2D_ARRAY:
 798                depth = 1;
 799                break;
 800        case V_030000_SQ_TEX_DIM_2D_MSAA:
 801        case V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA:
 802                surf.nsamples = 1 << llevel;
 803                llevel = 0;
 804                depth = 1;
 805                break;
 806        case V_030000_SQ_TEX_DIM_3D:
 807                break;
 808        default:
 809                dev_warn(p->dev, "%s:%d texture invalid dimension %d\n",
 810                         __func__, __LINE__, dim);
 811                return -EINVAL;
 812        }
 813
 814        r = evergreen_surface_value_conv_check(p, &surf, "texture");
 815        if (r) {
 816                return r;
 817        }
 818
 819        /* align height */
 820        evergreen_surface_check(p, &surf, NULL);
 821        surf.nby = ALIGN(surf.nby, surf.halign);
 822
 823        r = evergreen_surface_check(p, &surf, "texture");
 824        if (r) {
 825                dev_warn(p->dev, "%s:%d texture invalid 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
 826                         __func__, __LINE__, texdw[0], texdw[1], texdw[4],
 827                         texdw[5], texdw[6], texdw[7]);
 828                return r;
 829        }
 830
 831        /* check texture size */
 832        if (toffset & (surf.base_align - 1)) {
 833                dev_warn(p->dev, "%s:%d texture bo base %ld not aligned with %ld\n",
 834                         __func__, __LINE__, toffset, surf.base_align);
 835                return -EINVAL;
 836        }
 837        if (surf.nsamples <= 1 && moffset & (surf.base_align - 1)) {
 838                dev_warn(p->dev, "%s:%d mipmap bo base %ld not aligned with %ld\n",
 839                         __func__, __LINE__, moffset, surf.base_align);
 840                return -EINVAL;
 841        }
 842        if (dim == SQ_TEX_DIM_3D) {
 843                toffset += surf.layer_size * depth;
 844        } else {
 845                toffset += surf.layer_size * mslice;
 846        }
 847        if (toffset > radeon_bo_size(texture)) {
 848                dev_warn(p->dev, "%s:%d texture bo too small (layer size %d, "
 849                         "offset %ld, max layer %d, depth %d, bo size %ld) (%d %d)\n",
 850                         __func__, __LINE__, surf.layer_size,
 851                        (unsigned long)texdw[2] << 8, mslice,
 852                        depth, radeon_bo_size(texture),
 853                        surf.nbx, surf.nby);
 854                return -EINVAL;
 855        }
 856
 857        if (!mipmap) {
 858                if (llevel) {
 859                        dev_warn(p->dev, "%s:%i got NULL MIP_ADDRESS relocation\n",
 860                                 __func__, __LINE__);
 861                        return -EINVAL;
 862                } else {
 863                        return 0; /* everything's ok */
 864                }
 865        }
 866
 867        /* check mipmap size */
 868        for (i = 1; i <= llevel; i++) {
 869                unsigned w, h, d;
 870
 871                w = r600_mip_minify(width, i);
 872                h = r600_mip_minify(height, i);
 873                d = r600_mip_minify(depth, i);
 874                surf.nbx = r600_fmt_get_nblocksx(surf.format, w);
 875                surf.nby = r600_fmt_get_nblocksy(surf.format, h);
 876
 877                switch (surf.mode) {
 878                case ARRAY_2D_TILED_THIN1:
 879                        if (surf.nbx < surf.palign || surf.nby < surf.halign) {
 880                                surf.mode = ARRAY_1D_TILED_THIN1;
 881                        }
 882                        /* recompute alignment */
 883                        evergreen_surface_check(p, &surf, NULL);
 884                        break;
 885                case ARRAY_LINEAR_GENERAL:
 886                case ARRAY_LINEAR_ALIGNED:
 887                case ARRAY_1D_TILED_THIN1:
 888                        break;
 889                default:
 890                        dev_warn(p->dev, "%s:%d invalid array mode %d\n",
 891                                 __func__, __LINE__, surf.mode);
 892                        return -EINVAL;
 893                }
 894                surf.nbx = ALIGN(surf.nbx, surf.palign);
 895                surf.nby = ALIGN(surf.nby, surf.halign);
 896
 897                r = evergreen_surface_check(p, &surf, "mipmap");
 898                if (r) {
 899                        return r;
 900                }
 901
 902                if (dim == SQ_TEX_DIM_3D) {
 903                        moffset += surf.layer_size * d;
 904                } else {
 905                        moffset += surf.layer_size * mslice;
 906                }
 907                if (moffset > radeon_bo_size(mipmap)) {
 908                        dev_warn(p->dev, "%s:%d mipmap [%d] bo too small (layer size %d, "
 909                                        "offset %ld, coffset %ld, max layer %d, depth %d, "
 910                                        "bo size %ld) level0 (%d %d %d)\n",
 911                                        __func__, __LINE__, i, surf.layer_size,
 912                                        (unsigned long)texdw[3] << 8, moffset, mslice,
 913                                        d, radeon_bo_size(mipmap),
 914                                        width, height, depth);
 915                        dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
 916                                 __func__, __LINE__, surf.nbx, surf.nby,
 917                                surf.mode, surf.bpe, surf.nsamples,
 918                                surf.bankw, surf.bankh,
 919                                surf.tsplit, surf.mtilea);
 920                        return -EINVAL;
 921                }
 922        }
 923
 924        return 0;
 925}
 926
 927static int evergreen_cs_track_check(struct radeon_cs_parser *p)
 928{
 929        struct evergreen_cs_track *track = p->track;
 930        unsigned tmp, i;
 931        int r;
 932        unsigned buffer_mask = 0;
 933
 934        /* check streamout */
 935        if (track->streamout_dirty && track->vgt_strmout_config) {
 936                for (i = 0; i < 4; i++) {
 937                        if (track->vgt_strmout_config & (1 << i)) {
 938                                buffer_mask |= (track->vgt_strmout_buffer_config >> (i * 4)) & 0xf;
 939                        }
 940                }
 941
 942                for (i = 0; i < 4; i++) {
 943                        if (buffer_mask & (1 << i)) {
 944                                if (track->vgt_strmout_bo[i]) {
 945                                        u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
 946                                                        (u64)track->vgt_strmout_size[i];
 947                                        if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
 948                                                DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
 949                                                          i, offset,
 950                                                          radeon_bo_size(track->vgt_strmout_bo[i]));
 951                                                return -EINVAL;
 952                                        }
 953                                } else {
 954                                        dev_warn(p->dev, "No buffer for streamout %d\n", i);
 955                                        return -EINVAL;
 956                                }
 957                        }
 958                }
 959                track->streamout_dirty = false;
 960        }
 961
 962        if (track->sx_misc_kill_all_prims)
 963                return 0;
 964
 965        /* check that we have a cb for each enabled target
 966         */
 967        if (track->cb_dirty) {
 968                tmp = track->cb_target_mask;
 969                for (i = 0; i < 8; i++) {
 970                        u32 format = G_028C70_FORMAT(track->cb_color_info[i]);
 971
 972                        if (format != V_028C70_COLOR_INVALID &&
 973                            (tmp >> (i * 4)) & 0xF) {
 974                                /* at least one component is enabled */
 975                                if (track->cb_color_bo[i] == NULL) {
 976                                        dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
 977                                                __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
 978                                        return -EINVAL;
 979                                }
 980                                /* check cb */
 981                                r = evergreen_cs_track_validate_cb(p, i);
 982                                if (r) {
 983                                        return r;
 984                                }
 985                        }
 986                }
 987                track->cb_dirty = false;
 988        }
 989
 990        if (track->db_dirty) {
 991                /* Check stencil buffer */
 992                if (G_028044_FORMAT(track->db_s_info) != V_028044_STENCIL_INVALID &&
 993                    G_028800_STENCIL_ENABLE(track->db_depth_control)) {
 994                        r = evergreen_cs_track_validate_stencil(p);
 995                        if (r)
 996                                return r;
 997                }
 998                /* Check depth buffer */
 999                if (G_028040_FORMAT(track->db_z_info) != V_028040_Z_INVALID &&
1000                    G_028800_Z_ENABLE(track->db_depth_control)) {
1001                        r = evergreen_cs_track_validate_depth(p);
1002                        if (r)
1003                                return r;
1004                }
1005                track->db_dirty = false;
1006        }
1007
1008        return 0;
1009}
1010
1011/**
1012 * evergreen_cs_packet_parse_vline() - parse userspace VLINE packet
1013 * @parser:             parser structure holding parsing context.
1014 *
1015 * This is an Evergreen(+)-specific function for parsing VLINE packets.
1016 * Real work is done by r600_cs_common_vline_parse function.
1017 * Here we just set up ASIC-specific register table and call
1018 * the common implementation function.
1019 */
1020static int evergreen_cs_packet_parse_vline(struct radeon_cs_parser *p)
1021{
1022
1023        static uint32_t vline_start_end[6] = {
1024                EVERGREEN_VLINE_START_END + EVERGREEN_CRTC0_REGISTER_OFFSET,
1025                EVERGREEN_VLINE_START_END + EVERGREEN_CRTC1_REGISTER_OFFSET,
1026                EVERGREEN_VLINE_START_END + EVERGREEN_CRTC2_REGISTER_OFFSET,
1027                EVERGREEN_VLINE_START_END + EVERGREEN_CRTC3_REGISTER_OFFSET,
1028                EVERGREEN_VLINE_START_END + EVERGREEN_CRTC4_REGISTER_OFFSET,
1029                EVERGREEN_VLINE_START_END + EVERGREEN_CRTC5_REGISTER_OFFSET
1030        };
1031        static uint32_t vline_status[6] = {
1032                EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
1033                EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
1034                EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
1035                EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
1036                EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
1037                EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET
1038        };
1039
1040        return r600_cs_common_vline_parse(p, vline_start_end, vline_status);
1041}
1042
1043static int evergreen_packet0_check(struct radeon_cs_parser *p,
1044                                   struct radeon_cs_packet *pkt,
1045                                   unsigned idx, unsigned reg)
1046{
1047        int r;
1048
1049        switch (reg) {
1050        case EVERGREEN_VLINE_START_END:
1051                r = evergreen_cs_packet_parse_vline(p);
1052                if (r) {
1053                        DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1054                                        idx, reg);
1055                        return r;
1056                }
1057                break;
1058        default:
1059                printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1060                       reg, idx);
1061                return -EINVAL;
1062        }
1063        return 0;
1064}
1065
1066static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
1067                                      struct radeon_cs_packet *pkt)
1068{
1069        unsigned reg, i;
1070        unsigned idx;
1071        int r;
1072
1073        idx = pkt->idx + 1;
1074        reg = pkt->reg;
1075        for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
1076                r = evergreen_packet0_check(p, pkt, idx, reg);
1077                if (r) {
1078                        return r;
1079                }
1080        }
1081        return 0;
1082}
1083
1084/**
1085 * evergreen_cs_check_reg() - check if register is authorized or not
1086 * @parser: parser structure holding parsing context
1087 * @reg: register we are testing
1088 * @idx: index into the cs buffer
1089 *
1090 * This function will test against evergreen_reg_safe_bm and return 0
1091 * if register is safe. If register is not flag as safe this function
1092 * will test it against a list of register needind special handling.
1093 */
1094static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1095{
1096        struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
1097        struct radeon_cs_reloc *reloc;
1098        u32 last_reg;
1099        u32 m, i, tmp, *ib;
1100        int r;
1101
1102        if (p->rdev->family >= CHIP_CAYMAN)
1103                last_reg = ARRAY_SIZE(cayman_reg_safe_bm);
1104        else
1105                last_reg = ARRAY_SIZE(evergreen_reg_safe_bm);
1106
1107        i = (reg >> 7);
1108        if (i >= last_reg) {
1109                dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1110                return -EINVAL;
1111        }
1112        m = 1 << ((reg >> 2) & 31);
1113        if (p->rdev->family >= CHIP_CAYMAN) {
1114                if (!(cayman_reg_safe_bm[i] & m))
1115                        return 0;
1116        } else {
1117                if (!(evergreen_reg_safe_bm[i] & m))
1118                        return 0;
1119        }
1120        ib = p->ib.ptr;
1121        switch (reg) {
1122        /* force following reg to 0 in an attempt to disable out buffer
1123         * which will need us to better understand how it works to perform
1124         * security check on it (Jerome)
1125         */
1126        case SQ_ESGS_RING_SIZE:
1127        case SQ_GSVS_RING_SIZE:
1128        case SQ_ESTMP_RING_SIZE:
1129        case SQ_GSTMP_RING_SIZE:
1130        case SQ_HSTMP_RING_SIZE:
1131        case SQ_LSTMP_RING_SIZE:
1132        case SQ_PSTMP_RING_SIZE:
1133        case SQ_VSTMP_RING_SIZE:
1134        case SQ_ESGS_RING_ITEMSIZE:
1135        case SQ_ESTMP_RING_ITEMSIZE:
1136        case SQ_GSTMP_RING_ITEMSIZE:
1137        case SQ_GSVS_RING_ITEMSIZE:
1138        case SQ_GS_VERT_ITEMSIZE:
1139        case SQ_GS_VERT_ITEMSIZE_1:
1140        case SQ_GS_VERT_ITEMSIZE_2:
1141        case SQ_GS_VERT_ITEMSIZE_3:
1142        case SQ_GSVS_RING_OFFSET_1:
1143        case SQ_GSVS_RING_OFFSET_2:
1144        case SQ_GSVS_RING_OFFSET_3:
1145        case SQ_HSTMP_RING_ITEMSIZE:
1146        case SQ_LSTMP_RING_ITEMSIZE:
1147        case SQ_PSTMP_RING_ITEMSIZE:
1148        case SQ_VSTMP_RING_ITEMSIZE:
1149        case VGT_TF_RING_SIZE:
1150                /* get value to populate the IB don't remove */
1151                /*tmp =radeon_get_ib_value(p, idx);
1152                  ib[idx] = 0;*/
1153                break;
1154        case SQ_ESGS_RING_BASE:
1155        case SQ_GSVS_RING_BASE:
1156        case SQ_ESTMP_RING_BASE:
1157        case SQ_GSTMP_RING_BASE:
1158        case SQ_HSTMP_RING_BASE:
1159        case SQ_LSTMP_RING_BASE:
1160        case SQ_PSTMP_RING_BASE:
1161        case SQ_VSTMP_RING_BASE:
1162                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1163                if (r) {
1164                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1165                                        "0x%04X\n", reg);
1166                        return -EINVAL;
1167                }
1168                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1169                break;
1170        case DB_DEPTH_CONTROL:
1171                track->db_depth_control = radeon_get_ib_value(p, idx);
1172                track->db_dirty = true;
1173                break;
1174        case CAYMAN_DB_EQAA:
1175                if (p->rdev->family < CHIP_CAYMAN) {
1176                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1177                                 "0x%04X\n", reg);
1178                        return -EINVAL;
1179                }
1180                break;
1181        case CAYMAN_DB_DEPTH_INFO:
1182                if (p->rdev->family < CHIP_CAYMAN) {
1183                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1184                                 "0x%04X\n", reg);
1185                        return -EINVAL;
1186                }
1187                break;
1188        case DB_Z_INFO:
1189                track->db_z_info = radeon_get_ib_value(p, idx);
1190                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1191                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1192                        if (r) {
1193                                dev_warn(p->dev, "bad SET_CONTEXT_REG "
1194                                                "0x%04X\n", reg);
1195                                return -EINVAL;
1196                        }
1197                        ib[idx] &= ~Z_ARRAY_MODE(0xf);
1198                        track->db_z_info &= ~Z_ARRAY_MODE(0xf);
1199                        ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1200                        track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1201                        if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1202                                unsigned bankw, bankh, mtaspect, tile_split;
1203
1204                                evergreen_tiling_fields(reloc->tiling_flags,
1205                                                        &bankw, &bankh, &mtaspect,
1206                                                        &tile_split);
1207                                ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1208                                ib[idx] |= DB_TILE_SPLIT(tile_split) |
1209                                                DB_BANK_WIDTH(bankw) |
1210                                                DB_BANK_HEIGHT(bankh) |
1211                                                DB_MACRO_TILE_ASPECT(mtaspect);
1212                        }
1213                }
1214                track->db_dirty = true;
1215                break;
1216        case DB_STENCIL_INFO:
1217                track->db_s_info = radeon_get_ib_value(p, idx);
1218                track->db_dirty = true;
1219                break;
1220        case DB_DEPTH_VIEW:
1221                track->db_depth_view = radeon_get_ib_value(p, idx);
1222                track->db_dirty = true;
1223                break;
1224        case DB_DEPTH_SIZE:
1225                track->db_depth_size = radeon_get_ib_value(p, idx);
1226                track->db_dirty = true;
1227                break;
1228        case R_02805C_DB_DEPTH_SLICE:
1229                track->db_depth_slice = radeon_get_ib_value(p, idx);
1230                track->db_dirty = true;
1231                break;
1232        case DB_Z_READ_BASE:
1233                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1234                if (r) {
1235                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1236                                        "0x%04X\n", reg);
1237                        return -EINVAL;
1238                }
1239                track->db_z_read_offset = radeon_get_ib_value(p, idx);
1240                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1241                track->db_z_read_bo = reloc->robj;
1242                track->db_dirty = true;
1243                break;
1244        case DB_Z_WRITE_BASE:
1245                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1246                if (r) {
1247                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1248                                        "0x%04X\n", reg);
1249                        return -EINVAL;
1250                }
1251                track->db_z_write_offset = radeon_get_ib_value(p, idx);
1252                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1253                track->db_z_write_bo = reloc->robj;
1254                track->db_dirty = true;
1255                break;
1256        case DB_STENCIL_READ_BASE:
1257                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1258                if (r) {
1259                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1260                                        "0x%04X\n", reg);
1261                        return -EINVAL;
1262                }
1263                track->db_s_read_offset = radeon_get_ib_value(p, idx);
1264                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1265                track->db_s_read_bo = reloc->robj;
1266                track->db_dirty = true;
1267                break;
1268        case DB_STENCIL_WRITE_BASE:
1269                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1270                if (r) {
1271                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1272                                        "0x%04X\n", reg);
1273                        return -EINVAL;
1274                }
1275                track->db_s_write_offset = radeon_get_ib_value(p, idx);
1276                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1277                track->db_s_write_bo = reloc->robj;
1278                track->db_dirty = true;
1279                break;
1280        case VGT_STRMOUT_CONFIG:
1281                track->vgt_strmout_config = radeon_get_ib_value(p, idx);
1282                track->streamout_dirty = true;
1283                break;
1284        case VGT_STRMOUT_BUFFER_CONFIG:
1285                track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx);
1286                track->streamout_dirty = true;
1287                break;
1288        case VGT_STRMOUT_BUFFER_BASE_0:
1289        case VGT_STRMOUT_BUFFER_BASE_1:
1290        case VGT_STRMOUT_BUFFER_BASE_2:
1291        case VGT_STRMOUT_BUFFER_BASE_3:
1292                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1293                if (r) {
1294                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1295                                        "0x%04X\n", reg);
1296                        return -EINVAL;
1297                }
1298                tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
1299                track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1300                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1301                track->vgt_strmout_bo[tmp] = reloc->robj;
1302                track->streamout_dirty = true;
1303                break;
1304        case VGT_STRMOUT_BUFFER_SIZE_0:
1305        case VGT_STRMOUT_BUFFER_SIZE_1:
1306        case VGT_STRMOUT_BUFFER_SIZE_2:
1307        case VGT_STRMOUT_BUFFER_SIZE_3:
1308                tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
1309                /* size in register is DWs, convert to bytes */
1310                track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
1311                track->streamout_dirty = true;
1312                break;
1313        case CP_COHER_BASE:
1314                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1315                if (r) {
1316                        dev_warn(p->dev, "missing reloc for CP_COHER_BASE "
1317                                        "0x%04X\n", reg);
1318                        return -EINVAL;
1319                }
1320                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1321        case CB_TARGET_MASK:
1322                track->cb_target_mask = radeon_get_ib_value(p, idx);
1323                track->cb_dirty = true;
1324                break;
1325        case CB_SHADER_MASK:
1326                track->cb_shader_mask = radeon_get_ib_value(p, idx);
1327                track->cb_dirty = true;
1328                break;
1329        case PA_SC_AA_CONFIG:
1330                if (p->rdev->family >= CHIP_CAYMAN) {
1331                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1332                                 "0x%04X\n", reg);
1333                        return -EINVAL;
1334                }
1335                tmp = radeon_get_ib_value(p, idx) & MSAA_NUM_SAMPLES_MASK;
1336                track->nsamples = 1 << tmp;
1337                break;
1338        case CAYMAN_PA_SC_AA_CONFIG:
1339                if (p->rdev->family < CHIP_CAYMAN) {
1340                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1341                                 "0x%04X\n", reg);
1342                        return -EINVAL;
1343                }
1344                tmp = radeon_get_ib_value(p, idx) & CAYMAN_MSAA_NUM_SAMPLES_MASK;
1345                track->nsamples = 1 << tmp;
1346                break;
1347        case CB_COLOR0_VIEW:
1348        case CB_COLOR1_VIEW:
1349        case CB_COLOR2_VIEW:
1350        case CB_COLOR3_VIEW:
1351        case CB_COLOR4_VIEW:
1352        case CB_COLOR5_VIEW:
1353        case CB_COLOR6_VIEW:
1354        case CB_COLOR7_VIEW:
1355                tmp = (reg - CB_COLOR0_VIEW) / 0x3c;
1356                track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1357                track->cb_dirty = true;
1358                break;
1359        case CB_COLOR8_VIEW:
1360        case CB_COLOR9_VIEW:
1361        case CB_COLOR10_VIEW:
1362        case CB_COLOR11_VIEW:
1363                tmp = ((reg - CB_COLOR8_VIEW) / 0x1c) + 8;
1364                track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1365                track->cb_dirty = true;
1366                break;
1367        case CB_COLOR0_INFO:
1368        case CB_COLOR1_INFO:
1369        case CB_COLOR2_INFO:
1370        case CB_COLOR3_INFO:
1371        case CB_COLOR4_INFO:
1372        case CB_COLOR5_INFO:
1373        case CB_COLOR6_INFO:
1374        case CB_COLOR7_INFO:
1375                tmp = (reg - CB_COLOR0_INFO) / 0x3c;
1376                track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1377                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1378                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1379                        if (r) {
1380                                dev_warn(p->dev, "bad SET_CONTEXT_REG "
1381                                                "0x%04X\n", reg);
1382                                return -EINVAL;
1383                        }
1384                        ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1385                        track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1386                }
1387                track->cb_dirty = true;
1388                break;
1389        case CB_COLOR8_INFO:
1390        case CB_COLOR9_INFO:
1391        case CB_COLOR10_INFO:
1392        case CB_COLOR11_INFO:
1393                tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8;
1394                track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1395                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1396                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1397                        if (r) {
1398                                dev_warn(p->dev, "bad SET_CONTEXT_REG "
1399                                                "0x%04X\n", reg);
1400                                return -EINVAL;
1401                        }
1402                        ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1403                        track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1404                }
1405                track->cb_dirty = true;
1406                break;
1407        case CB_COLOR0_PITCH:
1408        case CB_COLOR1_PITCH:
1409        case CB_COLOR2_PITCH:
1410        case CB_COLOR3_PITCH:
1411        case CB_COLOR4_PITCH:
1412        case CB_COLOR5_PITCH:
1413        case CB_COLOR6_PITCH:
1414        case CB_COLOR7_PITCH:
1415                tmp = (reg - CB_COLOR0_PITCH) / 0x3c;
1416                track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1417                track->cb_dirty = true;
1418                break;
1419        case CB_COLOR8_PITCH:
1420        case CB_COLOR9_PITCH:
1421        case CB_COLOR10_PITCH:
1422        case CB_COLOR11_PITCH:
1423                tmp = ((reg - CB_COLOR8_PITCH) / 0x1c) + 8;
1424                track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1425                track->cb_dirty = true;
1426                break;
1427        case CB_COLOR0_SLICE:
1428        case CB_COLOR1_SLICE:
1429        case CB_COLOR2_SLICE:
1430        case CB_COLOR3_SLICE:
1431        case CB_COLOR4_SLICE:
1432        case CB_COLOR5_SLICE:
1433        case CB_COLOR6_SLICE:
1434        case CB_COLOR7_SLICE:
1435                tmp = (reg - CB_COLOR0_SLICE) / 0x3c;
1436                track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1437                track->cb_color_slice_idx[tmp] = idx;
1438                track->cb_dirty = true;
1439                break;
1440        case CB_COLOR8_SLICE:
1441        case CB_COLOR9_SLICE:
1442        case CB_COLOR10_SLICE:
1443        case CB_COLOR11_SLICE:
1444                tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8;
1445                track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1446                track->cb_color_slice_idx[tmp] = idx;
1447                track->cb_dirty = true;
1448                break;
1449        case CB_COLOR0_ATTRIB:
1450        case CB_COLOR1_ATTRIB:
1451        case CB_COLOR2_ATTRIB:
1452        case CB_COLOR3_ATTRIB:
1453        case CB_COLOR4_ATTRIB:
1454        case CB_COLOR5_ATTRIB:
1455        case CB_COLOR6_ATTRIB:
1456        case CB_COLOR7_ATTRIB:
1457                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1458                if (r) {
1459                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1460                                        "0x%04X\n", reg);
1461                        return -EINVAL;
1462                }
1463                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1464                        if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1465                                unsigned bankw, bankh, mtaspect, tile_split;
1466
1467                                evergreen_tiling_fields(reloc->tiling_flags,
1468                                                        &bankw, &bankh, &mtaspect,
1469                                                        &tile_split);
1470                                ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1471                                ib[idx] |= CB_TILE_SPLIT(tile_split) |
1472                                           CB_BANK_WIDTH(bankw) |
1473                                           CB_BANK_HEIGHT(bankh) |
1474                                           CB_MACRO_TILE_ASPECT(mtaspect);
1475                        }
1476                }
1477                tmp = ((reg - CB_COLOR0_ATTRIB) / 0x3c);
1478                track->cb_color_attrib[tmp] = ib[idx];
1479                track->cb_dirty = true;
1480                break;
1481        case CB_COLOR8_ATTRIB:
1482        case CB_COLOR9_ATTRIB:
1483        case CB_COLOR10_ATTRIB:
1484        case CB_COLOR11_ATTRIB:
1485                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1486                if (r) {
1487                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1488                                        "0x%04X\n", reg);
1489                        return -EINVAL;
1490                }
1491                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1492                        if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1493                                unsigned bankw, bankh, mtaspect, tile_split;
1494
1495                                evergreen_tiling_fields(reloc->tiling_flags,
1496                                                        &bankw, &bankh, &mtaspect,
1497                                                        &tile_split);
1498                                ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1499                                ib[idx] |= CB_TILE_SPLIT(tile_split) |
1500                                           CB_BANK_WIDTH(bankw) |
1501                                           CB_BANK_HEIGHT(bankh) |
1502                                           CB_MACRO_TILE_ASPECT(mtaspect);
1503                        }
1504                }
1505                tmp = ((reg - CB_COLOR8_ATTRIB) / 0x1c) + 8;
1506                track->cb_color_attrib[tmp] = ib[idx];
1507                track->cb_dirty = true;
1508                break;
1509        case CB_COLOR0_FMASK:
1510        case CB_COLOR1_FMASK:
1511        case CB_COLOR2_FMASK:
1512        case CB_COLOR3_FMASK:
1513        case CB_COLOR4_FMASK:
1514        case CB_COLOR5_FMASK:
1515        case CB_COLOR6_FMASK:
1516        case CB_COLOR7_FMASK:
1517                tmp = (reg - CB_COLOR0_FMASK) / 0x3c;
1518                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1519                if (r) {
1520                        dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1521                        return -EINVAL;
1522                }
1523                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1524                track->cb_color_fmask_bo[tmp] = reloc->robj;
1525                break;
1526        case CB_COLOR0_CMASK:
1527        case CB_COLOR1_CMASK:
1528        case CB_COLOR2_CMASK:
1529        case CB_COLOR3_CMASK:
1530        case CB_COLOR4_CMASK:
1531        case CB_COLOR5_CMASK:
1532        case CB_COLOR6_CMASK:
1533        case CB_COLOR7_CMASK:
1534                tmp = (reg - CB_COLOR0_CMASK) / 0x3c;
1535                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1536                if (r) {
1537                        dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1538                        return -EINVAL;
1539                }
1540                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1541                track->cb_color_cmask_bo[tmp] = reloc->robj;
1542                break;
1543        case CB_COLOR0_FMASK_SLICE:
1544        case CB_COLOR1_FMASK_SLICE:
1545        case CB_COLOR2_FMASK_SLICE:
1546        case CB_COLOR3_FMASK_SLICE:
1547        case CB_COLOR4_FMASK_SLICE:
1548        case CB_COLOR5_FMASK_SLICE:
1549        case CB_COLOR6_FMASK_SLICE:
1550        case CB_COLOR7_FMASK_SLICE:
1551                tmp = (reg - CB_COLOR0_FMASK_SLICE) / 0x3c;
1552                track->cb_color_fmask_slice[tmp] = radeon_get_ib_value(p, idx);
1553                break;
1554        case CB_COLOR0_CMASK_SLICE:
1555        case CB_COLOR1_CMASK_SLICE:
1556        case CB_COLOR2_CMASK_SLICE:
1557        case CB_COLOR3_CMASK_SLICE:
1558        case CB_COLOR4_CMASK_SLICE:
1559        case CB_COLOR5_CMASK_SLICE:
1560        case CB_COLOR6_CMASK_SLICE:
1561        case CB_COLOR7_CMASK_SLICE:
1562                tmp = (reg - CB_COLOR0_CMASK_SLICE) / 0x3c;
1563                track->cb_color_cmask_slice[tmp] = radeon_get_ib_value(p, idx);
1564                break;
1565        case CB_COLOR0_BASE:
1566        case CB_COLOR1_BASE:
1567        case CB_COLOR2_BASE:
1568        case CB_COLOR3_BASE:
1569        case CB_COLOR4_BASE:
1570        case CB_COLOR5_BASE:
1571        case CB_COLOR6_BASE:
1572        case CB_COLOR7_BASE:
1573                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1574                if (r) {
1575                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1576                                        "0x%04X\n", reg);
1577                        return -EINVAL;
1578                }
1579                tmp = (reg - CB_COLOR0_BASE) / 0x3c;
1580                track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1581                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1582                track->cb_color_bo[tmp] = reloc->robj;
1583                track->cb_dirty = true;
1584                break;
1585        case CB_COLOR8_BASE:
1586        case CB_COLOR9_BASE:
1587        case CB_COLOR10_BASE:
1588        case CB_COLOR11_BASE:
1589                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1590                if (r) {
1591                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1592                                        "0x%04X\n", reg);
1593                        return -EINVAL;
1594                }
1595                tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8;
1596                track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1597                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1598                track->cb_color_bo[tmp] = reloc->robj;
1599                track->cb_dirty = true;
1600                break;
1601        case DB_HTILE_DATA_BASE:
1602                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1603                if (r) {
1604                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1605                                        "0x%04X\n", reg);
1606                        return -EINVAL;
1607                }
1608                track->htile_offset = radeon_get_ib_value(p, idx);
1609                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1610                track->htile_bo = reloc->robj;
1611                track->db_dirty = true;
1612                break;
1613        case DB_HTILE_SURFACE:
1614                /* 8x8 only */
1615                track->htile_surface = radeon_get_ib_value(p, idx);
1616                /* force 8x8 htile width and height */
1617                ib[idx] |= 3;
1618                track->db_dirty = true;
1619                break;
1620        case CB_IMMED0_BASE:
1621        case CB_IMMED1_BASE:
1622        case CB_IMMED2_BASE:
1623        case CB_IMMED3_BASE:
1624        case CB_IMMED4_BASE:
1625        case CB_IMMED5_BASE:
1626        case CB_IMMED6_BASE:
1627        case CB_IMMED7_BASE:
1628        case CB_IMMED8_BASE:
1629        case CB_IMMED9_BASE:
1630        case CB_IMMED10_BASE:
1631        case CB_IMMED11_BASE:
1632        case SQ_PGM_START_FS:
1633        case SQ_PGM_START_ES:
1634        case SQ_PGM_START_VS:
1635        case SQ_PGM_START_GS:
1636        case SQ_PGM_START_PS:
1637        case SQ_PGM_START_HS:
1638        case SQ_PGM_START_LS:
1639        case SQ_CONST_MEM_BASE:
1640        case SQ_ALU_CONST_CACHE_GS_0:
1641        case SQ_ALU_CONST_CACHE_GS_1:
1642        case SQ_ALU_CONST_CACHE_GS_2:
1643        case SQ_ALU_CONST_CACHE_GS_3:
1644        case SQ_ALU_CONST_CACHE_GS_4:
1645        case SQ_ALU_CONST_CACHE_GS_5:
1646        case SQ_ALU_CONST_CACHE_GS_6:
1647        case SQ_ALU_CONST_CACHE_GS_7:
1648        case SQ_ALU_CONST_CACHE_GS_8:
1649        case SQ_ALU_CONST_CACHE_GS_9:
1650        case SQ_ALU_CONST_CACHE_GS_10:
1651        case SQ_ALU_CONST_CACHE_GS_11:
1652        case SQ_ALU_CONST_CACHE_GS_12:
1653        case SQ_ALU_CONST_CACHE_GS_13:
1654        case SQ_ALU_CONST_CACHE_GS_14:
1655        case SQ_ALU_CONST_CACHE_GS_15:
1656        case SQ_ALU_CONST_CACHE_PS_0:
1657        case SQ_ALU_CONST_CACHE_PS_1:
1658        case SQ_ALU_CONST_CACHE_PS_2:
1659        case SQ_ALU_CONST_CACHE_PS_3:
1660        case SQ_ALU_CONST_CACHE_PS_4:
1661        case SQ_ALU_CONST_CACHE_PS_5:
1662        case SQ_ALU_CONST_CACHE_PS_6:
1663        case SQ_ALU_CONST_CACHE_PS_7:
1664        case SQ_ALU_CONST_CACHE_PS_8:
1665        case SQ_ALU_CONST_CACHE_PS_9:
1666        case SQ_ALU_CONST_CACHE_PS_10:
1667        case SQ_ALU_CONST_CACHE_PS_11:
1668        case SQ_ALU_CONST_CACHE_PS_12:
1669        case SQ_ALU_CONST_CACHE_PS_13:
1670        case SQ_ALU_CONST_CACHE_PS_14:
1671        case SQ_ALU_CONST_CACHE_PS_15:
1672        case SQ_ALU_CONST_CACHE_VS_0:
1673        case SQ_ALU_CONST_CACHE_VS_1:
1674        case SQ_ALU_CONST_CACHE_VS_2:
1675        case SQ_ALU_CONST_CACHE_VS_3:
1676        case SQ_ALU_CONST_CACHE_VS_4:
1677        case SQ_ALU_CONST_CACHE_VS_5:
1678        case SQ_ALU_CONST_CACHE_VS_6:
1679        case SQ_ALU_CONST_CACHE_VS_7:
1680        case SQ_ALU_CONST_CACHE_VS_8:
1681        case SQ_ALU_CONST_CACHE_VS_9:
1682        case SQ_ALU_CONST_CACHE_VS_10:
1683        case SQ_ALU_CONST_CACHE_VS_11:
1684        case SQ_ALU_CONST_CACHE_VS_12:
1685        case SQ_ALU_CONST_CACHE_VS_13:
1686        case SQ_ALU_CONST_CACHE_VS_14:
1687        case SQ_ALU_CONST_CACHE_VS_15:
1688        case SQ_ALU_CONST_CACHE_HS_0:
1689        case SQ_ALU_CONST_CACHE_HS_1:
1690        case SQ_ALU_CONST_CACHE_HS_2:
1691        case SQ_ALU_CONST_CACHE_HS_3:
1692        case SQ_ALU_CONST_CACHE_HS_4:
1693        case SQ_ALU_CONST_CACHE_HS_5:
1694        case SQ_ALU_CONST_CACHE_HS_6:
1695        case SQ_ALU_CONST_CACHE_HS_7:
1696        case SQ_ALU_CONST_CACHE_HS_8:
1697        case SQ_ALU_CONST_CACHE_HS_9:
1698        case SQ_ALU_CONST_CACHE_HS_10:
1699        case SQ_ALU_CONST_CACHE_HS_11:
1700        case SQ_ALU_CONST_CACHE_HS_12:
1701        case SQ_ALU_CONST_CACHE_HS_13:
1702        case SQ_ALU_CONST_CACHE_HS_14:
1703        case SQ_ALU_CONST_CACHE_HS_15:
1704        case SQ_ALU_CONST_CACHE_LS_0:
1705        case SQ_ALU_CONST_CACHE_LS_1:
1706        case SQ_ALU_CONST_CACHE_LS_2:
1707        case SQ_ALU_CONST_CACHE_LS_3:
1708        case SQ_ALU_CONST_CACHE_LS_4:
1709        case SQ_ALU_CONST_CACHE_LS_5:
1710        case SQ_ALU_CONST_CACHE_LS_6:
1711        case SQ_ALU_CONST_CACHE_LS_7:
1712        case SQ_ALU_CONST_CACHE_LS_8:
1713        case SQ_ALU_CONST_CACHE_LS_9:
1714        case SQ_ALU_CONST_CACHE_LS_10:
1715        case SQ_ALU_CONST_CACHE_LS_11:
1716        case SQ_ALU_CONST_CACHE_LS_12:
1717        case SQ_ALU_CONST_CACHE_LS_13:
1718        case SQ_ALU_CONST_CACHE_LS_14:
1719        case SQ_ALU_CONST_CACHE_LS_15:
1720                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1721                if (r) {
1722                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1723                                        "0x%04X\n", reg);
1724                        return -EINVAL;
1725                }
1726                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1727                break;
1728        case SX_MEMORY_EXPORT_BASE:
1729                if (p->rdev->family >= CHIP_CAYMAN) {
1730                        dev_warn(p->dev, "bad SET_CONFIG_REG "
1731                                 "0x%04X\n", reg);
1732                        return -EINVAL;
1733                }
1734                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1735                if (r) {
1736                        dev_warn(p->dev, "bad SET_CONFIG_REG "
1737                                        "0x%04X\n", reg);
1738                        return -EINVAL;
1739                }
1740                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1741                break;
1742        case CAYMAN_SX_SCATTER_EXPORT_BASE:
1743                if (p->rdev->family < CHIP_CAYMAN) {
1744                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1745                                 "0x%04X\n", reg);
1746                        return -EINVAL;
1747                }
1748                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1749                if (r) {
1750                        dev_warn(p->dev, "bad SET_CONTEXT_REG "
1751                                        "0x%04X\n", reg);
1752                        return -EINVAL;
1753                }
1754                ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1755                break;
1756        case SX_MISC:
1757                track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0;
1758                break;
1759        default:
1760                dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1761                return -EINVAL;
1762        }
1763        return 0;
1764}
1765
1766static bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1767{
1768        u32 last_reg, m, i;
1769
1770        if (p->rdev->family >= CHIP_CAYMAN)
1771                last_reg = ARRAY_SIZE(cayman_reg_safe_bm);
1772        else
1773                last_reg = ARRAY_SIZE(evergreen_reg_safe_bm);
1774
1775        i = (reg >> 7);
1776        if (i >= last_reg) {
1777                dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1778                return false;
1779        }
1780        m = 1 << ((reg >> 2) & 31);
1781        if (p->rdev->family >= CHIP_CAYMAN) {
1782                if (!(cayman_reg_safe_bm[i] & m))
1783                        return true;
1784        } else {
1785                if (!(evergreen_reg_safe_bm[i] & m))
1786                        return true;
1787        }
1788        dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1789        return false;
1790}
1791
1792static int evergreen_packet3_check(struct radeon_cs_parser *p,
1793                                   struct radeon_cs_packet *pkt)
1794{
1795        struct radeon_cs_reloc *reloc;
1796        struct evergreen_cs_track *track;
1797        volatile u32 *ib;
1798        unsigned idx;
1799        unsigned i;
1800        unsigned start_reg, end_reg, reg;
1801        int r;
1802        u32 idx_value;
1803
1804        track = (struct evergreen_cs_track *)p->track;
1805        ib = p->ib.ptr;
1806        idx = pkt->idx + 1;
1807        idx_value = radeon_get_ib_value(p, idx);
1808
1809        switch (pkt->opcode) {
1810        case PACKET3_SET_PREDICATION:
1811        {
1812                int pred_op;
1813                int tmp;
1814                uint64_t offset;
1815
1816                if (pkt->count != 1) {
1817                        DRM_ERROR("bad SET PREDICATION\n");
1818                        return -EINVAL;
1819                }
1820
1821                tmp = radeon_get_ib_value(p, idx + 1);
1822                pred_op = (tmp >> 16) & 0x7;
1823
1824                /* for the clear predicate operation */
1825                if (pred_op == 0)
1826                        return 0;
1827
1828                if (pred_op > 2) {
1829                        DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op);
1830                        return -EINVAL;
1831                }
1832
1833                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1834                if (r) {
1835                        DRM_ERROR("bad SET PREDICATION\n");
1836                        return -EINVAL;
1837                }
1838
1839                offset = reloc->gpu_offset +
1840                         (idx_value & 0xfffffff0) +
1841                         ((u64)(tmp & 0xff) << 32);
1842
1843                ib[idx + 0] = offset;
1844                ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
1845        }
1846        break;
1847        case PACKET3_CONTEXT_CONTROL:
1848                if (pkt->count != 1) {
1849                        DRM_ERROR("bad CONTEXT_CONTROL\n");
1850                        return -EINVAL;
1851                }
1852                break;
1853        case PACKET3_INDEX_TYPE:
1854        case PACKET3_NUM_INSTANCES:
1855        case PACKET3_CLEAR_STATE:
1856                if (pkt->count) {
1857                        DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1858                        return -EINVAL;
1859                }
1860                break;
1861        case CAYMAN_PACKET3_DEALLOC_STATE:
1862                if (p->rdev->family < CHIP_CAYMAN) {
1863                        DRM_ERROR("bad PACKET3_DEALLOC_STATE\n");
1864                        return -EINVAL;
1865                }
1866                if (pkt->count) {
1867                        DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1868                        return -EINVAL;
1869                }
1870                break;
1871        case PACKET3_INDEX_BASE:
1872        {
1873                uint64_t offset;
1874
1875                if (pkt->count != 1) {
1876                        DRM_ERROR("bad INDEX_BASE\n");
1877                        return -EINVAL;
1878                }
1879                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1880                if (r) {
1881                        DRM_ERROR("bad INDEX_BASE\n");
1882                        return -EINVAL;
1883                }
1884
1885                offset = reloc->gpu_offset +
1886                         idx_value +
1887                         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1888
1889                ib[idx+0] = offset;
1890                ib[idx+1] = upper_32_bits(offset) & 0xff;
1891
1892                r = evergreen_cs_track_check(p);
1893                if (r) {
1894                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1895                        return r;
1896                }
1897                break;
1898        }
1899        case PACKET3_DRAW_INDEX:
1900        {
1901                uint64_t offset;
1902                if (pkt->count != 3) {
1903                        DRM_ERROR("bad DRAW_INDEX\n");
1904                        return -EINVAL;
1905                }
1906                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1907                if (r) {
1908                        DRM_ERROR("bad DRAW_INDEX\n");
1909                        return -EINVAL;
1910                }
1911
1912                offset = reloc->gpu_offset +
1913                         idx_value +
1914                         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1915
1916                ib[idx+0] = offset;
1917                ib[idx+1] = upper_32_bits(offset) & 0xff;
1918
1919                r = evergreen_cs_track_check(p);
1920                if (r) {
1921                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1922                        return r;
1923                }
1924                break;
1925        }
1926        case PACKET3_DRAW_INDEX_2:
1927        {
1928                uint64_t offset;
1929
1930                if (pkt->count != 4) {
1931                        DRM_ERROR("bad DRAW_INDEX_2\n");
1932                        return -EINVAL;
1933                }
1934                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1935                if (r) {
1936                        DRM_ERROR("bad DRAW_INDEX_2\n");
1937                        return -EINVAL;
1938                }
1939
1940                offset = reloc->gpu_offset +
1941                         radeon_get_ib_value(p, idx+1) +
1942                         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
1943
1944                ib[idx+1] = offset;
1945                ib[idx+2] = upper_32_bits(offset) & 0xff;
1946
1947                r = evergreen_cs_track_check(p);
1948                if (r) {
1949                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1950                        return r;
1951                }
1952                break;
1953        }
1954        case PACKET3_DRAW_INDEX_AUTO:
1955                if (pkt->count != 1) {
1956                        DRM_ERROR("bad DRAW_INDEX_AUTO\n");
1957                        return -EINVAL;
1958                }
1959                r = evergreen_cs_track_check(p);
1960                if (r) {
1961                        dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1962                        return r;
1963                }
1964                break;
1965        case PACKET3_DRAW_INDEX_MULTI_AUTO:
1966                if (pkt->count != 2) {
1967                        DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n");
1968                        return -EINVAL;
1969                }
1970                r = evergreen_cs_track_check(p);
1971                if (r) {
1972                        dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1973                        return r;
1974                }
1975                break;
1976        case PACKET3_DRAW_INDEX_IMMD:
1977                if (pkt->count < 2) {
1978                        DRM_ERROR("bad DRAW_INDEX_IMMD\n");
1979                        return -EINVAL;
1980                }
1981                r = evergreen_cs_track_check(p);
1982                if (r) {
1983                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1984                        return r;
1985                }
1986                break;
1987        case PACKET3_DRAW_INDEX_OFFSET:
1988                if (pkt->count != 2) {
1989                        DRM_ERROR("bad DRAW_INDEX_OFFSET\n");
1990                        return -EINVAL;
1991                }
1992                r = evergreen_cs_track_check(p);
1993                if (r) {
1994                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1995                        return r;
1996                }
1997                break;
1998        case PACKET3_DRAW_INDEX_OFFSET_2:
1999                if (pkt->count != 3) {
2000                        DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n");
2001                        return -EINVAL;
2002                }
2003                r = evergreen_cs_track_check(p);
2004                if (r) {
2005                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2006                        return r;
2007                }
2008                break;
2009        case PACKET3_DISPATCH_DIRECT:
2010                if (pkt->count != 3) {
2011                        DRM_ERROR("bad DISPATCH_DIRECT\n");
2012                        return -EINVAL;
2013                }
2014                r = evergreen_cs_track_check(p);
2015                if (r) {
2016                        dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2017                        return r;
2018                }
2019                break;
2020        case PACKET3_DISPATCH_INDIRECT:
2021                if (pkt->count != 1) {
2022                        DRM_ERROR("bad DISPATCH_INDIRECT\n");
2023                        return -EINVAL;
2024                }
2025                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2026                if (r) {
2027                        DRM_ERROR("bad DISPATCH_INDIRECT\n");
2028                        return -EINVAL;
2029                }
2030                ib[idx+0] = idx_value + (u32)(reloc->gpu_offset & 0xffffffff);
2031                r = evergreen_cs_track_check(p);
2032                if (r) {
2033                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2034                        return r;
2035                }
2036                break;
2037        case PACKET3_WAIT_REG_MEM:
2038                if (pkt->count != 5) {
2039                        DRM_ERROR("bad WAIT_REG_MEM\n");
2040                        return -EINVAL;
2041                }
2042                /* bit 4 is reg (0) or mem (1) */
2043                if (idx_value & 0x10) {
2044                        uint64_t offset;
2045
2046                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2047                        if (r) {
2048                                DRM_ERROR("bad WAIT_REG_MEM\n");
2049                                return -EINVAL;
2050                        }
2051
2052                        offset = reloc->gpu_offset +
2053                                 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2054                                 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2055
2056                        ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc);
2057                        ib[idx+2] = upper_32_bits(offset) & 0xff;
2058                } else if (idx_value & 0x100) {
2059                        DRM_ERROR("cannot use PFP on REG wait\n");
2060                        return -EINVAL;
2061                }
2062                break;
2063        case PACKET3_CP_DMA:
2064        {
2065                u32 command, size, info;
2066                u64 offset, tmp;
2067                if (pkt->count != 4) {
2068                        DRM_ERROR("bad CP DMA\n");
2069                        return -EINVAL;
2070                }
2071                command = radeon_get_ib_value(p, idx+4);
2072                size = command & 0x1fffff;
2073                info = radeon_get_ib_value(p, idx+1);
2074                if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
2075                    (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
2076                    ((((info & 0x00300000) >> 20) == 0) &&
2077                     (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
2078                    ((((info & 0x60000000) >> 29) == 0) &&
2079                     (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
2080                        /* non mem to mem copies requires dw aligned count */
2081                        if (size % 4) {
2082                                DRM_ERROR("CP DMA command requires dw count alignment\n");
2083                                return -EINVAL;
2084                        }
2085                }
2086                if (command & PACKET3_CP_DMA_CMD_SAS) {
2087                        /* src address space is register */
2088                        /* GDS is ok */
2089                        if (((info & 0x60000000) >> 29) != 1) {
2090                                DRM_ERROR("CP DMA SAS not supported\n");
2091                                return -EINVAL;
2092                        }
2093                } else {
2094                        if (command & PACKET3_CP_DMA_CMD_SAIC) {
2095                                DRM_ERROR("CP DMA SAIC only supported for registers\n");
2096                                return -EINVAL;
2097                        }
2098                        /* src address space is memory */
2099                        if (((info & 0x60000000) >> 29) == 0) {
2100                                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2101                                if (r) {
2102                                        DRM_ERROR("bad CP DMA SRC\n");
2103                                        return -EINVAL;
2104                                }
2105
2106                                tmp = radeon_get_ib_value(p, idx) +
2107                                        ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2108
2109                                offset = reloc->gpu_offset + tmp;
2110
2111                                if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2112                                        dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
2113                                                 tmp + size, radeon_bo_size(reloc->robj));
2114                                        return -EINVAL;
2115                                }
2116
2117                                ib[idx] = offset;
2118                                ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2119                        } else if (((info & 0x60000000) >> 29) != 2) {
2120                                DRM_ERROR("bad CP DMA SRC_SEL\n");
2121                                return -EINVAL;
2122                        }
2123                }
2124                if (command & PACKET3_CP_DMA_CMD_DAS) {
2125                        /* dst address space is register */
2126                        /* GDS is ok */
2127                        if (((info & 0x00300000) >> 20) != 1) {
2128                                DRM_ERROR("CP DMA DAS not supported\n");
2129                                return -EINVAL;
2130                        }
2131                } else {
2132                        /* dst address space is memory */
2133                        if (command & PACKET3_CP_DMA_CMD_DAIC) {
2134                                DRM_ERROR("CP DMA DAIC only supported for registers\n");
2135                                return -EINVAL;
2136                        }
2137                        if (((info & 0x00300000) >> 20) == 0) {
2138                                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2139                                if (r) {
2140                                        DRM_ERROR("bad CP DMA DST\n");
2141                                        return -EINVAL;
2142                                }
2143
2144                                tmp = radeon_get_ib_value(p, idx+2) +
2145                                        ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
2146
2147                                offset = reloc->gpu_offset + tmp;
2148
2149                                if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2150                                        dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
2151                                                 tmp + size, radeon_bo_size(reloc->robj));
2152                                        return -EINVAL;
2153                                }
2154
2155                                ib[idx+2] = offset;
2156                                ib[idx+3] = upper_32_bits(offset) & 0xff;
2157                        } else {
2158                                DRM_ERROR("bad CP DMA DST_SEL\n");
2159                                return -EINVAL;
2160                        }
2161                }
2162                break;
2163        }
2164        case PACKET3_SURFACE_SYNC:
2165                if (pkt->count != 3) {
2166                        DRM_ERROR("bad SURFACE_SYNC\n");
2167                        return -EINVAL;
2168                }
2169                /* 0xffffffff/0x0 is flush all cache flag */
2170                if (radeon_get_ib_value(p, idx + 1) != 0xffffffff ||
2171                    radeon_get_ib_value(p, idx + 2) != 0) {
2172                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2173                        if (r) {
2174                                DRM_ERROR("bad SURFACE_SYNC\n");
2175                                return -EINVAL;
2176                        }
2177                        ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2178                }
2179                break;
2180        case PACKET3_EVENT_WRITE:
2181                if (pkt->count != 2 && pkt->count != 0) {
2182                        DRM_ERROR("bad EVENT_WRITE\n");
2183                        return -EINVAL;
2184                }
2185                if (pkt->count) {
2186                        uint64_t offset;
2187
2188                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2189                        if (r) {
2190                                DRM_ERROR("bad EVENT_WRITE\n");
2191                                return -EINVAL;
2192                        }
2193                        offset = reloc->gpu_offset +
2194                                 (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
2195                                 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2196
2197                        ib[idx+1] = offset & 0xfffffff8;
2198                        ib[idx+2] = upper_32_bits(offset) & 0xff;
2199                }
2200                break;
2201        case PACKET3_EVENT_WRITE_EOP:
2202        {
2203                uint64_t offset;
2204
2205                if (pkt->count != 4) {
2206                        DRM_ERROR("bad EVENT_WRITE_EOP\n");
2207                        return -EINVAL;
2208                }
2209                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2210                if (r) {
2211                        DRM_ERROR("bad EVENT_WRITE_EOP\n");
2212                        return -EINVAL;
2213                }
2214
2215                offset = reloc->gpu_offset +
2216                         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2217                         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2218
2219                ib[idx+1] = offset & 0xfffffffc;
2220                ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2221                break;
2222        }
2223        case PACKET3_EVENT_WRITE_EOS:
2224        {
2225                uint64_t offset;
2226
2227                if (pkt->count != 3) {
2228                        DRM_ERROR("bad EVENT_WRITE_EOS\n");
2229                        return -EINVAL;
2230                }
2231                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2232                if (r) {
2233                        DRM_ERROR("bad EVENT_WRITE_EOS\n");
2234                        return -EINVAL;
2235                }
2236
2237                offset = reloc->gpu_offset +
2238                         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2239                         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2240
2241                ib[idx+1] = offset & 0xfffffffc;
2242                ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2243                break;
2244        }
2245        case PACKET3_SET_CONFIG_REG:
2246                start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2247                end_reg = 4 * pkt->count + start_reg - 4;
2248                if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2249                    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2250                    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2251                        DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2252                        return -EINVAL;
2253                }
2254                for (i = 0; i < pkt->count; i++) {
2255                        reg = start_reg + (4 * i);
2256                        r = evergreen_cs_check_reg(p, reg, idx+1+i);
2257                        if (r)
2258                                return r;
2259                }
2260                break;
2261        case PACKET3_SET_CONTEXT_REG:
2262                start_reg = (idx_value << 2) + PACKET3_SET_CONTEXT_REG_START;
2263                end_reg = 4 * pkt->count + start_reg - 4;
2264                if ((start_reg < PACKET3_SET_CONTEXT_REG_START) ||
2265                    (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
2266                    (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
2267                        DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
2268                        return -EINVAL;
2269                }
2270                for (i = 0; i < pkt->count; i++) {
2271                        reg = start_reg + (4 * i);
2272                        r = evergreen_cs_check_reg(p, reg, idx+1+i);
2273                        if (r)
2274                                return r;
2275                }
2276                break;
2277        case PACKET3_SET_RESOURCE:
2278                if (pkt->count % 8) {
2279                        DRM_ERROR("bad SET_RESOURCE\n");
2280                        return -EINVAL;
2281                }
2282                start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_START;
2283                end_reg = 4 * pkt->count + start_reg - 4;
2284                if ((start_reg < PACKET3_SET_RESOURCE_START) ||
2285                    (start_reg >= PACKET3_SET_RESOURCE_END) ||
2286                    (end_reg >= PACKET3_SET_RESOURCE_END)) {
2287                        DRM_ERROR("bad SET_RESOURCE\n");
2288                        return -EINVAL;
2289                }
2290                for (i = 0; i < (pkt->count / 8); i++) {
2291                        struct radeon_bo *texture, *mipmap;
2292                        u32 toffset, moffset;
2293                        u32 size, offset, mip_address, tex_dim;
2294
2295                        switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p, idx+1+(i*8)+7))) {
2296                        case SQ_TEX_VTX_VALID_TEXTURE:
2297                                /* tex base */
2298                                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2299                                if (r) {
2300                                        DRM_ERROR("bad SET_RESOURCE (tex)\n");
2301                                        return -EINVAL;
2302                                }
2303                                if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
2304                                        ib[idx+1+(i*8)+1] |=
2305                                                TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
2306                                        if (reloc->tiling_flags & RADEON_TILING_MACRO) {
2307                                                unsigned bankw, bankh, mtaspect, tile_split;
2308
2309                                                evergreen_tiling_fields(reloc->tiling_flags,
2310                                                                        &bankw, &bankh, &mtaspect,
2311                                                                        &tile_split);
2312                                                ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split);
2313                                                ib[idx+1+(i*8)+7] |=
2314                                                        TEX_BANK_WIDTH(bankw) |
2315                                                        TEX_BANK_HEIGHT(bankh) |
2316                                                        MACRO_TILE_ASPECT(mtaspect) |
2317                                                        TEX_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
2318                                        }
2319                                }
2320                                texture = reloc->robj;
2321                                toffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2322
2323                                /* tex mip base */
2324                                tex_dim = ib[idx+1+(i*8)+0] & 0x7;
2325                                mip_address = ib[idx+1+(i*8)+3];
2326
2327                                if ((tex_dim == SQ_TEX_DIM_2D_MSAA || tex_dim == SQ_TEX_DIM_2D_ARRAY_MSAA) &&
2328                                    !mip_address &&
2329                                    !radeon_cs_packet_next_is_pkt3_nop(p)) {
2330                                        /* MIP_ADDRESS should point to FMASK for an MSAA texture.
2331                                         * It should be 0 if FMASK is disabled. */
2332                                        moffset = 0;
2333                                        mipmap = NULL;
2334                                } else {
2335                                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2336                                        if (r) {
2337                                                DRM_ERROR("bad SET_RESOURCE (tex)\n");
2338                                                return -EINVAL;
2339                                        }
2340                                        moffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2341                                        mipmap = reloc->robj;
2342                                }
2343
2344                                r = evergreen_cs_track_validate_texture(p, texture, mipmap, idx+1+(i*8));
2345                                if (r)
2346                                        return r;
2347                                ib[idx+1+(i*8)+2] += toffset;
2348                                ib[idx+1+(i*8)+3] += moffset;
2349                                break;
2350                        case SQ_TEX_VTX_VALID_BUFFER:
2351                        {
2352                                uint64_t offset64;
2353                                /* vtx base */
2354                                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2355                                if (r) {
2356                                        DRM_ERROR("bad SET_RESOURCE (vtx)\n");
2357                                        return -EINVAL;
2358                                }
2359                                offset = radeon_get_ib_value(p, idx+1+(i*8)+0);
2360                                size = radeon_get_ib_value(p, idx+1+(i*8)+1);
2361                                if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
2362                                        /* force size to size of the buffer */
2363                                        dev_warn(p->dev, "vbo resource seems too big for the bo\n");
2364                                        ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset;
2365                                }
2366
2367                                offset64 = reloc->gpu_offset + offset;
2368                                ib[idx+1+(i*8)+0] = offset64;
2369                                ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
2370                                                    (upper_32_bits(offset64) & 0xff);
2371                                break;
2372                        }
2373                        case SQ_TEX_VTX_INVALID_TEXTURE:
2374                        case SQ_TEX_VTX_INVALID_BUFFER:
2375                        default:
2376                                DRM_ERROR("bad SET_RESOURCE\n");
2377                                return -EINVAL;
2378                        }
2379                }
2380                break;
2381        case PACKET3_SET_ALU_CONST:
2382                /* XXX fix me ALU const buffers only */
2383                break;
2384        case PACKET3_SET_BOOL_CONST:
2385                start_reg = (idx_value << 2) + PACKET3_SET_BOOL_CONST_START;
2386                end_reg = 4 * pkt->count + start_reg - 4;
2387                if ((start_reg < PACKET3_SET_BOOL_CONST_START) ||
2388                    (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
2389                    (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
2390                        DRM_ERROR("bad SET_BOOL_CONST\n");
2391                        return -EINVAL;
2392                }
2393                break;
2394        case PACKET3_SET_LOOP_CONST:
2395                start_reg = (idx_value << 2) + PACKET3_SET_LOOP_CONST_START;
2396                end_reg = 4 * pkt->count + start_reg - 4;
2397                if ((start_reg < PACKET3_SET_LOOP_CONST_START) ||
2398                    (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
2399                    (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
2400                        DRM_ERROR("bad SET_LOOP_CONST\n");
2401                        return -EINVAL;
2402                }
2403                break;
2404        case PACKET3_SET_CTL_CONST:
2405                start_reg = (idx_value << 2) + PACKET3_SET_CTL_CONST_START;
2406                end_reg = 4 * pkt->count + start_reg - 4;
2407                if ((start_reg < PACKET3_SET_CTL_CONST_START) ||
2408                    (start_reg >= PACKET3_SET_CTL_CONST_END) ||
2409                    (end_reg >= PACKET3_SET_CTL_CONST_END)) {
2410                        DRM_ERROR("bad SET_CTL_CONST\n");
2411                        return -EINVAL;
2412                }
2413                break;
2414        case PACKET3_SET_SAMPLER:
2415                if (pkt->count % 3) {
2416                        DRM_ERROR("bad SET_SAMPLER\n");
2417                        return -EINVAL;
2418                }
2419                start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_START;
2420                end_reg = 4 * pkt->count + start_reg - 4;
2421                if ((start_reg < PACKET3_SET_SAMPLER_START) ||
2422                    (start_reg >= PACKET3_SET_SAMPLER_END) ||
2423                    (end_reg >= PACKET3_SET_SAMPLER_END)) {
2424                        DRM_ERROR("bad SET_SAMPLER\n");
2425                        return -EINVAL;
2426                }
2427                break;
2428        case PACKET3_STRMOUT_BUFFER_UPDATE:
2429                if (pkt->count != 4) {
2430                        DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
2431                        return -EINVAL;
2432                }
2433                /* Updating memory at DST_ADDRESS. */
2434                if (idx_value & 0x1) {
2435                        u64 offset;
2436                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2437                        if (r) {
2438                                DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
2439                                return -EINVAL;
2440                        }
2441                        offset = radeon_get_ib_value(p, idx+1);
2442                        offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2443                        if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2444                                DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n",
2445                                          offset + 4, radeon_bo_size(reloc->robj));
2446                                return -EINVAL;
2447                        }
2448                        offset += reloc->gpu_offset;
2449                        ib[idx+1] = offset;
2450                        ib[idx+2] = upper_32_bits(offset) & 0xff;
2451                }
2452                /* Reading data from SRC_ADDRESS. */
2453                if (((idx_value >> 1) & 0x3) == 2) {
2454                        u64 offset;
2455                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2456                        if (r) {
2457                                DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
2458                                return -EINVAL;
2459                        }
2460                        offset = radeon_get_ib_value(p, idx+3);
2461                        offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2462                        if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2463                                DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n",
2464                                          offset + 4, radeon_bo_size(reloc->robj));
2465                                return -EINVAL;
2466                        }
2467                        offset += reloc->gpu_offset;
2468                        ib[idx+3] = offset;
2469                        ib[idx+4] = upper_32_bits(offset) & 0xff;
2470                }
2471                break;
2472        case PACKET3_MEM_WRITE:
2473        {
2474                u64 offset;
2475
2476                if (pkt->count != 3) {
2477                        DRM_ERROR("bad MEM_WRITE (invalid count)\n");
2478                        return -EINVAL;
2479                }
2480                r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2481                if (r) {
2482                        DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
2483                        return -EINVAL;
2484                }
2485                offset = radeon_get_ib_value(p, idx+0);
2486                offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
2487                if (offset & 0x7) {
2488                        DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
2489                        return -EINVAL;
2490                }
2491                if ((offset + 8) > radeon_bo_size(reloc->robj)) {
2492                        DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
2493                                  offset + 8, radeon_bo_size(reloc->robj));
2494                        return -EINVAL;
2495                }
2496                offset += reloc->gpu_offset;
2497                ib[idx+0] = offset;
2498                ib[idx+1] = upper_32_bits(offset) & 0xff;
2499                break;
2500        }
2501        case PACKET3_COPY_DW:
2502                if (pkt->count != 4) {
2503                        DRM_ERROR("bad COPY_DW (invalid count)\n");
2504                        return -EINVAL;
2505                }
2506                if (idx_value & 0x1) {
2507                        u64 offset;
2508                        /* SRC is memory. */
2509                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2510                        if (r) {
2511                                DRM_ERROR("bad COPY_DW (missing src reloc)\n");
2512                                return -EINVAL;
2513                        }
2514                        offset = radeon_get_ib_value(p, idx+1);
2515                        offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2516                        if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2517                                DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n",
2518                                          offset + 4, radeon_bo_size(reloc->robj));
2519                                return -EINVAL;
2520                        }
2521                        offset += reloc->gpu_offset;
2522                        ib[idx+1] = offset;
2523                        ib[idx+2] = upper_32_bits(offset) & 0xff;
2524                } else {
2525                        /* SRC is a reg. */
2526                        reg = radeon_get_ib_value(p, idx+1) << 2;
2527                        if (!evergreen_is_safe_reg(p, reg, idx+1))
2528                                return -EINVAL;
2529                }
2530                if (idx_value & 0x2) {
2531                        u64 offset;
2532                        /* DST is memory. */
2533                        r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2534                        if (r) {
2535                                DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
2536                                return -EINVAL;
2537                        }
2538                        offset = radeon_get_ib_value(p, idx+3);
2539                        offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2540                        if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2541                                DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n",
2542                                          offset + 4, radeon_bo_size(reloc->robj));
2543                                return -EINVAL;
2544                        }
2545                        offset += reloc->gpu_offset;
2546                        ib[idx+3] = offset;
2547                        ib[idx+4] = upper_32_bits(offset) & 0xff;
2548                } else {
2549                        /* DST is a reg. */
2550                        reg = radeon_get_ib_value(p, idx+3) << 2;
2551                        if (!evergreen_is_safe_reg(p, reg, idx+3))
2552                                return -EINVAL;
2553                }
2554                break;
2555        case PACKET3_NOP:
2556                break;
2557        default:
2558                DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2559                return -EINVAL;
2560        }
2561        return 0;
2562}
2563
2564int evergreen_cs_parse(struct radeon_cs_parser *p)
2565{
2566        struct radeon_cs_packet pkt;
2567        struct evergreen_cs_track *track;
2568        u32 tmp;
2569        int r;
2570
2571        if (p->track == NULL) {
2572                /* initialize tracker, we are in kms */
2573                track = kzalloc(sizeof(*track), GFP_KERNEL);
2574                if (track == NULL)
2575                        return -ENOMEM;
2576                evergreen_cs_track_init(track);
2577                if (p->rdev->family >= CHIP_CAYMAN)
2578                        tmp = p->rdev->config.cayman.tile_config;
2579                else
2580                        tmp = p->rdev->config.evergreen.tile_config;
2581
2582                switch (tmp & 0xf) {
2583                case 0:
2584                        track->npipes = 1;
2585                        break;
2586                case 1:
2587                default:
2588                        track->npipes = 2;
2589                        break;
2590                case 2:
2591                        track->npipes = 4;
2592                        break;
2593                case 3:
2594                        track->npipes = 8;
2595                        break;
2596                }
2597
2598                switch ((tmp & 0xf0) >> 4) {
2599                case 0:
2600                        track->nbanks = 4;
2601                        break;
2602                case 1:
2603                default:
2604                        track->nbanks = 8;
2605                        break;
2606                case 2:
2607                        track->nbanks = 16;
2608                        break;
2609                }
2610
2611                switch ((tmp & 0xf00) >> 8) {
2612                case 0:
2613                        track->group_size = 256;
2614                        break;
2615                case 1:
2616                default:
2617                        track->group_size = 512;
2618                        break;
2619                }
2620
2621                switch ((tmp & 0xf000) >> 12) {
2622                case 0:
2623                        track->row_size = 1;
2624                        break;
2625                case 1:
2626                default:
2627                        track->row_size = 2;
2628                        break;
2629                case 2:
2630                        track->row_size = 4;
2631                        break;
2632                }
2633
2634                p->track = track;
2635        }
2636        do {
2637                r = radeon_cs_packet_parse(p, &pkt, p->idx);
2638                if (r) {
2639                        kfree(p->track);
2640                        p->track = NULL;
2641                        return r;
2642                }
2643                p->idx += pkt.count + 2;
2644                switch (pkt.type) {
2645                case RADEON_PACKET_TYPE0:
2646                        r = evergreen_cs_parse_packet0(p, &pkt);
2647                        break;
2648                case RADEON_PACKET_TYPE2:
2649                        break;
2650                case RADEON_PACKET_TYPE3:
2651                        r = evergreen_packet3_check(p, &pkt);
2652                        break;
2653                default:
2654                        DRM_ERROR("Unknown packet type %d !\n", pkt.type);
2655                        kfree(p->track);
2656                        p->track = NULL;
2657                        return -EINVAL;
2658                }
2659                if (r) {
2660                        kfree(p->track);
2661                        p->track = NULL;
2662                        return r;
2663                }
2664        } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
2665#if 0
2666        for (r = 0; r < p->ib.length_dw; r++) {
2667                printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
2668                mdelay(1);
2669        }
2670#endif
2671        kfree(p->track);
2672        p->track = NULL;
2673        return 0;
2674}
2675
2676/**
2677 * evergreen_dma_cs_parse() - parse the DMA IB
2678 * @p:          parser structure holding parsing context.
2679 *
2680 * Parses the DMA IB from the CS ioctl and updates
2681 * the GPU addresses based on the reloc information and
2682 * checks for errors. (Evergreen-Cayman)
2683 * Returns 0 for success and an error on failure.
2684 **/
2685int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2686{
2687        struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
2688        struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc;
2689        u32 header, cmd, count, sub_cmd;
2690        volatile u32 *ib = p->ib.ptr;
2691        u32 idx;
2692        u64 src_offset, dst_offset, dst2_offset;
2693        int r;
2694
2695        do {
2696                if (p->idx >= ib_chunk->length_dw) {
2697                        DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2698                                  p->idx, ib_chunk->length_dw);
2699                        return -EINVAL;
2700                }
2701                idx = p->idx;
2702                header = radeon_get_ib_value(p, idx);
2703                cmd = GET_DMA_CMD(header);
2704                count = GET_DMA_COUNT(header);
2705                sub_cmd = GET_DMA_SUB_CMD(header);
2706
2707                switch (cmd) {
2708                case DMA_PACKET_WRITE:
2709                        r = r600_dma_cs_next_reloc(p, &dst_reloc);
2710                        if (r) {
2711                                DRM_ERROR("bad DMA_PACKET_WRITE\n");
2712                                return -EINVAL;
2713                        }
2714                        switch (sub_cmd) {
2715                        /* tiled */
2716                        case 8:
2717                                dst_offset = radeon_get_ib_value(p, idx+1);
2718                                dst_offset <<= 8;
2719
2720                                ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2721                                p->idx += count + 7;
2722                                break;
2723                        /* linear */
2724                        case 0:
2725                                dst_offset = radeon_get_ib_value(p, idx+1);
2726                                dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2727
2728                                ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2729                                ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2730                                p->idx += count + 3;
2731                                break;
2732                        default:
2733                                DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, header);
2734                                return -EINVAL;
2735                        }
2736                        if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2737                                dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
2738                                         dst_offset, radeon_bo_size(dst_reloc->robj));
2739                                return -EINVAL;
2740                        }
2741                        break;
2742                case DMA_PACKET_COPY:
2743                        r = r600_dma_cs_next_reloc(p, &src_reloc);
2744                        if (r) {
2745                                DRM_ERROR("bad DMA_PACKET_COPY\n");
2746                                return -EINVAL;
2747                        }
2748                        r = r600_dma_cs_next_reloc(p, &dst_reloc);
2749                        if (r) {
2750                                DRM_ERROR("bad DMA_PACKET_COPY\n");
2751                                return -EINVAL;
2752                        }
2753                        switch (sub_cmd) {
2754                        /* Copy L2L, DW aligned */
2755                        case 0x00:
2756                                /* L2L, dw */
2757                                src_offset = radeon_get_ib_value(p, idx+2);
2758                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2759                                dst_offset = radeon_get_ib_value(p, idx+1);
2760                                dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2761                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2762                                        dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
2763                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2764                                        return -EINVAL;
2765                                }
2766                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2767                                        dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
2768                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2769                                        return -EINVAL;
2770                                }
2771                                ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2772                                ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2773                                ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2774                                ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2775                                p->idx += 5;
2776                                break;
2777                        /* Copy L2T/T2L */
2778                        case 0x08:
2779                                /* detile bit */
2780                                if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2781                                        /* tiled src, linear dst */
2782                                        src_offset = radeon_get_ib_value(p, idx+1);
2783                                        src_offset <<= 8;
2784                                        ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
2785
2786                                        dst_offset = radeon_get_ib_value(p, idx + 7);
2787                                        dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2788                                        ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2789                                        ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2790                                } else {
2791                                        /* linear src, tiled dst */
2792                                        src_offset = radeon_get_ib_value(p, idx+7);
2793                                        src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2794                                        ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2795                                        ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2796
2797                                        dst_offset = radeon_get_ib_value(p, idx+1);
2798                                        dst_offset <<= 8;
2799                                        ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2800                                }
2801                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2802                                        dev_warn(p->dev, "DMA L2T, src buffer too small (%llu %lu)\n",
2803                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2804                                        return -EINVAL;
2805                                }
2806                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2807                                        dev_warn(p->dev, "DMA L2T, dst buffer too small (%llu %lu)\n",
2808                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2809                                        return -EINVAL;
2810                                }
2811                                p->idx += 9;
2812                                break;
2813                        /* Copy L2L, byte aligned */
2814                        case 0x40:
2815                                /* L2L, byte */
2816                                src_offset = radeon_get_ib_value(p, idx+2);
2817                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2818                                dst_offset = radeon_get_ib_value(p, idx+1);
2819                                dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2820                                if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
2821                                        dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
2822                                                        src_offset + count, radeon_bo_size(src_reloc->robj));
2823                                        return -EINVAL;
2824                                }
2825                                if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
2826                                        dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
2827                                                        dst_offset + count, radeon_bo_size(dst_reloc->robj));
2828                                        return -EINVAL;
2829                                }
2830                                ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2831                                ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2832                                ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2833                                ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2834                                p->idx += 5;
2835                                break;
2836                        /* Copy L2L, partial */
2837                        case 0x41:
2838                                /* L2L, partial */
2839                                if (p->family < CHIP_CAYMAN) {
2840                                        DRM_ERROR("L2L Partial is cayman only !\n");
2841                                        return -EINVAL;
2842                                }
2843                                ib[idx+1] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2844                                ib[idx+2] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2845                                ib[idx+4] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2846                                ib[idx+5] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2847
2848                                p->idx += 9;
2849                                break;
2850                        /* Copy L2L, DW aligned, broadcast */
2851                        case 0x44:
2852                                /* L2L, dw, broadcast */
2853                                r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2854                                if (r) {
2855                                        DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
2856                                        return -EINVAL;
2857                                }
2858                                dst_offset = radeon_get_ib_value(p, idx+1);
2859                                dst_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2860                                dst2_offset = radeon_get_ib_value(p, idx+2);
2861                                dst2_offset |= ((u64)(radeon_get_ib_value(p, idx+5) & 0xff)) << 32;
2862                                src_offset = radeon_get_ib_value(p, idx+3);
2863                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
2864                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2865                                        dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
2866                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2867                                        return -EINVAL;
2868                                }
2869                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2870                                        dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
2871                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2872                                        return -EINVAL;
2873                                }
2874                                if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2875                                        dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
2876                                                        dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2877                                        return -EINVAL;
2878                                }
2879                                ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2880                                ib[idx+2] += (u32)(dst2_reloc->gpu_offset & 0xfffffffc);
2881                                ib[idx+3] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2882                                ib[idx+4] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2883                                ib[idx+5] += upper_32_bits(dst2_reloc->gpu_offset) & 0xff;
2884                                ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2885                                p->idx += 7;
2886                                break;
2887                        /* Copy L2T Frame to Field */
2888                        case 0x48:
2889                                if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2890                                        DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2891                                        return -EINVAL;
2892                                }
2893                                r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2894                                if (r) {
2895                                        DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2896                                        return -EINVAL;
2897                                }
2898                                dst_offset = radeon_get_ib_value(p, idx+1);
2899                                dst_offset <<= 8;
2900                                dst2_offset = radeon_get_ib_value(p, idx+2);
2901                                dst2_offset <<= 8;
2902                                src_offset = radeon_get_ib_value(p, idx+8);
2903                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
2904                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2905                                        dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
2906                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2907                                        return -EINVAL;
2908                                }
2909                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2910                                        dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2911                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2912                                        return -EINVAL;
2913                                }
2914                                if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2915                                        dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2916                                                        dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2917                                        return -EINVAL;
2918                                }
2919                                ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2920                                ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
2921                                ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2922                                ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2923                                p->idx += 10;
2924                                break;
2925                        /* Copy L2T/T2L, partial */
2926                        case 0x49:
2927                                /* L2T, T2L partial */
2928                                if (p->family < CHIP_CAYMAN) {
2929                                        DRM_ERROR("L2T, T2L Partial is cayman only !\n");
2930                                        return -EINVAL;
2931                                }
2932                                /* detile bit */
2933                                if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2934                                        /* tiled src, linear dst */
2935                                        ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
2936
2937                                        ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2938                                        ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2939                                } else {
2940                                        /* linear src, tiled dst */
2941                                        ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2942                                        ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2943
2944                                        ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2945                                }
2946                                p->idx += 12;
2947                                break;
2948                        /* Copy L2T broadcast */
2949                        case 0x4b:
2950                                /* L2T, broadcast */
2951                                if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2952                                        DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
2953                                        return -EINVAL;
2954                                }
2955                                r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2956                                if (r) {
2957                                        DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
2958                                        return -EINVAL;
2959                                }
2960                                dst_offset = radeon_get_ib_value(p, idx+1);
2961                                dst_offset <<= 8;
2962                                dst2_offset = radeon_get_ib_value(p, idx+2);
2963                                dst2_offset <<= 8;
2964                                src_offset = radeon_get_ib_value(p, idx+8);
2965                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
2966                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2967                                        dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
2968                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2969                                        return -EINVAL;
2970                                }
2971                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2972                                        dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
2973                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2974                                        return -EINVAL;
2975                                }
2976                                if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2977                                        dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
2978                                                        dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2979                                        return -EINVAL;
2980                                }
2981                                ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2982                                ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
2983                                ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2984                                ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2985                                p->idx += 10;
2986                                break;
2987                        /* Copy L2T/T2L (tile units) */
2988                        case 0x4c:
2989                                /* L2T, T2L */
2990                                /* detile bit */
2991                                if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2992                                        /* tiled src, linear dst */
2993                                        src_offset = radeon_get_ib_value(p, idx+1);
2994                                        src_offset <<= 8;
2995                                        ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
2996
2997                                        dst_offset = radeon_get_ib_value(p, idx+7);
2998                                        dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2999                                        ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3000                                        ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
3001                                } else {
3002                                        /* linear src, tiled dst */
3003                                        src_offset = radeon_get_ib_value(p, idx+7);
3004                                        src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3005                                        ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3006                                        ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3007
3008                                        dst_offset = radeon_get_ib_value(p, idx+1);
3009                                        dst_offset <<= 8;
3010                                        ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3011                                }
3012                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3013                                        dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
3014                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3015                                        return -EINVAL;
3016                                }
3017                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3018                                        dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
3019                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3020                                        return -EINVAL;
3021                                }
3022                                p->idx += 9;
3023                                break;
3024                        /* Copy T2T, partial (tile units) */
3025                        case 0x4d:
3026                                /* T2T partial */
3027                                if (p->family < CHIP_CAYMAN) {
3028                                        DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3029                                        return -EINVAL;
3030                                }
3031                                ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3032                                ib[idx+4] += (u32)(dst_reloc->gpu_offset >> 8);
3033                                p->idx += 13;
3034                                break;
3035                        /* Copy L2T broadcast (tile units) */
3036                        case 0x4f:
3037                                /* L2T, broadcast */
3038                                if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3039                                        DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3040                                        return -EINVAL;
3041                                }
3042                                r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3043                                if (r) {
3044                                        DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3045                                        return -EINVAL;
3046                                }
3047                                dst_offset = radeon_get_ib_value(p, idx+1);
3048                                dst_offset <<= 8;
3049                                dst2_offset = radeon_get_ib_value(p, idx+2);
3050                                dst2_offset <<= 8;
3051                                src_offset = radeon_get_ib_value(p, idx+8);
3052                                src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3053                                if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3054                                        dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3055                                                        src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3056                                        return -EINVAL;
3057                                }
3058                                if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3059                                        dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3060                                                        dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3061                                        return -EINVAL;
3062                                }
3063                                if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3064                                        dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3065                                                        dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3066                                        return -EINVAL;
3067                                }
3068                                ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3069                                ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3070                                ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3071                                ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3072                                p->idx += 10;
3073                                break;
3074                        default:
3075                                DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, header);
3076                                return -EINVAL;
3077                        }
3078                        break;
3079                case DMA_PACKET_CONSTANT_FILL:
3080                        r = r600_dma_cs_next_reloc(p, &dst_reloc);
3081                        if (r) {
3082                                DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
3083                                return -EINVAL;
3084                        }
3085                        dst_offset = radeon_get_ib_value(p, idx+1);
3086                        dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
3087                        if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3088                                dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
3089                                         dst_offset, radeon_bo_size(dst_reloc->robj));
3090                                return -EINVAL;
3091                        }
3092                        ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3093                        ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000;
3094                        p->idx += 4;
3095                        break;
3096                case DMA_PACKET_NOP:
3097                        p->idx += 1;
3098                        break;
3099                default:
3100                        DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3101                        return -EINVAL;
3102                }
3103        } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
3104#if 0
3105        for (r = 0; r < p->ib->length_dw; r++) {
3106                printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
3107                mdelay(1);
3108        }
3109#endif
3110        return 0;
3111}
3112
3113/* vm parser */
3114static bool evergreen_vm_reg_valid(u32 reg)
3115{
3116        /* context regs are fine */
3117        if (reg >= 0x28000)
3118                return true;
3119
3120        /* check config regs */
3121        switch (reg) {
3122        case WAIT_UNTIL:
3123        case GRBM_GFX_INDEX:
3124        case CP_STRMOUT_CNTL:
3125        case CP_COHER_CNTL:
3126        case CP_COHER_SIZE:
3127        case VGT_VTX_VECT_EJECT_REG:
3128        case VGT_CACHE_INVALIDATION:
3129        case VGT_GS_VERTEX_REUSE:
3130        case VGT_PRIMITIVE_TYPE:
3131        case VGT_INDEX_TYPE:
3132        case VGT_NUM_INDICES:
3133        case VGT_NUM_INSTANCES:
3134        case VGT_COMPUTE_DIM_X:
3135        case VGT_COMPUTE_DIM_Y:
3136        case VGT_COMPUTE_DIM_Z:
3137        case VGT_COMPUTE_START_X:
3138        case VGT_COMPUTE_START_Y:
3139        case VGT_COMPUTE_START_Z:
3140        case VGT_COMPUTE_INDEX:
3141        case VGT_COMPUTE_THREAD_GROUP_SIZE:
3142        case VGT_HS_OFFCHIP_PARAM:
3143        case PA_CL_ENHANCE:
3144        case PA_SU_LINE_STIPPLE_VALUE:
3145        case PA_SC_LINE_STIPPLE_STATE:
3146        case PA_SC_ENHANCE:
3147        case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ:
3148        case SQ_DYN_GPR_SIMD_LOCK_EN:
3149        case SQ_CONFIG:
3150        case SQ_GPR_RESOURCE_MGMT_1:
3151        case SQ_GLOBAL_GPR_RESOURCE_MGMT_1:
3152        case SQ_GLOBAL_GPR_RESOURCE_MGMT_2:
3153        case SQ_CONST_MEM_BASE:
3154        case SQ_STATIC_THREAD_MGMT_1:
3155        case SQ_STATIC_THREAD_MGMT_2:
3156        case SQ_STATIC_THREAD_MGMT_3:
3157        case SPI_CONFIG_CNTL:
3158        case SPI_CONFIG_CNTL_1:
3159        case TA_CNTL_AUX:
3160        case DB_DEBUG:
3161        case DB_DEBUG2:
3162        case DB_DEBUG3:
3163        case DB_DEBUG4:
3164        case DB_WATERMARKS:
3165        case TD_PS_BORDER_COLOR_INDEX:
3166        case TD_PS_BORDER_COLOR_RED:
3167        case TD_PS_BORDER_COLOR_GREEN:
3168        case TD_PS_BORDER_COLOR_BLUE:
3169        case TD_PS_BORDER_COLOR_ALPHA:
3170        case TD_VS_BORDER_COLOR_INDEX:
3171        case TD_VS_BORDER_COLOR_RED:
3172        case TD_VS_BORDER_COLOR_GREEN:
3173        case TD_VS_BORDER_COLOR_BLUE:
3174        case TD_VS_BORDER_COLOR_ALPHA:
3175        case TD_GS_BORDER_COLOR_INDEX:
3176        case TD_GS_BORDER_COLOR_RED:
3177        case TD_GS_BORDER_COLOR_GREEN:
3178        case TD_GS_BORDER_COLOR_BLUE:
3179        case TD_GS_BORDER_COLOR_ALPHA:
3180        case TD_HS_BORDER_COLOR_INDEX:
3181        case TD_HS_BORDER_COLOR_RED:
3182        case TD_HS_BORDER_COLOR_GREEN:
3183        case TD_HS_BORDER_COLOR_BLUE:
3184        case TD_HS_BORDER_COLOR_ALPHA:
3185        case TD_LS_BORDER_COLOR_INDEX:
3186        case TD_LS_BORDER_COLOR_RED:
3187        case TD_LS_BORDER_COLOR_GREEN:
3188        case TD_LS_BORDER_COLOR_BLUE:
3189        case TD_LS_BORDER_COLOR_ALPHA:
3190        case TD_CS_BORDER_COLOR_INDEX:
3191        case TD_CS_BORDER_COLOR_RED:
3192        case TD_CS_BORDER_COLOR_GREEN:
3193        case TD_CS_BORDER_COLOR_BLUE:
3194        case TD_CS_BORDER_COLOR_ALPHA:
3195        case SQ_ESGS_RING_SIZE:
3196        case SQ_GSVS_RING_SIZE:
3197        case SQ_ESTMP_RING_SIZE:
3198        case SQ_GSTMP_RING_SIZE:
3199        case SQ_HSTMP_RING_SIZE:
3200        case SQ_LSTMP_RING_SIZE:
3201        case SQ_PSTMP_RING_SIZE:
3202        case SQ_VSTMP_RING_SIZE:
3203        case SQ_ESGS_RING_ITEMSIZE:
3204        case SQ_ESTMP_RING_ITEMSIZE:
3205        case SQ_GSTMP_RING_ITEMSIZE:
3206        case SQ_GSVS_RING_ITEMSIZE:
3207        case SQ_GS_VERT_ITEMSIZE:
3208        case SQ_GS_VERT_ITEMSIZE_1:
3209        case SQ_GS_VERT_ITEMSIZE_2:
3210        case SQ_GS_VERT_ITEMSIZE_3:
3211        case SQ_GSVS_RING_OFFSET_1:
3212        case SQ_GSVS_RING_OFFSET_2:
3213        case SQ_GSVS_RING_OFFSET_3:
3214        case SQ_HSTMP_RING_ITEMSIZE:
3215        case SQ_LSTMP_RING_ITEMSIZE:
3216        case SQ_PSTMP_RING_ITEMSIZE:
3217        case SQ_VSTMP_RING_ITEMSIZE:
3218        case VGT_TF_RING_SIZE:
3219        case SQ_ESGS_RING_BASE:
3220        case SQ_GSVS_RING_BASE:
3221        case SQ_ESTMP_RING_BASE:
3222        case SQ_GSTMP_RING_BASE:
3223        case SQ_HSTMP_RING_BASE:
3224        case SQ_LSTMP_RING_BASE:
3225        case SQ_PSTMP_RING_BASE:
3226        case SQ_VSTMP_RING_BASE:
3227        case CAYMAN_VGT_OFFCHIP_LDS_BASE:
3228        case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS:
3229                return true;
3230        default:
3231                DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3232                return false;
3233        }
3234}
3235
3236static int evergreen_vm_packet3_check(struct radeon_device *rdev,
3237                                      u32 *ib, struct radeon_cs_packet *pkt)
3238{
3239        u32 idx = pkt->idx + 1;
3240        u32 idx_value = ib[idx];
3241        u32 start_reg, end_reg, reg, i;
3242        u32 command, info;
3243
3244        switch (pkt->opcode) {
3245        case PACKET3_NOP:
3246        case PACKET3_SET_BASE:
3247        case PACKET3_CLEAR_STATE:
3248        case PACKET3_INDEX_BUFFER_SIZE:
3249        case PACKET3_DISPATCH_DIRECT:
3250        case PACKET3_DISPATCH_INDIRECT:
3251        case PACKET3_MODE_CONTROL:
3252        case PACKET3_SET_PREDICATION:
3253        case PACKET3_COND_EXEC:
3254        case PACKET3_PRED_EXEC:
3255        case PACKET3_DRAW_INDIRECT:
3256        case PACKET3_DRAW_INDEX_INDIRECT:
3257        case PACKET3_INDEX_BASE:
3258        case PACKET3_DRAW_INDEX_2:
3259        case PACKET3_CONTEXT_CONTROL:
3260        case PACKET3_DRAW_INDEX_OFFSET:
3261        case PACKET3_INDEX_TYPE:
3262        case PACKET3_DRAW_INDEX:
3263        case PACKET3_DRAW_INDEX_AUTO:
3264        case PACKET3_DRAW_INDEX_IMMD:
3265        case PACKET3_NUM_INSTANCES:
3266        case PACKET3_DRAW_INDEX_MULTI_AUTO:
3267        case PACKET3_STRMOUT_BUFFER_UPDATE:
3268        case PACKET3_DRAW_INDEX_OFFSET_2:
3269        case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3270        case PACKET3_MPEG_INDEX:
3271        case PACKET3_WAIT_REG_MEM:
3272        case PACKET3_MEM_WRITE:
3273        case PACKET3_SURFACE_SYNC:
3274        case PACKET3_EVENT_WRITE:
3275        case PACKET3_EVENT_WRITE_EOP:
3276        case PACKET3_EVENT_WRITE_EOS:
3277        case PACKET3_SET_CONTEXT_REG:
3278        case PACKET3_SET_BOOL_CONST:
3279        case PACKET3_SET_LOOP_CONST:
3280        case PACKET3_SET_RESOURCE:
3281        case PACKET3_SET_SAMPLER:
3282        case PACKET3_SET_CTL_CONST:
3283        case PACKET3_SET_RESOURCE_OFFSET:
3284        case PACKET3_SET_CONTEXT_REG_INDIRECT:
3285        case PACKET3_SET_RESOURCE_INDIRECT:
3286        case CAYMAN_PACKET3_DEALLOC_STATE:
3287                break;
3288        case PACKET3_COND_WRITE:
3289                if (idx_value & 0x100) {
3290                        reg = ib[idx + 5] * 4;
3291                        if (!evergreen_vm_reg_valid(reg))
3292                                return -EINVAL;
3293                }
3294                break;
3295        case PACKET3_COPY_DW:
3296                if (idx_value & 0x2) {
3297                        reg = ib[idx + 3] * 4;
3298                        if (!evergreen_vm_reg_valid(reg))
3299                                return -EINVAL;
3300                }
3301                break;
3302        case PACKET3_SET_CONFIG_REG:
3303                start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3304                end_reg = 4 * pkt->count + start_reg - 4;
3305                if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3306                    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3307                    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3308                        DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3309                        return -EINVAL;
3310                }
3311                for (i = 0; i < pkt->count; i++) {
3312                        reg = start_reg + (4 * i);
3313                        if (!evergreen_vm_reg_valid(reg))
3314                                return -EINVAL;
3315                }
3316                break;
3317        case PACKET3_CP_DMA:
3318                command = ib[idx + 4];
3319                info = ib[idx + 1];
3320                if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
3321                    (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
3322                    ((((info & 0x00300000) >> 20) == 0) &&
3323                     (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
3324                    ((((info & 0x60000000) >> 29) == 0) &&
3325                     (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
3326                        /* non mem to mem copies requires dw aligned count */
3327                        if ((command & 0x1fffff) % 4) {
3328                                DRM_ERROR("CP DMA command requires dw count alignment\n");
3329                                return -EINVAL;
3330                        }
3331                }
3332                if (command & PACKET3_CP_DMA_CMD_SAS) {
3333                        /* src address space is register */
3334                        if (((info & 0x60000000) >> 29) == 0) {
3335                                start_reg = idx_value << 2;
3336                                if (command & PACKET3_CP_DMA_CMD_SAIC) {
3337                                        reg = start_reg;
3338                                        if (!evergreen_vm_reg_valid(reg)) {
3339                                                DRM_ERROR("CP DMA Bad SRC register\n");
3340                                                return -EINVAL;
3341                                        }
3342                                } else {
3343                                        for (i = 0; i < (command & 0x1fffff); i++) {
3344                                                reg = start_reg + (4 * i);
3345                                                if (!evergreen_vm_reg_valid(reg)) {
3346                                                        DRM_ERROR("CP DMA Bad SRC register\n");
3347                                                        return -EINVAL;
3348                                                }
3349                                        }
3350                                }
3351                        }
3352                }
3353                if (command & PACKET3_CP_DMA_CMD_DAS) {
3354                        /* dst address space is register */
3355                        if (((info & 0x00300000) >> 20) == 0) {
3356                                start_reg = ib[idx + 2];
3357                                if (command & PACKET3_CP_DMA_CMD_DAIC) {
3358                                        reg = start_reg;
3359                                        if (!evergreen_vm_reg_valid(reg)) {
3360                                                DRM_ERROR("CP DMA Bad DST register\n");
3361                                                return -EINVAL;
3362                                        }
3363                                } else {
3364                                        for (i = 0; i < (command & 0x1fffff); i++) {
3365                                                reg = start_reg + (4 * i);
3366                                                if (!evergreen_vm_reg_valid(reg)) {
3367                                                        DRM_ERROR("CP DMA Bad DST register\n");
3368                                                        return -EINVAL;
3369                                                }
3370                                        }
3371                                }
3372                        }
3373                }
3374                break;
3375        default:
3376                return -EINVAL;
3377        }
3378        return 0;
3379}
3380
3381int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3382{
3383        int ret = 0;
3384        u32 idx = 0;
3385        struct radeon_cs_packet pkt;
3386
3387        do {
3388                pkt.idx = idx;
3389                pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
3390                pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
3391                pkt.one_reg_wr = 0;
3392                switch (pkt.type) {
3393                case RADEON_PACKET_TYPE0:
3394                        dev_err(rdev->dev, "Packet0 not allowed!\n");
3395                        ret = -EINVAL;
3396                        break;
3397                case RADEON_PACKET_TYPE2:
3398                        idx += 1;
3399                        break;
3400                case RADEON_PACKET_TYPE3:
3401                        pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
3402                        ret = evergreen_vm_packet3_check(rdev, ib->ptr, &pkt);
3403                        idx += pkt.count + 2;
3404                        break;
3405                default:
3406                        dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
3407                        ret = -EINVAL;
3408                        break;
3409                }
3410                if (ret)
3411                        break;
3412        } while (idx < ib->length_dw);
3413
3414        return ret;
3415}
3416
3417/**
3418 * evergreen_dma_ib_parse() - parse the DMA IB for VM
3419 * @rdev: radeon_device pointer
3420 * @ib: radeon_ib pointer
3421 *
3422 * Parses the DMA IB from the VM CS ioctl
3423 * checks for errors. (Cayman-SI)
3424 * Returns 0 for success and an error on failure.
3425 **/
3426int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3427{
3428        u32 idx = 0;
3429        u32 header, cmd, count, sub_cmd;
3430
3431        do {
3432                header = ib->ptr[idx];
3433                cmd = GET_DMA_CMD(header);
3434                count = GET_DMA_COUNT(header);
3435                sub_cmd = GET_DMA_SUB_CMD(header);
3436
3437                switch (cmd) {
3438                case DMA_PACKET_WRITE:
3439                        switch (sub_cmd) {
3440                        /* tiled */
3441                        case 8:
3442                                idx += count + 7;
3443                                break;
3444                        /* linear */
3445                        case 0:
3446                                idx += count + 3;
3447                                break;
3448                        default:
3449                                DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib->ptr[idx]);
3450                                return -EINVAL;
3451                        }
3452                        break;
3453                case DMA_PACKET_COPY:
3454                        switch (sub_cmd) {
3455                        /* Copy L2L, DW aligned */
3456                        case 0x00:
3457                                idx += 5;
3458                                break;
3459                        /* Copy L2T/T2L */
3460                        case 0x08:
3461                                idx += 9;
3462                                break;
3463                        /* Copy L2L, byte aligned */
3464                        case 0x40:
3465                                idx += 5;
3466                                break;
3467                        /* Copy L2L, partial */
3468                        case 0x41:
3469                                idx += 9;
3470                                break;
3471                        /* Copy L2L, DW aligned, broadcast */
3472                        case 0x44:
3473                                idx += 7;
3474                                break;
3475                        /* Copy L2T Frame to Field */
3476                        case 0x48:
3477                                idx += 10;
3478                                break;
3479                        /* Copy L2T/T2L, partial */
3480                        case 0x49:
3481                                idx += 12;
3482                                break;
3483                        /* Copy L2T broadcast */
3484                        case 0x4b:
3485                                idx += 10;
3486                                break;
3487                        /* Copy L2T/T2L (tile units) */
3488                        case 0x4c:
3489                                idx += 9;
3490                                break;
3491                        /* Copy T2T, partial (tile units) */
3492                        case 0x4d:
3493                                idx += 13;
3494                                break;
3495                        /* Copy L2T broadcast (tile units) */
3496                        case 0x4f:
3497                                idx += 10;
3498                                break;
3499                        default:
3500                                DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib->ptr[idx]);
3501                                return -EINVAL;
3502                        }
3503                        break;
3504                case DMA_PACKET_CONSTANT_FILL:
3505                        idx += 4;
3506                        break;
3507                case DMA_PACKET_NOP:
3508                        idx += 1;
3509                        break;
3510                default:
3511                        DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3512                        return -EINVAL;
3513                }
3514        } while (idx < ib->length_dw);
3515
3516        return 0;
3517}
3518