linux/drivers/gpu/drm/radeon/radeon_cs.c
<<
>>
Prefs
   1/*
   2 * Copyright 2008 Jerome Glisse.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the "Software"),
   7 * to deal in the Software without restriction, including without limitation
   8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9 * and/or sell copies of the Software, and to permit persons to whom the
  10 * Software is furnished to do so, subject to the following conditions:
  11 *
  12 * The above copyright notice and this permission notice (including the next
  13 * paragraph) shall be included in all copies or substantial portions of the
  14 * Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22 * DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors:
  25 *    Jerome Glisse <glisse@freedesktop.org>
  26 */
  27#include <linux/list_sort.h>
  28#include <drm/drmP.h>
  29#include <drm/radeon_drm.h>
  30#include "radeon_reg.h"
  31#include "radeon.h"
  32#include "radeon_trace.h"
  33
  34#define RADEON_CS_MAX_PRIORITY          32u
  35#define RADEON_CS_NUM_BUCKETS           (RADEON_CS_MAX_PRIORITY + 1)
  36
  37/* This is based on the bucket sort with O(n) time complexity.
  38 * An item with priority "i" is added to bucket[i]. The lists are then
  39 * concatenated in descending order.
  40 */
  41struct radeon_cs_buckets {
  42        struct list_head bucket[RADEON_CS_NUM_BUCKETS];
  43};
  44
  45static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
  46{
  47        unsigned i;
  48
  49        for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
  50                INIT_LIST_HEAD(&b->bucket[i]);
  51}
  52
  53static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
  54                                  struct list_head *item, unsigned priority)
  55{
  56        /* Since buffers which appear sooner in the relocation list are
  57         * likely to be used more often than buffers which appear later
  58         * in the list, the sort mustn't change the ordering of buffers
  59         * with the same priority, i.e. it must be stable.
  60         */
  61        list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
  62}
  63
  64static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
  65                                       struct list_head *out_list)
  66{
  67        unsigned i;
  68
  69        /* Connect the sorted buckets in the output list. */
  70        for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
  71                list_splice(&b->bucket[i], out_list);
  72        }
  73}
  74
  75static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
  76{
  77        struct drm_device *ddev = p->rdev->ddev;
  78        struct radeon_cs_chunk *chunk;
  79        struct radeon_cs_buckets buckets;
  80        unsigned i;
  81        bool need_mmap_lock = false;
  82        int r;
  83
  84        if (p->chunk_relocs == NULL) {
  85                return 0;
  86        }
  87        chunk = p->chunk_relocs;
  88        p->dma_reloc_idx = 0;
  89        /* FIXME: we assume that each relocs use 4 dwords */
  90        p->nrelocs = chunk->length_dw / 4;
  91        p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_bo_list), GFP_KERNEL);
  92        if (p->relocs == NULL) {
  93                return -ENOMEM;
  94        }
  95
  96        radeon_cs_buckets_init(&buckets);
  97
  98        for (i = 0; i < p->nrelocs; i++) {
  99                struct drm_radeon_cs_reloc *r;
 100                struct drm_gem_object *gobj;
 101                unsigned priority;
 102
 103                r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
 104                gobj = drm_gem_object_lookup(ddev, p->filp, r->handle);
 105                if (gobj == NULL) {
 106                        DRM_ERROR("gem object lookup failed 0x%x\n",
 107                                  r->handle);
 108                        return -ENOENT;
 109                }
 110                p->relocs[i].robj = gem_to_radeon_bo(gobj);
 111
 112                /* The userspace buffer priorities are from 0 to 15. A higher
 113                 * number means the buffer is more important.
 114                 * Also, the buffers used for write have a higher priority than
 115                 * the buffers used for read only, which doubles the range
 116                 * to 0 to 31. 32 is reserved for the kernel driver.
 117                 */
 118                priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
 119                           + !!r->write_domain;
 120
 121                /* the first reloc of an UVD job is the msg and that must be in
 122                   VRAM, also but everything into VRAM on AGP cards and older
 123                   IGP chips to avoid image corruptions */
 124                if (p->ring == R600_RING_TYPE_UVD_INDEX &&
 125                    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
 126                     p->rdev->family == CHIP_RS780 ||
 127                     p->rdev->family == CHIP_RS880)) {
 128
 129                        /* TODO: is this still needed for NI+ ? */
 130                        p->relocs[i].prefered_domains =
 131                                RADEON_GEM_DOMAIN_VRAM;
 132
 133                        p->relocs[i].allowed_domains =
 134                                RADEON_GEM_DOMAIN_VRAM;
 135
 136                        /* prioritize this over any other relocation */
 137                        priority = RADEON_CS_MAX_PRIORITY;
 138                } else {
 139                        uint32_t domain = r->write_domain ?
 140                                r->write_domain : r->read_domains;
 141
 142                        if (domain & RADEON_GEM_DOMAIN_CPU) {
 143                                DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
 144                                          "for command submission\n");
 145                                return -EINVAL;
 146                        }
 147
 148                        p->relocs[i].prefered_domains = domain;
 149                        if (domain == RADEON_GEM_DOMAIN_VRAM)
 150                                domain |= RADEON_GEM_DOMAIN_GTT;
 151                        p->relocs[i].allowed_domains = domain;
 152                }
 153
 154                if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) {
 155                        uint32_t domain = p->relocs[i].prefered_domains;
 156                        if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
 157                                DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
 158                                          "allowed for userptr BOs\n");
 159                                return -EINVAL;
 160                        }
 161                        need_mmap_lock = true;
 162                        domain = RADEON_GEM_DOMAIN_GTT;
 163                        p->relocs[i].prefered_domains = domain;
 164                        p->relocs[i].allowed_domains = domain;
 165                }
 166
 167                p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
 168                p->relocs[i].tv.shared = !r->write_domain;
 169
 170                radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
 171                                      priority);
 172        }
 173
 174        radeon_cs_buckets_get_list(&buckets, &p->validated);
 175
 176        if (p->cs_flags & RADEON_CS_USE_VM)
 177                p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
 178                                              &p->validated);
 179        if (need_mmap_lock)
 180                down_read(&current->mm->mmap_sem);
 181
 182        r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
 183
 184        if (need_mmap_lock)
 185                up_read(&current->mm->mmap_sem);
 186
 187        return r;
 188}
 189
 190static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
 191{
 192        p->priority = priority;
 193
 194        switch (ring) {
 195        default:
 196                DRM_ERROR("unknown ring id: %d\n", ring);
 197                return -EINVAL;
 198        case RADEON_CS_RING_GFX:
 199                p->ring = RADEON_RING_TYPE_GFX_INDEX;
 200                break;
 201        case RADEON_CS_RING_COMPUTE:
 202                if (p->rdev->family >= CHIP_TAHITI) {
 203                        if (p->priority > 0)
 204                                p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
 205                        else
 206                                p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
 207                } else
 208                        p->ring = RADEON_RING_TYPE_GFX_INDEX;
 209                break;
 210        case RADEON_CS_RING_DMA:
 211                if (p->rdev->family >= CHIP_CAYMAN) {
 212                        if (p->priority > 0)
 213                                p->ring = R600_RING_TYPE_DMA_INDEX;
 214                        else
 215                                p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
 216                } else if (p->rdev->family >= CHIP_RV770) {
 217                        p->ring = R600_RING_TYPE_DMA_INDEX;
 218                } else {
 219                        return -EINVAL;
 220                }
 221                break;
 222        case RADEON_CS_RING_UVD:
 223                p->ring = R600_RING_TYPE_UVD_INDEX;
 224                break;
 225        case RADEON_CS_RING_VCE:
 226                /* TODO: only use the low priority ring for now */
 227                p->ring = TN_RING_TYPE_VCE1_INDEX;
 228                break;
 229        }
 230        return 0;
 231}
 232
 233static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
 234{
 235        struct radeon_bo_list *reloc;
 236        int r;
 237
 238        list_for_each_entry(reloc, &p->validated, tv.head) {
 239                struct reservation_object *resv;
 240
 241                resv = reloc->robj->tbo.resv;
 242                r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
 243                                     reloc->tv.shared);
 244                if (r)
 245                        return r;
 246        }
 247        return 0;
 248}
 249
 250/* XXX: note that this is called from the legacy UMS CS ioctl as well */
 251int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 252{
 253        struct drm_radeon_cs *cs = data;
 254        uint64_t *chunk_array_ptr;
 255        unsigned size, i;
 256        u32 ring = RADEON_CS_RING_GFX;
 257        s32 priority = 0;
 258
 259        if (!cs->num_chunks) {
 260                return 0;
 261        }
 262        /* get chunks */
 263        INIT_LIST_HEAD(&p->validated);
 264        p->idx = 0;
 265        p->ib.sa_bo = NULL;
 266        p->const_ib.sa_bo = NULL;
 267        p->chunk_ib = NULL;
 268        p->chunk_relocs = NULL;
 269        p->chunk_flags = NULL;
 270        p->chunk_const_ib = NULL;
 271        p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
 272        if (p->chunks_array == NULL) {
 273                return -ENOMEM;
 274        }
 275        chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
 276        if (copy_from_user(p->chunks_array, chunk_array_ptr,
 277                               sizeof(uint64_t)*cs->num_chunks)) {
 278                return -EFAULT;
 279        }
 280        p->cs_flags = 0;
 281        p->nchunks = cs->num_chunks;
 282        p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
 283        if (p->chunks == NULL) {
 284                return -ENOMEM;
 285        }
 286        for (i = 0; i < p->nchunks; i++) {
 287                struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
 288                struct drm_radeon_cs_chunk user_chunk;
 289                uint32_t __user *cdata;
 290
 291                chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
 292                if (copy_from_user(&user_chunk, chunk_ptr,
 293                                       sizeof(struct drm_radeon_cs_chunk))) {
 294                        return -EFAULT;
 295                }
 296                p->chunks[i].length_dw = user_chunk.length_dw;
 297                if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) {
 298                        p->chunk_relocs = &p->chunks[i];
 299                }
 300                if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
 301                        p->chunk_ib = &p->chunks[i];
 302                        /* zero length IB isn't useful */
 303                        if (p->chunks[i].length_dw == 0)
 304                                return -EINVAL;
 305                }
 306                if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) {
 307                        p->chunk_const_ib = &p->chunks[i];
 308                        /* zero length CONST IB isn't useful */
 309                        if (p->chunks[i].length_dw == 0)
 310                                return -EINVAL;
 311                }
 312                if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
 313                        p->chunk_flags = &p->chunks[i];
 314                        /* zero length flags aren't useful */
 315                        if (p->chunks[i].length_dw == 0)
 316                                return -EINVAL;
 317                }
 318
 319                size = p->chunks[i].length_dw;
 320                cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
 321                p->chunks[i].user_ptr = cdata;
 322                if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB)
 323                        continue;
 324
 325                if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
 326                        if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
 327                                continue;
 328                }
 329
 330                p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
 331                size *= sizeof(uint32_t);
 332                if (p->chunks[i].kdata == NULL) {
 333                        return -ENOMEM;
 334                }
 335                if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
 336                        return -EFAULT;
 337                }
 338                if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
 339                        p->cs_flags = p->chunks[i].kdata[0];
 340                        if (p->chunks[i].length_dw > 1)
 341                                ring = p->chunks[i].kdata[1];
 342                        if (p->chunks[i].length_dw > 2)
 343                                priority = (s32)p->chunks[i].kdata[2];
 344                }
 345        }
 346
 347        /* these are KMS only */
 348        if (p->rdev) {
 349                if ((p->cs_flags & RADEON_CS_USE_VM) &&
 350                    !p->rdev->vm_manager.enabled) {
 351                        DRM_ERROR("VM not active on asic!\n");
 352                        return -EINVAL;
 353                }
 354
 355                if (radeon_cs_get_ring(p, ring, priority))
 356                        return -EINVAL;
 357
 358                /* we only support VM on some SI+ rings */
 359                if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
 360                        if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
 361                                DRM_ERROR("Ring %d requires VM!\n", p->ring);
 362                                return -EINVAL;
 363                        }
 364                } else {
 365                        if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
 366                                DRM_ERROR("VM not supported on ring %d!\n",
 367                                          p->ring);
 368                                return -EINVAL;
 369                        }
 370                }
 371        }
 372
 373        return 0;
 374}
 375
 376static int cmp_size_smaller_first(void *priv, struct list_head *a,
 377                                  struct list_head *b)
 378{
 379        struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head);
 380        struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head);
 381
 382        /* Sort A before B if A is smaller. */
 383        return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
 384}
 385
 386/**
 387 * cs_parser_fini() - clean parser states
 388 * @parser:     parser structure holding parsing context.
 389 * @error:      error number
 390 *
 391 * If error is set than unvalidate buffer, otherwise just free memory
 392 * used by parsing context.
 393 **/
 394static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
 395{
 396        unsigned i;
 397
 398        if (!error) {
 399                /* Sort the buffer list from the smallest to largest buffer,
 400                 * which affects the order of buffers in the LRU list.
 401                 * This assures that the smallest buffers are added first
 402                 * to the LRU list, so they are likely to be later evicted
 403                 * first, instead of large buffers whose eviction is more
 404                 * expensive.
 405                 *
 406                 * This slightly lowers the number of bytes moved by TTM
 407                 * per frame under memory pressure.
 408                 */
 409                list_sort(NULL, &parser->validated, cmp_size_smaller_first);
 410
 411                ttm_eu_fence_buffer_objects(&parser->ticket,
 412                                            &parser->validated,
 413                                            &parser->ib.fence->base);
 414        } else if (backoff) {
 415                ttm_eu_backoff_reservation(&parser->ticket,
 416                                           &parser->validated);
 417        }
 418
 419        if (parser->relocs != NULL) {
 420                for (i = 0; i < parser->nrelocs; i++) {
 421                        struct radeon_bo *bo = parser->relocs[i].robj;
 422                        if (bo == NULL)
 423                                continue;
 424
 425                        drm_gem_object_unreference_unlocked(&bo->gem_base);
 426                }
 427        }
 428        kfree(parser->track);
 429        kfree(parser->relocs);
 430        drm_free_large(parser->vm_bos);
 431        for (i = 0; i < parser->nchunks; i++)
 432                drm_free_large(parser->chunks[i].kdata);
 433        kfree(parser->chunks);
 434        kfree(parser->chunks_array);
 435        radeon_ib_free(parser->rdev, &parser->ib);
 436        radeon_ib_free(parser->rdev, &parser->const_ib);
 437}
 438
 439static int radeon_cs_ib_chunk(struct radeon_device *rdev,
 440                              struct radeon_cs_parser *parser)
 441{
 442        int r;
 443
 444        if (parser->chunk_ib == NULL)
 445                return 0;
 446
 447        if (parser->cs_flags & RADEON_CS_USE_VM)
 448                return 0;
 449
 450        r = radeon_cs_parse(rdev, parser->ring, parser);
 451        if (r || parser->parser_error) {
 452                DRM_ERROR("Invalid command stream !\n");
 453                return r;
 454        }
 455
 456        r = radeon_cs_sync_rings(parser);
 457        if (r) {
 458                if (r != -ERESTARTSYS)
 459                        DRM_ERROR("Failed to sync rings: %i\n", r);
 460                return r;
 461        }
 462
 463        if (parser->ring == R600_RING_TYPE_UVD_INDEX)
 464                radeon_uvd_note_usage(rdev);
 465        else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
 466                 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
 467                radeon_vce_note_usage(rdev);
 468
 469        r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
 470        if (r) {
 471                DRM_ERROR("Failed to schedule IB !\n");
 472        }
 473        return r;
 474}
 475
 476static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
 477                                   struct radeon_vm *vm)
 478{
 479        struct radeon_device *rdev = p->rdev;
 480        struct radeon_bo_va *bo_va;
 481        int i, r;
 482
 483        r = radeon_vm_update_page_directory(rdev, vm);
 484        if (r)
 485                return r;
 486
 487        r = radeon_vm_clear_freed(rdev, vm);
 488        if (r)
 489                return r;
 490
 491        if (vm->ib_bo_va == NULL) {
 492                DRM_ERROR("Tmp BO not in VM!\n");
 493                return -EINVAL;
 494        }
 495
 496        r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
 497                                &rdev->ring_tmp_bo.bo->tbo.mem);
 498        if (r)
 499                return r;
 500
 501        for (i = 0; i < p->nrelocs; i++) {
 502                struct radeon_bo *bo;
 503
 504                bo = p->relocs[i].robj;
 505                bo_va = radeon_vm_bo_find(vm, bo);
 506                if (bo_va == NULL) {
 507                        dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
 508                        return -EINVAL;
 509                }
 510
 511                r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
 512                if (r)
 513                        return r;
 514
 515                radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
 516        }
 517
 518        return radeon_vm_clear_invalids(rdev, vm);
 519}
 520
 521static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 522                                 struct radeon_cs_parser *parser)
 523{
 524        struct radeon_fpriv *fpriv = parser->filp->driver_priv;
 525        struct radeon_vm *vm = &fpriv->vm;
 526        int r;
 527
 528        if (parser->chunk_ib == NULL)
 529                return 0;
 530        if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
 531                return 0;
 532
 533        if (parser->const_ib.length_dw) {
 534                r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
 535                if (r) {
 536                        return r;
 537                }
 538        }
 539
 540        r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
 541        if (r) {
 542                return r;
 543        }
 544
 545        if (parser->ring == R600_RING_TYPE_UVD_INDEX)
 546                radeon_uvd_note_usage(rdev);
 547
 548        mutex_lock(&vm->mutex);
 549        r = radeon_bo_vm_update_pte(parser, vm);
 550        if (r) {
 551                goto out;
 552        }
 553
 554        r = radeon_cs_sync_rings(parser);
 555        if (r) {
 556                if (r != -ERESTARTSYS)
 557                        DRM_ERROR("Failed to sync rings: %i\n", r);
 558                goto out;
 559        }
 560
 561        if ((rdev->family >= CHIP_TAHITI) &&
 562            (parser->chunk_const_ib != NULL)) {
 563                r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
 564        } else {
 565                r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
 566        }
 567
 568out:
 569        mutex_unlock(&vm->mutex);
 570        return r;
 571}
 572
 573static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
 574{
 575        if (r == -EDEADLK) {
 576                r = radeon_gpu_reset(rdev);
 577                if (!r)
 578                        r = -EAGAIN;
 579        }
 580        return r;
 581}
 582
 583static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
 584{
 585        struct radeon_cs_chunk *ib_chunk;
 586        struct radeon_vm *vm = NULL;
 587        int r;
 588
 589        if (parser->chunk_ib == NULL)
 590                return 0;
 591
 592        if (parser->cs_flags & RADEON_CS_USE_VM) {
 593                struct radeon_fpriv *fpriv = parser->filp->driver_priv;
 594                vm = &fpriv->vm;
 595
 596                if ((rdev->family >= CHIP_TAHITI) &&
 597                    (parser->chunk_const_ib != NULL)) {
 598                        ib_chunk = parser->chunk_const_ib;
 599                        if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
 600                                DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
 601                                return -EINVAL;
 602                        }
 603                        r =  radeon_ib_get(rdev, parser->ring, &parser->const_ib,
 604                                           vm, ib_chunk->length_dw * 4);
 605                        if (r) {
 606                                DRM_ERROR("Failed to get const ib !\n");
 607                                return r;
 608                        }
 609                        parser->const_ib.is_const_ib = true;
 610                        parser->const_ib.length_dw = ib_chunk->length_dw;
 611                        if (copy_from_user(parser->const_ib.ptr,
 612                                               ib_chunk->user_ptr,
 613                                               ib_chunk->length_dw * 4))
 614                                return -EFAULT;
 615                }
 616
 617                ib_chunk = parser->chunk_ib;
 618                if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
 619                        DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
 620                        return -EINVAL;
 621                }
 622        }
 623        ib_chunk = parser->chunk_ib;
 624
 625        r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
 626                           vm, ib_chunk->length_dw * 4);
 627        if (r) {
 628                DRM_ERROR("Failed to get ib !\n");
 629                return r;
 630        }
 631        parser->ib.length_dw = ib_chunk->length_dw;
 632        if (ib_chunk->kdata)
 633                memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
 634        else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
 635                return -EFAULT;
 636        return 0;
 637}
 638
 639int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 640{
 641        struct radeon_device *rdev = dev->dev_private;
 642        struct radeon_cs_parser parser;
 643        int r;
 644
 645        down_read(&rdev->exclusive_lock);
 646        if (!rdev->accel_working) {
 647                up_read(&rdev->exclusive_lock);
 648                return -EBUSY;
 649        }
 650        if (rdev->in_reset) {
 651                up_read(&rdev->exclusive_lock);
 652                r = radeon_gpu_reset(rdev);
 653                if (!r)
 654                        r = -EAGAIN;
 655                return r;
 656        }
 657        /* initialize parser */
 658        memset(&parser, 0, sizeof(struct radeon_cs_parser));
 659        parser.filp = filp;
 660        parser.rdev = rdev;
 661        parser.dev = rdev->dev;
 662        parser.family = rdev->family;
 663        r = radeon_cs_parser_init(&parser, data);
 664        if (r) {
 665                DRM_ERROR("Failed to initialize parser !\n");
 666                radeon_cs_parser_fini(&parser, r, false);
 667                up_read(&rdev->exclusive_lock);
 668                r = radeon_cs_handle_lockup(rdev, r);
 669                return r;
 670        }
 671
 672        r = radeon_cs_ib_fill(rdev, &parser);
 673        if (!r) {
 674                r = radeon_cs_parser_relocs(&parser);
 675                if (r && r != -ERESTARTSYS)
 676                        DRM_ERROR("Failed to parse relocation %d!\n", r);
 677        }
 678
 679        if (r) {
 680                radeon_cs_parser_fini(&parser, r, false);
 681                up_read(&rdev->exclusive_lock);
 682                r = radeon_cs_handle_lockup(rdev, r);
 683                return r;
 684        }
 685
 686        trace_radeon_cs(&parser);
 687
 688        r = radeon_cs_ib_chunk(rdev, &parser);
 689        if (r) {
 690                goto out;
 691        }
 692        r = radeon_cs_ib_vm_chunk(rdev, &parser);
 693        if (r) {
 694                goto out;
 695        }
 696out:
 697        radeon_cs_parser_fini(&parser, r, true);
 698        up_read(&rdev->exclusive_lock);
 699        r = radeon_cs_handle_lockup(rdev, r);
 700        return r;
 701}
 702
 703/**
 704 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
 705 * @parser:     parser structure holding parsing context.
 706 * @pkt:        where to store packet information
 707 *
 708 * Assume that chunk_ib_index is properly set. Will return -EINVAL
 709 * if packet is bigger than remaining ib size. or if packets is unknown.
 710 **/
 711int radeon_cs_packet_parse(struct radeon_cs_parser *p,
 712                           struct radeon_cs_packet *pkt,
 713                           unsigned idx)
 714{
 715        struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
 716        struct radeon_device *rdev = p->rdev;
 717        uint32_t header;
 718
 719        if (idx >= ib_chunk->length_dw) {
 720                DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
 721                          idx, ib_chunk->length_dw);
 722                return -EINVAL;
 723        }
 724        header = radeon_get_ib_value(p, idx);
 725        pkt->idx = idx;
 726        pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
 727        pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
 728        pkt->one_reg_wr = 0;
 729        switch (pkt->type) {
 730        case RADEON_PACKET_TYPE0:
 731                if (rdev->family < CHIP_R600) {
 732                        pkt->reg = R100_CP_PACKET0_GET_REG(header);
 733                        pkt->one_reg_wr =
 734                                RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
 735                } else
 736                        pkt->reg = R600_CP_PACKET0_GET_REG(header);
 737                break;
 738        case RADEON_PACKET_TYPE3:
 739                pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
 740                break;
 741        case RADEON_PACKET_TYPE2:
 742                pkt->count = -1;
 743                break;
 744        default:
 745                DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
 746                return -EINVAL;
 747        }
 748        if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
 749                DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
 750                          pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
 751                return -EINVAL;
 752        }
 753        return 0;
 754}
 755
 756/**
 757 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
 758 * @p:          structure holding the parser context.
 759 *
 760 * Check if the next packet is NOP relocation packet3.
 761 **/
 762bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
 763{
 764        struct radeon_cs_packet p3reloc;
 765        int r;
 766
 767        r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
 768        if (r)
 769                return false;
 770        if (p3reloc.type != RADEON_PACKET_TYPE3)
 771                return false;
 772        if (p3reloc.opcode != RADEON_PACKET3_NOP)
 773                return false;
 774        return true;
 775}
 776
 777/**
 778 * radeon_cs_dump_packet() - dump raw packet context
 779 * @p:          structure holding the parser context.
 780 * @pkt:        structure holding the packet.
 781 *
 782 * Used mostly for debugging and error reporting.
 783 **/
 784void radeon_cs_dump_packet(struct radeon_cs_parser *p,
 785                           struct radeon_cs_packet *pkt)
 786{
 787        volatile uint32_t *ib;
 788        unsigned i;
 789        unsigned idx;
 790
 791        ib = p->ib.ptr;
 792        idx = pkt->idx;
 793        for (i = 0; i <= (pkt->count + 1); i++, idx++)
 794                DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
 795}
 796
 797/**
 798 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
 799 * @parser:             parser structure holding parsing context.
 800 * @data:               pointer to relocation data
 801 * @offset_start:       starting offset
 802 * @offset_mask:        offset mask (to align start offset on)
 803 * @reloc:              reloc informations
 804 *
 805 * Check if next packet is relocation packet3, do bo validation and compute
 806 * GPU offset using the provided start.
 807 **/
 808int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
 809                                struct radeon_bo_list **cs_reloc,
 810                                int nomm)
 811{
 812        struct radeon_cs_chunk *relocs_chunk;
 813        struct radeon_cs_packet p3reloc;
 814        unsigned idx;
 815        int r;
 816
 817        if (p->chunk_relocs == NULL) {
 818                DRM_ERROR("No relocation chunk !\n");
 819                return -EINVAL;
 820        }
 821        *cs_reloc = NULL;
 822        relocs_chunk = p->chunk_relocs;
 823        r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
 824        if (r)
 825                return r;
 826        p->idx += p3reloc.count + 2;
 827        if (p3reloc.type != RADEON_PACKET_TYPE3 ||
 828            p3reloc.opcode != RADEON_PACKET3_NOP) {
 829                DRM_ERROR("No packet3 for relocation for packet at %d.\n",
 830                          p3reloc.idx);
 831                radeon_cs_dump_packet(p, &p3reloc);
 832                return -EINVAL;
 833        }
 834        idx = radeon_get_ib_value(p, p3reloc.idx + 1);
 835        if (idx >= relocs_chunk->length_dw) {
 836                DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
 837                          idx, relocs_chunk->length_dw);
 838                radeon_cs_dump_packet(p, &p3reloc);
 839                return -EINVAL;
 840        }
 841        /* FIXME: we assume reloc size is 4 dwords */
 842        if (nomm) {
 843                *cs_reloc = p->relocs;
 844                (*cs_reloc)->gpu_offset =
 845                        (u64)relocs_chunk->kdata[idx + 3] << 32;
 846                (*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
 847        } else
 848                *cs_reloc = &p->relocs[(idx / 4)];
 849        return 0;
 850}
 851