linux/drivers/gpu/drm/radeon/radeon_cs.c
<<
>>
Prefs
   1/*
   2 * Copyright 2008 Jerome Glisse.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the "Software"),
   7 * to deal in the Software without restriction, including without limitation
   8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9 * and/or sell copies of the Software, and to permit persons to whom the
  10 * Software is furnished to do so, subject to the following conditions:
  11 *
  12 * The above copyright notice and this permission notice (including the next
  13 * paragraph) shall be included in all copies or substantial portions of the
  14 * Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22 * DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors:
  25 *    Jerome Glisse <glisse@freedesktop.org>
  26 */
  27#include <linux/list_sort.h>
  28#include <drm/drmP.h>
  29#include <drm/radeon_drm.h>
  30#include "radeon_reg.h"
  31#include "radeon.h"
  32#include "radeon_trace.h"
  33
  34#define RADEON_CS_MAX_PRIORITY          32u
  35#define RADEON_CS_NUM_BUCKETS           (RADEON_CS_MAX_PRIORITY + 1)
  36
  37/* This is based on the bucket sort with O(n) time complexity.
  38 * An item with priority "i" is added to bucket[i]. The lists are then
  39 * concatenated in descending order.
  40 */
  41struct radeon_cs_buckets {
  42        struct list_head bucket[RADEON_CS_NUM_BUCKETS];
  43};
  44
  45static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
  46{
  47        unsigned i;
  48
  49        for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
  50                INIT_LIST_HEAD(&b->bucket[i]);
  51}
  52
  53static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
  54                                  struct list_head *item, unsigned priority)
  55{
  56        /* Since buffers which appear sooner in the relocation list are
  57         * likely to be used more often than buffers which appear later
  58         * in the list, the sort mustn't change the ordering of buffers
  59         * with the same priority, i.e. it must be stable.
  60         */
  61        list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
  62}
  63
  64static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
  65                                       struct list_head *out_list)
  66{
  67        unsigned i;
  68
  69        /* Connect the sorted buckets in the output list. */
  70        for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
  71                list_splice(&b->bucket[i], out_list);
  72        }
  73}
  74
  75static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
  76{
  77        struct drm_device *ddev = p->rdev->ddev;
  78        struct radeon_cs_chunk *chunk;
  79        struct radeon_cs_buckets buckets;
  80        unsigned i;
  81        bool need_mmap_lock = false;
  82        int r;
  83
  84        if (p->chunk_relocs == NULL) {
  85                return 0;
  86        }
  87        chunk = p->chunk_relocs;
  88        p->dma_reloc_idx = 0;
  89        /* FIXME: we assume that each relocs use 4 dwords */
  90        p->nrelocs = chunk->length_dw / 4;
  91        p->relocs = drm_calloc_large(p->nrelocs, sizeof(struct radeon_bo_list));
  92        if (p->relocs == NULL) {
  93                return -ENOMEM;
  94        }
  95
  96        radeon_cs_buckets_init(&buckets);
  97
  98        for (i = 0; i < p->nrelocs; i++) {
  99                struct drm_radeon_cs_reloc *r;
 100                struct drm_gem_object *gobj;
 101                unsigned priority;
 102
 103                r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
 104                gobj = drm_gem_object_lookup(ddev, p->filp, r->handle);
 105                if (gobj == NULL) {
 106                        DRM_ERROR("gem object lookup failed 0x%x\n",
 107                                  r->handle);
 108                        return -ENOENT;
 109                }
 110                p->relocs[i].robj = gem_to_radeon_bo(gobj);
 111
 112                /* The userspace buffer priorities are from 0 to 15. A higher
 113                 * number means the buffer is more important.
 114                 * Also, the buffers used for write have a higher priority than
 115                 * the buffers used for read only, which doubles the range
 116                 * to 0 to 31. 32 is reserved for the kernel driver.
 117                 */
 118                priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
 119                           + !!r->write_domain;
 120
 121                /* the first reloc of an UVD job is the msg and that must be in
 122                   VRAM, also but everything into VRAM on AGP cards and older
 123                   IGP chips to avoid image corruptions */
 124                if (p->ring == R600_RING_TYPE_UVD_INDEX &&
 125                    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
 126                     p->rdev->family == CHIP_RS780 ||
 127                     p->rdev->family == CHIP_RS880)) {
 128
 129                        /* TODO: is this still needed for NI+ ? */
 130                        p->relocs[i].prefered_domains =
 131                                RADEON_GEM_DOMAIN_VRAM;
 132
 133                        p->relocs[i].allowed_domains =
 134                                RADEON_GEM_DOMAIN_VRAM;
 135
 136                        /* prioritize this over any other relocation */
 137                        priority = RADEON_CS_MAX_PRIORITY;
 138                } else {
 139                        uint32_t domain = r->write_domain ?
 140                                r->write_domain : r->read_domains;
 141
 142                        if (domain & RADEON_GEM_DOMAIN_CPU) {
 143                                DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
 144                                          "for command submission\n");
 145                                return -EINVAL;
 146                        }
 147
 148                        p->relocs[i].prefered_domains = domain;
 149                        if (domain == RADEON_GEM_DOMAIN_VRAM)
 150                                domain |= RADEON_GEM_DOMAIN_GTT;
 151                        p->relocs[i].allowed_domains = domain;
 152                }
 153
 154                if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) {
 155                        uint32_t domain = p->relocs[i].prefered_domains;
 156                        if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
 157                                DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
 158                                          "allowed for userptr BOs\n");
 159                                return -EINVAL;
 160                        }
 161                        need_mmap_lock = true;
 162                        domain = RADEON_GEM_DOMAIN_GTT;
 163                        p->relocs[i].prefered_domains = domain;
 164                        p->relocs[i].allowed_domains = domain;
 165                }
 166
 167                p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
 168                p->relocs[i].tv.shared = !r->write_domain;
 169
 170                radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
 171                                      priority);
 172        }
 173
 174        radeon_cs_buckets_get_list(&buckets, &p->validated);
 175
 176        if (p->cs_flags & RADEON_CS_USE_VM)
 177                p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
 178                                              &p->validated);
 179        if (need_mmap_lock)
 180                down_read(&current->mm->mmap_sem);
 181
 182        r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
 183
 184        if (need_mmap_lock)
 185                up_read(&current->mm->mmap_sem);
 186
 187        return r;
 188}
 189
 190static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
 191{
 192        p->priority = priority;
 193
 194        switch (ring) {
 195        default:
 196                DRM_ERROR("unknown ring id: %d\n", ring);
 197                return -EINVAL;
 198        case RADEON_CS_RING_GFX:
 199                p->ring = RADEON_RING_TYPE_GFX_INDEX;
 200                break;
 201        case RADEON_CS_RING_COMPUTE:
 202                if (p->rdev->family >= CHIP_TAHITI) {
 203                        if (p->priority > 0)
 204                                p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
 205                        else
 206                                p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
 207                } else
 208                        p->ring = RADEON_RING_TYPE_GFX_INDEX;
 209                break;
 210        case RADEON_CS_RING_DMA:
 211                if (p->rdev->family >= CHIP_CAYMAN) {
 212                        if (p->priority > 0)
 213                                p->ring = R600_RING_TYPE_DMA_INDEX;
 214                        else
 215                                p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
 216                } else if (p->rdev->family >= CHIP_RV770) {
 217                        p->ring = R600_RING_TYPE_DMA_INDEX;
 218                } else {
 219                        return -EINVAL;
 220                }
 221                break;
 222        case RADEON_CS_RING_UVD:
 223                p->ring = R600_RING_TYPE_UVD_INDEX;
 224                break;
 225        case RADEON_CS_RING_VCE:
 226                /* TODO: only use the low priority ring for now */
 227                p->ring = TN_RING_TYPE_VCE1_INDEX;
 228                break;
 229        }
 230        return 0;
 231}
 232
 233static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
 234{
 235        struct radeon_bo_list *reloc;
 236        int r;
 237
 238        list_for_each_entry(reloc, &p->validated, tv.head) {
 239                struct reservation_object *resv;
 240
 241                resv = reloc->robj->tbo.resv;
 242                r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
 243                                     reloc->tv.shared);
 244                if (r)
 245                        return r;
 246        }
 247        return 0;
 248}
 249
 250/* XXX: note that this is called from the legacy UMS CS ioctl as well */
 251int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 252{
 253        struct drm_radeon_cs *cs = data;
 254        uint64_t *chunk_array_ptr;
 255        unsigned size, i;
 256        u32 ring = RADEON_CS_RING_GFX;
 257        s32 priority = 0;
 258
 259        INIT_LIST_HEAD(&p->validated);
 260
 261        if (!cs->num_chunks) {
 262                return 0;
 263        }
 264
 265        /* get chunks */
 266        p->idx = 0;
 267        p->ib.sa_bo = NULL;
 268        p->const_ib.sa_bo = NULL;
 269        p->chunk_ib = NULL;
 270        p->chunk_relocs = NULL;
 271        p->chunk_flags = NULL;
 272        p->chunk_const_ib = NULL;
 273        p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
 274        if (p->chunks_array == NULL) {
 275                return -ENOMEM;
 276        }
 277        chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
 278        if (copy_from_user(p->chunks_array, chunk_array_ptr,
 279                               sizeof(uint64_t)*cs->num_chunks)) {
 280                return -EFAULT;
 281        }
 282        p->cs_flags = 0;
 283        p->nchunks = cs->num_chunks;
 284        p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
 285        if (p->chunks == NULL) {
 286                return -ENOMEM;
 287        }
 288        for (i = 0; i < p->nchunks; i++) {
 289                struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
 290                struct drm_radeon_cs_chunk user_chunk;
 291                uint32_t __user *cdata;
 292
 293                chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
 294                if (copy_from_user(&user_chunk, chunk_ptr,
 295                                       sizeof(struct drm_radeon_cs_chunk))) {
 296                        return -EFAULT;
 297                }
 298                p->chunks[i].length_dw = user_chunk.length_dw;
 299                if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) {
 300                        p->chunk_relocs = &p->chunks[i];
 301                }
 302                if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
 303                        p->chunk_ib = &p->chunks[i];
 304                        /* zero length IB isn't useful */
 305                        if (p->chunks[i].length_dw == 0)
 306                                return -EINVAL;
 307                }
 308                if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) {
 309                        p->chunk_const_ib = &p->chunks[i];
 310                        /* zero length CONST IB isn't useful */
 311                        if (p->chunks[i].length_dw == 0)
 312                                return -EINVAL;
 313                }
 314                if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
 315                        p->chunk_flags = &p->chunks[i];
 316                        /* zero length flags aren't useful */
 317                        if (p->chunks[i].length_dw == 0)
 318                                return -EINVAL;
 319                }
 320
 321                size = p->chunks[i].length_dw;
 322                cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
 323                p->chunks[i].user_ptr = cdata;
 324                if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB)
 325                        continue;
 326
 327                if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
 328                        if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
 329                                continue;
 330                }
 331
 332                p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
 333                size *= sizeof(uint32_t);
 334                if (p->chunks[i].kdata == NULL) {
 335                        return -ENOMEM;
 336                }
 337                if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
 338                        return -EFAULT;
 339                }
 340                if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
 341                        p->cs_flags = p->chunks[i].kdata[0];
 342                        if (p->chunks[i].length_dw > 1)
 343                                ring = p->chunks[i].kdata[1];
 344                        if (p->chunks[i].length_dw > 2)
 345                                priority = (s32)p->chunks[i].kdata[2];
 346                }
 347        }
 348
 349        /* these are KMS only */
 350        if (p->rdev) {
 351                if ((p->cs_flags & RADEON_CS_USE_VM) &&
 352                    !p->rdev->vm_manager.enabled) {
 353                        DRM_ERROR("VM not active on asic!\n");
 354                        return -EINVAL;
 355                }
 356
 357                if (radeon_cs_get_ring(p, ring, priority))
 358                        return -EINVAL;
 359
 360                /* we only support VM on some SI+ rings */
 361                if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
 362                        if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
 363                                DRM_ERROR("Ring %d requires VM!\n", p->ring);
 364                                return -EINVAL;
 365                        }
 366                } else {
 367                        if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
 368                                DRM_ERROR("VM not supported on ring %d!\n",
 369                                          p->ring);
 370                                return -EINVAL;
 371                        }
 372                }
 373        }
 374
 375        return 0;
 376}
 377
 378static int cmp_size_smaller_first(void *priv, struct list_head *a,
 379                                  struct list_head *b)
 380{
 381        struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head);
 382        struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head);
 383
 384        /* Sort A before B if A is smaller. */
 385        return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
 386}
 387
 388/**
 389 * cs_parser_fini() - clean parser states
 390 * @parser:     parser structure holding parsing context.
 391 * @error:      error number
 392 *
 393 * If error is set than unvalidate buffer, otherwise just free memory
 394 * used by parsing context.
 395 **/
 396static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
 397{
 398        unsigned i;
 399
 400        if (!error) {
 401                /* Sort the buffer list from the smallest to largest buffer,
 402                 * which affects the order of buffers in the LRU list.
 403                 * This assures that the smallest buffers are added first
 404                 * to the LRU list, so they are likely to be later evicted
 405                 * first, instead of large buffers whose eviction is more
 406                 * expensive.
 407                 *
 408                 * This slightly lowers the number of bytes moved by TTM
 409                 * per frame under memory pressure.
 410                 */
 411                list_sort(NULL, &parser->validated, cmp_size_smaller_first);
 412
 413                ttm_eu_fence_buffer_objects(&parser->ticket,
 414                                            &parser->validated,
 415                                            &parser->ib.fence->base);
 416        } else if (backoff) {
 417                ttm_eu_backoff_reservation(&parser->ticket,
 418                                           &parser->validated);
 419        }
 420
 421        if (parser->relocs != NULL) {
 422                for (i = 0; i < parser->nrelocs; i++) {
 423                        struct radeon_bo *bo = parser->relocs[i].robj;
 424                        if (bo == NULL)
 425                                continue;
 426
 427                        drm_gem_object_unreference_unlocked(&bo->gem_base);
 428                }
 429        }
 430        kfree(parser->track);
 431        drm_free_large(parser->relocs);
 432        drm_free_large(parser->vm_bos);
 433        for (i = 0; i < parser->nchunks; i++)
 434                drm_free_large(parser->chunks[i].kdata);
 435        kfree(parser->chunks);
 436        kfree(parser->chunks_array);
 437        radeon_ib_free(parser->rdev, &parser->ib);
 438        radeon_ib_free(parser->rdev, &parser->const_ib);
 439}
 440
 441static int radeon_cs_ib_chunk(struct radeon_device *rdev,
 442                              struct radeon_cs_parser *parser)
 443{
 444        int r;
 445
 446        if (parser->chunk_ib == NULL)
 447                return 0;
 448
 449        if (parser->cs_flags & RADEON_CS_USE_VM)
 450                return 0;
 451
 452        r = radeon_cs_parse(rdev, parser->ring, parser);
 453        if (r || parser->parser_error) {
 454                DRM_ERROR("Invalid command stream !\n");
 455                return r;
 456        }
 457
 458        r = radeon_cs_sync_rings(parser);
 459        if (r) {
 460                if (r != -ERESTARTSYS)
 461                        DRM_ERROR("Failed to sync rings: %i\n", r);
 462                return r;
 463        }
 464
 465        if (parser->ring == R600_RING_TYPE_UVD_INDEX)
 466                radeon_uvd_note_usage(rdev);
 467        else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
 468                 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
 469                radeon_vce_note_usage(rdev);
 470
 471        r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
 472        if (r) {
 473                DRM_ERROR("Failed to schedule IB !\n");
 474        }
 475        return r;
 476}
 477
 478static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
 479                                   struct radeon_vm *vm)
 480{
 481        struct radeon_device *rdev = p->rdev;
 482        struct radeon_bo_va *bo_va;
 483        int i, r;
 484
 485        r = radeon_vm_update_page_directory(rdev, vm);
 486        if (r)
 487                return r;
 488
 489        r = radeon_vm_clear_freed(rdev, vm);
 490        if (r)
 491                return r;
 492
 493        if (vm->ib_bo_va == NULL) {
 494                DRM_ERROR("Tmp BO not in VM!\n");
 495                return -EINVAL;
 496        }
 497
 498        r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
 499                                &rdev->ring_tmp_bo.bo->tbo.mem);
 500        if (r)
 501                return r;
 502
 503        for (i = 0; i < p->nrelocs; i++) {
 504                struct radeon_bo *bo;
 505
 506                bo = p->relocs[i].robj;
 507                bo_va = radeon_vm_bo_find(vm, bo);
 508                if (bo_va == NULL) {
 509                        dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
 510                        return -EINVAL;
 511                }
 512
 513                r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
 514                if (r)
 515                        return r;
 516
 517                radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
 518        }
 519
 520        return radeon_vm_clear_invalids(rdev, vm);
 521}
 522
 523static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 524                                 struct radeon_cs_parser *parser)
 525{
 526        struct radeon_fpriv *fpriv = parser->filp->driver_priv;
 527        struct radeon_vm *vm = &fpriv->vm;
 528        int r;
 529
 530        if (parser->chunk_ib == NULL)
 531                return 0;
 532        if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
 533                return 0;
 534
 535        if (parser->const_ib.length_dw) {
 536                r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
 537                if (r) {
 538                        return r;
 539                }
 540        }
 541
 542        r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
 543        if (r) {
 544                return r;
 545        }
 546
 547        if (parser->ring == R600_RING_TYPE_UVD_INDEX)
 548                radeon_uvd_note_usage(rdev);
 549
 550        mutex_lock(&vm->mutex);
 551        r = radeon_bo_vm_update_pte(parser, vm);
 552        if (r) {
 553                goto out;
 554        }
 555
 556        r = radeon_cs_sync_rings(parser);
 557        if (r) {
 558                if (r != -ERESTARTSYS)
 559                        DRM_ERROR("Failed to sync rings: %i\n", r);
 560                goto out;
 561        }
 562
 563        if ((rdev->family >= CHIP_TAHITI) &&
 564            (parser->chunk_const_ib != NULL)) {
 565                r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
 566        } else {
 567                r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
 568        }
 569
 570out:
 571        mutex_unlock(&vm->mutex);
 572        return r;
 573}
 574
 575static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
 576{
 577        if (r == -EDEADLK) {
 578                r = radeon_gpu_reset(rdev);
 579                if (!r)
 580                        r = -EAGAIN;
 581        }
 582        return r;
 583}
 584
 585static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
 586{
 587        struct radeon_cs_chunk *ib_chunk;
 588        struct radeon_vm *vm = NULL;
 589        int r;
 590
 591        if (parser->chunk_ib == NULL)
 592                return 0;
 593
 594        if (parser->cs_flags & RADEON_CS_USE_VM) {
 595                struct radeon_fpriv *fpriv = parser->filp->driver_priv;
 596                vm = &fpriv->vm;
 597
 598                if ((rdev->family >= CHIP_TAHITI) &&
 599                    (parser->chunk_const_ib != NULL)) {
 600                        ib_chunk = parser->chunk_const_ib;
 601                        if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
 602                                DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
 603                                return -EINVAL;
 604                        }
 605                        r =  radeon_ib_get(rdev, parser->ring, &parser->const_ib,
 606                                           vm, ib_chunk->length_dw * 4);
 607                        if (r) {
 608                                DRM_ERROR("Failed to get const ib !\n");
 609                                return r;
 610                        }
 611                        parser->const_ib.is_const_ib = true;
 612                        parser->const_ib.length_dw = ib_chunk->length_dw;
 613                        if (copy_from_user(parser->const_ib.ptr,
 614                                               ib_chunk->user_ptr,
 615                                               ib_chunk->length_dw * 4))
 616                                return -EFAULT;
 617                }
 618
 619                ib_chunk = parser->chunk_ib;
 620                if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
 621                        DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
 622                        return -EINVAL;
 623                }
 624        }
 625        ib_chunk = parser->chunk_ib;
 626
 627        r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
 628                           vm, ib_chunk->length_dw * 4);
 629        if (r) {
 630                DRM_ERROR("Failed to get ib !\n");
 631                return r;
 632        }
 633        parser->ib.length_dw = ib_chunk->length_dw;
 634        if (ib_chunk->kdata)
 635                memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
 636        else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
 637                return -EFAULT;
 638        return 0;
 639}
 640
 641int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 642{
 643        struct radeon_device *rdev = dev->dev_private;
 644        struct radeon_cs_parser parser;
 645        int r;
 646
 647        down_read(&rdev->exclusive_lock);
 648        if (!rdev->accel_working) {
 649                up_read(&rdev->exclusive_lock);
 650                return -EBUSY;
 651        }
 652        if (rdev->in_reset) {
 653                up_read(&rdev->exclusive_lock);
 654                r = radeon_gpu_reset(rdev);
 655                if (!r)
 656                        r = -EAGAIN;
 657                return r;
 658        }
 659        /* initialize parser */
 660        memset(&parser, 0, sizeof(struct radeon_cs_parser));
 661        parser.filp = filp;
 662        parser.rdev = rdev;
 663        parser.dev = rdev->dev;
 664        parser.family = rdev->family;
 665        r = radeon_cs_parser_init(&parser, data);
 666        if (r) {
 667                DRM_ERROR("Failed to initialize parser !\n");
 668                radeon_cs_parser_fini(&parser, r, false);
 669                up_read(&rdev->exclusive_lock);
 670                r = radeon_cs_handle_lockup(rdev, r);
 671                return r;
 672        }
 673
 674        r = radeon_cs_ib_fill(rdev, &parser);
 675        if (!r) {
 676                r = radeon_cs_parser_relocs(&parser);
 677                if (r && r != -ERESTARTSYS)
 678                        DRM_ERROR("Failed to parse relocation %d!\n", r);
 679        }
 680
 681        if (r) {
 682                radeon_cs_parser_fini(&parser, r, false);
 683                up_read(&rdev->exclusive_lock);
 684                r = radeon_cs_handle_lockup(rdev, r);
 685                return r;
 686        }
 687
 688        trace_radeon_cs(&parser);
 689
 690        r = radeon_cs_ib_chunk(rdev, &parser);
 691        if (r) {
 692                goto out;
 693        }
 694        r = radeon_cs_ib_vm_chunk(rdev, &parser);
 695        if (r) {
 696                goto out;
 697        }
 698out:
 699        radeon_cs_parser_fini(&parser, r, true);
 700        up_read(&rdev->exclusive_lock);
 701        r = radeon_cs_handle_lockup(rdev, r);
 702        return r;
 703}
 704
 705/**
 706 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
 707 * @parser:     parser structure holding parsing context.
 708 * @pkt:        where to store packet information
 709 *
 710 * Assume that chunk_ib_index is properly set. Will return -EINVAL
 711 * if packet is bigger than remaining ib size. or if packets is unknown.
 712 **/
 713int radeon_cs_packet_parse(struct radeon_cs_parser *p,
 714                           struct radeon_cs_packet *pkt,
 715                           unsigned idx)
 716{
 717        struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
 718        struct radeon_device *rdev = p->rdev;
 719        uint32_t header;
 720        int ret = 0, i;
 721
 722        if (idx >= ib_chunk->length_dw) {
 723                DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
 724                          idx, ib_chunk->length_dw);
 725                return -EINVAL;
 726        }
 727        header = radeon_get_ib_value(p, idx);
 728        pkt->idx = idx;
 729        pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
 730        pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
 731        pkt->one_reg_wr = 0;
 732        switch (pkt->type) {
 733        case RADEON_PACKET_TYPE0:
 734                if (rdev->family < CHIP_R600) {
 735                        pkt->reg = R100_CP_PACKET0_GET_REG(header);
 736                        pkt->one_reg_wr =
 737                                RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
 738                } else
 739                        pkt->reg = R600_CP_PACKET0_GET_REG(header);
 740                break;
 741        case RADEON_PACKET_TYPE3:
 742                pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
 743                break;
 744        case RADEON_PACKET_TYPE2:
 745                pkt->count = -1;
 746                break;
 747        default:
 748                DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
 749                ret = -EINVAL;
 750                goto dump_ib;
 751        }
 752        if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
 753                DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
 754                          pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
 755                ret = -EINVAL;
 756                goto dump_ib;
 757        }
 758        return 0;
 759
 760dump_ib:
 761        for (i = 0; i < ib_chunk->length_dw; i++) {
 762                if (i == idx)
 763                        printk("\t0x%08x <---\n", radeon_get_ib_value(p, i));
 764                else
 765                        printk("\t0x%08x\n", radeon_get_ib_value(p, i));
 766        }
 767        return ret;
 768}
 769
 770/**
 771 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
 772 * @p:          structure holding the parser context.
 773 *
 774 * Check if the next packet is NOP relocation packet3.
 775 **/
 776bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
 777{
 778        struct radeon_cs_packet p3reloc;
 779        int r;
 780
 781        r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
 782        if (r)
 783                return false;
 784        if (p3reloc.type != RADEON_PACKET_TYPE3)
 785                return false;
 786        if (p3reloc.opcode != RADEON_PACKET3_NOP)
 787                return false;
 788        return true;
 789}
 790
 791/**
 792 * radeon_cs_dump_packet() - dump raw packet context
 793 * @p:          structure holding the parser context.
 794 * @pkt:        structure holding the packet.
 795 *
 796 * Used mostly for debugging and error reporting.
 797 **/
 798void radeon_cs_dump_packet(struct radeon_cs_parser *p,
 799                           struct radeon_cs_packet *pkt)
 800{
 801        volatile uint32_t *ib;
 802        unsigned i;
 803        unsigned idx;
 804
 805        ib = p->ib.ptr;
 806        idx = pkt->idx;
 807        for (i = 0; i <= (pkt->count + 1); i++, idx++)
 808                DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
 809}
 810
 811/**
 812 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
 813 * @parser:             parser structure holding parsing context.
 814 * @data:               pointer to relocation data
 815 * @offset_start:       starting offset
 816 * @offset_mask:        offset mask (to align start offset on)
 817 * @reloc:              reloc informations
 818 *
 819 * Check if next packet is relocation packet3, do bo validation and compute
 820 * GPU offset using the provided start.
 821 **/
 822int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
 823                                struct radeon_bo_list **cs_reloc,
 824                                int nomm)
 825{
 826        struct radeon_cs_chunk *relocs_chunk;
 827        struct radeon_cs_packet p3reloc;
 828        unsigned idx;
 829        int r;
 830
 831        if (p->chunk_relocs == NULL) {
 832                DRM_ERROR("No relocation chunk !\n");
 833                return -EINVAL;
 834        }
 835        *cs_reloc = NULL;
 836        relocs_chunk = p->chunk_relocs;
 837        r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
 838        if (r)
 839                return r;
 840        p->idx += p3reloc.count + 2;
 841        if (p3reloc.type != RADEON_PACKET_TYPE3 ||
 842            p3reloc.opcode != RADEON_PACKET3_NOP) {
 843                DRM_ERROR("No packet3 for relocation for packet at %d.\n",
 844                          p3reloc.idx);
 845                radeon_cs_dump_packet(p, &p3reloc);
 846                return -EINVAL;
 847        }
 848        idx = radeon_get_ib_value(p, p3reloc.idx + 1);
 849        if (idx >= relocs_chunk->length_dw) {
 850                DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
 851                          idx, relocs_chunk->length_dw);
 852                radeon_cs_dump_packet(p, &p3reloc);
 853                return -EINVAL;
 854        }
 855        /* FIXME: we assume reloc size is 4 dwords */
 856        if (nomm) {
 857                *cs_reloc = p->relocs;
 858                (*cs_reloc)->gpu_offset =
 859                        (u64)relocs_chunk->kdata[idx + 3] << 32;
 860                (*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
 861        } else
 862                *cs_reloc = &p->relocs[(idx / 4)];
 863        return 0;
 864}
 865