linux/drivers/gpu/drm/radeon/radeon_cs.c
<<
>>
Prefs
   1/*
   2 * Copyright 2008 Jerome Glisse.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the "Software"),
   7 * to deal in the Software without restriction, including without limitation
   8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9 * and/or sell copies of the Software, and to permit persons to whom the
  10 * Software is furnished to do so, subject to the following conditions:
  11 *
  12 * The above copyright notice and this permission notice (including the next
  13 * paragraph) shall be included in all copies or substantial portions of the
  14 * Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22 * DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors:
  25 *    Jerome Glisse <glisse@freedesktop.org>
  26 */
  27#include <drm/drmP.h>
  28#include <drm/radeon_drm.h>
  29#include "radeon_reg.h"
  30#include "radeon.h"
  31
  32static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
  33{
  34        struct drm_device *ddev = p->rdev->ddev;
  35        struct radeon_cs_chunk *chunk;
  36        unsigned i, j;
  37        bool duplicate;
  38
  39        if (p->chunk_relocs_idx == -1) {
  40                return 0;
  41        }
  42        chunk = &p->chunks[p->chunk_relocs_idx];
  43        p->dma_reloc_idx = 0;
  44        /* FIXME: we assume that each relocs use 4 dwords */
  45        p->nrelocs = chunk->length_dw / 4;
  46        p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
  47        if (p->relocs_ptr == NULL) {
  48                return -ENOMEM;
  49        }
  50        p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_cs_reloc), GFP_KERNEL);
  51        if (p->relocs == NULL) {
  52                return -ENOMEM;
  53        }
  54        for (i = 0; i < p->nrelocs; i++) {
  55                struct drm_radeon_cs_reloc *r;
  56
  57                duplicate = false;
  58                r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
  59                for (j = 0; j < i; j++) {
  60                        if (r->handle == p->relocs[j].handle) {
  61                                p->relocs_ptr[i] = &p->relocs[j];
  62                                duplicate = true;
  63                                break;
  64                        }
  65                }
  66                if (duplicate) {
  67                        p->relocs[i].handle = 0;
  68                        continue;
  69                }
  70
  71                p->relocs[i].gobj = drm_gem_object_lookup(ddev, p->filp,
  72                                                          r->handle);
  73                if (p->relocs[i].gobj == NULL) {
  74                        DRM_ERROR("gem object lookup failed 0x%x\n",
  75                                  r->handle);
  76                        return -ENOENT;
  77                }
  78                p->relocs_ptr[i] = &p->relocs[i];
  79                p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
  80                p->relocs[i].lobj.bo = p->relocs[i].robj;
  81                p->relocs[i].lobj.written = !!r->write_domain;
  82
  83                /* the first reloc of an UVD job is the
  84                   msg and that must be in VRAM */
  85                if (p->ring == R600_RING_TYPE_UVD_INDEX && i == 0) {
  86                        /* TODO: is this still needed for NI+ ? */
  87                        p->relocs[i].lobj.domain =
  88                                RADEON_GEM_DOMAIN_VRAM;
  89
  90                        p->relocs[i].lobj.alt_domain =
  91                                RADEON_GEM_DOMAIN_VRAM;
  92
  93                } else {
  94                        uint32_t domain = r->write_domain ?
  95                                r->write_domain : r->read_domains;
  96
  97                        p->relocs[i].lobj.domain = domain;
  98                        if (domain == RADEON_GEM_DOMAIN_VRAM)
  99                                domain |= RADEON_GEM_DOMAIN_GTT;
 100                        p->relocs[i].lobj.alt_domain = domain;
 101                }
 102
 103                p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
 104                p->relocs[i].handle = r->handle;
 105
 106                radeon_bo_list_add_object(&p->relocs[i].lobj,
 107                                          &p->validated);
 108        }
 109        return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring);
 110}
 111
 112static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
 113{
 114        p->priority = priority;
 115
 116        switch (ring) {
 117        default:
 118                DRM_ERROR("unknown ring id: %d\n", ring);
 119                return -EINVAL;
 120        case RADEON_CS_RING_GFX:
 121                p->ring = RADEON_RING_TYPE_GFX_INDEX;
 122                break;
 123        case RADEON_CS_RING_COMPUTE:
 124                if (p->rdev->family >= CHIP_TAHITI) {
 125                        if (p->priority > 0)
 126                                p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
 127                        else
 128                                p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
 129                } else
 130                        p->ring = RADEON_RING_TYPE_GFX_INDEX;
 131                break;
 132        case RADEON_CS_RING_DMA:
 133                if (p->rdev->family >= CHIP_CAYMAN) {
 134                        if (p->priority > 0)
 135                                p->ring = R600_RING_TYPE_DMA_INDEX;
 136                        else
 137                                p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
 138                } else if (p->rdev->family >= CHIP_R600) {
 139                        p->ring = R600_RING_TYPE_DMA_INDEX;
 140                } else {
 141                        return -EINVAL;
 142                }
 143                break;
 144        case RADEON_CS_RING_UVD:
 145                p->ring = R600_RING_TYPE_UVD_INDEX;
 146                break;
 147        }
 148        return 0;
 149}
 150
 151static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
 152{
 153        int i;
 154
 155        for (i = 0; i < p->nrelocs; i++) {
 156                if (!p->relocs[i].robj)
 157                        continue;
 158
 159                radeon_ib_sync_to(&p->ib, p->relocs[i].robj->tbo.sync_obj);
 160        }
 161}
 162
 163/* XXX: note that this is called from the legacy UMS CS ioctl as well */
 164int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 165{
 166        struct drm_radeon_cs *cs = data;
 167        uint64_t *chunk_array_ptr;
 168        unsigned size, i;
 169        u32 ring = RADEON_CS_RING_GFX;
 170        s32 priority = 0;
 171
 172        if (!cs->num_chunks) {
 173                return 0;
 174        }
 175        /* get chunks */
 176        INIT_LIST_HEAD(&p->validated);
 177        p->idx = 0;
 178        p->ib.sa_bo = NULL;
 179        p->ib.semaphore = NULL;
 180        p->const_ib.sa_bo = NULL;
 181        p->const_ib.semaphore = NULL;
 182        p->chunk_ib_idx = -1;
 183        p->chunk_relocs_idx = -1;
 184        p->chunk_flags_idx = -1;
 185        p->chunk_const_ib_idx = -1;
 186        p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
 187        if (p->chunks_array == NULL) {
 188                return -ENOMEM;
 189        }
 190        chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
 191        if (DRM_COPY_FROM_USER(p->chunks_array, chunk_array_ptr,
 192                               sizeof(uint64_t)*cs->num_chunks)) {
 193                return -EFAULT;
 194        }
 195        p->cs_flags = 0;
 196        p->nchunks = cs->num_chunks;
 197        p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
 198        if (p->chunks == NULL) {
 199                return -ENOMEM;
 200        }
 201        for (i = 0; i < p->nchunks; i++) {
 202                struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
 203                struct drm_radeon_cs_chunk user_chunk;
 204                uint32_t __user *cdata;
 205
 206                chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
 207                if (DRM_COPY_FROM_USER(&user_chunk, chunk_ptr,
 208                                       sizeof(struct drm_radeon_cs_chunk))) {
 209                        return -EFAULT;
 210                }
 211                p->chunks[i].length_dw = user_chunk.length_dw;
 212                p->chunks[i].kdata = NULL;
 213                p->chunks[i].chunk_id = user_chunk.chunk_id;
 214                p->chunks[i].user_ptr = (void __user *)(unsigned long)user_chunk.chunk_data;
 215                if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) {
 216                        p->chunk_relocs_idx = i;
 217                }
 218                if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
 219                        p->chunk_ib_idx = i;
 220                        /* zero length IB isn't useful */
 221                        if (p->chunks[i].length_dw == 0)
 222                                return -EINVAL;
 223                }
 224                if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) {
 225                        p->chunk_const_ib_idx = i;
 226                        /* zero length CONST IB isn't useful */
 227                        if (p->chunks[i].length_dw == 0)
 228                                return -EINVAL;
 229                }
 230                if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
 231                        p->chunk_flags_idx = i;
 232                        /* zero length flags aren't useful */
 233                        if (p->chunks[i].length_dw == 0)
 234                                return -EINVAL;
 235                }
 236
 237                cdata = (uint32_t *)(unsigned long)user_chunk.chunk_data;
 238                if ((p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) ||
 239                    (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS)) {
 240                        size = p->chunks[i].length_dw * sizeof(uint32_t);
 241                        p->chunks[i].kdata = kmalloc(size, GFP_KERNEL);
 242                        if (p->chunks[i].kdata == NULL) {
 243                                return -ENOMEM;
 244                        }
 245                        if (DRM_COPY_FROM_USER(p->chunks[i].kdata,
 246                                               p->chunks[i].user_ptr, size)) {
 247                                return -EFAULT;
 248                        }
 249                        if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
 250                                p->cs_flags = p->chunks[i].kdata[0];
 251                                if (p->chunks[i].length_dw > 1)
 252                                        ring = p->chunks[i].kdata[1];
 253                                if (p->chunks[i].length_dw > 2)
 254                                        priority = (s32)p->chunks[i].kdata[2];
 255                        }
 256                }
 257        }
 258
 259        /* these are KMS only */
 260        if (p->rdev) {
 261                if ((p->cs_flags & RADEON_CS_USE_VM) &&
 262                    !p->rdev->vm_manager.enabled) {
 263                        DRM_ERROR("VM not active on asic!\n");
 264                        return -EINVAL;
 265                }
 266
 267                if (radeon_cs_get_ring(p, ring, priority))
 268                        return -EINVAL;
 269
 270                /* we only support VM on some SI+ rings */
 271                if ((p->rdev->asic->ring[p->ring].cs_parse == NULL) &&
 272                   ((p->cs_flags & RADEON_CS_USE_VM) == 0)) {
 273                        DRM_ERROR("Ring %d requires VM!\n", p->ring);
 274                        return -EINVAL;
 275                }
 276        }
 277
 278        /* deal with non-vm */
 279        if ((p->chunk_ib_idx != -1) &&
 280            ((p->cs_flags & RADEON_CS_USE_VM) == 0) &&
 281            (p->chunks[p->chunk_ib_idx].chunk_id == RADEON_CHUNK_ID_IB)) {
 282                if (p->chunks[p->chunk_ib_idx].length_dw > (16 * 1024)) {
 283                        DRM_ERROR("cs IB too big: %d\n",
 284                                  p->chunks[p->chunk_ib_idx].length_dw);
 285                        return -EINVAL;
 286                }
 287                if (p->rdev && (p->rdev->flags & RADEON_IS_AGP)) {
 288                        p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL);
 289                        p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL);
 290                        if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL ||
 291                            p->chunks[p->chunk_ib_idx].kpage[1] == NULL) {
 292                                kfree(p->chunks[p->chunk_ib_idx].kpage[0]);
 293                                kfree(p->chunks[p->chunk_ib_idx].kpage[1]);
 294                                p->chunks[p->chunk_ib_idx].kpage[0] = NULL;
 295                                p->chunks[p->chunk_ib_idx].kpage[1] = NULL;
 296                                return -ENOMEM;
 297                        }
 298                }
 299                p->chunks[p->chunk_ib_idx].kpage_idx[0] = -1;
 300                p->chunks[p->chunk_ib_idx].kpage_idx[1] = -1;
 301                p->chunks[p->chunk_ib_idx].last_copied_page = -1;
 302                p->chunks[p->chunk_ib_idx].last_page_index =
 303                        ((p->chunks[p->chunk_ib_idx].length_dw * 4) - 1) / PAGE_SIZE;
 304        }
 305
 306        return 0;
 307}
 308
 309/**
 310 * cs_parser_fini() - clean parser states
 311 * @parser:     parser structure holding parsing context.
 312 * @error:      error number
 313 *
 314 * If error is set than unvalidate buffer, otherwise just free memory
 315 * used by parsing context.
 316 **/
 317static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
 318{
 319        unsigned i;
 320
 321        if (!error) {
 322                ttm_eu_fence_buffer_objects(&parser->ticket,
 323                                            &parser->validated,
 324                                            parser->ib.fence);
 325        } else if (backoff) {
 326                ttm_eu_backoff_reservation(&parser->ticket,
 327                                           &parser->validated);
 328        }
 329
 330        if (parser->relocs != NULL) {
 331                for (i = 0; i < parser->nrelocs; i++) {
 332                        if (parser->relocs[i].gobj)
 333                                drm_gem_object_unreference_unlocked(parser->relocs[i].gobj);
 334                }
 335        }
 336        kfree(parser->track);
 337        kfree(parser->relocs);
 338        kfree(parser->relocs_ptr);
 339        for (i = 0; i < parser->nchunks; i++) {
 340                kfree(parser->chunks[i].kdata);
 341                if ((parser->rdev->flags & RADEON_IS_AGP)) {
 342                        kfree(parser->chunks[i].kpage[0]);
 343                        kfree(parser->chunks[i].kpage[1]);
 344                }
 345        }
 346        kfree(parser->chunks);
 347        kfree(parser->chunks_array);
 348        radeon_ib_free(parser->rdev, &parser->ib);
 349        radeon_ib_free(parser->rdev, &parser->const_ib);
 350}
 351
 352static int radeon_cs_ib_chunk(struct radeon_device *rdev,
 353                              struct radeon_cs_parser *parser)
 354{
 355        struct radeon_cs_chunk *ib_chunk;
 356        int r;
 357
 358        if (parser->chunk_ib_idx == -1)
 359                return 0;
 360
 361        if (parser->cs_flags & RADEON_CS_USE_VM)
 362                return 0;
 363
 364        ib_chunk = &parser->chunks[parser->chunk_ib_idx];
 365        /* Copy the packet into the IB, the parser will read from the
 366         * input memory (cached) and write to the IB (which can be
 367         * uncached).
 368         */
 369        r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
 370                           NULL, ib_chunk->length_dw * 4);
 371        if (r) {
 372                DRM_ERROR("Failed to get ib !\n");
 373                return r;
 374        }
 375        parser->ib.length_dw = ib_chunk->length_dw;
 376        r = radeon_cs_parse(rdev, parser->ring, parser);
 377        if (r || parser->parser_error) {
 378                DRM_ERROR("Invalid command stream !\n");
 379                return r;
 380        }
 381        r = radeon_cs_finish_pages(parser);
 382        if (r) {
 383                DRM_ERROR("Invalid command stream !\n");
 384                return r;
 385        }
 386        radeon_cs_sync_rings(parser);
 387        r = radeon_ib_schedule(rdev, &parser->ib, NULL);
 388        if (r) {
 389                DRM_ERROR("Failed to schedule IB !\n");
 390        }
 391        return r;
 392}
 393
 394static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser,
 395                                   struct radeon_vm *vm)
 396{
 397        struct radeon_device *rdev = parser->rdev;
 398        struct radeon_bo_list *lobj;
 399        struct radeon_bo *bo;
 400        int r;
 401
 402        r = radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo, &rdev->ring_tmp_bo.bo->tbo.mem);
 403        if (r) {
 404                return r;
 405        }
 406        list_for_each_entry(lobj, &parser->validated, tv.head) {
 407                bo = lobj->bo;
 408                r = radeon_vm_bo_update_pte(parser->rdev, vm, bo, &bo->tbo.mem);
 409                if (r) {
 410                        return r;
 411                }
 412        }
 413        return 0;
 414}
 415
 416static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 417                                 struct radeon_cs_parser *parser)
 418{
 419        struct radeon_cs_chunk *ib_chunk;
 420        struct radeon_fpriv *fpriv = parser->filp->driver_priv;
 421        struct radeon_vm *vm = &fpriv->vm;
 422        int r;
 423
 424        if (parser->chunk_ib_idx == -1)
 425                return 0;
 426        if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
 427                return 0;
 428
 429        if ((rdev->family >= CHIP_TAHITI) &&
 430            (parser->chunk_const_ib_idx != -1)) {
 431                ib_chunk = &parser->chunks[parser->chunk_const_ib_idx];
 432                if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
 433                        DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
 434                        return -EINVAL;
 435                }
 436                r =  radeon_ib_get(rdev, parser->ring, &parser->const_ib,
 437                                   vm, ib_chunk->length_dw * 4);
 438                if (r) {
 439                        DRM_ERROR("Failed to get const ib !\n");
 440                        return r;
 441                }
 442                parser->const_ib.is_const_ib = true;
 443                parser->const_ib.length_dw = ib_chunk->length_dw;
 444                /* Copy the packet into the IB */
 445                if (DRM_COPY_FROM_USER(parser->const_ib.ptr, ib_chunk->user_ptr,
 446                                       ib_chunk->length_dw * 4)) {
 447                        return -EFAULT;
 448                }
 449                r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
 450                if (r) {
 451                        return r;
 452                }
 453        }
 454
 455        ib_chunk = &parser->chunks[parser->chunk_ib_idx];
 456        if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
 457                DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
 458                return -EINVAL;
 459        }
 460        r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
 461                           vm, ib_chunk->length_dw * 4);
 462        if (r) {
 463                DRM_ERROR("Failed to get ib !\n");
 464                return r;
 465        }
 466        parser->ib.length_dw = ib_chunk->length_dw;
 467        /* Copy the packet into the IB */
 468        if (DRM_COPY_FROM_USER(parser->ib.ptr, ib_chunk->user_ptr,
 469                               ib_chunk->length_dw * 4)) {
 470                return -EFAULT;
 471        }
 472        r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
 473        if (r) {
 474                return r;
 475        }
 476
 477        mutex_lock(&rdev->vm_manager.lock);
 478        mutex_lock(&vm->mutex);
 479        r = radeon_vm_alloc_pt(rdev, vm);
 480        if (r) {
 481                goto out;
 482        }
 483        r = radeon_bo_vm_update_pte(parser, vm);
 484        if (r) {
 485                goto out;
 486        }
 487        radeon_cs_sync_rings(parser);
 488        radeon_ib_sync_to(&parser->ib, vm->fence);
 489        radeon_ib_sync_to(&parser->ib, radeon_vm_grab_id(
 490                rdev, vm, parser->ring));
 491
 492        if ((rdev->family >= CHIP_TAHITI) &&
 493            (parser->chunk_const_ib_idx != -1)) {
 494                r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib);
 495        } else {
 496                r = radeon_ib_schedule(rdev, &parser->ib, NULL);
 497        }
 498
 499        if (!r) {
 500                radeon_vm_fence(rdev, vm, parser->ib.fence);
 501        }
 502
 503out:
 504        radeon_vm_add_to_lru(rdev, vm);
 505        mutex_unlock(&vm->mutex);
 506        mutex_unlock(&rdev->vm_manager.lock);
 507        return r;
 508}
 509
 510static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
 511{
 512        if (r == -EDEADLK) {
 513                r = radeon_gpu_reset(rdev);
 514                if (!r)
 515                        r = -EAGAIN;
 516        }
 517        return r;
 518}
 519
 520int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 521{
 522        struct radeon_device *rdev = dev->dev_private;
 523        struct radeon_cs_parser parser;
 524        int r;
 525
 526        down_read(&rdev->exclusive_lock);
 527        if (!rdev->accel_working) {
 528                up_read(&rdev->exclusive_lock);
 529                return -EBUSY;
 530        }
 531        /* initialize parser */
 532        memset(&parser, 0, sizeof(struct radeon_cs_parser));
 533        parser.filp = filp;
 534        parser.rdev = rdev;
 535        parser.dev = rdev->dev;
 536        parser.family = rdev->family;
 537        r = radeon_cs_parser_init(&parser, data);
 538        if (r) {
 539                DRM_ERROR("Failed to initialize parser !\n");
 540                radeon_cs_parser_fini(&parser, r, false);
 541                up_read(&rdev->exclusive_lock);
 542                r = radeon_cs_handle_lockup(rdev, r);
 543                return r;
 544        }
 545        r = radeon_cs_parser_relocs(&parser);
 546        if (r) {
 547                if (r != -ERESTARTSYS)
 548                        DRM_ERROR("Failed to parse relocation %d!\n", r);
 549                radeon_cs_parser_fini(&parser, r, false);
 550                up_read(&rdev->exclusive_lock);
 551                r = radeon_cs_handle_lockup(rdev, r);
 552                return r;
 553        }
 554
 555        /* XXX pick SD/HD/MVC */
 556        if (parser.ring == R600_RING_TYPE_UVD_INDEX)
 557                radeon_uvd_note_usage(rdev);
 558
 559        r = radeon_cs_ib_chunk(rdev, &parser);
 560        if (r) {
 561                goto out;
 562        }
 563        r = radeon_cs_ib_vm_chunk(rdev, &parser);
 564        if (r) {
 565                goto out;
 566        }
 567out:
 568        radeon_cs_parser_fini(&parser, r, true);
 569        up_read(&rdev->exclusive_lock);
 570        r = radeon_cs_handle_lockup(rdev, r);
 571        return r;
 572}
 573
 574int radeon_cs_finish_pages(struct radeon_cs_parser *p)
 575{
 576        struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
 577        int i;
 578        int size = PAGE_SIZE;
 579
 580        for (i = ibc->last_copied_page + 1; i <= ibc->last_page_index; i++) {
 581                if (i == ibc->last_page_index) {
 582                        size = (ibc->length_dw * 4) % PAGE_SIZE;
 583                        if (size == 0)
 584                                size = PAGE_SIZE;
 585                }
 586                
 587                if (DRM_COPY_FROM_USER(p->ib.ptr + (i * (PAGE_SIZE/4)),
 588                                       ibc->user_ptr + (i * PAGE_SIZE),
 589                                       size))
 590                        return -EFAULT;
 591        }
 592        return 0;
 593}
 594
 595static int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx)
 596{
 597        int new_page;
 598        struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
 599        int i;
 600        int size = PAGE_SIZE;
 601        bool copy1 = (p->rdev && (p->rdev->flags & RADEON_IS_AGP)) ?
 602                false : true;
 603
 604        for (i = ibc->last_copied_page + 1; i < pg_idx; i++) {
 605                if (DRM_COPY_FROM_USER(p->ib.ptr + (i * (PAGE_SIZE/4)),
 606                                       ibc->user_ptr + (i * PAGE_SIZE),
 607                                       PAGE_SIZE)) {
 608                        p->parser_error = -EFAULT;
 609                        return 0;
 610                }
 611        }
 612
 613        if (pg_idx == ibc->last_page_index) {
 614                size = (ibc->length_dw * 4) % PAGE_SIZE;
 615                if (size == 0)
 616                        size = PAGE_SIZE;
 617        }
 618
 619        new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1;
 620        if (copy1)
 621                ibc->kpage[new_page] = p->ib.ptr + (pg_idx * (PAGE_SIZE / 4));
 622
 623        if (DRM_COPY_FROM_USER(ibc->kpage[new_page],
 624                               ibc->user_ptr + (pg_idx * PAGE_SIZE),
 625                               size)) {
 626                p->parser_error = -EFAULT;
 627                return 0;
 628        }
 629
 630        /* copy to IB for non single case */
 631        if (!copy1)
 632                memcpy((void *)(p->ib.ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size);
 633
 634        ibc->last_copied_page = pg_idx;
 635        ibc->kpage_idx[new_page] = pg_idx;
 636
 637        return new_page;
 638}
 639
 640u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
 641{
 642        struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
 643        u32 pg_idx, pg_offset;
 644        u32 idx_value = 0;
 645        int new_page;
 646
 647        pg_idx = (idx * 4) / PAGE_SIZE;
 648        pg_offset = (idx * 4) % PAGE_SIZE;
 649
 650        if (ibc->kpage_idx[0] == pg_idx)
 651                return ibc->kpage[0][pg_offset/4];
 652        if (ibc->kpage_idx[1] == pg_idx)
 653                return ibc->kpage[1][pg_offset/4];
 654
 655        new_page = radeon_cs_update_pages(p, pg_idx);
 656        if (new_page < 0) {
 657                p->parser_error = new_page;
 658                return 0;
 659        }
 660
 661        idx_value = ibc->kpage[new_page][pg_offset/4];
 662        return idx_value;
 663}
 664
 665/**
 666 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
 667 * @parser:     parser structure holding parsing context.
 668 * @pkt:        where to store packet information
 669 *
 670 * Assume that chunk_ib_index is properly set. Will return -EINVAL
 671 * if packet is bigger than remaining ib size. or if packets is unknown.
 672 **/
 673int radeon_cs_packet_parse(struct radeon_cs_parser *p,
 674                           struct radeon_cs_packet *pkt,
 675                           unsigned idx)
 676{
 677        struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
 678        struct radeon_device *rdev = p->rdev;
 679        uint32_t header;
 680
 681        if (idx >= ib_chunk->length_dw) {
 682                DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
 683                          idx, ib_chunk->length_dw);
 684                return -EINVAL;
 685        }
 686        header = radeon_get_ib_value(p, idx);
 687        pkt->idx = idx;
 688        pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
 689        pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
 690        pkt->one_reg_wr = 0;
 691        switch (pkt->type) {
 692        case RADEON_PACKET_TYPE0:
 693                if (rdev->family < CHIP_R600) {
 694                        pkt->reg = R100_CP_PACKET0_GET_REG(header);
 695                        pkt->one_reg_wr =
 696                                RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
 697                } else
 698                        pkt->reg = R600_CP_PACKET0_GET_REG(header);
 699                break;
 700        case RADEON_PACKET_TYPE3:
 701                pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
 702                break;
 703        case RADEON_PACKET_TYPE2:
 704                pkt->count = -1;
 705                break;
 706        default:
 707                DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
 708                return -EINVAL;
 709        }
 710        if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
 711                DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
 712                          pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
 713                return -EINVAL;
 714        }
 715        return 0;
 716}
 717
 718/**
 719 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
 720 * @p:          structure holding the parser context.
 721 *
 722 * Check if the next packet is NOP relocation packet3.
 723 **/
 724bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
 725{
 726        struct radeon_cs_packet p3reloc;
 727        int r;
 728
 729        r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
 730        if (r)
 731                return false;
 732        if (p3reloc.type != RADEON_PACKET_TYPE3)
 733                return false;
 734        if (p3reloc.opcode != RADEON_PACKET3_NOP)
 735                return false;
 736        return true;
 737}
 738
 739/**
 740 * radeon_cs_dump_packet() - dump raw packet context
 741 * @p:          structure holding the parser context.
 742 * @pkt:        structure holding the packet.
 743 *
 744 * Used mostly for debugging and error reporting.
 745 **/
 746void radeon_cs_dump_packet(struct radeon_cs_parser *p,
 747                           struct radeon_cs_packet *pkt)
 748{
 749        volatile uint32_t *ib;
 750        unsigned i;
 751        unsigned idx;
 752
 753        ib = p->ib.ptr;
 754        idx = pkt->idx;
 755        for (i = 0; i <= (pkt->count + 1); i++, idx++)
 756                DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
 757}
 758
 759/**
 760 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
 761 * @parser:             parser structure holding parsing context.
 762 * @data:               pointer to relocation data
 763 * @offset_start:       starting offset
 764 * @offset_mask:        offset mask (to align start offset on)
 765 * @reloc:              reloc informations
 766 *
 767 * Check if next packet is relocation packet3, do bo validation and compute
 768 * GPU offset using the provided start.
 769 **/
 770int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
 771                                struct radeon_cs_reloc **cs_reloc,
 772                                int nomm)
 773{
 774        struct radeon_cs_chunk *relocs_chunk;
 775        struct radeon_cs_packet p3reloc;
 776        unsigned idx;
 777        int r;
 778
 779        if (p->chunk_relocs_idx == -1) {
 780                DRM_ERROR("No relocation chunk !\n");
 781                return -EINVAL;
 782        }
 783        *cs_reloc = NULL;
 784        relocs_chunk = &p->chunks[p->chunk_relocs_idx];
 785        r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
 786        if (r)
 787                return r;
 788        p->idx += p3reloc.count + 2;
 789        if (p3reloc.type != RADEON_PACKET_TYPE3 ||
 790            p3reloc.opcode != RADEON_PACKET3_NOP) {
 791                DRM_ERROR("No packet3 for relocation for packet at %d.\n",
 792                          p3reloc.idx);
 793                radeon_cs_dump_packet(p, &p3reloc);
 794                return -EINVAL;
 795        }
 796        idx = radeon_get_ib_value(p, p3reloc.idx + 1);
 797        if (idx >= relocs_chunk->length_dw) {
 798                DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
 799                          idx, relocs_chunk->length_dw);
 800                radeon_cs_dump_packet(p, &p3reloc);
 801                return -EINVAL;
 802        }
 803        /* FIXME: we assume reloc size is 4 dwords */
 804        if (nomm) {
 805                *cs_reloc = p->relocs;
 806                (*cs_reloc)->lobj.gpu_offset =
 807                        (u64)relocs_chunk->kdata[idx + 3] << 32;
 808                (*cs_reloc)->lobj.gpu_offset |= relocs_chunk->kdata[idx + 0];
 809        } else
 810                *cs_reloc = p->relocs_ptr[(idx / 4)];
 811        return 0;
 812}
 813