linux/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
<<
>>
Prefs
   1/*
   2 * Copyright 2008 Jerome Glisse.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the "Software"),
   7 * to deal in the Software without restriction, including without limitation
   8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9 * and/or sell copies of the Software, and to permit persons to whom the
  10 * Software is furnished to do so, subject to the following conditions:
  11 *
  12 * The above copyright notice and this permission notice (including the next
  13 * paragraph) shall be included in all copies or substantial portions of the
  14 * Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22 * DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors:
  25 *    Jerome Glisse <glisse@freedesktop.org>
  26 */
  27
  28#include <linux/file.h>
  29#include <linux/pagemap.h>
  30#include <linux/sync_file.h>
  31#include <linux/dma-buf.h>
  32
  33#include <drm/amdgpu_drm.h>
  34#include <drm/drm_syncobj.h>
  35#include "amdgpu.h"
  36#include "amdgpu_trace.h"
  37#include "amdgpu_gmc.h"
  38#include "amdgpu_gem.h"
  39#include "amdgpu_ras.h"
  40
  41static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
  42                                      struct drm_amdgpu_cs_chunk_fence *data,
  43                                      uint32_t *offset)
  44{
  45        struct drm_gem_object *gobj;
  46        struct amdgpu_bo *bo;
  47        unsigned long size;
  48        int r;
  49
  50        gobj = drm_gem_object_lookup(p->filp, data->handle);
  51        if (gobj == NULL)
  52                return -EINVAL;
  53
  54        bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
  55        p->uf_entry.priority = 0;
  56        p->uf_entry.tv.bo = &bo->tbo;
  57        /* One for TTM and one for the CS job */
  58        p->uf_entry.tv.num_shared = 2;
  59
  60        drm_gem_object_put(gobj);
  61
  62        size = amdgpu_bo_size(bo);
  63        if (size != PAGE_SIZE || (data->offset + 8) > size) {
  64                r = -EINVAL;
  65                goto error_unref;
  66        }
  67
  68        if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
  69                r = -EINVAL;
  70                goto error_unref;
  71        }
  72
  73        *offset = data->offset;
  74
  75        return 0;
  76
  77error_unref:
  78        amdgpu_bo_unref(&bo);
  79        return r;
  80}
  81
  82static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
  83                                      struct drm_amdgpu_bo_list_in *data)
  84{
  85        int r;
  86        struct drm_amdgpu_bo_list_entry *info = NULL;
  87
  88        r = amdgpu_bo_create_list_entry_array(data, &info);
  89        if (r)
  90                return r;
  91
  92        r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,
  93                                  &p->bo_list);
  94        if (r)
  95                goto error_free;
  96
  97        kvfree(info);
  98        return 0;
  99
 100error_free:
 101        if (info)
 102                kvfree(info);
 103
 104        return r;
 105}
 106
 107static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
 108{
 109        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 110        struct amdgpu_vm *vm = &fpriv->vm;
 111        uint64_t *chunk_array_user;
 112        uint64_t *chunk_array;
 113        unsigned size, num_ibs = 0;
 114        uint32_t uf_offset = 0;
 115        int i;
 116        int ret;
 117
 118        if (cs->in.num_chunks == 0)
 119                return 0;
 120
 121        chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
 122        if (!chunk_array)
 123                return -ENOMEM;
 124
 125        p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
 126        if (!p->ctx) {
 127                ret = -EINVAL;
 128                goto free_chunk;
 129        }
 130
 131        mutex_lock(&p->ctx->lock);
 132
 133        /* skip guilty context job */
 134        if (atomic_read(&p->ctx->guilty) == 1) {
 135                ret = -ECANCELED;
 136                goto free_chunk;
 137        }
 138
 139        /* get chunks */
 140        chunk_array_user = u64_to_user_ptr(cs->in.chunks);
 141        if (copy_from_user(chunk_array, chunk_array_user,
 142                           sizeof(uint64_t)*cs->in.num_chunks)) {
 143                ret = -EFAULT;
 144                goto free_chunk;
 145        }
 146
 147        p->nchunks = cs->in.num_chunks;
 148        p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
 149                            GFP_KERNEL);
 150        if (!p->chunks) {
 151                ret = -ENOMEM;
 152                goto free_chunk;
 153        }
 154
 155        for (i = 0; i < p->nchunks; i++) {
 156                struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
 157                struct drm_amdgpu_cs_chunk user_chunk;
 158                uint32_t __user *cdata;
 159
 160                chunk_ptr = u64_to_user_ptr(chunk_array[i]);
 161                if (copy_from_user(&user_chunk, chunk_ptr,
 162                                       sizeof(struct drm_amdgpu_cs_chunk))) {
 163                        ret = -EFAULT;
 164                        i--;
 165                        goto free_partial_kdata;
 166                }
 167                p->chunks[i].chunk_id = user_chunk.chunk_id;
 168                p->chunks[i].length_dw = user_chunk.length_dw;
 169
 170                size = p->chunks[i].length_dw;
 171                cdata = u64_to_user_ptr(user_chunk.chunk_data);
 172
 173                p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
 174                if (p->chunks[i].kdata == NULL) {
 175                        ret = -ENOMEM;
 176                        i--;
 177                        goto free_partial_kdata;
 178                }
 179                size *= sizeof(uint32_t);
 180                if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
 181                        ret = -EFAULT;
 182                        goto free_partial_kdata;
 183                }
 184
 185                switch (p->chunks[i].chunk_id) {
 186                case AMDGPU_CHUNK_ID_IB:
 187                        ++num_ibs;
 188                        break;
 189
 190                case AMDGPU_CHUNK_ID_FENCE:
 191                        size = sizeof(struct drm_amdgpu_cs_chunk_fence);
 192                        if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
 193                                ret = -EINVAL;
 194                                goto free_partial_kdata;
 195                        }
 196
 197                        ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata,
 198                                                         &uf_offset);
 199                        if (ret)
 200                                goto free_partial_kdata;
 201
 202                        break;
 203
 204                case AMDGPU_CHUNK_ID_BO_HANDLES:
 205                        size = sizeof(struct drm_amdgpu_bo_list_in);
 206                        if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
 207                                ret = -EINVAL;
 208                                goto free_partial_kdata;
 209                        }
 210
 211                        ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
 212                        if (ret)
 213                                goto free_partial_kdata;
 214
 215                        break;
 216
 217                case AMDGPU_CHUNK_ID_DEPENDENCIES:
 218                case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
 219                case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
 220                case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
 221                case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
 222                case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
 223                        break;
 224
 225                default:
 226                        ret = -EINVAL;
 227                        goto free_partial_kdata;
 228                }
 229        }
 230
 231        ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
 232        if (ret)
 233                goto free_all_kdata;
 234
 235        if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
 236                ret = -ECANCELED;
 237                goto free_all_kdata;
 238        }
 239
 240        if (p->uf_entry.tv.bo)
 241                p->job->uf_addr = uf_offset;
 242        kfree(chunk_array);
 243
 244        /* Use this opportunity to fill in task info for the vm */
 245        amdgpu_vm_set_task_info(vm);
 246
 247        return 0;
 248
 249free_all_kdata:
 250        i = p->nchunks - 1;
 251free_partial_kdata:
 252        for (; i >= 0; i--)
 253                kvfree(p->chunks[i].kdata);
 254        kfree(p->chunks);
 255        p->chunks = NULL;
 256        p->nchunks = 0;
 257free_chunk:
 258        kfree(chunk_array);
 259
 260        return ret;
 261}
 262
 263/* Convert microseconds to bytes. */
 264static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
 265{
 266        if (us <= 0 || !adev->mm_stats.log2_max_MBps)
 267                return 0;
 268
 269        /* Since accum_us is incremented by a million per second, just
 270         * multiply it by the number of MB/s to get the number of bytes.
 271         */
 272        return us << adev->mm_stats.log2_max_MBps;
 273}
 274
 275static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
 276{
 277        if (!adev->mm_stats.log2_max_MBps)
 278                return 0;
 279
 280        return bytes >> adev->mm_stats.log2_max_MBps;
 281}
 282
 283/* Returns how many bytes TTM can move right now. If no bytes can be moved,
 284 * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
 285 * which means it can go over the threshold once. If that happens, the driver
 286 * will be in debt and no other buffer migrations can be done until that debt
 287 * is repaid.
 288 *
 289 * This approach allows moving a buffer of any size (it's important to allow
 290 * that).
 291 *
 292 * The currency is simply time in microseconds and it increases as the clock
 293 * ticks. The accumulated microseconds (us) are converted to bytes and
 294 * returned.
 295 */
 296static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
 297                                              u64 *max_bytes,
 298                                              u64 *max_vis_bytes)
 299{
 300        s64 time_us, increment_us;
 301        u64 free_vram, total_vram, used_vram;
 302        struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
 303        /* Allow a maximum of 200 accumulated ms. This is basically per-IB
 304         * throttling.
 305         *
 306         * It means that in order to get full max MBps, at least 5 IBs per
 307         * second must be submitted and not more than 200ms apart from each
 308         * other.
 309         */
 310        const s64 us_upper_bound = 200000;
 311
 312        if (!adev->mm_stats.log2_max_MBps) {
 313                *max_bytes = 0;
 314                *max_vis_bytes = 0;
 315                return;
 316        }
 317
 318        total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
 319        used_vram = amdgpu_vram_mgr_usage(vram_man);
 320        free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
 321
 322        spin_lock(&adev->mm_stats.lock);
 323
 324        /* Increase the amount of accumulated us. */
 325        time_us = ktime_to_us(ktime_get());
 326        increment_us = time_us - adev->mm_stats.last_update_us;
 327        adev->mm_stats.last_update_us = time_us;
 328        adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
 329                                      us_upper_bound);
 330
 331        /* This prevents the short period of low performance when the VRAM
 332         * usage is low and the driver is in debt or doesn't have enough
 333         * accumulated us to fill VRAM quickly.
 334         *
 335         * The situation can occur in these cases:
 336         * - a lot of VRAM is freed by userspace
 337         * - the presence of a big buffer causes a lot of evictions
 338         *   (solution: split buffers into smaller ones)
 339         *
 340         * If 128 MB or 1/8th of VRAM is free, start filling it now by setting
 341         * accum_us to a positive number.
 342         */
 343        if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
 344                s64 min_us;
 345
 346                /* Be more aggresive on dGPUs. Try to fill a portion of free
 347                 * VRAM now.
 348                 */
 349                if (!(adev->flags & AMD_IS_APU))
 350                        min_us = bytes_to_us(adev, free_vram / 4);
 351                else
 352                        min_us = 0; /* Reset accum_us on APUs. */
 353
 354                adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
 355        }
 356
 357        /* This is set to 0 if the driver is in debt to disallow (optional)
 358         * buffer moves.
 359         */
 360        *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
 361
 362        /* Do the same for visible VRAM if half of it is free */
 363        if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
 364                u64 total_vis_vram = adev->gmc.visible_vram_size;
 365                u64 used_vis_vram =
 366                  amdgpu_vram_mgr_vis_usage(vram_man);
 367
 368                if (used_vis_vram < total_vis_vram) {
 369                        u64 free_vis_vram = total_vis_vram - used_vis_vram;
 370                        adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
 371                                                          increment_us, us_upper_bound);
 372
 373                        if (free_vis_vram >= total_vis_vram / 2)
 374                                adev->mm_stats.accum_us_vis =
 375                                        max(bytes_to_us(adev, free_vis_vram / 2),
 376                                            adev->mm_stats.accum_us_vis);
 377                }
 378
 379                *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
 380        } else {
 381                *max_vis_bytes = 0;
 382        }
 383
 384        spin_unlock(&adev->mm_stats.lock);
 385}
 386
 387/* Report how many bytes have really been moved for the last command
 388 * submission. This can result in a debt that can stop buffer migrations
 389 * temporarily.
 390 */
 391void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
 392                                  u64 num_vis_bytes)
 393{
 394        spin_lock(&adev->mm_stats.lock);
 395        adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
 396        adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
 397        spin_unlock(&adev->mm_stats.lock);
 398}
 399
 400static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 401                                 struct amdgpu_bo *bo)
 402{
 403        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 404        struct ttm_operation_ctx ctx = {
 405                .interruptible = true,
 406                .no_wait_gpu = false,
 407                .resv = bo->tbo.base.resv,
 408                .flags = 0
 409        };
 410        uint32_t domain;
 411        int r;
 412
 413        if (bo->pin_count)
 414                return 0;
 415
 416        /* Don't move this buffer if we have depleted our allowance
 417         * to move it. Don't move anything if the threshold is zero.
 418         */
 419        if (p->bytes_moved < p->bytes_moved_threshold &&
 420            (!bo->tbo.base.dma_buf ||
 421            list_empty(&bo->tbo.base.dma_buf->attachments))) {
 422                if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
 423                    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
 424                        /* And don't move a CPU_ACCESS_REQUIRED BO to limited
 425                         * visible VRAM if we've depleted our allowance to do
 426                         * that.
 427                         */
 428                        if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
 429                                domain = bo->preferred_domains;
 430                        else
 431                                domain = bo->allowed_domains;
 432                } else {
 433                        domain = bo->preferred_domains;
 434                }
 435        } else {
 436                domain = bo->allowed_domains;
 437        }
 438
 439retry:
 440        amdgpu_bo_placement_from_domain(bo, domain);
 441        r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 442
 443        p->bytes_moved += ctx.bytes_moved;
 444        if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
 445            amdgpu_bo_in_cpu_visible_vram(bo))
 446                p->bytes_moved_vis += ctx.bytes_moved;
 447
 448        if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
 449                domain = bo->allowed_domains;
 450                goto retry;
 451        }
 452
 453        return r;
 454}
 455
 456static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
 457{
 458        struct amdgpu_cs_parser *p = param;
 459        int r;
 460
 461        r = amdgpu_cs_bo_validate(p, bo);
 462        if (r)
 463                return r;
 464
 465        if (bo->shadow)
 466                r = amdgpu_cs_bo_validate(p, bo->shadow);
 467
 468        return r;
 469}
 470
 471static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 472                            struct list_head *validated)
 473{
 474        struct ttm_operation_ctx ctx = { true, false };
 475        struct amdgpu_bo_list_entry *lobj;
 476        int r;
 477
 478        list_for_each_entry(lobj, validated, tv.head) {
 479                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
 480                struct mm_struct *usermm;
 481
 482                usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
 483                if (usermm && usermm != current->mm)
 484                        return -EPERM;
 485
 486                if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
 487                    lobj->user_invalidated && lobj->user_pages) {
 488                        amdgpu_bo_placement_from_domain(bo,
 489                                                        AMDGPU_GEM_DOMAIN_CPU);
 490                        r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 491                        if (r)
 492                                return r;
 493
 494                        amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
 495                                                     lobj->user_pages);
 496                }
 497
 498                r = amdgpu_cs_validate(p, bo);
 499                if (r)
 500                        return r;
 501
 502                kvfree(lobj->user_pages);
 503                lobj->user_pages = NULL;
 504        }
 505        return 0;
 506}
 507
 508static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 509                                union drm_amdgpu_cs *cs)
 510{
 511        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 512        struct amdgpu_vm *vm = &fpriv->vm;
 513        struct amdgpu_bo_list_entry *e;
 514        struct list_head duplicates;
 515        struct amdgpu_bo *gds;
 516        struct amdgpu_bo *gws;
 517        struct amdgpu_bo *oa;
 518        int r;
 519
 520        INIT_LIST_HEAD(&p->validated);
 521
 522        /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
 523        if (cs->in.bo_list_handle) {
 524                if (p->bo_list)
 525                        return -EINVAL;
 526
 527                r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
 528                                       &p->bo_list);
 529                if (r)
 530                        return r;
 531        } else if (!p->bo_list) {
 532                /* Create a empty bo_list when no handle is provided */
 533                r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,
 534                                          &p->bo_list);
 535                if (r)
 536                        return r;
 537        }
 538
 539        /* One for TTM and one for the CS job */
 540        amdgpu_bo_list_for_each_entry(e, p->bo_list)
 541                e->tv.num_shared = 2;
 542
 543        amdgpu_bo_list_get_list(p->bo_list, &p->validated);
 544
 545        INIT_LIST_HEAD(&duplicates);
 546        amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
 547
 548        if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
 549                list_add(&p->uf_entry.tv.head, &p->validated);
 550
 551        /* Get userptr backing pages. If pages are updated after registered
 552         * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
 553         * amdgpu_ttm_backend_bind() to flush and invalidate new pages
 554         */
 555        amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
 556                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 557                bool userpage_invalidated = false;
 558                int i;
 559
 560                e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
 561                                        sizeof(struct page *),
 562                                        GFP_KERNEL | __GFP_ZERO);
 563                if (!e->user_pages) {
 564                        DRM_ERROR("calloc failure\n");
 565                        return -ENOMEM;
 566                }
 567
 568                r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages);
 569                if (r) {
 570                        kvfree(e->user_pages);
 571                        e->user_pages = NULL;
 572                        return r;
 573                }
 574
 575                for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
 576                        if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
 577                                userpage_invalidated = true;
 578                                break;
 579                        }
 580                }
 581                e->user_invalidated = userpage_invalidated;
 582        }
 583
 584        r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
 585                                   &duplicates);
 586        if (unlikely(r != 0)) {
 587                if (r != -ERESTARTSYS)
 588                        DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
 589                goto out;
 590        }
 591
 592        amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
 593                                          &p->bytes_moved_vis_threshold);
 594        p->bytes_moved = 0;
 595        p->bytes_moved_vis = 0;
 596
 597        r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
 598                                      amdgpu_cs_validate, p);
 599        if (r) {
 600                DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
 601                goto error_validate;
 602        }
 603
 604        r = amdgpu_cs_list_validate(p, &duplicates);
 605        if (r)
 606                goto error_validate;
 607
 608        r = amdgpu_cs_list_validate(p, &p->validated);
 609        if (r)
 610                goto error_validate;
 611
 612        amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
 613                                     p->bytes_moved_vis);
 614
 615        gds = p->bo_list->gds_obj;
 616        gws = p->bo_list->gws_obj;
 617        oa = p->bo_list->oa_obj;
 618
 619        amdgpu_bo_list_for_each_entry(e, p->bo_list) {
 620                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 621
 622                /* Make sure we use the exclusive slot for shared BOs */
 623                if (bo->prime_shared_count)
 624                        e->tv.num_shared = 0;
 625                e->bo_va = amdgpu_vm_bo_find(vm, bo);
 626        }
 627
 628        if (gds) {
 629                p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
 630                p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
 631        }
 632        if (gws) {
 633                p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
 634                p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
 635        }
 636        if (oa) {
 637                p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
 638                p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
 639        }
 640
 641        if (!r && p->uf_entry.tv.bo) {
 642                struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
 643
 644                r = amdgpu_ttm_alloc_gart(&uf->tbo);
 645                p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
 646        }
 647
 648error_validate:
 649        if (r)
 650                ttm_eu_backoff_reservation(&p->ticket, &p->validated);
 651out:
 652        return r;
 653}
 654
 655static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
 656{
 657        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 658        struct amdgpu_bo_list_entry *e;
 659        int r;
 660
 661        list_for_each_entry(e, &p->validated, tv.head) {
 662                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 663                struct dma_resv *resv = bo->tbo.base.resv;
 664                enum amdgpu_sync_mode sync_mode;
 665
 666                sync_mode = amdgpu_bo_explicit_sync(bo) ?
 667                        AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
 668                r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode,
 669                                     &fpriv->vm);
 670                if (r)
 671                        return r;
 672        }
 673        return 0;
 674}
 675
 676/**
 677 * cs_parser_fini() - clean parser states
 678 * @parser:     parser structure holding parsing context.
 679 * @error:      error number
 680 *
 681 * If error is set than unvalidate buffer, otherwise just free memory
 682 * used by parsing context.
 683 **/
 684static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
 685                                  bool backoff)
 686{
 687        unsigned i;
 688
 689        if (error && backoff)
 690                ttm_eu_backoff_reservation(&parser->ticket,
 691                                           &parser->validated);
 692
 693        for (i = 0; i < parser->num_post_deps; i++) {
 694                drm_syncobj_put(parser->post_deps[i].syncobj);
 695                kfree(parser->post_deps[i].chain);
 696        }
 697        kfree(parser->post_deps);
 698
 699        dma_fence_put(parser->fence);
 700
 701        if (parser->ctx) {
 702                mutex_unlock(&parser->ctx->lock);
 703                amdgpu_ctx_put(parser->ctx);
 704        }
 705        if (parser->bo_list)
 706                amdgpu_bo_list_put(parser->bo_list);
 707
 708        for (i = 0; i < parser->nchunks; i++)
 709                kvfree(parser->chunks[i].kdata);
 710        kfree(parser->chunks);
 711        if (parser->job)
 712                amdgpu_job_free(parser->job);
 713        if (parser->uf_entry.tv.bo) {
 714                struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
 715
 716                amdgpu_bo_unref(&uf);
 717        }
 718}
 719
 720static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 721{
 722        struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
 723        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 724        struct amdgpu_device *adev = p->adev;
 725        struct amdgpu_vm *vm = &fpriv->vm;
 726        struct amdgpu_bo_list_entry *e;
 727        struct amdgpu_bo_va *bo_va;
 728        struct amdgpu_bo *bo;
 729        int r;
 730
 731        /* Only for UVD/VCE VM emulation */
 732        if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
 733                unsigned i, j;
 734
 735                for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
 736                        struct drm_amdgpu_cs_chunk_ib *chunk_ib;
 737                        struct amdgpu_bo_va_mapping *m;
 738                        struct amdgpu_bo *aobj = NULL;
 739                        struct amdgpu_cs_chunk *chunk;
 740                        uint64_t offset, va_start;
 741                        struct amdgpu_ib *ib;
 742                        uint8_t *kptr;
 743
 744                        chunk = &p->chunks[i];
 745                        ib = &p->job->ibs[j];
 746                        chunk_ib = chunk->kdata;
 747
 748                        if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
 749                                continue;
 750
 751                        va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
 752                        r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
 753                        if (r) {
 754                                DRM_ERROR("IB va_start is invalid\n");
 755                                return r;
 756                        }
 757
 758                        if ((va_start + chunk_ib->ib_bytes) >
 759                            (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
 760                                DRM_ERROR("IB va_start+ib_bytes is invalid\n");
 761                                return -EINVAL;
 762                        }
 763
 764                        /* the IB should be reserved at this point */
 765                        r = amdgpu_bo_kmap(aobj, (void **)&kptr);
 766                        if (r) {
 767                                return r;
 768                        }
 769
 770                        offset = m->start * AMDGPU_GPU_PAGE_SIZE;
 771                        kptr += va_start - offset;
 772
 773                        if (ring->funcs->parse_cs) {
 774                                memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
 775                                amdgpu_bo_kunmap(aobj);
 776
 777                                r = amdgpu_ring_parse_cs(ring, p, j);
 778                                if (r)
 779                                        return r;
 780                        } else {
 781                                ib->ptr = (uint32_t *)kptr;
 782                                r = amdgpu_ring_patch_cs_in_place(ring, p, j);
 783                                amdgpu_bo_kunmap(aobj);
 784                                if (r)
 785                                        return r;
 786                        }
 787
 788                        j++;
 789                }
 790        }
 791
 792        if (!p->job->vm)
 793                return amdgpu_cs_sync_rings(p);
 794
 795
 796        r = amdgpu_vm_clear_freed(adev, vm, NULL);
 797        if (r)
 798                return r;
 799
 800        r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
 801        if (r)
 802                return r;
 803
 804        r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
 805        if (r)
 806                return r;
 807
 808        if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
 809                bo_va = fpriv->csa_va;
 810                BUG_ON(!bo_va);
 811                r = amdgpu_vm_bo_update(adev, bo_va, false);
 812                if (r)
 813                        return r;
 814
 815                r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
 816                if (r)
 817                        return r;
 818        }
 819
 820        amdgpu_bo_list_for_each_entry(e, p->bo_list) {
 821                /* ignore duplicates */
 822                bo = ttm_to_amdgpu_bo(e->tv.bo);
 823                if (!bo)
 824                        continue;
 825
 826                bo_va = e->bo_va;
 827                if (bo_va == NULL)
 828                        continue;
 829
 830                r = amdgpu_vm_bo_update(adev, bo_va, false);
 831                if (r)
 832                        return r;
 833
 834                r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
 835                if (r)
 836                        return r;
 837        }
 838
 839        r = amdgpu_vm_handle_moved(adev, vm);
 840        if (r)
 841                return r;
 842
 843        r = amdgpu_vm_update_pdes(adev, vm, false);
 844        if (r)
 845                return r;
 846
 847        r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
 848        if (r)
 849                return r;
 850
 851        p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
 852
 853        if (amdgpu_vm_debug) {
 854                /* Invalidate all BOs to test for userspace bugs */
 855                amdgpu_bo_list_for_each_entry(e, p->bo_list) {
 856                        struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 857
 858                        /* ignore duplicates */
 859                        if (!bo)
 860                                continue;
 861
 862                        amdgpu_vm_bo_invalidate(adev, bo, false);
 863                }
 864        }
 865
 866        return amdgpu_cs_sync_rings(p);
 867}
 868
 869static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 870                             struct amdgpu_cs_parser *parser)
 871{
 872        struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
 873        struct amdgpu_vm *vm = &fpriv->vm;
 874        int r, ce_preempt = 0, de_preempt = 0;
 875        struct amdgpu_ring *ring;
 876        int i, j;
 877
 878        for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
 879                struct amdgpu_cs_chunk *chunk;
 880                struct amdgpu_ib *ib;
 881                struct drm_amdgpu_cs_chunk_ib *chunk_ib;
 882                struct drm_sched_entity *entity;
 883
 884                chunk = &parser->chunks[i];
 885                ib = &parser->job->ibs[j];
 886                chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
 887
 888                if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
 889                        continue;
 890
 891                if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
 892                    (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
 893                        if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
 894                                if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
 895                                        ce_preempt++;
 896                                else
 897                                        de_preempt++;
 898                        }
 899
 900                        /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
 901                        if (ce_preempt > 1 || de_preempt > 1)
 902                                return -EINVAL;
 903                }
 904
 905                r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
 906                                          chunk_ib->ip_instance, chunk_ib->ring,
 907                                          &entity);
 908                if (r)
 909                        return r;
 910
 911                if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
 912                        parser->job->preamble_status |=
 913                                AMDGPU_PREAMBLE_IB_PRESENT;
 914
 915                if (parser->entity && parser->entity != entity)
 916                        return -EINVAL;
 917
 918                /* Return if there is no run queue associated with this entity.
 919                 * Possibly because of disabled HW IP*/
 920                if (entity->rq == NULL)
 921                        return -EINVAL;
 922
 923                parser->entity = entity;
 924
 925                ring = to_amdgpu_ring(entity->rq->sched);
 926                r =  amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
 927                                   chunk_ib->ib_bytes : 0,
 928                                   AMDGPU_IB_POOL_DELAYED, ib);
 929                if (r) {
 930                        DRM_ERROR("Failed to get ib !\n");
 931                        return r;
 932                }
 933
 934                ib->gpu_addr = chunk_ib->va_start;
 935                ib->length_dw = chunk_ib->ib_bytes / 4;
 936                ib->flags = chunk_ib->flags;
 937
 938                j++;
 939        }
 940
 941        /* MM engine doesn't support user fences */
 942        ring = to_amdgpu_ring(parser->entity->rq->sched);
 943        if (parser->job->uf_addr && ring->funcs->no_user_fence)
 944                return -EINVAL;
 945
 946        return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
 947}
 948
 949static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
 950                                       struct amdgpu_cs_chunk *chunk)
 951{
 952        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 953        unsigned num_deps;
 954        int i, r;
 955        struct drm_amdgpu_cs_chunk_dep *deps;
 956
 957        deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
 958        num_deps = chunk->length_dw * 4 /
 959                sizeof(struct drm_amdgpu_cs_chunk_dep);
 960
 961        for (i = 0; i < num_deps; ++i) {
 962                struct amdgpu_ctx *ctx;
 963                struct drm_sched_entity *entity;
 964                struct dma_fence *fence;
 965
 966                ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
 967                if (ctx == NULL)
 968                        return -EINVAL;
 969
 970                r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
 971                                          deps[i].ip_instance,
 972                                          deps[i].ring, &entity);
 973                if (r) {
 974                        amdgpu_ctx_put(ctx);
 975                        return r;
 976                }
 977
 978                fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
 979                amdgpu_ctx_put(ctx);
 980
 981                if (IS_ERR(fence))
 982                        return PTR_ERR(fence);
 983                else if (!fence)
 984                        continue;
 985
 986                if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
 987                        struct drm_sched_fence *s_fence;
 988                        struct dma_fence *old = fence;
 989
 990                        s_fence = to_drm_sched_fence(fence);
 991                        fence = dma_fence_get(&s_fence->scheduled);
 992                        dma_fence_put(old);
 993                }
 994
 995                r = amdgpu_sync_fence(&p->job->sync, fence);
 996                dma_fence_put(fence);
 997                if (r)
 998                        return r;
 999        }
1000        return 0;
1001}
1002
1003static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
1004                                                 uint32_t handle, u64 point,
1005                                                 u64 flags)
1006{
1007        struct dma_fence *fence;
1008        int r;
1009
1010        r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
1011        if (r) {
1012                DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
1013                          handle, point, r);
1014                return r;
1015        }
1016
1017        r = amdgpu_sync_fence(&p->job->sync, fence);
1018        dma_fence_put(fence);
1019
1020        return r;
1021}
1022
1023static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
1024                                            struct amdgpu_cs_chunk *chunk)
1025{
1026        struct drm_amdgpu_cs_chunk_sem *deps;
1027        unsigned num_deps;
1028        int i, r;
1029
1030        deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1031        num_deps = chunk->length_dw * 4 /
1032                sizeof(struct drm_amdgpu_cs_chunk_sem);
1033        for (i = 0; i < num_deps; ++i) {
1034                r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
1035                                                          0, 0);
1036                if (r)
1037                        return r;
1038        }
1039
1040        return 0;
1041}
1042
1043
1044static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
1045                                                     struct amdgpu_cs_chunk *chunk)
1046{
1047        struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
1048        unsigned num_deps;
1049        int i, r;
1050
1051        syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
1052        num_deps = chunk->length_dw * 4 /
1053                sizeof(struct drm_amdgpu_cs_chunk_syncobj);
1054        for (i = 0; i < num_deps; ++i) {
1055                r = amdgpu_syncobj_lookup_and_add_to_sync(p,
1056                                                          syncobj_deps[i].handle,
1057                                                          syncobj_deps[i].point,
1058                                                          syncobj_deps[i].flags);
1059                if (r)
1060                        return r;
1061        }
1062
1063        return 0;
1064}
1065
1066static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
1067                                             struct amdgpu_cs_chunk *chunk)
1068{
1069        struct drm_amdgpu_cs_chunk_sem *deps;
1070        unsigned num_deps;
1071        int i;
1072
1073        deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1074        num_deps = chunk->length_dw * 4 /
1075                sizeof(struct drm_amdgpu_cs_chunk_sem);
1076
1077        if (p->post_deps)
1078                return -EINVAL;
1079
1080        p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
1081                                     GFP_KERNEL);
1082        p->num_post_deps = 0;
1083
1084        if (!p->post_deps)
1085                return -ENOMEM;
1086
1087
1088        for (i = 0; i < num_deps; ++i) {
1089                p->post_deps[i].syncobj =
1090                        drm_syncobj_find(p->filp, deps[i].handle);
1091                if (!p->post_deps[i].syncobj)
1092                        return -EINVAL;
1093                p->post_deps[i].chain = NULL;
1094                p->post_deps[i].point = 0;
1095                p->num_post_deps++;
1096        }
1097
1098        return 0;
1099}
1100
1101
1102static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
1103                                                      struct amdgpu_cs_chunk *chunk)
1104{
1105        struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
1106        unsigned num_deps;
1107        int i;
1108
1109        syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
1110        num_deps = chunk->length_dw * 4 /
1111                sizeof(struct drm_amdgpu_cs_chunk_syncobj);
1112
1113        if (p->post_deps)
1114                return -EINVAL;
1115
1116        p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
1117                                     GFP_KERNEL);
1118        p->num_post_deps = 0;
1119
1120        if (!p->post_deps)
1121                return -ENOMEM;
1122
1123        for (i = 0; i < num_deps; ++i) {
1124                struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
1125
1126                dep->chain = NULL;
1127                if (syncobj_deps[i].point) {
1128                        dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
1129                        if (!dep->chain)
1130                                return -ENOMEM;
1131                }
1132
1133                dep->syncobj = drm_syncobj_find(p->filp,
1134                                                syncobj_deps[i].handle);
1135                if (!dep->syncobj) {
1136                        kfree(dep->chain);
1137                        return -EINVAL;
1138                }
1139                dep->point = syncobj_deps[i].point;
1140                p->num_post_deps++;
1141        }
1142
1143        return 0;
1144}
1145
1146static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
1147                                  struct amdgpu_cs_parser *p)
1148{
1149        int i, r;
1150
1151        for (i = 0; i < p->nchunks; ++i) {
1152                struct amdgpu_cs_chunk *chunk;
1153
1154                chunk = &p->chunks[i];
1155
1156                switch (chunk->chunk_id) {
1157                case AMDGPU_CHUNK_ID_DEPENDENCIES:
1158                case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
1159                        r = amdgpu_cs_process_fence_dep(p, chunk);
1160                        if (r)
1161                                return r;
1162                        break;
1163                case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
1164                        r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
1165                        if (r)
1166                                return r;
1167                        break;
1168                case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
1169                        r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
1170                        if (r)
1171                                return r;
1172                        break;
1173                case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
1174                        r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
1175                        if (r)
1176                                return r;
1177                        break;
1178                case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
1179                        r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
1180                        if (r)
1181                                return r;
1182                        break;
1183                }
1184        }
1185
1186        return 0;
1187}
1188
1189static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
1190{
1191        int i;
1192
1193        for (i = 0; i < p->num_post_deps; ++i) {
1194                if (p->post_deps[i].chain && p->post_deps[i].point) {
1195                        drm_syncobj_add_point(p->post_deps[i].syncobj,
1196                                              p->post_deps[i].chain,
1197                                              p->fence, p->post_deps[i].point);
1198                        p->post_deps[i].chain = NULL;
1199                } else {
1200                        drm_syncobj_replace_fence(p->post_deps[i].syncobj,
1201                                                  p->fence);
1202                }
1203        }
1204}
1205
1206static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1207                            union drm_amdgpu_cs *cs)
1208{
1209        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1210        struct drm_sched_entity *entity = p->entity;
1211        struct amdgpu_bo_list_entry *e;
1212        struct amdgpu_job *job;
1213        uint64_t seq;
1214        int r;
1215
1216        job = p->job;
1217        p->job = NULL;
1218
1219        r = drm_sched_job_init(&job->base, entity, &fpriv->vm);
1220        if (r)
1221                goto error_unlock;
1222
1223        /* No memory allocation is allowed while holding the notifier lock.
1224         * The lock is held until amdgpu_cs_submit is finished and fence is
1225         * added to BOs.
1226         */
1227        mutex_lock(&p->adev->notifier_lock);
1228
1229        /* If userptr are invalidated after amdgpu_cs_parser_bos(), return
1230         * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
1231         */
1232        amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
1233                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
1234
1235                r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
1236        }
1237        if (r) {
1238                r = -EAGAIN;
1239                goto error_abort;
1240        }
1241
1242        p->fence = dma_fence_get(&job->base.s_fence->finished);
1243
1244        amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
1245        amdgpu_cs_post_dependencies(p);
1246
1247        if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
1248            !p->ctx->preamble_presented) {
1249                job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
1250                p->ctx->preamble_presented = true;
1251        }
1252
1253        cs->out.handle = seq;
1254        job->uf_sequence = seq;
1255
1256        amdgpu_job_free_resources(job);
1257
1258        trace_amdgpu_cs_ioctl(job);
1259        amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
1260        drm_sched_entity_push_job(&job->base, entity);
1261
1262        amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
1263
1264        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
1265        mutex_unlock(&p->adev->notifier_lock);
1266
1267        return 0;
1268
1269error_abort:
1270        drm_sched_job_cleanup(&job->base);
1271        mutex_unlock(&p->adev->notifier_lock);
1272
1273error_unlock:
1274        amdgpu_job_free(job);
1275        return r;
1276}
1277
1278static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser)
1279{
1280        int i;
1281
1282        if (!trace_amdgpu_cs_enabled())
1283                return;
1284
1285        for (i = 0; i < parser->job->num_ibs; i++)
1286                trace_amdgpu_cs(parser, i);
1287}
1288
1289int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1290{
1291        struct amdgpu_device *adev = drm_to_adev(dev);
1292        union drm_amdgpu_cs *cs = data;
1293        struct amdgpu_cs_parser parser = {};
1294        bool reserved_buffers = false;
1295        int r;
1296
1297        if (amdgpu_ras_intr_triggered())
1298                return -EHWPOISON;
1299
1300        if (!adev->accel_working)
1301                return -EBUSY;
1302
1303        parser.adev = adev;
1304        parser.filp = filp;
1305
1306        r = amdgpu_cs_parser_init(&parser, data);
1307        if (r) {
1308                if (printk_ratelimit())
1309                        DRM_ERROR("Failed to initialize parser %d!\n", r);
1310                goto out;
1311        }
1312
1313        r = amdgpu_cs_ib_fill(adev, &parser);
1314        if (r)
1315                goto out;
1316
1317        r = amdgpu_cs_dependencies(adev, &parser);
1318        if (r) {
1319                DRM_ERROR("Failed in the dependencies handling %d!\n", r);
1320                goto out;
1321        }
1322
1323        r = amdgpu_cs_parser_bos(&parser, data);
1324        if (r) {
1325                if (r == -ENOMEM)
1326                        DRM_ERROR("Not enough memory for command submission!\n");
1327                else if (r != -ERESTARTSYS && r != -EAGAIN)
1328                        DRM_ERROR("Failed to process the buffer list %d!\n", r);
1329                goto out;
1330        }
1331
1332        reserved_buffers = true;
1333
1334        trace_amdgpu_cs_ibs(&parser);
1335
1336        r = amdgpu_cs_vm_handling(&parser);
1337        if (r)
1338                goto out;
1339
1340        r = amdgpu_cs_submit(&parser, cs);
1341
1342out:
1343        amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
1344
1345        return r;
1346}
1347
1348/**
1349 * amdgpu_cs_wait_ioctl - wait for a command submission to finish
1350 *
1351 * @dev: drm device
1352 * @data: data from userspace
1353 * @filp: file private
1354 *
1355 * Wait for the command submission identified by handle to finish.
1356 */
1357int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
1358                         struct drm_file *filp)
1359{
1360        union drm_amdgpu_wait_cs *wait = data;
1361        unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
1362        struct drm_sched_entity *entity;
1363        struct amdgpu_ctx *ctx;
1364        struct dma_fence *fence;
1365        long r;
1366
1367        ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
1368        if (ctx == NULL)
1369                return -EINVAL;
1370
1371        r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
1372                                  wait->in.ring, &entity);
1373        if (r) {
1374                amdgpu_ctx_put(ctx);
1375                return r;
1376        }
1377
1378        fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
1379        if (IS_ERR(fence))
1380                r = PTR_ERR(fence);
1381        else if (fence) {
1382                r = dma_fence_wait_timeout(fence, true, timeout);
1383                if (r > 0 && fence->error)
1384                        r = fence->error;
1385                dma_fence_put(fence);
1386        } else
1387                r = 1;
1388
1389        amdgpu_ctx_put(ctx);
1390        if (r < 0)
1391                return r;
1392
1393        memset(wait, 0, sizeof(*wait));
1394        wait->out.status = (r == 0);
1395
1396        return 0;
1397}
1398
1399/**
1400 * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
1401 *
1402 * @adev: amdgpu device
1403 * @filp: file private
1404 * @user: drm_amdgpu_fence copied from user space
1405 */
1406static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
1407                                             struct drm_file *filp,
1408                                             struct drm_amdgpu_fence *user)
1409{
1410        struct drm_sched_entity *entity;
1411        struct amdgpu_ctx *ctx;
1412        struct dma_fence *fence;
1413        int r;
1414
1415        ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
1416        if (ctx == NULL)
1417                return ERR_PTR(-EINVAL);
1418
1419        r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
1420                                  user->ring, &entity);
1421        if (r) {
1422                amdgpu_ctx_put(ctx);
1423                return ERR_PTR(r);
1424        }
1425
1426        fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
1427        amdgpu_ctx_put(ctx);
1428
1429        return fence;
1430}
1431
1432int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
1433                                    struct drm_file *filp)
1434{
1435        struct amdgpu_device *adev = drm_to_adev(dev);
1436        union drm_amdgpu_fence_to_handle *info = data;
1437        struct dma_fence *fence;
1438        struct drm_syncobj *syncobj;
1439        struct sync_file *sync_file;
1440        int fd, r;
1441
1442        fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
1443        if (IS_ERR(fence))
1444                return PTR_ERR(fence);
1445
1446        if (!fence)
1447                fence = dma_fence_get_stub();
1448
1449        switch (info->in.what) {
1450        case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
1451                r = drm_syncobj_create(&syncobj, 0, fence);
1452                dma_fence_put(fence);
1453                if (r)
1454                        return r;
1455                r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle);
1456                drm_syncobj_put(syncobj);
1457                return r;
1458
1459        case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD:
1460                r = drm_syncobj_create(&syncobj, 0, fence);
1461                dma_fence_put(fence);
1462                if (r)
1463                        return r;
1464                r = drm_syncobj_get_fd(syncobj, (int*)&info->out.handle);
1465                drm_syncobj_put(syncobj);
1466                return r;
1467
1468        case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD:
1469                fd = get_unused_fd_flags(O_CLOEXEC);
1470                if (fd < 0) {
1471                        dma_fence_put(fence);
1472                        return fd;
1473                }
1474
1475                sync_file = sync_file_create(fence);
1476                dma_fence_put(fence);
1477                if (!sync_file) {
1478                        put_unused_fd(fd);
1479                        return -ENOMEM;
1480                }
1481
1482                fd_install(fd, sync_file->file);
1483                info->out.handle = fd;
1484                return 0;
1485
1486        default:
1487                return -EINVAL;
1488        }
1489}
1490
1491/**
1492 * amdgpu_cs_wait_all_fence - wait on all fences to signal
1493 *
1494 * @adev: amdgpu device
1495 * @filp: file private
1496 * @wait: wait parameters
1497 * @fences: array of drm_amdgpu_fence
1498 */
1499static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
1500                                     struct drm_file *filp,
1501                                     union drm_amdgpu_wait_fences *wait,
1502                                     struct drm_amdgpu_fence *fences)
1503{
1504        uint32_t fence_count = wait->in.fence_count;
1505        unsigned int i;
1506        long r = 1;
1507
1508        for (i = 0; i < fence_count; i++) {
1509                struct dma_fence *fence;
1510                unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1511
1512                fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1513                if (IS_ERR(fence))
1514                        return PTR_ERR(fence);
1515                else if (!fence)
1516                        continue;
1517
1518                r = dma_fence_wait_timeout(fence, true, timeout);
1519                dma_fence_put(fence);
1520                if (r < 0)
1521                        return r;
1522
1523                if (r == 0)
1524                        break;
1525
1526                if (fence->error)
1527                        return fence->error;
1528        }
1529
1530        memset(wait, 0, sizeof(*wait));
1531        wait->out.status = (r > 0);
1532
1533        return 0;
1534}
1535
1536/**
1537 * amdgpu_cs_wait_any_fence - wait on any fence to signal
1538 *
1539 * @adev: amdgpu device
1540 * @filp: file private
1541 * @wait: wait parameters
1542 * @fences: array of drm_amdgpu_fence
1543 */
1544static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
1545                                    struct drm_file *filp,
1546                                    union drm_amdgpu_wait_fences *wait,
1547                                    struct drm_amdgpu_fence *fences)
1548{
1549        unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1550        uint32_t fence_count = wait->in.fence_count;
1551        uint32_t first = ~0;
1552        struct dma_fence **array;
1553        unsigned int i;
1554        long r;
1555
1556        /* Prepare the fence array */
1557        array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL);
1558
1559        if (array == NULL)
1560                return -ENOMEM;
1561
1562        for (i = 0; i < fence_count; i++) {
1563                struct dma_fence *fence;
1564
1565                fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1566                if (IS_ERR(fence)) {
1567                        r = PTR_ERR(fence);
1568                        goto err_free_fence_array;
1569                } else if (fence) {
1570                        array[i] = fence;
1571                } else { /* NULL, the fence has been already signaled */
1572                        r = 1;
1573                        first = i;
1574                        goto out;
1575                }
1576        }
1577
1578        r = dma_fence_wait_any_timeout(array, fence_count, true, timeout,
1579                                       &first);
1580        if (r < 0)
1581                goto err_free_fence_array;
1582
1583out:
1584        memset(wait, 0, sizeof(*wait));
1585        wait->out.status = (r > 0);
1586        wait->out.first_signaled = first;
1587
1588        if (first < fence_count && array[first])
1589                r = array[first]->error;
1590        else
1591                r = 0;
1592
1593err_free_fence_array:
1594        for (i = 0; i < fence_count; i++)
1595                dma_fence_put(array[i]);
1596        kfree(array);
1597
1598        return r;
1599}
1600
1601/**
1602 * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish
1603 *
1604 * @dev: drm device
1605 * @data: data from userspace
1606 * @filp: file private
1607 */
1608int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
1609                                struct drm_file *filp)
1610{
1611        struct amdgpu_device *adev = drm_to_adev(dev);
1612        union drm_amdgpu_wait_fences *wait = data;
1613        uint32_t fence_count = wait->in.fence_count;
1614        struct drm_amdgpu_fence *fences_user;
1615        struct drm_amdgpu_fence *fences;
1616        int r;
1617
1618        /* Get the fences from userspace */
1619        fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
1620                        GFP_KERNEL);
1621        if (fences == NULL)
1622                return -ENOMEM;
1623
1624        fences_user = u64_to_user_ptr(wait->in.fences);
1625        if (copy_from_user(fences, fences_user,
1626                sizeof(struct drm_amdgpu_fence) * fence_count)) {
1627                r = -EFAULT;
1628                goto err_free_fences;
1629        }
1630
1631        if (wait->in.wait_all)
1632                r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
1633        else
1634                r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
1635
1636err_free_fences:
1637        kfree(fences);
1638
1639        return r;
1640}
1641
1642/**
1643 * amdgpu_cs_find_bo_va - find bo_va for VM address
1644 *
1645 * @parser: command submission parser context
1646 * @addr: VM address
1647 * @bo: resulting BO of the mapping found
1648 *
1649 * Search the buffer objects in the command submission context for a certain
1650 * virtual memory address. Returns allocation structure when found, NULL
1651 * otherwise.
1652 */
1653int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
1654                           uint64_t addr, struct amdgpu_bo **bo,
1655                           struct amdgpu_bo_va_mapping **map)
1656{
1657        struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
1658        struct ttm_operation_ctx ctx = { false, false };
1659        struct amdgpu_vm *vm = &fpriv->vm;
1660        struct amdgpu_bo_va_mapping *mapping;
1661        int r;
1662
1663        addr /= AMDGPU_GPU_PAGE_SIZE;
1664
1665        mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
1666        if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
1667                return -EINVAL;
1668
1669        *bo = mapping->bo_va->base.bo;
1670        *map = mapping;
1671
1672        /* Double check that the BO is reserved by this CS */
1673        if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket)
1674                return -EINVAL;
1675
1676        if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
1677                (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
1678                amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
1679                r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
1680                if (r)
1681                        return r;
1682        }
1683
1684        return amdgpu_ttm_alloc_gart(&(*bo)->tbo);
1685}
1686