linux/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
<<
>>
Prefs
   1/*
   2 * Copyright 2008 Jerome Glisse.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the "Software"),
   7 * to deal in the Software without restriction, including without limitation
   8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9 * and/or sell copies of the Software, and to permit persons to whom the
  10 * Software is furnished to do so, subject to the following conditions:
  11 *
  12 * The above copyright notice and this permission notice (including the next
  13 * paragraph) shall be included in all copies or substantial portions of the
  14 * Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22 * DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors:
  25 *    Jerome Glisse <glisse@freedesktop.org>
  26 */
  27
  28#include <linux/file.h>
  29#include <linux/pagemap.h>
  30#include <linux/sync_file.h>
  31#include <linux/dma-buf.h>
  32
  33#include <drm/amdgpu_drm.h>
  34#include <drm/drm_syncobj.h>
  35#include "amdgpu.h"
  36#include "amdgpu_trace.h"
  37#include "amdgpu_gmc.h"
  38#include "amdgpu_gem.h"
  39#include "amdgpu_ras.h"
  40
  41static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
  42                                      struct drm_amdgpu_cs_chunk_fence *data,
  43                                      uint32_t *offset)
  44{
  45        struct drm_gem_object *gobj;
  46        struct amdgpu_bo *bo;
  47        unsigned long size;
  48        int r;
  49
  50        gobj = drm_gem_object_lookup(p->filp, data->handle);
  51        if (gobj == NULL)
  52                return -EINVAL;
  53
  54        bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
  55        p->uf_entry.priority = 0;
  56        p->uf_entry.tv.bo = &bo->tbo;
  57        /* One for TTM and one for the CS job */
  58        p->uf_entry.tv.num_shared = 2;
  59
  60        drm_gem_object_put(gobj);
  61
  62        size = amdgpu_bo_size(bo);
  63        if (size != PAGE_SIZE || (data->offset + 8) > size) {
  64                r = -EINVAL;
  65                goto error_unref;
  66        }
  67
  68        if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
  69                r = -EINVAL;
  70                goto error_unref;
  71        }
  72
  73        *offset = data->offset;
  74
  75        return 0;
  76
  77error_unref:
  78        amdgpu_bo_unref(&bo);
  79        return r;
  80}
  81
  82static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
  83                                      struct drm_amdgpu_bo_list_in *data)
  84{
  85        int r;
  86        struct drm_amdgpu_bo_list_entry *info = NULL;
  87
  88        r = amdgpu_bo_create_list_entry_array(data, &info);
  89        if (r)
  90                return r;
  91
  92        r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,
  93                                  &p->bo_list);
  94        if (r)
  95                goto error_free;
  96
  97        kvfree(info);
  98        return 0;
  99
 100error_free:
 101        kvfree(info);
 102
 103        return r;
 104}
 105
 106static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
 107{
 108        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 109        struct amdgpu_vm *vm = &fpriv->vm;
 110        uint64_t *chunk_array_user;
 111        uint64_t *chunk_array;
 112        unsigned size, num_ibs = 0;
 113        uint32_t uf_offset = 0;
 114        int i;
 115        int ret;
 116
 117        if (cs->in.num_chunks == 0)
 118                return 0;
 119
 120        chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
 121        if (!chunk_array)
 122                return -ENOMEM;
 123
 124        p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
 125        if (!p->ctx) {
 126                ret = -EINVAL;
 127                goto free_chunk;
 128        }
 129
 130        mutex_lock(&p->ctx->lock);
 131
 132        /* skip guilty context job */
 133        if (atomic_read(&p->ctx->guilty) == 1) {
 134                ret = -ECANCELED;
 135                goto free_chunk;
 136        }
 137
 138        /* get chunks */
 139        chunk_array_user = u64_to_user_ptr(cs->in.chunks);
 140        if (copy_from_user(chunk_array, chunk_array_user,
 141                           sizeof(uint64_t)*cs->in.num_chunks)) {
 142                ret = -EFAULT;
 143                goto free_chunk;
 144        }
 145
 146        p->nchunks = cs->in.num_chunks;
 147        p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
 148                            GFP_KERNEL);
 149        if (!p->chunks) {
 150                ret = -ENOMEM;
 151                goto free_chunk;
 152        }
 153
 154        for (i = 0; i < p->nchunks; i++) {
 155                struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
 156                struct drm_amdgpu_cs_chunk user_chunk;
 157                uint32_t __user *cdata;
 158
 159                chunk_ptr = u64_to_user_ptr(chunk_array[i]);
 160                if (copy_from_user(&user_chunk, chunk_ptr,
 161                                       sizeof(struct drm_amdgpu_cs_chunk))) {
 162                        ret = -EFAULT;
 163                        i--;
 164                        goto free_partial_kdata;
 165                }
 166                p->chunks[i].chunk_id = user_chunk.chunk_id;
 167                p->chunks[i].length_dw = user_chunk.length_dw;
 168
 169                size = p->chunks[i].length_dw;
 170                cdata = u64_to_user_ptr(user_chunk.chunk_data);
 171
 172                p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
 173                if (p->chunks[i].kdata == NULL) {
 174                        ret = -ENOMEM;
 175                        i--;
 176                        goto free_partial_kdata;
 177                }
 178                size *= sizeof(uint32_t);
 179                if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
 180                        ret = -EFAULT;
 181                        goto free_partial_kdata;
 182                }
 183
 184                switch (p->chunks[i].chunk_id) {
 185                case AMDGPU_CHUNK_ID_IB:
 186                        ++num_ibs;
 187                        break;
 188
 189                case AMDGPU_CHUNK_ID_FENCE:
 190                        size = sizeof(struct drm_amdgpu_cs_chunk_fence);
 191                        if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
 192                                ret = -EINVAL;
 193                                goto free_partial_kdata;
 194                        }
 195
 196                        ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata,
 197                                                         &uf_offset);
 198                        if (ret)
 199                                goto free_partial_kdata;
 200
 201                        break;
 202
 203                case AMDGPU_CHUNK_ID_BO_HANDLES:
 204                        size = sizeof(struct drm_amdgpu_bo_list_in);
 205                        if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
 206                                ret = -EINVAL;
 207                                goto free_partial_kdata;
 208                        }
 209
 210                        ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
 211                        if (ret)
 212                                goto free_partial_kdata;
 213
 214                        break;
 215
 216                case AMDGPU_CHUNK_ID_DEPENDENCIES:
 217                case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
 218                case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
 219                case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
 220                case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
 221                case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
 222                        break;
 223
 224                default:
 225                        ret = -EINVAL;
 226                        goto free_partial_kdata;
 227                }
 228        }
 229
 230        ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
 231        if (ret)
 232                goto free_all_kdata;
 233
 234        if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
 235                ret = -ECANCELED;
 236                goto free_all_kdata;
 237        }
 238
 239        if (p->uf_entry.tv.bo)
 240                p->job->uf_addr = uf_offset;
 241        kfree(chunk_array);
 242
 243        /* Use this opportunity to fill in task info for the vm */
 244        amdgpu_vm_set_task_info(vm);
 245
 246        return 0;
 247
 248free_all_kdata:
 249        i = p->nchunks - 1;
 250free_partial_kdata:
 251        for (; i >= 0; i--)
 252                kvfree(p->chunks[i].kdata);
 253        kfree(p->chunks);
 254        p->chunks = NULL;
 255        p->nchunks = 0;
 256free_chunk:
 257        kfree(chunk_array);
 258
 259        return ret;
 260}
 261
 262/* Convert microseconds to bytes. */
 263static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
 264{
 265        if (us <= 0 || !adev->mm_stats.log2_max_MBps)
 266                return 0;
 267
 268        /* Since accum_us is incremented by a million per second, just
 269         * multiply it by the number of MB/s to get the number of bytes.
 270         */
 271        return us << adev->mm_stats.log2_max_MBps;
 272}
 273
 274static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
 275{
 276        if (!adev->mm_stats.log2_max_MBps)
 277                return 0;
 278
 279        return bytes >> adev->mm_stats.log2_max_MBps;
 280}
 281
 282/* Returns how many bytes TTM can move right now. If no bytes can be moved,
 283 * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
 284 * which means it can go over the threshold once. If that happens, the driver
 285 * will be in debt and no other buffer migrations can be done until that debt
 286 * is repaid.
 287 *
 288 * This approach allows moving a buffer of any size (it's important to allow
 289 * that).
 290 *
 291 * The currency is simply time in microseconds and it increases as the clock
 292 * ticks. The accumulated microseconds (us) are converted to bytes and
 293 * returned.
 294 */
 295static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
 296                                              u64 *max_bytes,
 297                                              u64 *max_vis_bytes)
 298{
 299        s64 time_us, increment_us;
 300        u64 free_vram, total_vram, used_vram;
 301        struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
 302        /* Allow a maximum of 200 accumulated ms. This is basically per-IB
 303         * throttling.
 304         *
 305         * It means that in order to get full max MBps, at least 5 IBs per
 306         * second must be submitted and not more than 200ms apart from each
 307         * other.
 308         */
 309        const s64 us_upper_bound = 200000;
 310
 311        if (!adev->mm_stats.log2_max_MBps) {
 312                *max_bytes = 0;
 313                *max_vis_bytes = 0;
 314                return;
 315        }
 316
 317        total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
 318        used_vram = amdgpu_vram_mgr_usage(vram_man);
 319        free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
 320
 321        spin_lock(&adev->mm_stats.lock);
 322
 323        /* Increase the amount of accumulated us. */
 324        time_us = ktime_to_us(ktime_get());
 325        increment_us = time_us - adev->mm_stats.last_update_us;
 326        adev->mm_stats.last_update_us = time_us;
 327        adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
 328                                      us_upper_bound);
 329
 330        /* This prevents the short period of low performance when the VRAM
 331         * usage is low and the driver is in debt or doesn't have enough
 332         * accumulated us to fill VRAM quickly.
 333         *
 334         * The situation can occur in these cases:
 335         * - a lot of VRAM is freed by userspace
 336         * - the presence of a big buffer causes a lot of evictions
 337         *   (solution: split buffers into smaller ones)
 338         *
 339         * If 128 MB or 1/8th of VRAM is free, start filling it now by setting
 340         * accum_us to a positive number.
 341         */
 342        if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
 343                s64 min_us;
 344
 345                /* Be more aggresive on dGPUs. Try to fill a portion of free
 346                 * VRAM now.
 347                 */
 348                if (!(adev->flags & AMD_IS_APU))
 349                        min_us = bytes_to_us(adev, free_vram / 4);
 350                else
 351                        min_us = 0; /* Reset accum_us on APUs. */
 352
 353                adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
 354        }
 355
 356        /* This is set to 0 if the driver is in debt to disallow (optional)
 357         * buffer moves.
 358         */
 359        *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
 360
 361        /* Do the same for visible VRAM if half of it is free */
 362        if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
 363                u64 total_vis_vram = adev->gmc.visible_vram_size;
 364                u64 used_vis_vram =
 365                  amdgpu_vram_mgr_vis_usage(vram_man);
 366
 367                if (used_vis_vram < total_vis_vram) {
 368                        u64 free_vis_vram = total_vis_vram - used_vis_vram;
 369                        adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
 370                                                          increment_us, us_upper_bound);
 371
 372                        if (free_vis_vram >= total_vis_vram / 2)
 373                                adev->mm_stats.accum_us_vis =
 374                                        max(bytes_to_us(adev, free_vis_vram / 2),
 375                                            adev->mm_stats.accum_us_vis);
 376                }
 377
 378                *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
 379        } else {
 380                *max_vis_bytes = 0;
 381        }
 382
 383        spin_unlock(&adev->mm_stats.lock);
 384}
 385
 386/* Report how many bytes have really been moved for the last command
 387 * submission. This can result in a debt that can stop buffer migrations
 388 * temporarily.
 389 */
 390void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
 391                                  u64 num_vis_bytes)
 392{
 393        spin_lock(&adev->mm_stats.lock);
 394        adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
 395        adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
 396        spin_unlock(&adev->mm_stats.lock);
 397}
 398
 399static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 400                                 struct amdgpu_bo *bo)
 401{
 402        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 403        struct ttm_operation_ctx ctx = {
 404                .interruptible = true,
 405                .no_wait_gpu = false,
 406                .resv = bo->tbo.base.resv
 407        };
 408        uint32_t domain;
 409        int r;
 410
 411        if (bo->tbo.pin_count)
 412                return 0;
 413
 414        /* Don't move this buffer if we have depleted our allowance
 415         * to move it. Don't move anything if the threshold is zero.
 416         */
 417        if (p->bytes_moved < p->bytes_moved_threshold &&
 418            (!bo->tbo.base.dma_buf ||
 419            list_empty(&bo->tbo.base.dma_buf->attachments))) {
 420                if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
 421                    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
 422                        /* And don't move a CPU_ACCESS_REQUIRED BO to limited
 423                         * visible VRAM if we've depleted our allowance to do
 424                         * that.
 425                         */
 426                        if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
 427                                domain = bo->preferred_domains;
 428                        else
 429                                domain = bo->allowed_domains;
 430                } else {
 431                        domain = bo->preferred_domains;
 432                }
 433        } else {
 434                domain = bo->allowed_domains;
 435        }
 436
 437retry:
 438        amdgpu_bo_placement_from_domain(bo, domain);
 439        r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 440
 441        p->bytes_moved += ctx.bytes_moved;
 442        if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
 443            amdgpu_bo_in_cpu_visible_vram(bo))
 444                p->bytes_moved_vis += ctx.bytes_moved;
 445
 446        if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
 447                domain = bo->allowed_domains;
 448                goto retry;
 449        }
 450
 451        return r;
 452}
 453
 454static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
 455{
 456        struct amdgpu_cs_parser *p = param;
 457        int r;
 458
 459        r = amdgpu_cs_bo_validate(p, bo);
 460        if (r)
 461                return r;
 462
 463        if (bo->shadow)
 464                r = amdgpu_cs_bo_validate(p, bo->shadow);
 465
 466        return r;
 467}
 468
 469static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 470                            struct list_head *validated)
 471{
 472        struct ttm_operation_ctx ctx = { true, false };
 473        struct amdgpu_bo_list_entry *lobj;
 474        int r;
 475
 476        list_for_each_entry(lobj, validated, tv.head) {
 477                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
 478                struct mm_struct *usermm;
 479
 480                usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
 481                if (usermm && usermm != current->mm)
 482                        return -EPERM;
 483
 484                if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
 485                    lobj->user_invalidated && lobj->user_pages) {
 486                        amdgpu_bo_placement_from_domain(bo,
 487                                                        AMDGPU_GEM_DOMAIN_CPU);
 488                        r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 489                        if (r)
 490                                return r;
 491
 492                        amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
 493                                                     lobj->user_pages);
 494                }
 495
 496                r = amdgpu_cs_validate(p, bo);
 497                if (r)
 498                        return r;
 499
 500                kvfree(lobj->user_pages);
 501                lobj->user_pages = NULL;
 502        }
 503        return 0;
 504}
 505
 506static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 507                                union drm_amdgpu_cs *cs)
 508{
 509        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 510        struct amdgpu_vm *vm = &fpriv->vm;
 511        struct amdgpu_bo_list_entry *e;
 512        struct list_head duplicates;
 513        struct amdgpu_bo *gds;
 514        struct amdgpu_bo *gws;
 515        struct amdgpu_bo *oa;
 516        int r;
 517
 518        INIT_LIST_HEAD(&p->validated);
 519
 520        /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
 521        if (cs->in.bo_list_handle) {
 522                if (p->bo_list)
 523                        return -EINVAL;
 524
 525                r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
 526                                       &p->bo_list);
 527                if (r)
 528                        return r;
 529        } else if (!p->bo_list) {
 530                /* Create a empty bo_list when no handle is provided */
 531                r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,
 532                                          &p->bo_list);
 533                if (r)
 534                        return r;
 535        }
 536
 537        /* One for TTM and one for the CS job */
 538        amdgpu_bo_list_for_each_entry(e, p->bo_list)
 539                e->tv.num_shared = 2;
 540
 541        amdgpu_bo_list_get_list(p->bo_list, &p->validated);
 542
 543        INIT_LIST_HEAD(&duplicates);
 544        amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
 545
 546        if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
 547                list_add(&p->uf_entry.tv.head, &p->validated);
 548
 549        /* Get userptr backing pages. If pages are updated after registered
 550         * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
 551         * amdgpu_ttm_backend_bind() to flush and invalidate new pages
 552         */
 553        amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
 554                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 555                bool userpage_invalidated = false;
 556                int i;
 557
 558                e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
 559                                        sizeof(struct page *),
 560                                        GFP_KERNEL | __GFP_ZERO);
 561                if (!e->user_pages) {
 562                        DRM_ERROR("calloc failure\n");
 563                        return -ENOMEM;
 564                }
 565
 566                r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages);
 567                if (r) {
 568                        kvfree(e->user_pages);
 569                        e->user_pages = NULL;
 570                        return r;
 571                }
 572
 573                for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
 574                        if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
 575                                userpage_invalidated = true;
 576                                break;
 577                        }
 578                }
 579                e->user_invalidated = userpage_invalidated;
 580        }
 581
 582        r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
 583                                   &duplicates);
 584        if (unlikely(r != 0)) {
 585                if (r != -ERESTARTSYS)
 586                        DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
 587                goto out;
 588        }
 589
 590        amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
 591                                          &p->bytes_moved_vis_threshold);
 592        p->bytes_moved = 0;
 593        p->bytes_moved_vis = 0;
 594
 595        r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
 596                                      amdgpu_cs_validate, p);
 597        if (r) {
 598                DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
 599                goto error_validate;
 600        }
 601
 602        r = amdgpu_cs_list_validate(p, &duplicates);
 603        if (r)
 604                goto error_validate;
 605
 606        r = amdgpu_cs_list_validate(p, &p->validated);
 607        if (r)
 608                goto error_validate;
 609
 610        amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
 611                                     p->bytes_moved_vis);
 612
 613        gds = p->bo_list->gds_obj;
 614        gws = p->bo_list->gws_obj;
 615        oa = p->bo_list->oa_obj;
 616
 617        amdgpu_bo_list_for_each_entry(e, p->bo_list) {
 618                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 619
 620                /* Make sure we use the exclusive slot for shared BOs */
 621                if (bo->prime_shared_count)
 622                        e->tv.num_shared = 0;
 623                e->bo_va = amdgpu_vm_bo_find(vm, bo);
 624        }
 625
 626        if (gds) {
 627                p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
 628                p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
 629        }
 630        if (gws) {
 631                p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
 632                p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
 633        }
 634        if (oa) {
 635                p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
 636                p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
 637        }
 638
 639        if (!r && p->uf_entry.tv.bo) {
 640                struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
 641
 642                r = amdgpu_ttm_alloc_gart(&uf->tbo);
 643                p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
 644        }
 645
 646error_validate:
 647        if (r)
 648                ttm_eu_backoff_reservation(&p->ticket, &p->validated);
 649out:
 650        return r;
 651}
 652
 653static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
 654{
 655        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 656        struct amdgpu_bo_list_entry *e;
 657        int r;
 658
 659        list_for_each_entry(e, &p->validated, tv.head) {
 660                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 661                struct dma_resv *resv = bo->tbo.base.resv;
 662                enum amdgpu_sync_mode sync_mode;
 663
 664                sync_mode = amdgpu_bo_explicit_sync(bo) ?
 665                        AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
 666                r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode,
 667                                     &fpriv->vm);
 668                if (r)
 669                        return r;
 670        }
 671        return 0;
 672}
 673
 674/**
 675 * cs_parser_fini() - clean parser states
 676 * @parser:     parser structure holding parsing context.
 677 * @error:      error number
 678 * @backoff:    indicator to backoff the reservation
 679 *
 680 * If error is set than unvalidate buffer, otherwise just free memory
 681 * used by parsing context.
 682 **/
 683static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
 684                                  bool backoff)
 685{
 686        unsigned i;
 687
 688        if (error && backoff)
 689                ttm_eu_backoff_reservation(&parser->ticket,
 690                                           &parser->validated);
 691
 692        for (i = 0; i < parser->num_post_deps; i++) {
 693                drm_syncobj_put(parser->post_deps[i].syncobj);
 694                kfree(parser->post_deps[i].chain);
 695        }
 696        kfree(parser->post_deps);
 697
 698        dma_fence_put(parser->fence);
 699
 700        if (parser->ctx) {
 701                mutex_unlock(&parser->ctx->lock);
 702                amdgpu_ctx_put(parser->ctx);
 703        }
 704        if (parser->bo_list)
 705                amdgpu_bo_list_put(parser->bo_list);
 706
 707        for (i = 0; i < parser->nchunks; i++)
 708                kvfree(parser->chunks[i].kdata);
 709        kfree(parser->chunks);
 710        if (parser->job)
 711                amdgpu_job_free(parser->job);
 712        if (parser->uf_entry.tv.bo) {
 713                struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
 714
 715                amdgpu_bo_unref(&uf);
 716        }
 717}
 718
 719static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 720{
 721        struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
 722        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 723        struct amdgpu_device *adev = p->adev;
 724        struct amdgpu_vm *vm = &fpriv->vm;
 725        struct amdgpu_bo_list_entry *e;
 726        struct amdgpu_bo_va *bo_va;
 727        struct amdgpu_bo *bo;
 728        int r;
 729
 730        /* Only for UVD/VCE VM emulation */
 731        if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
 732                unsigned i, j;
 733
 734                for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
 735                        struct drm_amdgpu_cs_chunk_ib *chunk_ib;
 736                        struct amdgpu_bo_va_mapping *m;
 737                        struct amdgpu_bo *aobj = NULL;
 738                        struct amdgpu_cs_chunk *chunk;
 739                        uint64_t offset, va_start;
 740                        struct amdgpu_ib *ib;
 741                        uint8_t *kptr;
 742
 743                        chunk = &p->chunks[i];
 744                        ib = &p->job->ibs[j];
 745                        chunk_ib = chunk->kdata;
 746
 747                        if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
 748                                continue;
 749
 750                        va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
 751                        r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
 752                        if (r) {
 753                                DRM_ERROR("IB va_start is invalid\n");
 754                                return r;
 755                        }
 756
 757                        if ((va_start + chunk_ib->ib_bytes) >
 758                            (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
 759                                DRM_ERROR("IB va_start+ib_bytes is invalid\n");
 760                                return -EINVAL;
 761                        }
 762
 763                        /* the IB should be reserved at this point */
 764                        r = amdgpu_bo_kmap(aobj, (void **)&kptr);
 765                        if (r) {
 766                                return r;
 767                        }
 768
 769                        offset = m->start * AMDGPU_GPU_PAGE_SIZE;
 770                        kptr += va_start - offset;
 771
 772                        if (ring->funcs->parse_cs) {
 773                                memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
 774                                amdgpu_bo_kunmap(aobj);
 775
 776                                r = amdgpu_ring_parse_cs(ring, p, j);
 777                                if (r)
 778                                        return r;
 779                        } else {
 780                                ib->ptr = (uint32_t *)kptr;
 781                                r = amdgpu_ring_patch_cs_in_place(ring, p, j);
 782                                amdgpu_bo_kunmap(aobj);
 783                                if (r)
 784                                        return r;
 785                        }
 786
 787                        j++;
 788                }
 789        }
 790
 791        if (!p->job->vm)
 792                return amdgpu_cs_sync_rings(p);
 793
 794
 795        r = amdgpu_vm_clear_freed(adev, vm, NULL);
 796        if (r)
 797                return r;
 798
 799        r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
 800        if (r)
 801                return r;
 802
 803        r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
 804        if (r)
 805                return r;
 806
 807        if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
 808                bo_va = fpriv->csa_va;
 809                BUG_ON(!bo_va);
 810                r = amdgpu_vm_bo_update(adev, bo_va, false);
 811                if (r)
 812                        return r;
 813
 814                r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
 815                if (r)
 816                        return r;
 817        }
 818
 819        amdgpu_bo_list_for_each_entry(e, p->bo_list) {
 820                /* ignore duplicates */
 821                bo = ttm_to_amdgpu_bo(e->tv.bo);
 822                if (!bo)
 823                        continue;
 824
 825                bo_va = e->bo_va;
 826                if (bo_va == NULL)
 827                        continue;
 828
 829                r = amdgpu_vm_bo_update(adev, bo_va, false);
 830                if (r)
 831                        return r;
 832
 833                r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
 834                if (r)
 835                        return r;
 836        }
 837
 838        r = amdgpu_vm_handle_moved(adev, vm);
 839        if (r)
 840                return r;
 841
 842        r = amdgpu_vm_update_pdes(adev, vm, false);
 843        if (r)
 844                return r;
 845
 846        r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
 847        if (r)
 848                return r;
 849
 850        p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
 851
 852        if (amdgpu_vm_debug) {
 853                /* Invalidate all BOs to test for userspace bugs */
 854                amdgpu_bo_list_for_each_entry(e, p->bo_list) {
 855                        struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 856
 857                        /* ignore duplicates */
 858                        if (!bo)
 859                                continue;
 860
 861                        amdgpu_vm_bo_invalidate(adev, bo, false);
 862                }
 863        }
 864
 865        return amdgpu_cs_sync_rings(p);
 866}
 867
 868static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 869                             struct amdgpu_cs_parser *parser)
 870{
 871        struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
 872        struct amdgpu_vm *vm = &fpriv->vm;
 873        int r, ce_preempt = 0, de_preempt = 0;
 874        struct amdgpu_ring *ring;
 875        int i, j;
 876
 877        for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
 878                struct amdgpu_cs_chunk *chunk;
 879                struct amdgpu_ib *ib;
 880                struct drm_amdgpu_cs_chunk_ib *chunk_ib;
 881                struct drm_sched_entity *entity;
 882
 883                chunk = &parser->chunks[i];
 884                ib = &parser->job->ibs[j];
 885                chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
 886
 887                if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
 888                        continue;
 889
 890                if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
 891                    (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
 892                        if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
 893                                if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
 894                                        ce_preempt++;
 895                                else
 896                                        de_preempt++;
 897                        }
 898
 899                        /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
 900                        if (ce_preempt > 1 || de_preempt > 1)
 901                                return -EINVAL;
 902                }
 903
 904                r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
 905                                          chunk_ib->ip_instance, chunk_ib->ring,
 906                                          &entity);
 907                if (r)
 908                        return r;
 909
 910                if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
 911                        parser->job->preamble_status |=
 912                                AMDGPU_PREAMBLE_IB_PRESENT;
 913
 914                if (parser->entity && parser->entity != entity)
 915                        return -EINVAL;
 916
 917                /* Return if there is no run queue associated with this entity.
 918                 * Possibly because of disabled HW IP*/
 919                if (entity->rq == NULL)
 920                        return -EINVAL;
 921
 922                parser->entity = entity;
 923
 924                ring = to_amdgpu_ring(entity->rq->sched);
 925                r =  amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
 926                                   chunk_ib->ib_bytes : 0,
 927                                   AMDGPU_IB_POOL_DELAYED, ib);
 928                if (r) {
 929                        DRM_ERROR("Failed to get ib !\n");
 930                        return r;
 931                }
 932
 933                ib->gpu_addr = chunk_ib->va_start;
 934                ib->length_dw = chunk_ib->ib_bytes / 4;
 935                ib->flags = chunk_ib->flags;
 936
 937                j++;
 938        }
 939
 940        /* MM engine doesn't support user fences */
 941        ring = to_amdgpu_ring(parser->entity->rq->sched);
 942        if (parser->job->uf_addr && ring->funcs->no_user_fence)
 943                return -EINVAL;
 944
 945        return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
 946}
 947
 948static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
 949                                       struct amdgpu_cs_chunk *chunk)
 950{
 951        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 952        unsigned num_deps;
 953        int i, r;
 954        struct drm_amdgpu_cs_chunk_dep *deps;
 955
 956        deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
 957        num_deps = chunk->length_dw * 4 /
 958                sizeof(struct drm_amdgpu_cs_chunk_dep);
 959
 960        for (i = 0; i < num_deps; ++i) {
 961                struct amdgpu_ctx *ctx;
 962                struct drm_sched_entity *entity;
 963                struct dma_fence *fence;
 964
 965                ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
 966                if (ctx == NULL)
 967                        return -EINVAL;
 968
 969                r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
 970                                          deps[i].ip_instance,
 971                                          deps[i].ring, &entity);
 972                if (r) {
 973                        amdgpu_ctx_put(ctx);
 974                        return r;
 975                }
 976
 977                fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
 978                amdgpu_ctx_put(ctx);
 979
 980                if (IS_ERR(fence))
 981                        return PTR_ERR(fence);
 982                else if (!fence)
 983                        continue;
 984
 985                if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
 986                        struct drm_sched_fence *s_fence;
 987                        struct dma_fence *old = fence;
 988
 989                        s_fence = to_drm_sched_fence(fence);
 990                        fence = dma_fence_get(&s_fence->scheduled);
 991                        dma_fence_put(old);
 992                }
 993
 994                r = amdgpu_sync_fence(&p->job->sync, fence);
 995                dma_fence_put(fence);
 996                if (r)
 997                        return r;
 998        }
 999        return 0;
1000}
1001
1002static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
1003                                                 uint32_t handle, u64 point,
1004                                                 u64 flags)
1005{
1006        struct dma_fence *fence;
1007        int r;
1008
1009        r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
1010        if (r) {
1011                DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
1012                          handle, point, r);
1013                return r;
1014        }
1015
1016        r = amdgpu_sync_fence(&p->job->sync, fence);
1017        dma_fence_put(fence);
1018
1019        return r;
1020}
1021
1022static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
1023                                            struct amdgpu_cs_chunk *chunk)
1024{
1025        struct drm_amdgpu_cs_chunk_sem *deps;
1026        unsigned num_deps;
1027        int i, r;
1028
1029        deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1030        num_deps = chunk->length_dw * 4 /
1031                sizeof(struct drm_amdgpu_cs_chunk_sem);
1032        for (i = 0; i < num_deps; ++i) {
1033                r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
1034                                                          0, 0);
1035                if (r)
1036                        return r;
1037        }
1038
1039        return 0;
1040}
1041
1042
1043static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
1044                                                     struct amdgpu_cs_chunk *chunk)
1045{
1046        struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
1047        unsigned num_deps;
1048        int i, r;
1049
1050        syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
1051        num_deps = chunk->length_dw * 4 /
1052                sizeof(struct drm_amdgpu_cs_chunk_syncobj);
1053        for (i = 0; i < num_deps; ++i) {
1054                r = amdgpu_syncobj_lookup_and_add_to_sync(p,
1055                                                          syncobj_deps[i].handle,
1056                                                          syncobj_deps[i].point,
1057                                                          syncobj_deps[i].flags);
1058                if (r)
1059                        return r;
1060        }
1061
1062        return 0;
1063}
1064
1065static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
1066                                             struct amdgpu_cs_chunk *chunk)
1067{
1068        struct drm_amdgpu_cs_chunk_sem *deps;
1069        unsigned num_deps;
1070        int i;
1071
1072        deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1073        num_deps = chunk->length_dw * 4 /
1074                sizeof(struct drm_amdgpu_cs_chunk_sem);
1075
1076        if (p->post_deps)
1077                return -EINVAL;
1078
1079        p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
1080                                     GFP_KERNEL);
1081        p->num_post_deps = 0;
1082
1083        if (!p->post_deps)
1084                return -ENOMEM;
1085
1086
1087        for (i = 0; i < num_deps; ++i) {
1088                p->post_deps[i].syncobj =
1089                        drm_syncobj_find(p->filp, deps[i].handle);
1090                if (!p->post_deps[i].syncobj)
1091                        return -EINVAL;
1092                p->post_deps[i].chain = NULL;
1093                p->post_deps[i].point = 0;
1094                p->num_post_deps++;
1095        }
1096
1097        return 0;
1098}
1099
1100
1101static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
1102                                                      struct amdgpu_cs_chunk *chunk)
1103{
1104        struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
1105        unsigned num_deps;
1106        int i;
1107
1108        syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
1109        num_deps = chunk->length_dw * 4 /
1110                sizeof(struct drm_amdgpu_cs_chunk_syncobj);
1111
1112        if (p->post_deps)
1113                return -EINVAL;
1114
1115        p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
1116                                     GFP_KERNEL);
1117        p->num_post_deps = 0;
1118
1119        if (!p->post_deps)
1120                return -ENOMEM;
1121
1122        for (i = 0; i < num_deps; ++i) {
1123                struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
1124
1125                dep->chain = NULL;
1126                if (syncobj_deps[i].point) {
1127                        dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
1128                        if (!dep->chain)
1129                                return -ENOMEM;
1130                }
1131
1132                dep->syncobj = drm_syncobj_find(p->filp,
1133                                                syncobj_deps[i].handle);
1134                if (!dep->syncobj) {
1135                        kfree(dep->chain);
1136                        return -EINVAL;
1137                }
1138                dep->point = syncobj_deps[i].point;
1139                p->num_post_deps++;
1140        }
1141
1142        return 0;
1143}
1144
1145static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
1146                                  struct amdgpu_cs_parser *p)
1147{
1148        int i, r;
1149
1150        for (i = 0; i < p->nchunks; ++i) {
1151                struct amdgpu_cs_chunk *chunk;
1152
1153                chunk = &p->chunks[i];
1154
1155                switch (chunk->chunk_id) {
1156                case AMDGPU_CHUNK_ID_DEPENDENCIES:
1157                case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
1158                        r = amdgpu_cs_process_fence_dep(p, chunk);
1159                        if (r)
1160                                return r;
1161                        break;
1162                case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
1163                        r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
1164                        if (r)
1165                                return r;
1166                        break;
1167                case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
1168                        r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
1169                        if (r)
1170                                return r;
1171                        break;
1172                case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
1173                        r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
1174                        if (r)
1175                                return r;
1176                        break;
1177                case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
1178                        r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
1179                        if (r)
1180                                return r;
1181                        break;
1182                }
1183        }
1184
1185        return 0;
1186}
1187
1188static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
1189{
1190        int i;
1191
1192        for (i = 0; i < p->num_post_deps; ++i) {
1193                if (p->post_deps[i].chain && p->post_deps[i].point) {
1194                        drm_syncobj_add_point(p->post_deps[i].syncobj,
1195                                              p->post_deps[i].chain,
1196                                              p->fence, p->post_deps[i].point);
1197                        p->post_deps[i].chain = NULL;
1198                } else {
1199                        drm_syncobj_replace_fence(p->post_deps[i].syncobj,
1200                                                  p->fence);
1201                }
1202        }
1203}
1204
1205static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1206                            union drm_amdgpu_cs *cs)
1207{
1208        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1209        struct drm_sched_entity *entity = p->entity;
1210        struct amdgpu_bo_list_entry *e;
1211        struct amdgpu_job *job;
1212        uint64_t seq;
1213        int r;
1214
1215        job = p->job;
1216        p->job = NULL;
1217
1218        r = drm_sched_job_init(&job->base, entity, &fpriv->vm);
1219        if (r)
1220                goto error_unlock;
1221
1222        /* No memory allocation is allowed while holding the notifier lock.
1223         * The lock is held until amdgpu_cs_submit is finished and fence is
1224         * added to BOs.
1225         */
1226        mutex_lock(&p->adev->notifier_lock);
1227
1228        /* If userptr are invalidated after amdgpu_cs_parser_bos(), return
1229         * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
1230         */
1231        amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
1232                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
1233
1234                r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
1235        }
1236        if (r) {
1237                r = -EAGAIN;
1238                goto error_abort;
1239        }
1240
1241        p->fence = dma_fence_get(&job->base.s_fence->finished);
1242
1243        amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
1244        amdgpu_cs_post_dependencies(p);
1245
1246        if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
1247            !p->ctx->preamble_presented) {
1248                job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
1249                p->ctx->preamble_presented = true;
1250        }
1251
1252        cs->out.handle = seq;
1253        job->uf_sequence = seq;
1254
1255        amdgpu_job_free_resources(job);
1256
1257        trace_amdgpu_cs_ioctl(job);
1258        amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
1259        drm_sched_entity_push_job(&job->base, entity);
1260
1261        amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
1262
1263        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
1264        mutex_unlock(&p->adev->notifier_lock);
1265
1266        return 0;
1267
1268error_abort:
1269        drm_sched_job_cleanup(&job->base);
1270        mutex_unlock(&p->adev->notifier_lock);
1271
1272error_unlock:
1273        amdgpu_job_free(job);
1274        return r;
1275}
1276
1277static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser)
1278{
1279        int i;
1280
1281        if (!trace_amdgpu_cs_enabled())
1282                return;
1283
1284        for (i = 0; i < parser->job->num_ibs; i++)
1285                trace_amdgpu_cs(parser, i);
1286}
1287
1288int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1289{
1290        struct amdgpu_device *adev = drm_to_adev(dev);
1291        union drm_amdgpu_cs *cs = data;
1292        struct amdgpu_cs_parser parser = {};
1293        bool reserved_buffers = false;
1294        int r;
1295
1296        if (amdgpu_ras_intr_triggered())
1297                return -EHWPOISON;
1298
1299        if (!adev->accel_working)
1300                return -EBUSY;
1301
1302        parser.adev = adev;
1303        parser.filp = filp;
1304
1305        r = amdgpu_cs_parser_init(&parser, data);
1306        if (r) {
1307                if (printk_ratelimit())
1308                        DRM_ERROR("Failed to initialize parser %d!\n", r);
1309                goto out;
1310        }
1311
1312        r = amdgpu_cs_ib_fill(adev, &parser);
1313        if (r)
1314                goto out;
1315
1316        r = amdgpu_cs_dependencies(adev, &parser);
1317        if (r) {
1318                DRM_ERROR("Failed in the dependencies handling %d!\n", r);
1319                goto out;
1320        }
1321
1322        r = amdgpu_cs_parser_bos(&parser, data);
1323        if (r) {
1324                if (r == -ENOMEM)
1325                        DRM_ERROR("Not enough memory for command submission!\n");
1326                else if (r != -ERESTARTSYS && r != -EAGAIN)
1327                        DRM_ERROR("Failed to process the buffer list %d!\n", r);
1328                goto out;
1329        }
1330
1331        reserved_buffers = true;
1332
1333        trace_amdgpu_cs_ibs(&parser);
1334
1335        r = amdgpu_cs_vm_handling(&parser);
1336        if (r)
1337                goto out;
1338
1339        r = amdgpu_cs_submit(&parser, cs);
1340
1341out:
1342        amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
1343
1344        return r;
1345}
1346
1347/**
1348 * amdgpu_cs_wait_ioctl - wait for a command submission to finish
1349 *
1350 * @dev: drm device
1351 * @data: data from userspace
1352 * @filp: file private
1353 *
1354 * Wait for the command submission identified by handle to finish.
1355 */
1356int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
1357                         struct drm_file *filp)
1358{
1359        union drm_amdgpu_wait_cs *wait = data;
1360        unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
1361        struct drm_sched_entity *entity;
1362        struct amdgpu_ctx *ctx;
1363        struct dma_fence *fence;
1364        long r;
1365
1366        ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
1367        if (ctx == NULL)
1368                return -EINVAL;
1369
1370        r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
1371                                  wait->in.ring, &entity);
1372        if (r) {
1373                amdgpu_ctx_put(ctx);
1374                return r;
1375        }
1376
1377        fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
1378        if (IS_ERR(fence))
1379                r = PTR_ERR(fence);
1380        else if (fence) {
1381                r = dma_fence_wait_timeout(fence, true, timeout);
1382                if (r > 0 && fence->error)
1383                        r = fence->error;
1384                dma_fence_put(fence);
1385        } else
1386                r = 1;
1387
1388        amdgpu_ctx_put(ctx);
1389        if (r < 0)
1390                return r;
1391
1392        memset(wait, 0, sizeof(*wait));
1393        wait->out.status = (r == 0);
1394
1395        return 0;
1396}
1397
1398/**
1399 * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
1400 *
1401 * @adev: amdgpu device
1402 * @filp: file private
1403 * @user: drm_amdgpu_fence copied from user space
1404 */
1405static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
1406                                             struct drm_file *filp,
1407                                             struct drm_amdgpu_fence *user)
1408{
1409        struct drm_sched_entity *entity;
1410        struct amdgpu_ctx *ctx;
1411        struct dma_fence *fence;
1412        int r;
1413
1414        ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
1415        if (ctx == NULL)
1416                return ERR_PTR(-EINVAL);
1417
1418        r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
1419                                  user->ring, &entity);
1420        if (r) {
1421                amdgpu_ctx_put(ctx);
1422                return ERR_PTR(r);
1423        }
1424
1425        fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
1426        amdgpu_ctx_put(ctx);
1427
1428        return fence;
1429}
1430
1431int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
1432                                    struct drm_file *filp)
1433{
1434        struct amdgpu_device *adev = drm_to_adev(dev);
1435        union drm_amdgpu_fence_to_handle *info = data;
1436        struct dma_fence *fence;
1437        struct drm_syncobj *syncobj;
1438        struct sync_file *sync_file;
1439        int fd, r;
1440
1441        fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
1442        if (IS_ERR(fence))
1443                return PTR_ERR(fence);
1444
1445        if (!fence)
1446                fence = dma_fence_get_stub();
1447
1448        switch (info->in.what) {
1449        case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
1450                r = drm_syncobj_create(&syncobj, 0, fence);
1451                dma_fence_put(fence);
1452                if (r)
1453                        return r;
1454                r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle);
1455                drm_syncobj_put(syncobj);
1456                return r;
1457
1458        case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD:
1459                r = drm_syncobj_create(&syncobj, 0, fence);
1460                dma_fence_put(fence);
1461                if (r)
1462                        return r;
1463                r = drm_syncobj_get_fd(syncobj, (int *)&info->out.handle);
1464                drm_syncobj_put(syncobj);
1465                return r;
1466
1467        case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD:
1468                fd = get_unused_fd_flags(O_CLOEXEC);
1469                if (fd < 0) {
1470                        dma_fence_put(fence);
1471                        return fd;
1472                }
1473
1474                sync_file = sync_file_create(fence);
1475                dma_fence_put(fence);
1476                if (!sync_file) {
1477                        put_unused_fd(fd);
1478                        return -ENOMEM;
1479                }
1480
1481                fd_install(fd, sync_file->file);
1482                info->out.handle = fd;
1483                return 0;
1484
1485        default:
1486                return -EINVAL;
1487        }
1488}
1489
1490/**
1491 * amdgpu_cs_wait_all_fence - wait on all fences to signal
1492 *
1493 * @adev: amdgpu device
1494 * @filp: file private
1495 * @wait: wait parameters
1496 * @fences: array of drm_amdgpu_fence
1497 */
1498static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
1499                                     struct drm_file *filp,
1500                                     union drm_amdgpu_wait_fences *wait,
1501                                     struct drm_amdgpu_fence *fences)
1502{
1503        uint32_t fence_count = wait->in.fence_count;
1504        unsigned int i;
1505        long r = 1;
1506
1507        for (i = 0; i < fence_count; i++) {
1508                struct dma_fence *fence;
1509                unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1510
1511                fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1512                if (IS_ERR(fence))
1513                        return PTR_ERR(fence);
1514                else if (!fence)
1515                        continue;
1516
1517                r = dma_fence_wait_timeout(fence, true, timeout);
1518                dma_fence_put(fence);
1519                if (r < 0)
1520                        return r;
1521
1522                if (r == 0)
1523                        break;
1524
1525                if (fence->error)
1526                        return fence->error;
1527        }
1528
1529        memset(wait, 0, sizeof(*wait));
1530        wait->out.status = (r > 0);
1531
1532        return 0;
1533}
1534
1535/**
1536 * amdgpu_cs_wait_any_fence - wait on any fence to signal
1537 *
1538 * @adev: amdgpu device
1539 * @filp: file private
1540 * @wait: wait parameters
1541 * @fences: array of drm_amdgpu_fence
1542 */
1543static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
1544                                    struct drm_file *filp,
1545                                    union drm_amdgpu_wait_fences *wait,
1546                                    struct drm_amdgpu_fence *fences)
1547{
1548        unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1549        uint32_t fence_count = wait->in.fence_count;
1550        uint32_t first = ~0;
1551        struct dma_fence **array;
1552        unsigned int i;
1553        long r;
1554
1555        /* Prepare the fence array */
1556        array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL);
1557
1558        if (array == NULL)
1559                return -ENOMEM;
1560
1561        for (i = 0; i < fence_count; i++) {
1562                struct dma_fence *fence;
1563
1564                fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1565                if (IS_ERR(fence)) {
1566                        r = PTR_ERR(fence);
1567                        goto err_free_fence_array;
1568                } else if (fence) {
1569                        array[i] = fence;
1570                } else { /* NULL, the fence has been already signaled */
1571                        r = 1;
1572                        first = i;
1573                        goto out;
1574                }
1575        }
1576
1577        r = dma_fence_wait_any_timeout(array, fence_count, true, timeout,
1578                                       &first);
1579        if (r < 0)
1580                goto err_free_fence_array;
1581
1582out:
1583        memset(wait, 0, sizeof(*wait));
1584        wait->out.status = (r > 0);
1585        wait->out.first_signaled = first;
1586
1587        if (first < fence_count && array[first])
1588                r = array[first]->error;
1589        else
1590                r = 0;
1591
1592err_free_fence_array:
1593        for (i = 0; i < fence_count; i++)
1594                dma_fence_put(array[i]);
1595        kfree(array);
1596
1597        return r;
1598}
1599
1600/**
1601 * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish
1602 *
1603 * @dev: drm device
1604 * @data: data from userspace
1605 * @filp: file private
1606 */
1607int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
1608                                struct drm_file *filp)
1609{
1610        struct amdgpu_device *adev = drm_to_adev(dev);
1611        union drm_amdgpu_wait_fences *wait = data;
1612        uint32_t fence_count = wait->in.fence_count;
1613        struct drm_amdgpu_fence *fences_user;
1614        struct drm_amdgpu_fence *fences;
1615        int r;
1616
1617        /* Get the fences from userspace */
1618        fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
1619                        GFP_KERNEL);
1620        if (fences == NULL)
1621                return -ENOMEM;
1622
1623        fences_user = u64_to_user_ptr(wait->in.fences);
1624        if (copy_from_user(fences, fences_user,
1625                sizeof(struct drm_amdgpu_fence) * fence_count)) {
1626                r = -EFAULT;
1627                goto err_free_fences;
1628        }
1629
1630        if (wait->in.wait_all)
1631                r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
1632        else
1633                r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
1634
1635err_free_fences:
1636        kfree(fences);
1637
1638        return r;
1639}
1640
1641/**
1642 * amdgpu_cs_find_bo_va - find bo_va for VM address
1643 *
1644 * @parser: command submission parser context
1645 * @addr: VM address
1646 * @bo: resulting BO of the mapping found
1647 * @map: Placeholder to return found BO mapping
1648 *
1649 * Search the buffer objects in the command submission context for a certain
1650 * virtual memory address. Returns allocation structure when found, NULL
1651 * otherwise.
1652 */
1653int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
1654                           uint64_t addr, struct amdgpu_bo **bo,
1655                           struct amdgpu_bo_va_mapping **map)
1656{
1657        struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
1658        struct ttm_operation_ctx ctx = { false, false };
1659        struct amdgpu_vm *vm = &fpriv->vm;
1660        struct amdgpu_bo_va_mapping *mapping;
1661        int r;
1662
1663        addr /= AMDGPU_GPU_PAGE_SIZE;
1664
1665        mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
1666        if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
1667                return -EINVAL;
1668
1669        *bo = mapping->bo_va->base.bo;
1670        *map = mapping;
1671
1672        /* Double check that the BO is reserved by this CS */
1673        if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket)
1674                return -EINVAL;
1675
1676        if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
1677                (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
1678                amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
1679                r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
1680                if (r)
1681                        return r;
1682        }
1683
1684        return amdgpu_ttm_alloc_gart(&(*bo)->tbo);
1685}
1686