LXR linux/drivers/gpu/drm/amd/amdgpu/amdgpu

   1/*
   2 * Copyright 2008 Jerome Glisse.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the "Software"),
   7 * to deal in the Software without restriction, including without limitation
   8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9 * and/or sell copies of the Software, and to permit persons to whom the
  10 * Software is furnished to do so, subject to the following conditions:
  11 *
  12 * The above copyright notice and this permission notice (including the next
  13 * paragraph) shall be included in all copies or substantial portions of the
  14 * Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22 * DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors:
  25 *    Jerome Glisse <glisse@freedesktop.org>
  26 */
  27
  28#include <linux/file.h>
  29#include <linux/pagemap.h>
  30#include <linux/sync_file.h>
  31#include <linux/dma-buf.h>
  32
  33#include <drm/amdgpu_drm.h>
  34#include <drm/drm_syncobj.h>
  35#include "amdgpu.h"
  36#include "amdgpu_trace.h"
  37#include "amdgpu_gmc.h"
  38#include "amdgpu_gem.h"
  39#include "amdgpu_ras.h"
  40
  41static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
  42                                      struct drm_amdgpu_cs_chunk_fence *data,
  43                                      uint32_t *offset)
  44{
  45        struct drm_gem_object *gobj;
  46        struct amdgpu_bo *bo;
  47        unsigned long size;
  48        int r;
  49
  50        gobj = drm_gem_object_lookup(p->filp, data->handle);
  51        if (gobj == NULL)
  52                return -EINVAL;
  53
  54        bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
  55        p->uf_entry.priority = 0;
  56        p->uf_entry.tv.bo = &bo->tbo;
  57        /* One for TTM and one for the CS job */
  58        p->uf_entry.tv.num_shared = 2;
  59
  60        drm_gem_object_put_unlocked(gobj);
  61
  62        size = amdgpu_bo_size(bo);
  63        if (size != PAGE_SIZE || (data->offset + 8) > size) {
  64                r = -EINVAL;
  65                goto error_unref;
  66        }
  67
  68        if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
  69                r = -EINVAL;
  70                goto error_unref;
  71        }
  72
  73        *offset = data->offset;
  74
  75        return 0;
  76
  77error_unref:
  78        amdgpu_bo_unref(&bo);
  79        return r;
  80}
  81
  82static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
  83                                      struct drm_amdgpu_bo_list_in *data)
  84{
  85        int r;
  86        struct drm_amdgpu_bo_list_entry *info = NULL;
  87
  88        r = amdgpu_bo_create_list_entry_array(data, &info);
  89        if (r)
  90                return r;
  91
  92        r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,
  93                                  &p->bo_list);
  94        if (r)
  95                goto error_free;
  96
  97        kvfree(info);
  98        return 0;
  99
 100error_free:
 101        if (info)
 102                kvfree(info);
 103
 104        return r;
 105}
 106
 107static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
 108{
 109        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 110        struct amdgpu_vm *vm = &fpriv->vm;
 111        uint64_t *chunk_array_user;
 112        uint64_t *chunk_array;
 113        unsigned size, num_ibs = 0;
 114        uint32_t uf_offset = 0;
 115        int i;
 116        int ret;
 117
 118        if (cs->in.num_chunks == 0)
 119                return 0;
 120
 121        chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
 122        if (!chunk_array)
 123                return -ENOMEM;
 124
 125        p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
 126        if (!p->ctx) {
 127                ret = -EINVAL;
 128                goto free_chunk;
 129        }
 130
 131        mutex_lock(&p->ctx->lock);
 132
 133        /* skip guilty context job */
 134        if (atomic_read(&p->ctx->guilty) == 1) {
 135                ret = -ECANCELED;
 136                goto free_chunk;
 137        }
 138
 139        /* get chunks */
 140        chunk_array_user = u64_to_user_ptr(cs->in.chunks);
 141        if (copy_from_user(chunk_array, chunk_array_user,
 142                           sizeof(uint64_t)*cs->in.num_chunks)) {
 143                ret = -EFAULT;
 144                goto free_chunk;
 145        }
 146
 147        p->nchunks = cs->in.num_chunks;
 148        p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
 149                            GFP_KERNEL);
 150        if (!p->chunks) {
 151                ret = -ENOMEM;
 152                goto free_chunk;
 153        }
 154
 155        for (i = 0; i < p->nchunks; i++) {
 156                struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
 157                struct drm_amdgpu_cs_chunk user_chunk;
 158                uint32_t __user *cdata;
 159
 160                chunk_ptr = u64_to_user_ptr(chunk_array[i]);
 161                if (copy_from_user(&user_chunk, chunk_ptr,
 162                                       sizeof(struct drm_amdgpu_cs_chunk))) {
 163                        ret = -EFAULT;
 164                        i--;
 165                        goto free_partial_kdata;
 166                }
 167                p->chunks[i].chunk_id = user_chunk.chunk_id;
 168                p->chunks[i].length_dw = user_chunk.length_dw;
 169
 170                size = p->chunks[i].length_dw;
 171                cdata = u64_to_user_ptr(user_chunk.chunk_data);
 172
 173                p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
 174                if (p->chunks[i].kdata == NULL) {
 175                        ret = -ENOMEM;
 176                        i--;
 177                        goto free_partial_kdata;
 178                }
 179                size *= sizeof(uint32_t);
 180                if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
 181                        ret = -EFAULT;
 182                        goto free_partial_kdata;
 183                }
 184
 185                switch (p->chunks[i].chunk_id) {
 186                case AMDGPU_CHUNK_ID_IB:
 187                        ++num_ibs;
 188                        break;
 189
 190                case AMDGPU_CHUNK_ID_FENCE:
 191                        size = sizeof(struct drm_amdgpu_cs_chunk_fence);
 192                        if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
 193                                ret = -EINVAL;
 194                                goto free_partial_kdata;
 195                        }
 196
 197                        ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata,
 198                                                         &uf_offset);
 199                        if (ret)
 200                                goto free_partial_kdata;
 201
 202                        break;
 203
 204                case AMDGPU_CHUNK_ID_BO_HANDLES:
 205                        size = sizeof(struct drm_amdgpu_bo_list_in);
 206                        if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
 207                                ret = -EINVAL;
 208                                goto free_partial_kdata;
 209                        }
 210
 211                        ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
 212                        if (ret)
 213                                goto free_partial_kdata;
 214
 215                        break;
 216
 217                case AMDGPU_CHUNK_ID_DEPENDENCIES:
 218                case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
 219                case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
 220                case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
 221                case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
 222                case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
 223                        break;
 224
 225                default:
 226                        ret = -EINVAL;
 227                        goto free_partial_kdata;
 228                }
 229        }
 230
 231        ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
 232        if (ret)
 233                goto free_all_kdata;
 234
 235        if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
 236                ret = -ECANCELED;
 237                goto free_all_kdata;
 238        }
 239
 240        if (p->uf_entry.tv.bo)
 241                p->job->uf_addr = uf_offset;
 242        kfree(chunk_array);
 243
 244        /* Use this opportunity to fill in task info for the vm */
 245        amdgpu_vm_set_task_info(vm);
 246
 247        return 0;
 248
 249free_all_kdata:
 250        i = p->nchunks - 1;
 251free_partial_kdata:
 252        for (; i >= 0; i--)
 253                kvfree(p->chunks[i].kdata);
 254        kfree(p->chunks);
 255        p->chunks = NULL;
 256        p->nchunks = 0;
 257free_chunk:
 258        kfree(chunk_array);
 259
 260        return ret;
 261}
 262
 263/* Convert microseconds to bytes. */
 264static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
 265{
 266        if (us <= 0 || !adev->mm_stats.log2_max_MBps)
 267                return 0;
 268
 269        /* Since accum_us is incremented by a million per second, just
 270         * multiply it by the number of MB/s to get the number of bytes.
 271         */
 272        return us << adev->mm_stats.log2_max_MBps;
 273}
 274
 275static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
 276{
 277        if (!adev->mm_stats.log2_max_MBps)
 278                return 0;
 279
 280        return bytes >> adev->mm_stats.log2_max_MBps;
 281}
 282
 283/* Returns how many bytes TTM can move right now. If no bytes can be moved,
 284 * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
 285 * which means it can go over the threshold once. If that happens, the driver
 286 * will be in debt and no other buffer migrations can be done until that debt
 287 * is repaid.
 288 *
 289 * This approach allows moving a buffer of any size (it's important to allow
 290 * that).
 291 *
 292 * The currency is simply time in microseconds and it increases as the clock
 293 * ticks. The accumulated microseconds (us) are converted to bytes and
 294 * returned.
 295 */
 296static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
 297                                              u64 *max_bytes,
 298                                              u64 *max_vis_bytes)
 299{
 300        s64 time_us, increment_us;
 301        u64 free_vram, total_vram, used_vram;
 302
 303        /* Allow a maximum of 200 accumulated ms. This is basically per-IB
 304         * throttling.
 305         *
 306         * It means that in order to get full max MBps, at least 5 IBs per
 307         * second must be submitted and not more than 200ms apart from each
 308         * other.
 309         */
 310        const s64 us_upper_bound = 200000;
 311
 312        if (!adev->mm_stats.log2_max_MBps) {
 313                *max_bytes = 0;
 314                *max_vis_bytes = 0;
 315                return;
 316        }
 317
 318        total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
 319        used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 320        free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
 321
 322        spin_lock(&adev->mm_stats.lock);
 323
 324        /* Increase the amount of accumulated us. */
 325        time_us = ktime_to_us(ktime_get());
 326        increment_us = time_us - adev->mm_stats.last_update_us;
 327        adev->mm_stats.last_update_us = time_us;
 328        adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
 329                                      us_upper_bound);
 330
 331        /* This prevents the short period of low performance when the VRAM
 332         * usage is low and the driver is in debt or doesn't have enough
 333         * accumulated us to fill VRAM quickly.
 334         *
 335         * The situation can occur in these cases:
 336         * - a lot of VRAM is freed by userspace
 337         * - the presence of a big buffer causes a lot of evictions
 338         *   (solution: split buffers into smaller ones)
 339         *
 340         * If 128 MB or 1/8th of VRAM is free, start filling it now by setting
 341         * accum_us to a positive number.
 342         */
 343        if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
 344                s64 min_us;
 345
 346                /* Be more aggresive on dGPUs. Try to fill a portion of free
 347                 * VRAM now.
 348                 */
 349                if (!(adev->flags & AMD_IS_APU))
 350                        min_us = bytes_to_us(adev, free_vram / 4);
 351                else
 352                        min_us = 0; /* Reset accum_us on APUs. */
 353
 354                adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
 355        }
 356
 357        /* This is set to 0 if the driver is in debt to disallow (optional)
 358         * buffer moves.
 359         */
 360        *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
 361
 362        /* Do the same for visible VRAM if half of it is free */
 363        if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
 364                u64 total_vis_vram = adev->gmc.visible_vram_size;
 365                u64 used_vis_vram =
 366                        amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 367
 368                if (used_vis_vram < total_vis_vram) {
 369                        u64 free_vis_vram = total_vis_vram - used_vis_vram;
 370                        adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
 371                                                          increment_us, us_upper_bound);
 372
 373                        if (free_vis_vram >= total_vis_vram / 2)
 374                                adev->mm_stats.accum_us_vis =
 375                                        max(bytes_to_us(adev, free_vis_vram / 2),
 376                                            adev->mm_stats.accum_us_vis);
 377                }
 378
 379                *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
 380        } else {
 381                *max_vis_bytes = 0;
 382        }
 383
 384        spin_unlock(&adev->mm_stats.lock);
 385}
 386
 387/* Report how many bytes have really been moved for the last command
 388 * submission. This can result in a debt that can stop buffer migrations
 389 * temporarily.
 390 */
 391void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
 392                                  u64 num_vis_bytes)
 393{
 394        spin_lock(&adev->mm_stats.lock);
 395        adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
 396        adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
 397        spin_unlock(&adev->mm_stats.lock);
 398}
 399
 400static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 401                                 struct amdgpu_bo *bo)
 402{
 403        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 404        struct ttm_operation_ctx ctx = {
 405                .interruptible = true,
 406                .no_wait_gpu = false,
 407                .resv = bo->tbo.base.resv,
 408                .flags = 0
 409        };
 410        uint32_t domain;
 411        int r;
 412
 413        if (bo->pin_count)
 414                return 0;
 415
 416        /* Don't move this buffer if we have depleted our allowance
 417         * to move it. Don't move anything if the threshold is zero.
 418         */
 419        if (p->bytes_moved < p->bytes_moved_threshold &&
 420            (!bo->tbo.base.dma_buf ||
 421            list_empty(&bo->tbo.base.dma_buf->attachments))) {
 422                if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
 423                    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
 424                        /* And don't move a CPU_ACCESS_REQUIRED BO to limited
 425                         * visible VRAM if we've depleted our allowance to do
 426                         * that.
 427                         */
 428                        if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
 429                                domain = bo->preferred_domains;
 430                        else
 431                                domain = bo->allowed_domains;
 432                } else {
 433                        domain = bo->preferred_domains;
 434                }
 435        } else {
 436                domain = bo->allowed_domains;
 437        }
 438
 439retry:
 440        amdgpu_bo_placement_from_domain(bo, domain);
 441        r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 442
 443        p->bytes_moved += ctx.bytes_moved;
 444        if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
 445            amdgpu_bo_in_cpu_visible_vram(bo))
 446                p->bytes_moved_vis += ctx.bytes_moved;
 447
 448        if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
 449                domain = bo->allowed_domains;
 450                goto retry;
 451        }
 452
 453        return r;
 454}
 455
 456static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
 457{
 458        struct amdgpu_cs_parser *p = param;
 459        int r;
 460
 461        r = amdgpu_cs_bo_validate(p, bo);
 462        if (r)
 463                return r;
 464
 465        if (bo->shadow)
 466                r = amdgpu_cs_bo_validate(p, bo->shadow);
 467
 468        return r;
 469}
 470
 471static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 472                            struct list_head *validated)
 473{
 474        struct ttm_operation_ctx ctx = { true, false };
 475        struct amdgpu_bo_list_entry *lobj;
 476        int r;
 477
 478        list_for_each_entry(lobj, validated, tv.head) {
 479                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
 480                struct mm_struct *usermm;
 481
 482                usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
 483                if (usermm && usermm != current->mm)
 484                        return -EPERM;
 485
 486                if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
 487                    lobj->user_invalidated && lobj->user_pages) {
 488                        amdgpu_bo_placement_from_domain(bo,
 489                                                        AMDGPU_GEM_DOMAIN_CPU);
 490                        r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 491                        if (r)
 492                                return r;
 493
 494                        amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
 495                                                     lobj->user_pages);
 496                }
 497
 498                r = amdgpu_cs_validate(p, bo);
 499                if (r)
 500                        return r;
 501
 502                kvfree(lobj->user_pages);
 503                lobj->user_pages = NULL;
 504        }
 505        return 0;
 506}
 507
 508static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 509                                union drm_amdgpu_cs *cs)
 510{
 511        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 512        struct amdgpu_vm *vm = &fpriv->vm;
 513        struct amdgpu_bo_list_entry *e;
 514        struct list_head duplicates;
 515        struct amdgpu_bo *gds;
 516        struct amdgpu_bo *gws;
 517        struct amdgpu_bo *oa;
 518        int r;
 519
 520        INIT_LIST_HEAD(&p->validated);
 521
 522        /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
 523        if (cs->in.bo_list_handle) {
 524                if (p->bo_list)
 525                        return -EINVAL;
 526
 527                r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
 528                                       &p->bo_list);
 529                if (r)
 530                        return r;
 531        } else if (!p->bo_list) {
 532                /* Create a empty bo_list when no handle is provided */
 533                r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,
 534                                          &p->bo_list);
 535                if (r)
 536                        return r;
 537        }
 538
 539        /* One for TTM and one for the CS job */
 540        amdgpu_bo_list_for_each_entry(e, p->bo_list)
 541                e->tv.num_shared = 2;
 542
 543        amdgpu_bo_list_get_list(p->bo_list, &p->validated);
 544
 545        INIT_LIST_HEAD(&duplicates);
 546        amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
 547
 548        if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
 549                list_add(&p->uf_entry.tv.head, &p->validated);
 550
 551        /* Get userptr backing pages. If pages are updated after registered
 552         * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
 553         * amdgpu_ttm_backend_bind() to flush and invalidate new pages
 554         */
 555        amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
 556                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 557                bool userpage_invalidated = false;
 558                int i;
 559
 560                e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
 561                                        sizeof(struct page *),
 562                                        GFP_KERNEL | __GFP_ZERO);
 563                if (!e->user_pages) {
 564                        DRM_ERROR("calloc failure\n");
 565                        return -ENOMEM;
 566                }
 567
 568                r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages);
 569                if (r) {
 570                        kvfree(e->user_pages);
 571                        e->user_pages = NULL;
 572                        return r;
 573                }
 574
 575                for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
 576                        if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
 577                                userpage_invalidated = true;
 578                                break;
 579                        }
 580                }
 581                e->user_invalidated = userpage_invalidated;
 582        }
 583
 584        r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
 585                                   &duplicates);
 586        if (unlikely(r != 0)) {
 587                if (r != -ERESTARTSYS)
 588                        DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
 589                goto out;
 590        }
 591
 592        amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
 593                                          &p->bytes_moved_vis_threshold);
 594        p->bytes_moved = 0;
 595        p->bytes_moved_vis = 0;
 596
 597        r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
 598                                      amdgpu_cs_validate, p);
 599        if (r) {
 600                DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
 601                goto error_validate;
 602        }
 603
 604        r = amdgpu_cs_list_validate(p, &duplicates);
 605        if (r)
 606                goto error_validate;
 607
 608        r = amdgpu_cs_list_validate(p, &p->validated);
 609        if (r)
 610                goto error_validate;
 611
 612        amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
 613                                     p->bytes_moved_vis);
 614
 615        gds = p->bo_list->gds_obj;
 616        gws = p->bo_list->gws_obj;
 617        oa = p->bo_list->oa_obj;
 618
 619        amdgpu_bo_list_for_each_entry(e, p->bo_list) {
 620                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 621
 622                /* Make sure we use the exclusive slot for shared BOs */
 623                if (bo->prime_shared_count)
 624                        e->tv.num_shared = 0;
 625                e->bo_va = amdgpu_vm_bo_find(vm, bo);
 626        }
 627
 628        if (gds) {
 629                p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
 630                p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
 631        }
 632        if (gws) {
 633                p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
 634                p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
 635        }
 636        if (oa) {
 637                p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
 638                p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
 639        }
 640
 641        if (!r && p->uf_entry.tv.bo) {
 642                struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
 643
 644                r = amdgpu_ttm_alloc_gart(&uf->tbo);
 645                p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
 646        }
 647
 648error_validate:
 649        if (r)
 650                ttm_eu_backoff_reservation(&p->ticket, &p->validated);
 651out:
 652        return r;
 653}
 654
 655static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
 656{
 657        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 658        struct amdgpu_bo_list_entry *e;
 659        int r;
 660
 661        list_for_each_entry(e, &p->validated, tv.head) {
 662                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 663                struct dma_resv *resv = bo->tbo.base.resv;
 664                enum amdgpu_sync_mode sync_mode;
 665
 666                sync_mode = amdgpu_bo_explicit_sync(bo) ?
 667                        AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
 668                r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode,
 669                                     &fpriv->vm);
 670                if (r)
 671                        return r;
 672        }
 673        return 0;
 674}
 675
 676/**
 677 * cs_parser_fini() - clean parser states
 678 * @parser:     parser structure holding parsing context.
 679 * @error:      error number
 680 *
 681 * If error is set than unvalidate buffer, otherwise just free memory
 682 * used by parsing context.
 683 **/
 684static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
 685                                  bool backoff)
 686{
 687        unsigned i;
 688
 689        if (error && backoff)
 690                ttm_eu_backoff_reservation(&parser->ticket,
 691                                           &parser->validated);
 692
 693        for (i = 0; i < parser->num_post_deps; i++) {
 694                drm_syncobj_put(parser->post_deps[i].syncobj);
 695                kfree(parser->post_deps[i].chain);
 696        }
 697        kfree(parser->post_deps);
 698
 699        dma_fence_put(parser->fence);
 700
 701        if (parser->ctx) {
 702                mutex_unlock(&parser->ctx->lock);
 703                amdgpu_ctx_put(parser->ctx);
 704        }
 705        if (parser->bo_list)
 706                amdgpu_bo_list_put(parser->bo_list);
 707
 708        for (i = 0; i < parser->nchunks; i++)
 709                kvfree(parser->chunks[i].kdata);
 710        kfree(parser->chunks);
 711        if (parser->job)
 712                amdgpu_job_free(parser->job);
 713        if (parser->uf_entry.tv.bo) {
 714                struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
 715
 716                amdgpu_bo_unref(&uf);
 717        }
 718}
 719
 720static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 721{
 722        struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
 723        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 724        struct amdgpu_device *adev = p->adev;
 725        struct amdgpu_vm *vm = &fpriv->vm;
 726        struct amdgpu_bo_list_entry *e;
 727        struct amdgpu_bo_va *bo_va;
 728        struct amdgpu_bo *bo;
 729        int r;
 730
 731        /* Only for UVD/VCE VM emulation */
 732        if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
 733                unsigned i, j;
 734
 735                for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
 736                        struct drm_amdgpu_cs_chunk_ib *chunk_ib;
 737                        struct amdgpu_bo_va_mapping *m;
 738                        struct amdgpu_bo *aobj = NULL;
 739                        struct amdgpu_cs_chunk *chunk;
 740                        uint64_t offset, va_start;
 741                        struct amdgpu_ib *ib;
 742                        uint8_t *kptr;
 743
 744                        chunk = &p->chunks[i];
 745                        ib = &p->job->ibs[j];
 746                        chunk_ib = chunk->kdata;
 747
 748                        if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
 749                                continue;
 750
 751                        va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
 752                        r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
 753                        if (r) {
 754                                DRM_ERROR("IB va_start is invalid\n");
 755                                return r;
 756                        }
 757
 758                        if ((va_start + chunk_ib->ib_bytes) >
 759                            (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
 760                                DRM_ERROR("IB va_start+ib_bytes is invalid\n");
 761                                return -EINVAL;
 762                        }
 763
 764                        /* the IB should be reserved at this point */
 765                        r = amdgpu_bo_kmap(aobj, (void **)&kptr);
 766                        if (r) {
 767                                return r;
 768                        }
 769
 770                        offset = m->start * AMDGPU_GPU_PAGE_SIZE;
 771                        kptr += va_start - offset;
 772
 773                        if (ring->funcs->parse_cs) {
 774                                memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
 775                                amdgpu_bo_kunmap(aobj);
 776
 777                                r = amdgpu_ring_parse_cs(ring, p, j);
 778                                if (r)
 779                                        return r;
 780                        } else {
 781                                ib->ptr = (uint32_t *)kptr;
 782                                r = amdgpu_ring_patch_cs_in_place(ring, p, j);
 783                                amdgpu_bo_kunmap(aobj);
 784                                if (r)
 785                                        return r;
 786                        }
 787
 788                        j++;
 789                }
 790        }
 791
 792        if (!p->job->vm)
 793                return amdgpu_cs_sync_rings(p);
 794
 795
 796        r = amdgpu_vm_clear_freed(adev, vm, NULL);
 797        if (r)
 798                return r;
 799
 800        r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
 801        if (r)
 802                return r;
 803
 804        r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
 805        if (r)
 806                return r;
 807
 808        if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
 809                bo_va = fpriv->csa_va;
 810                BUG_ON(!bo_va);
 811                r = amdgpu_vm_bo_update(adev, bo_va, false);
 812                if (r)
 813                        return r;
 814
 815                r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
 816                if (r)
 817                        return r;
 818        }
 819
 820        amdgpu_bo_list_for_each_entry(e, p->bo_list) {
 821                /* ignore duplicates */
 822                bo = ttm_to_amdgpu_bo(e->tv.bo);
 823                if (!bo)
 824                        continue;
 825
 826                bo_va = e->bo_va;
 827                if (bo_va == NULL)
 828                        continue;
 829
 830                r = amdgpu_vm_bo_update(adev, bo_va, false);
 831                if (r)
 832                        return r;
 833
 834                r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
 835                if (r)
 836                        return r;
 837        }
 838
 839        r = amdgpu_vm_handle_moved(adev, vm);
 840        if (r)
 841                return r;
 842
 843        r = amdgpu_vm_update_pdes(adev, vm, false);
 844        if (r)
 845                return r;
 846
 847        r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
 848        if (r)
 849                return r;
 850
 851        p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
 852
 853        if (amdgpu_vm_debug) {
 854                /* Invalidate all BOs to test for userspace bugs */
 855                amdgpu_bo_list_for_each_entry(e, p->bo_list) {
 856                        struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 857
 858                        /* ignore duplicates */
 859                        if (!bo)
 860                                continue;
 861
 862                        amdgpu_vm_bo_invalidate(adev, bo, false);
 863                }
 864        }
 865
 866        return amdgpu_cs_sync_rings(p);
 867}
 868
 869static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 870                             struct amdgpu_cs_parser *parser)
 871{
 872        struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
 873        struct amdgpu_vm *vm = &fpriv->vm;
 874        int r, ce_preempt = 0, de_preempt = 0;
 875        struct amdgpu_ring *ring;
 876        int i, j;
 877
 878        for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
 879                struct amdgpu_cs_chunk *chunk;
 880                struct amdgpu_ib *ib;
 881                struct drm_amdgpu_cs_chunk_ib *chunk_ib;
 882                struct drm_sched_entity *entity;
 883
 884                chunk = &parser->chunks[i];
 885                ib = &parser->job->ibs[j];
 886                chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
 887
 888                if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
 889                        continue;
 890
 891                if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
 892                    (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
 893                        if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
 894                                if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
 895                                        ce_preempt++;
 896                                else
 897                                        de_preempt++;
 898                        }
 899
 900                        /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
 901                        if (ce_preempt > 1 || de_preempt > 1)
 902                                return -EINVAL;
 903                }
 904
 905                r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
 906                                          chunk_ib->ip_instance, chunk_ib->ring,
 907                                          &entity);
 908                if (r)
 909                        return r;
 910
 911                if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
 912                        parser->job->preamble_status |=
 913                                AMDGPU_PREAMBLE_IB_PRESENT;
 914
 915                if (parser->entity && parser->entity != entity)
 916                        return -EINVAL;
 917
 918                /* Return if there is no run queue associated with this entity.
 919                 * Possibly because of disabled HW IP*/
 920                if (entity->rq == NULL)
 921                        return -EINVAL;
 922
 923                parser->entity = entity;
 924
 925                ring = to_amdgpu_ring(entity->rq->sched);
 926                r =  amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
 927                                   chunk_ib->ib_bytes : 0, ib);
 928                if (r) {
 929                        DRM_ERROR("Failed to get ib !\n");
 930                        return r;
 931                }
 932
 933                ib->gpu_addr = chunk_ib->va_start;
 934                ib->length_dw = chunk_ib->ib_bytes / 4;
 935                ib->flags = chunk_ib->flags;
 936
 937                j++;
 938        }
 939
 940        /* MM engine doesn't support user fences */
 941        ring = to_amdgpu_ring(parser->entity->rq->sched);
 942        if (parser->job->uf_addr && ring->funcs->no_user_fence)
 943                return -EINVAL;
 944
 945        return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
 946}
 947
 948static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
 949                                       struct amdgpu_cs_chunk *chunk)
 950{
 951        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 952        unsigned num_deps;
 953        int i, r;
 954        struct drm_amdgpu_cs_chunk_dep *deps;
 955
 956        deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
 957        num_deps = chunk->length_dw * 4 /
 958                sizeof(struct drm_amdgpu_cs_chunk_dep);
 959
 960        for (i = 0; i < num_deps; ++i) {
 961                struct amdgpu_ctx *ctx;
 962                struct drm_sched_entity *entity;
 963                struct dma_fence *fence;
 964
 965                ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
 966                if (ctx == NULL)
 967                        return -EINVAL;
 968
 969                r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
 970                                          deps[i].ip_instance,
 971                                          deps[i].ring, &entity);
 972                if (r) {
 973                        amdgpu_ctx_put(ctx);
 974                        return r;
 975                }
 976
 977                fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
 978                amdgpu_ctx_put(ctx);
 979
 980                if (IS_ERR(fence))
 981                        return PTR_ERR(fence);
 982                else if (!fence)
 983                        continue;
 984
 985                if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
 986                        struct drm_sched_fence *s_fence;
 987                        struct dma_fence *old = fence;
 988
 989                        s_fence = to_drm_sched_fence(fence);
 990                        fence = dma_fence_get(&s_fence->scheduled);
 991                        dma_fence_put(old);
 992                }
 993
 994                r = amdgpu_sync_fence(&p->job->sync, fence, true);
 995                dma_fence_put(fence);
 996                if (r)
 997                        return r;
 998        }
 999        return 0;
1000}

1001
1002static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
1003                                                 uint32_t handle, u64 point,
1004                                                 u64 flags)
1005{
1006        struct dma_fence *fence;
1007        int r;
1008
1009        r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
1010        if (r) {
1011                DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
1012                          handle, point, r);
1013                return r;
1014        }
1015
1016        r = amdgpu_sync_fence(&p->job->sync, fence, true);
1017        dma_fence_put(fence);
1018
1019        return r;
1020}
1021
1022static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
1023                                            struct amdgpu_cs_chunk *chunk)
1024{
1025        struct drm_amdgpu_cs_chunk_sem *deps;
1026        unsigned num_deps;
1027        int i, r;
1028
1029        deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1030        num_deps = chunk->length_dw * 4 /
1031                sizeof(struct drm_amdgpu_cs_chunk_sem);
1032        for (i = 0; i < num_deps; ++i) {
1033                r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
1034                                                          0, 0);
1035                if (r)
1036                        return r;
1037        }
1038
1039        return 0;
1040}
1041
1042
1043static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
1044                                                     struct amdgpu_cs_chunk *chunk)
1045{
1046        struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
1047        unsigned num_deps;
1048        int i, r;
1049
1050        syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
1051        num_deps = chunk->length_dw * 4 /
1052                sizeof(struct drm_amdgpu_cs_chunk_syncobj);
1053        for (i = 0; i < num_deps; ++i) {
1054                r = amdgpu_syncobj_lookup_and_add_to_sync(p,
1055                                                          syncobj_deps[i].handle,
1056                                                          syncobj_deps[i].point,
1057                                                          syncobj_deps[i].flags);
1058                if (r)
1059                        return r;
1060        }
1061
1062        return 0;
1063}
1064
1065static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
1066                                             struct amdgpu_cs_chunk *chunk)
1067{
1068        struct drm_amdgpu_cs_chunk_sem *deps;
1069        unsigned num_deps;
1070        int i;
1071
1072        deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1073        num_deps = chunk->length_dw * 4 /
1074                sizeof(struct drm_amdgpu_cs_chunk_sem);
1075
1076        if (p->post_deps)
1077                return -EINVAL;
1078
1079        p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
1080                                     GFP_KERNEL);
1081        p->num_post_deps = 0;
1082
1083        if (!p->post_deps)
1084                return -ENOMEM;
1085
1086
1087        for (i = 0; i < num_deps; ++i) {
1088                p->post_deps[i].syncobj =
1089                        drm_syncobj_find(p->filp, deps[i].handle);
1090                if (!p->post_deps[i].syncobj)
1091                        return -EINVAL;
1092                p->post_deps[i].chain = NULL;
1093                p->post_deps[i].point = 0;
1094                p->num_post_deps++;
1095        }
1096
1097        return 0;
1098}
1099
1100
1101static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
1102                                                      struct amdgpu_cs_chunk *chunk)
1103{
1104        struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
1105        unsigned num_deps;
1106        int i;
1107
1108        syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
1109        num_deps = chunk->length_dw * 4 /
1110                sizeof(struct drm_amdgpu_cs_chunk_syncobj);
1111
1112        if (p->post_deps)
1113                return -EINVAL;
1114
1115        p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
1116                                     GFP_KERNEL);
1117        p->num_post_deps = 0;
1118
1119        if (!p->post_deps)
1120                return -ENOMEM;
1121
1122        for (i = 0; i < num_deps; ++i) {
1123                struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
1124
1125                dep->chain = NULL;
1126                if (syncobj_deps[i].point) {
1127                        dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
1128                        if (!dep->chain)
1129                                return -ENOMEM;
1130                }
1131
1132                dep->syncobj = drm_syncobj_find(p->filp,
1133                                                syncobj_deps[i].handle);
1134                if (!dep->syncobj) {
1135                        kfree(dep->chain);
1136                        return -EINVAL;
1137                }
1138                dep->point = syncobj_deps[i].point;
1139                p->num_post_deps++;
1140        }
1141
1142        return 0;
1143}
1144
1145static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
1146                                  struct amdgpu_cs_parser *p)
1147{
1148        int i, r;
1149
1150        for (i = 0; i < p->nchunks; ++i) {
1151                struct amdgpu_cs_chunk *chunk;
1152
1153                chunk = &p->chunks[i];
1154
1155                switch (chunk->chunk_id) {
1156                case AMDGPU_CHUNK_ID_DEPENDENCIES:
1157                case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
1158                        r = amdgpu_cs_process_fence_dep(p, chunk);
1159                        if (r)
1160                                return r;
1161                        break;
1162                case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
1163                        r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
1164                        if (r)
1165                                return r;
1166                        break;
1167                case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
1168                        r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
1169                        if (r)
1170                                return r;
1171                        break;
1172                case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
1173                        r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
1174                        if (r)
1175                                return r;
1176                        break;
1177                case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
1178                        r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
1179                        if (r)
1180                                return r;
1181                        break;
1182                }
1183        }
1184
1185        return 0;
1186}
1187
1188static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
1189{
1190        int i;
1191
1192        for (i = 0; i < p->num_post_deps; ++i) {
1193                if (p->post_deps[i].chain && p->post_deps[i].point) {
1194                        drm_syncobj_add_point(p->post_deps[i].syncobj,
1195                                              p->post_deps[i].chain,
1196                                              p->fence, p->post_deps[i].point);
1197                        p->post_deps[i].chain = NULL;
1198                } else {
1199                        drm_syncobj_replace_fence(p->post_deps[i].syncobj,
1200                                                  p->fence);
1201                }
1202        }
1203}
1204
1205static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1206                            union drm_amdgpu_cs *cs)
1207{
1208        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1209        struct drm_sched_entity *entity = p->entity;
1210        enum drm_sched_priority priority;
1211        struct amdgpu_bo_list_entry *e;
1212        struct amdgpu_job *job;
1213        uint64_t seq;
1214        int r;
1215
1216        job = p->job;
1217        p->job = NULL;
1218
1219        r = drm_sched_job_init(&job->base, entity, &fpriv->vm);
1220        if (r)
1221                goto error_unlock;
1222
1223        /* No memory allocation is allowed while holding the notifier lock.
1224         * The lock is held until amdgpu_cs_submit is finished and fence is
1225         * added to BOs.
1226         */
1227        mutex_lock(&p->adev->notifier_lock);
1228
1229        /* If userptr are invalidated after amdgpu_cs_parser_bos(), return
1230         * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
1231         */
1232        amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
1233                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
1234
1235                r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
1236        }
1237        if (r) {
1238                r = -EAGAIN;
1239                goto error_abort;
1240        }
1241
1242        p->fence = dma_fence_get(&job->base.s_fence->finished);
1243
1244        amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
1245        amdgpu_cs_post_dependencies(p);
1246
1247        if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
1248            !p->ctx->preamble_presented) {
1249                job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
1250                p->ctx->preamble_presented = true;
1251        }
1252
1253        cs->out.handle = seq;
1254        job->uf_sequence = seq;
1255
1256        amdgpu_job_free_resources(job);
1257
1258        trace_amdgpu_cs_ioctl(job);
1259        amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
1260        priority = job->base.s_priority;
1261        drm_sched_entity_push_job(&job->base, entity);
1262
1263        amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
1264
1265        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
1266        mutex_unlock(&p->adev->notifier_lock);
1267
1268        return 0;
1269
1270error_abort:
1271        drm_sched_job_cleanup(&job->base);
1272        mutex_unlock(&p->adev->notifier_lock);
1273
1274error_unlock:
1275        amdgpu_job_free(job);
1276        return r;
1277}
1278
1279int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1280{
1281        struct amdgpu_device *adev = dev->dev_private;
1282        union drm_amdgpu_cs *cs = data;
1283        struct amdgpu_cs_parser parser = {};
1284        bool reserved_buffers = false;
1285        int i, r;
1286
1287        if (amdgpu_ras_intr_triggered())
1288                return -EHWPOISON;
1289
1290        if (!adev->accel_working)
1291                return -EBUSY;
1292
1293        parser.adev = adev;
1294        parser.filp = filp;
1295
1296        r = amdgpu_cs_parser_init(&parser, data);
1297        if (r) {
1298                DRM_ERROR("Failed to initialize parser %d!\n", r);
1299                goto out;
1300        }
1301
1302        r = amdgpu_cs_ib_fill(adev, &parser);
1303        if (r)
1304                goto out;
1305
1306        r = amdgpu_cs_dependencies(adev, &parser);
1307        if (r) {
1308                DRM_ERROR("Failed in the dependencies handling %d!\n", r);
1309                goto out;
1310        }
1311
1312        r = amdgpu_cs_parser_bos(&parser, data);
1313        if (r) {
1314                if (r == -ENOMEM)
1315                        DRM_ERROR("Not enough memory for command submission!\n");
1316                else if (r != -ERESTARTSYS && r != -EAGAIN)
1317                        DRM_ERROR("Failed to process the buffer list %d!\n", r);
1318                goto out;
1319        }
1320
1321        reserved_buffers = true;
1322
1323        for (i = 0; i < parser.job->num_ibs; i++)
1324                trace_amdgpu_cs(&parser, i);
1325
1326        r = amdgpu_cs_vm_handling(&parser);
1327        if (r)
1328                goto out;
1329
1330        r = amdgpu_cs_submit(&parser, cs);
1331
1332out:
1333        amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
1334
1335        return r;
1336}
1337
1338/**
1339 * amdgpu_cs_wait_ioctl - wait for a command submission to finish
1340 *
1341 * @dev: drm device
1342 * @data: data from userspace
1343 * @filp: file private
1344 *
1345 * Wait for the command submission identified by handle to finish.
1346 */
1347int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
1348                         struct drm_file *filp)
1349{
1350        union drm_amdgpu_wait_cs *wait = data;
1351        unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
1352        struct drm_sched_entity *entity;
1353        struct amdgpu_ctx *ctx;
1354        struct dma_fence *fence;
1355        long r;
1356
1357        ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
1358        if (ctx == NULL)
1359                return -EINVAL;
1360
1361        r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
1362                                  wait->in.ring, &entity);
1363        if (r) {
1364                amdgpu_ctx_put(ctx);
1365                return r;
1366        }
1367
1368        fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
1369        if (IS_ERR(fence))
1370                r = PTR_ERR(fence);
1371        else if (fence) {
1372                r = dma_fence_wait_timeout(fence, true, timeout);
1373                if (r > 0 && fence->error)
1374                        r = fence->error;
1375                dma_fence_put(fence);
1376        } else
1377                r = 1;
1378
1379        amdgpu_ctx_put(ctx);
1380        if (r < 0)
1381                return r;
1382
1383        memset(wait, 0, sizeof(*wait));
1384        wait->out.status = (r == 0);
1385
1386        return 0;
1387}
1388
1389/**
1390 * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
1391 *
1392 * @adev: amdgpu device
1393 * @filp: file private
1394 * @user: drm_amdgpu_fence copied from user space
1395 */
1396static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
1397                                             struct drm_file *filp,
1398                                             struct drm_amdgpu_fence *user)
1399{
1400        struct drm_sched_entity *entity;
1401        struct amdgpu_ctx *ctx;
1402        struct dma_fence *fence;
1403        int r;
1404
1405        ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
1406        if (ctx == NULL)
1407                return ERR_PTR(-EINVAL);
1408
1409        r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
1410                                  user->ring, &entity);
1411        if (r) {
1412                amdgpu_ctx_put(ctx);
1413                return ERR_PTR(r);
1414        }
1415
1416        fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
1417        amdgpu_ctx_put(ctx);
1418
1419        return fence;
1420}
1421
1422int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
1423                                    struct drm_file *filp)
1424{
1425        struct amdgpu_device *adev = dev->dev_private;
1426        union drm_amdgpu_fence_to_handle *info = data;
1427        struct dma_fence *fence;
1428        struct drm_syncobj *syncobj;
1429        struct sync_file *sync_file;
1430        int fd, r;
1431
1432        fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
1433        if (IS_ERR(fence))
1434                return PTR_ERR(fence);
1435
1436        if (!fence)
1437                fence = dma_fence_get_stub();
1438
1439        switch (info->in.what) {
1440        case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
1441                r = drm_syncobj_create(&syncobj, 0, fence);
1442                dma_fence_put(fence);
1443                if (r)
1444                        return r;
1445                r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle);
1446                drm_syncobj_put(syncobj);
1447                return r;
1448
1449        case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD:
1450                r = drm_syncobj_create(&syncobj, 0, fence);
1451                dma_fence_put(fence);
1452                if (r)
1453                        return r;
1454                r = drm_syncobj_get_fd(syncobj, (int*)&info->out.handle);
1455                drm_syncobj_put(syncobj);
1456                return r;
1457
1458        case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD:
1459                fd = get_unused_fd_flags(O_CLOEXEC);
1460                if (fd < 0) {
1461                        dma_fence_put(fence);
1462                        return fd;
1463                }
1464
1465                sync_file = sync_file_create(fence);
1466                dma_fence_put(fence);
1467                if (!sync_file) {
1468                        put_unused_fd(fd);
1469                        return -ENOMEM;
1470                }
1471
1472                fd_install(fd, sync_file->file);
1473                info->out.handle = fd;
1474                return 0;
1475
1476        default:
1477                return -EINVAL;
1478        }
1479}
1480
1481/**
1482 * amdgpu_cs_wait_all_fence - wait on all fences to signal
1483 *
1484 * @adev: amdgpu device
1485 * @filp: file private
1486 * @wait: wait parameters
1487 * @fences: array of drm_amdgpu_fence
1488 */
1489static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
1490                                     struct drm_file *filp,
1491                                     union drm_amdgpu_wait_fences *wait,
1492                                     struct drm_amdgpu_fence *fences)
1493{
1494        uint32_t fence_count = wait->in.fence_count;
1495        unsigned int i;
1496        long r = 1;
1497
1498        for (i = 0; i < fence_count; i++) {
1499                struct dma_fence *fence;
1500                unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1501
1502                fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1503                if (IS_ERR(fence))
1504                        return PTR_ERR(fence);
1505                else if (!fence)
1506                        continue;
1507
1508                r = dma_fence_wait_timeout(fence, true, timeout);
1509                dma_fence_put(fence);
1510                if (r < 0)
1511                        return r;
1512
1513                if (r == 0)
1514                        break;
1515
1516                if (fence->error)
1517                        return fence->error;
1518        }
1519
1520        memset(wait, 0, sizeof(*wait));
1521        wait->out.status = (r > 0);
1522
1523        return 0;
1524}
1525
1526/**
1527 * amdgpu_cs_wait_any_fence - wait on any fence to signal
1528 *
1529 * @adev: amdgpu device
1530 * @filp: file private
1531 * @wait: wait parameters
1532 * @fences: array of drm_amdgpu_fence
1533 */
1534static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
1535                                    struct drm_file *filp,
1536                                    union drm_amdgpu_wait_fences *wait,
1537                                    struct drm_amdgpu_fence *fences)
1538{
1539        unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1540        uint32_t fence_count = wait->in.fence_count;
1541        uint32_t first = ~0;
1542        struct dma_fence **array;
1543        unsigned int i;
1544        long r;
1545
1546        /* Prepare the fence array */
1547        array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL);
1548
1549        if (array == NULL)
1550                return -ENOMEM;
1551
1552        for (i = 0; i < fence_count; i++) {
1553                struct dma_fence *fence;
1554
1555                fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1556                if (IS_ERR(fence)) {
1557                        r = PTR_ERR(fence);
1558                        goto err_free_fence_array;
1559                } else if (fence) {
1560                        array[i] = fence;
1561                } else { /* NULL, the fence has been already signaled */
1562                        r = 1;
1563                        first = i;
1564                        goto out;
1565                }
1566        }
1567
1568        r = dma_fence_wait_any_timeout(array, fence_count, true, timeout,
1569                                       &first);
1570        if (r < 0)
1571                goto err_free_fence_array;
1572
1573out:
1574        memset(wait, 0, sizeof(*wait));
1575        wait->out.status = (r > 0);
1576        wait->out.first_signaled = first;
1577
1578        if (first < fence_count && array[first])
1579                r = array[first]->error;
1580        else
1581                r = 0;
1582
1583err_free_fence_array:
1584        for (i = 0; i < fence_count; i++)
1585                dma_fence_put(array[i]);
1586        kfree(array);
1587
1588        return r;
1589}
1590
1591/**
1592 * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish
1593 *
1594 * @dev: drm device
1595 * @data: data from userspace
1596 * @filp: file private
1597 */
1598int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
1599                                struct drm_file *filp)
1600{
1601        struct amdgpu_device *adev = dev->dev_private;
1602        union drm_amdgpu_wait_fences *wait = data;
1603        uint32_t fence_count = wait->in.fence_count;
1604        struct drm_amdgpu_fence *fences_user;
1605        struct drm_amdgpu_fence *fences;
1606        int r;
1607
1608        /* Get the fences from userspace */
1609        fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
1610                        GFP_KERNEL);
1611        if (fences == NULL)
1612                return -ENOMEM;
1613
1614        fences_user = u64_to_user_ptr(wait->in.fences);
1615        if (copy_from_user(fences, fences_user,
1616                sizeof(struct drm_amdgpu_fence) * fence_count)) {
1617                r = -EFAULT;
1618                goto err_free_fences;
1619        }
1620
1621        if (wait->in.wait_all)
1622                r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
1623        else
1624                r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
1625
1626err_free_fences:
1627        kfree(fences);
1628
1629        return r;
1630}
1631
1632/**
1633 * amdgpu_cs_find_bo_va - find bo_va for VM address
1634 *
1635 * @parser: command submission parser context
1636 * @addr: VM address
1637 * @bo: resulting BO of the mapping found
1638 *
1639 * Search the buffer objects in the command submission context for a certain
1640 * virtual memory address. Returns allocation structure when found, NULL
1641 * otherwise.
1642 */
1643int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
1644                           uint64_t addr, struct amdgpu_bo **bo,
1645                           struct amdgpu_bo_va_mapping **map)
1646{
1647        struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
1648        struct ttm_operation_ctx ctx = { false, false };
1649        struct amdgpu_vm *vm = &fpriv->vm;
1650        struct amdgpu_bo_va_mapping *mapping;
1651        int r;
1652
1653        addr /= AMDGPU_GPU_PAGE_SIZE;
1654
1655        mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
1656        if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
1657                return -EINVAL;
1658
1659        *bo = mapping->bo_va->base.bo;
1660        *map = mapping;
1661
1662        /* Double check that the BO is reserved by this CS */
1663        if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket)
1664                return -EINVAL;
1665
1666        if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
1667                (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
1668                amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
1669                r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
1670                if (r)
1671                        return r;
1672        }
1673
1674        return amdgpu_ttm_alloc_gart(&(*bo)->tbo);
1675}
1676