linux/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
<<
>>
Prefs
   1/*
   2 * Copyright 2015 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 * Authors: monk liu <monk.liu@amd.com>
  23 */
  24
  25#include <drm/drm_auth.h>
  26#include "amdgpu.h"
  27#include "amdgpu_sched.h"
  28#include "amdgpu_ras.h"
  29#include <linux/nospec.h>
  30
  31#define to_amdgpu_ctx_entity(e) \
  32        container_of((e), struct amdgpu_ctx_entity, entity)
  33
  34const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
  35        [AMDGPU_HW_IP_GFX]      =       1,
  36        [AMDGPU_HW_IP_COMPUTE]  =       4,
  37        [AMDGPU_HW_IP_DMA]      =       2,
  38        [AMDGPU_HW_IP_UVD]      =       1,
  39        [AMDGPU_HW_IP_VCE]      =       1,
  40        [AMDGPU_HW_IP_UVD_ENC]  =       1,
  41        [AMDGPU_HW_IP_VCN_DEC]  =       1,
  42        [AMDGPU_HW_IP_VCN_ENC]  =       1,
  43        [AMDGPU_HW_IP_VCN_JPEG] =       1,
  44};
  45
  46static int amdgpu_ctx_priority_permit(struct drm_file *filp,
  47                                      enum drm_sched_priority priority)
  48{
  49        if (priority < 0 || priority >= DRM_SCHED_PRIORITY_COUNT)
  50                return -EINVAL;
  51
  52        /* NORMAL and below are accessible by everyone */
  53        if (priority <= DRM_SCHED_PRIORITY_NORMAL)
  54                return 0;
  55
  56        if (capable(CAP_SYS_NICE))
  57                return 0;
  58
  59        if (drm_is_current_master(filp))
  60                return 0;
  61
  62        return -EACCES;
  63}
  64
  65static enum gfx_pipe_priority amdgpu_ctx_sched_prio_to_compute_prio(enum drm_sched_priority prio)
  66{
  67        switch (prio) {
  68        case DRM_SCHED_PRIORITY_HIGH:
  69        case DRM_SCHED_PRIORITY_KERNEL:
  70                return AMDGPU_GFX_PIPE_PRIO_HIGH;
  71        default:
  72                return AMDGPU_GFX_PIPE_PRIO_NORMAL;
  73        }
  74}
  75
  76static unsigned int amdgpu_ctx_prio_sched_to_hw(struct amdgpu_device *adev,
  77                                                 enum drm_sched_priority prio,
  78                                                 u32 hw_ip)
  79{
  80        unsigned int hw_prio;
  81
  82        hw_prio = (hw_ip == AMDGPU_HW_IP_COMPUTE) ?
  83                        amdgpu_ctx_sched_prio_to_compute_prio(prio) :
  84                        AMDGPU_RING_PRIO_DEFAULT;
  85        hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
  86        if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0)
  87                hw_prio = AMDGPU_RING_PRIO_DEFAULT;
  88
  89        return hw_prio;
  90}
  91
  92static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
  93                                   const u32 ring)
  94{
  95        struct amdgpu_device *adev = ctx->adev;
  96        struct amdgpu_ctx_entity *entity;
  97        struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
  98        unsigned num_scheds = 0;
  99        unsigned int hw_prio;
 100        enum drm_sched_priority priority;
 101        int r;
 102
 103        entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
 104                         GFP_KERNEL);
 105        if (!entity)
 106                return  -ENOMEM;
 107
 108        entity->sequence = 1;
 109        priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
 110                                ctx->init_priority : ctx->override_priority;
 111        hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority, hw_ip);
 112
 113        hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
 114        scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
 115        num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
 116
 117        /* disable load balance if the hw engine retains context among dependent jobs */
 118        if (hw_ip == AMDGPU_HW_IP_VCN_ENC ||
 119            hw_ip == AMDGPU_HW_IP_VCN_DEC ||
 120            hw_ip == AMDGPU_HW_IP_UVD_ENC ||
 121            hw_ip == AMDGPU_HW_IP_UVD) {
 122                sched = drm_sched_pick_best(scheds, num_scheds);
 123                scheds = &sched;
 124                num_scheds = 1;
 125        }
 126
 127        r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds,
 128                                  &ctx->guilty);
 129        if (r)
 130                goto error_free_entity;
 131
 132        ctx->entities[hw_ip][ring] = entity;
 133        return 0;
 134
 135error_free_entity:
 136        kfree(entity);
 137
 138        return r;
 139}
 140
 141static int amdgpu_ctx_init(struct amdgpu_device *adev,
 142                           enum drm_sched_priority priority,
 143                           struct drm_file *filp,
 144                           struct amdgpu_ctx *ctx)
 145{
 146        int r;
 147
 148        r = amdgpu_ctx_priority_permit(filp, priority);
 149        if (r)
 150                return r;
 151
 152        memset(ctx, 0, sizeof(*ctx));
 153
 154        ctx->adev = adev;
 155
 156        kref_init(&ctx->refcount);
 157        spin_lock_init(&ctx->ring_lock);
 158        mutex_init(&ctx->lock);
 159
 160        ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
 161        ctx->reset_counter_query = ctx->reset_counter;
 162        ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
 163        ctx->init_priority = priority;
 164        ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
 165
 166        return 0;
 167}
 168
 169static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
 170{
 171
 172        int i;
 173
 174        if (!entity)
 175                return;
 176
 177        for (i = 0; i < amdgpu_sched_jobs; ++i)
 178                dma_fence_put(entity->fences[i]);
 179
 180        kfree(entity);
 181}
 182
 183static void amdgpu_ctx_fini(struct kref *ref)
 184{
 185        struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
 186        struct amdgpu_device *adev = ctx->adev;
 187        unsigned i, j;
 188
 189        if (!adev)
 190                return;
 191
 192        for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 193                for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
 194                        amdgpu_ctx_fini_entity(ctx->entities[i][j]);
 195                        ctx->entities[i][j] = NULL;
 196                }
 197        }
 198
 199        mutex_destroy(&ctx->lock);
 200        kfree(ctx);
 201}
 202
 203int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
 204                          u32 ring, struct drm_sched_entity **entity)
 205{
 206        int r;
 207
 208        if (hw_ip >= AMDGPU_HW_IP_NUM) {
 209                DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
 210                return -EINVAL;
 211        }
 212
 213        /* Right now all IPs have only one instance - multiple rings. */
 214        if (instance != 0) {
 215                DRM_DEBUG("invalid ip instance: %d\n", instance);
 216                return -EINVAL;
 217        }
 218
 219        if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
 220                DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
 221                return -EINVAL;
 222        }
 223
 224        if (ctx->entities[hw_ip][ring] == NULL) {
 225                r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
 226                if (r)
 227                        return r;
 228        }
 229
 230        *entity = &ctx->entities[hw_ip][ring]->entity;
 231        return 0;
 232}
 233
 234static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 235                            struct amdgpu_fpriv *fpriv,
 236                            struct drm_file *filp,
 237                            enum drm_sched_priority priority,
 238                            uint32_t *id)
 239{
 240        struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
 241        struct amdgpu_ctx *ctx;
 242        int r;
 243
 244        ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
 245        if (!ctx)
 246                return -ENOMEM;
 247
 248        mutex_lock(&mgr->lock);
 249        r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
 250        if (r < 0) {
 251                mutex_unlock(&mgr->lock);
 252                kfree(ctx);
 253                return r;
 254        }
 255
 256        *id = (uint32_t)r;
 257        r = amdgpu_ctx_init(adev, priority, filp, ctx);
 258        if (r) {
 259                idr_remove(&mgr->ctx_handles, *id);
 260                *id = 0;
 261                kfree(ctx);
 262        }
 263        mutex_unlock(&mgr->lock);
 264        return r;
 265}
 266
 267static void amdgpu_ctx_do_release(struct kref *ref)
 268{
 269        struct amdgpu_ctx *ctx;
 270        u32 i, j;
 271
 272        ctx = container_of(ref, struct amdgpu_ctx, refcount);
 273        for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 274                for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 275                        if (!ctx->entities[i][j])
 276                                continue;
 277
 278                        drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
 279                }
 280        }
 281
 282        amdgpu_ctx_fini(ref);
 283}
 284
 285static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
 286{
 287        struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
 288        struct amdgpu_ctx *ctx;
 289
 290        mutex_lock(&mgr->lock);
 291        ctx = idr_remove(&mgr->ctx_handles, id);
 292        if (ctx)
 293                kref_put(&ctx->refcount, amdgpu_ctx_do_release);
 294        mutex_unlock(&mgr->lock);
 295        return ctx ? 0 : -EINVAL;
 296}
 297
 298static int amdgpu_ctx_query(struct amdgpu_device *adev,
 299                            struct amdgpu_fpriv *fpriv, uint32_t id,
 300                            union drm_amdgpu_ctx_out *out)
 301{
 302        struct amdgpu_ctx *ctx;
 303        struct amdgpu_ctx_mgr *mgr;
 304        unsigned reset_counter;
 305
 306        if (!fpriv)
 307                return -EINVAL;
 308
 309        mgr = &fpriv->ctx_mgr;
 310        mutex_lock(&mgr->lock);
 311        ctx = idr_find(&mgr->ctx_handles, id);
 312        if (!ctx) {
 313                mutex_unlock(&mgr->lock);
 314                return -EINVAL;
 315        }
 316
 317        /* TODO: these two are always zero */
 318        out->state.flags = 0x0;
 319        out->state.hangs = 0x0;
 320
 321        /* determine if a GPU reset has occured since the last call */
 322        reset_counter = atomic_read(&adev->gpu_reset_counter);
 323        /* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
 324        if (ctx->reset_counter_query == reset_counter)
 325                out->state.reset_status = AMDGPU_CTX_NO_RESET;
 326        else
 327                out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
 328        ctx->reset_counter_query = reset_counter;
 329
 330        mutex_unlock(&mgr->lock);
 331        return 0;
 332}
 333
 334#define AMDGPU_RAS_COUNTE_DELAY_MS 3000
 335
 336static int amdgpu_ctx_query2(struct amdgpu_device *adev,
 337                             struct amdgpu_fpriv *fpriv, uint32_t id,
 338                             union drm_amdgpu_ctx_out *out)
 339{
 340        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 341        struct amdgpu_ctx *ctx;
 342        struct amdgpu_ctx_mgr *mgr;
 343
 344        if (!fpriv)
 345                return -EINVAL;
 346
 347        mgr = &fpriv->ctx_mgr;
 348        mutex_lock(&mgr->lock);
 349        ctx = idr_find(&mgr->ctx_handles, id);
 350        if (!ctx) {
 351                mutex_unlock(&mgr->lock);
 352                return -EINVAL;
 353        }
 354
 355        out->state.flags = 0x0;
 356        out->state.hangs = 0x0;
 357
 358        if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter))
 359                out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET;
 360
 361        if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter))
 362                out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;
 363
 364        if (atomic_read(&ctx->guilty))
 365                out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
 366
 367        if (adev->ras_enabled && con) {
 368                /* Return the cached values in O(1),
 369                 * and schedule delayed work to cache
 370                 * new vaues.
 371                 */
 372                int ce_count, ue_count;
 373
 374                ce_count = atomic_read(&con->ras_ce_count);
 375                ue_count = atomic_read(&con->ras_ue_count);
 376
 377                if (ce_count != ctx->ras_counter_ce) {
 378                        ctx->ras_counter_ce = ce_count;
 379                        out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
 380                }
 381
 382                if (ue_count != ctx->ras_counter_ue) {
 383                        ctx->ras_counter_ue = ue_count;
 384                        out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
 385                }
 386
 387                schedule_delayed_work(&con->ras_counte_delay_work,
 388                                      msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS));
 389        }
 390
 391        mutex_unlock(&mgr->lock);
 392        return 0;
 393}
 394
 395int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 396                     struct drm_file *filp)
 397{
 398        int r;
 399        uint32_t id;
 400        enum drm_sched_priority priority;
 401
 402        union drm_amdgpu_ctx *args = data;
 403        struct amdgpu_device *adev = drm_to_adev(dev);
 404        struct amdgpu_fpriv *fpriv = filp->driver_priv;
 405
 406        id = args->in.ctx_id;
 407        r = amdgpu_to_sched_priority(args->in.priority, &priority);
 408
 409        /* For backwards compatibility reasons, we need to accept
 410         * ioctls with garbage in the priority field */
 411        if (r == -EINVAL)
 412                priority = DRM_SCHED_PRIORITY_NORMAL;
 413
 414        switch (args->in.op) {
 415        case AMDGPU_CTX_OP_ALLOC_CTX:
 416                r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
 417                args->out.alloc.ctx_id = id;
 418                break;
 419        case AMDGPU_CTX_OP_FREE_CTX:
 420                r = amdgpu_ctx_free(fpriv, id);
 421                break;
 422        case AMDGPU_CTX_OP_QUERY_STATE:
 423                r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
 424                break;
 425        case AMDGPU_CTX_OP_QUERY_STATE2:
 426                r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
 427                break;
 428        default:
 429                return -EINVAL;
 430        }
 431
 432        return r;
 433}
 434
 435struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id)
 436{
 437        struct amdgpu_ctx *ctx;
 438        struct amdgpu_ctx_mgr *mgr;
 439
 440        if (!fpriv)
 441                return NULL;
 442
 443        mgr = &fpriv->ctx_mgr;
 444
 445        mutex_lock(&mgr->lock);
 446        ctx = idr_find(&mgr->ctx_handles, id);
 447        if (ctx)
 448                kref_get(&ctx->refcount);
 449        mutex_unlock(&mgr->lock);
 450        return ctx;
 451}
 452
 453int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
 454{
 455        if (ctx == NULL)
 456                return -EINVAL;
 457
 458        kref_put(&ctx->refcount, amdgpu_ctx_do_release);
 459        return 0;
 460}
 461
 462void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
 463                          struct drm_sched_entity *entity,
 464                          struct dma_fence *fence, uint64_t *handle)
 465{
 466        struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
 467        uint64_t seq = centity->sequence;
 468        struct dma_fence *other = NULL;
 469        unsigned idx = 0;
 470
 471        idx = seq & (amdgpu_sched_jobs - 1);
 472        other = centity->fences[idx];
 473        if (other)
 474                BUG_ON(!dma_fence_is_signaled(other));
 475
 476        dma_fence_get(fence);
 477
 478        spin_lock(&ctx->ring_lock);
 479        centity->fences[idx] = fence;
 480        centity->sequence++;
 481        spin_unlock(&ctx->ring_lock);
 482
 483        dma_fence_put(other);
 484        if (handle)
 485                *handle = seq;
 486}
 487
 488struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 489                                       struct drm_sched_entity *entity,
 490                                       uint64_t seq)
 491{
 492        struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
 493        struct dma_fence *fence;
 494
 495        spin_lock(&ctx->ring_lock);
 496
 497        if (seq == ~0ull)
 498                seq = centity->sequence - 1;
 499
 500        if (seq >= centity->sequence) {
 501                spin_unlock(&ctx->ring_lock);
 502                return ERR_PTR(-EINVAL);
 503        }
 504
 505
 506        if (seq + amdgpu_sched_jobs < centity->sequence) {
 507                spin_unlock(&ctx->ring_lock);
 508                return NULL;
 509        }
 510
 511        fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
 512        spin_unlock(&ctx->ring_lock);
 513
 514        return fence;
 515}
 516
 517static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
 518                                            struct amdgpu_ctx_entity *aentity,
 519                                            int hw_ip,
 520                                            enum drm_sched_priority priority)
 521{
 522        struct amdgpu_device *adev = ctx->adev;
 523        unsigned int hw_prio;
 524        struct drm_gpu_scheduler **scheds = NULL;
 525        unsigned num_scheds;
 526
 527        /* set sw priority */
 528        drm_sched_entity_set_priority(&aentity->entity, priority);
 529
 530        /* set hw priority */
 531        if (hw_ip == AMDGPU_HW_IP_COMPUTE) {
 532                hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority,
 533                                                      AMDGPU_HW_IP_COMPUTE);
 534                hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
 535                scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
 536                num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
 537                drm_sched_entity_modify_sched(&aentity->entity, scheds,
 538                                              num_scheds);
 539        }
 540}
 541
 542void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 543                                  enum drm_sched_priority priority)
 544{
 545        enum drm_sched_priority ctx_prio;
 546        unsigned i, j;
 547
 548        ctx->override_priority = priority;
 549
 550        ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
 551                        ctx->init_priority : ctx->override_priority;
 552        for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 553                for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 554                        if (!ctx->entities[i][j])
 555                                continue;
 556
 557                        amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j],
 558                                                       i, ctx_prio);
 559                }
 560        }
 561}
 562
 563int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
 564                               struct drm_sched_entity *entity)
 565{
 566        struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
 567        struct dma_fence *other;
 568        unsigned idx;
 569        long r;
 570
 571        spin_lock(&ctx->ring_lock);
 572        idx = centity->sequence & (amdgpu_sched_jobs - 1);
 573        other = dma_fence_get(centity->fences[idx]);
 574        spin_unlock(&ctx->ring_lock);
 575
 576        if (!other)
 577                return 0;
 578
 579        r = dma_fence_wait(other, true);
 580        if (r < 0 && r != -ERESTARTSYS)
 581                DRM_ERROR("Error (%ld) waiting for fence!\n", r);
 582
 583        dma_fence_put(other);
 584        return r;
 585}
 586
 587void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
 588{
 589        mutex_init(&mgr->lock);
 590        idr_init(&mgr->ctx_handles);
 591}
 592
 593long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
 594{
 595        struct amdgpu_ctx *ctx;
 596        struct idr *idp;
 597        uint32_t id, i, j;
 598
 599        idp = &mgr->ctx_handles;
 600
 601        mutex_lock(&mgr->lock);
 602        idr_for_each_entry(idp, ctx, id) {
 603                for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 604                        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 605                                struct drm_sched_entity *entity;
 606
 607                                if (!ctx->entities[i][j])
 608                                        continue;
 609
 610                                entity = &ctx->entities[i][j]->entity;
 611                                timeout = drm_sched_entity_flush(entity, timeout);
 612                        }
 613                }
 614        }
 615        mutex_unlock(&mgr->lock);
 616        return timeout;
 617}
 618
 619void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 620{
 621        struct amdgpu_ctx *ctx;
 622        struct idr *idp;
 623        uint32_t id, i, j;
 624
 625        idp = &mgr->ctx_handles;
 626
 627        idr_for_each_entry(idp, ctx, id) {
 628                if (kref_read(&ctx->refcount) != 1) {
 629                        DRM_ERROR("ctx %p is still alive\n", ctx);
 630                        continue;
 631                }
 632
 633                for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 634                        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 635                                struct drm_sched_entity *entity;
 636
 637                                if (!ctx->entities[i][j])
 638                                        continue;
 639
 640                                entity = &ctx->entities[i][j]->entity;
 641                                drm_sched_entity_fini(entity);
 642                        }
 643                }
 644        }
 645}
 646
 647void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
 648{
 649        struct amdgpu_ctx *ctx;
 650        struct idr *idp;
 651        uint32_t id;
 652
 653        amdgpu_ctx_mgr_entity_fini(mgr);
 654
 655        idp = &mgr->ctx_handles;
 656
 657        idr_for_each_entry(idp, ctx, id) {
 658                if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
 659                        DRM_ERROR("ctx %p is still alive\n", ctx);
 660        }
 661
 662        idr_destroy(&mgr->ctx_handles);
 663        mutex_destroy(&mgr->lock);
 664}
 665
 666static void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx,
 667                struct amdgpu_ctx_entity *centity, ktime_t *total, ktime_t *max)
 668{
 669        ktime_t now, t1;
 670        uint32_t i;
 671
 672        *total = *max = 0;
 673
 674        now = ktime_get();
 675        for (i = 0; i < amdgpu_sched_jobs; i++) {
 676                struct dma_fence *fence;
 677                struct drm_sched_fence *s_fence;
 678
 679                spin_lock(&ctx->ring_lock);
 680                fence = dma_fence_get(centity->fences[i]);
 681                spin_unlock(&ctx->ring_lock);
 682                if (!fence)
 683                        continue;
 684                s_fence = to_drm_sched_fence(fence);
 685                if (!dma_fence_is_signaled(&s_fence->scheduled)) {
 686                        dma_fence_put(fence);
 687                        continue;
 688                }
 689                t1 = s_fence->scheduled.timestamp;
 690                if (!ktime_before(t1, now)) {
 691                        dma_fence_put(fence);
 692                        continue;
 693                }
 694                if (dma_fence_is_signaled(&s_fence->finished) &&
 695                        s_fence->finished.timestamp < now)
 696                        *total += ktime_sub(s_fence->finished.timestamp, t1);
 697                else
 698                        *total += ktime_sub(now, t1);
 699                t1 = ktime_sub(now, t1);
 700                dma_fence_put(fence);
 701                *max = max(t1, *max);
 702        }
 703}
 704
 705ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
 706                uint32_t idx, uint64_t *elapsed)
 707{
 708        struct idr *idp;
 709        struct amdgpu_ctx *ctx;
 710        uint32_t id;
 711        struct amdgpu_ctx_entity *centity;
 712        ktime_t total = 0, max = 0;
 713
 714        if (idx >= AMDGPU_MAX_ENTITY_NUM)
 715                return 0;
 716        idp = &mgr->ctx_handles;
 717        mutex_lock(&mgr->lock);
 718        idr_for_each_entry(idp, ctx, id) {
 719                ktime_t ttotal, tmax;
 720
 721                if (!ctx->entities[hwip][idx])
 722                        continue;
 723
 724                centity = ctx->entities[hwip][idx];
 725                amdgpu_ctx_fence_time(ctx, centity, &ttotal, &tmax);
 726
 727                /* Harmonic mean approximation diverges for very small
 728                 * values. If ratio < 0.01% ignore
 729                 */
 730                if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal))
 731                        continue;
 732
 733                total = ktime_add(total, ttotal);
 734                max = ktime_after(tmax, max) ? tmax : max;
 735        }
 736
 737        mutex_unlock(&mgr->lock);
 738        if (elapsed)
 739                *elapsed = max;
 740
 741        return total;
 742}
 743