linux/drivers/gpu/drm/scheduler/sched_entity.c
<<
>>
Prefs
   1/*
   2 * Copyright 2015 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24#include <linux/kthread.h>
  25#include <drm/gpu_scheduler.h>
  26
  27#include "gpu_scheduler_trace.h"
  28
  29#define to_drm_sched_job(sched_job)             \
  30                container_of((sched_job), struct drm_sched_job, queue_node)
  31
  32/**
  33 * drm_sched_entity_init - Init a context entity used by scheduler when
  34 * submit to HW ring.
  35 *
  36 * @entity: scheduler entity to init
  37 * @rq_list: the list of run queue on which jobs from this
  38 *           entity can be submitted
  39 * @num_rq_list: number of run queue in rq_list
  40 * @guilty: atomic_t set to 1 when a job on this queue
  41 *          is found to be guilty causing a timeout
  42 *
  43 * Note: the rq_list should have atleast one element to schedule
  44 *       the entity
  45 *
  46 * Returns 0 on success or a negative error code on failure.
  47 */
  48int drm_sched_entity_init(struct drm_sched_entity *entity,
  49                          struct drm_sched_rq **rq_list,
  50                          unsigned int num_rq_list,
  51                          atomic_t *guilty)
  52{
  53        int i;
  54
  55        if (!(entity && rq_list && (num_rq_list == 0 || rq_list[0])))
  56                return -EINVAL;
  57
  58        memset(entity, 0, sizeof(struct drm_sched_entity));
  59        INIT_LIST_HEAD(&entity->list);
  60        entity->rq = NULL;
  61        entity->guilty = guilty;
  62        entity->num_rq_list = num_rq_list;
  63        entity->rq_list = kcalloc(num_rq_list, sizeof(struct drm_sched_rq *),
  64                                GFP_KERNEL);
  65        if (!entity->rq_list)
  66                return -ENOMEM;
  67
  68        for (i = 0; i < num_rq_list; ++i)
  69                entity->rq_list[i] = rq_list[i];
  70
  71        if (num_rq_list)
  72                entity->rq = rq_list[0];
  73
  74        entity->last_scheduled = NULL;
  75
  76        spin_lock_init(&entity->rq_lock);
  77        spsc_queue_init(&entity->job_queue);
  78
  79        atomic_set(&entity->fence_seq, 0);
  80        entity->fence_context = dma_fence_context_alloc(2);
  81
  82        return 0;
  83}
  84EXPORT_SYMBOL(drm_sched_entity_init);
  85
  86/**
  87 * drm_sched_entity_is_idle - Check if entity is idle
  88 *
  89 * @entity: scheduler entity
  90 *
  91 * Returns true if the entity does not have any unscheduled jobs.
  92 */
  93static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity)
  94{
  95        rmb(); /* for list_empty to work without lock */
  96
  97        if (list_empty(&entity->list) ||
  98            spsc_queue_peek(&entity->job_queue) == NULL)
  99                return true;
 100
 101        return false;
 102}
 103
 104/**
 105 * drm_sched_entity_is_ready - Check if entity is ready
 106 *
 107 * @entity: scheduler entity
 108 *
 109 * Return true if entity could provide a job.
 110 */
 111bool drm_sched_entity_is_ready(struct drm_sched_entity *entity)
 112{
 113        if (spsc_queue_peek(&entity->job_queue) == NULL)
 114                return false;
 115
 116        if (READ_ONCE(entity->dependency))
 117                return false;
 118
 119        return true;
 120}
 121
 122/**
 123 * drm_sched_entity_get_free_sched - Get the rq from rq_list with least load
 124 *
 125 * @entity: scheduler entity
 126 *
 127 * Return the pointer to the rq with least load.
 128 */
 129static struct drm_sched_rq *
 130drm_sched_entity_get_free_sched(struct drm_sched_entity *entity)
 131{
 132        struct drm_sched_rq *rq = NULL;
 133        unsigned int min_jobs = UINT_MAX, num_jobs;
 134        int i;
 135
 136        for (i = 0; i < entity->num_rq_list; ++i) {
 137                struct drm_gpu_scheduler *sched = entity->rq_list[i]->sched;
 138
 139                if (!entity->rq_list[i]->sched->ready) {
 140                        DRM_WARN("sched%s is not ready, skipping", sched->name);
 141                        continue;
 142                }
 143
 144                num_jobs = atomic_read(&sched->num_jobs);
 145                if (num_jobs < min_jobs) {
 146                        min_jobs = num_jobs;
 147                        rq = entity->rq_list[i];
 148                }
 149        }
 150
 151        return rq;
 152}
 153
 154/**
 155 * drm_sched_entity_flush - Flush a context entity
 156 *
 157 * @entity: scheduler entity
 158 * @timeout: time to wait in for Q to become empty in jiffies.
 159 *
 160 * Splitting drm_sched_entity_fini() into two functions, The first one does the
 161 * waiting, removes the entity from the runqueue and returns an error when the
 162 * process was killed.
 163 *
 164 * Returns the remaining time in jiffies left from the input timeout
 165 */
 166long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
 167{
 168        struct drm_gpu_scheduler *sched;
 169        struct task_struct *last_user;
 170        long ret = timeout;
 171
 172        if (!entity->rq)
 173                return 0;
 174
 175        sched = entity->rq->sched;
 176        /**
 177         * The client will not queue more IBs during this fini, consume existing
 178         * queued IBs or discard them on SIGKILL
 179         */
 180        if (current->flags & PF_EXITING) {
 181                if (timeout)
 182                        ret = wait_event_timeout(
 183                                        sched->job_scheduled,
 184                                        drm_sched_entity_is_idle(entity),
 185                                        timeout);
 186        } else {
 187                wait_event_killable(sched->job_scheduled,
 188                                    drm_sched_entity_is_idle(entity));
 189        }
 190
 191        /* For killed process disable any more IBs enqueue right now */
 192        last_user = cmpxchg(&entity->last_user, current->group_leader, NULL);
 193        if ((!last_user || last_user == current->group_leader) &&
 194            (current->flags & PF_EXITING) && (current->exit_code == SIGKILL)) {
 195                spin_lock(&entity->rq_lock);
 196                entity->stopped = true;
 197                drm_sched_rq_remove_entity(entity->rq, entity);
 198                spin_unlock(&entity->rq_lock);
 199        }
 200
 201        return ret;
 202}
 203EXPORT_SYMBOL(drm_sched_entity_flush);
 204
 205/**
 206 * drm_sched_entity_kill_jobs - helper for drm_sched_entity_kill_jobs
 207 *
 208 * @f: signaled fence
 209 * @cb: our callback structure
 210 *
 211 * Signal the scheduler finished fence when the entity in question is killed.
 212 */
 213static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
 214                                          struct dma_fence_cb *cb)
 215{
 216        struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
 217                                                 finish_cb);
 218
 219        drm_sched_fence_finished(job->s_fence);
 220        WARN_ON(job->s_fence->parent);
 221        job->sched->ops->free_job(job);
 222}
 223
 224/**
 225 * drm_sched_entity_kill_jobs - Make sure all remaining jobs are killed
 226 *
 227 * @entity: entity which is cleaned up
 228 *
 229 * Makes sure that all remaining jobs in an entity are killed before it is
 230 * destroyed.
 231 */
 232static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity)
 233{
 234        struct drm_sched_job *job;
 235        int r;
 236
 237        while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) {
 238                struct drm_sched_fence *s_fence = job->s_fence;
 239
 240                drm_sched_fence_scheduled(s_fence);
 241                dma_fence_set_error(&s_fence->finished, -ESRCH);
 242
 243                /*
 244                 * When pipe is hanged by older entity, new entity might
 245                 * not even have chance to submit it's first job to HW
 246                 * and so entity->last_scheduled will remain NULL
 247                 */
 248                if (!entity->last_scheduled) {
 249                        drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb);
 250                        continue;
 251                }
 252
 253                r = dma_fence_add_callback(entity->last_scheduled,
 254                                           &job->finish_cb,
 255                                           drm_sched_entity_kill_jobs_cb);
 256                if (r == -ENOENT)
 257                        drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb);
 258                else if (r)
 259                        DRM_ERROR("fence add callback failed (%d)\n", r);
 260        }
 261}
 262
 263/**
 264 * drm_sched_entity_cleanup - Destroy a context entity
 265 *
 266 * @entity: scheduler entity
 267 *
 268 * This should be called after @drm_sched_entity_do_release. It goes over the
 269 * entity and signals all jobs with an error code if the process was killed.
 270 *
 271 */
 272void drm_sched_entity_fini(struct drm_sched_entity *entity)
 273{
 274        struct drm_gpu_scheduler *sched = NULL;
 275
 276        if (entity->rq) {
 277                sched = entity->rq->sched;
 278                drm_sched_rq_remove_entity(entity->rq, entity);
 279        }
 280
 281        /* Consumption of existing IBs wasn't completed. Forcefully
 282         * remove them here.
 283         */
 284        if (spsc_queue_peek(&entity->job_queue)) {
 285                if (sched) {
 286                        /* Park the kernel for a moment to make sure it isn't processing
 287                         * our enity.
 288                         */
 289                        kthread_park(sched->thread);
 290                        kthread_unpark(sched->thread);
 291                }
 292                if (entity->dependency) {
 293                        dma_fence_remove_callback(entity->dependency,
 294                                                  &entity->cb);
 295                        dma_fence_put(entity->dependency);
 296                        entity->dependency = NULL;
 297                }
 298
 299                drm_sched_entity_kill_jobs(entity);
 300        }
 301
 302        dma_fence_put(entity->last_scheduled);
 303        entity->last_scheduled = NULL;
 304        kfree(entity->rq_list);
 305}
 306EXPORT_SYMBOL(drm_sched_entity_fini);
 307
 308/**
 309 * drm_sched_entity_fini - Destroy a context entity
 310 *
 311 * @entity: scheduler entity
 312 *
 313 * Calls drm_sched_entity_do_release() and drm_sched_entity_cleanup()
 314 */
 315void drm_sched_entity_destroy(struct drm_sched_entity *entity)
 316{
 317        drm_sched_entity_flush(entity, MAX_WAIT_SCHED_ENTITY_Q_EMPTY);
 318        drm_sched_entity_fini(entity);
 319}
 320EXPORT_SYMBOL(drm_sched_entity_destroy);
 321
 322/**
 323 * drm_sched_entity_clear_dep - callback to clear the entities dependency
 324 */
 325static void drm_sched_entity_clear_dep(struct dma_fence *f,
 326                                       struct dma_fence_cb *cb)
 327{
 328        struct drm_sched_entity *entity =
 329                container_of(cb, struct drm_sched_entity, cb);
 330
 331        entity->dependency = NULL;
 332        dma_fence_put(f);
 333}
 334
 335/**
 336 * drm_sched_entity_clear_dep - callback to clear the entities dependency and
 337 * wake up scheduler
 338 */
 339static void drm_sched_entity_wakeup(struct dma_fence *f,
 340                                    struct dma_fence_cb *cb)
 341{
 342        struct drm_sched_entity *entity =
 343                container_of(cb, struct drm_sched_entity, cb);
 344
 345        drm_sched_entity_clear_dep(f, cb);
 346        drm_sched_wakeup(entity->rq->sched);
 347}
 348
 349/**
 350 * drm_sched_entity_set_rq_priority - helper for drm_sched_entity_set_priority
 351 */
 352static void drm_sched_entity_set_rq_priority(struct drm_sched_rq **rq,
 353                                             enum drm_sched_priority priority)
 354{
 355        *rq = &(*rq)->sched->sched_rq[priority];
 356}
 357
 358/**
 359 * drm_sched_entity_set_priority - Sets priority of the entity
 360 *
 361 * @entity: scheduler entity
 362 * @priority: scheduler priority
 363 *
 364 * Update the priority of runqueus used for the entity.
 365 */
 366void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
 367                                   enum drm_sched_priority priority)
 368{
 369        unsigned int i;
 370
 371        spin_lock(&entity->rq_lock);
 372
 373        for (i = 0; i < entity->num_rq_list; ++i)
 374                drm_sched_entity_set_rq_priority(&entity->rq_list[i], priority);
 375
 376        if (entity->rq) {
 377                drm_sched_rq_remove_entity(entity->rq, entity);
 378                drm_sched_entity_set_rq_priority(&entity->rq, priority);
 379                drm_sched_rq_add_entity(entity->rq, entity);
 380        }
 381
 382        spin_unlock(&entity->rq_lock);
 383}
 384EXPORT_SYMBOL(drm_sched_entity_set_priority);
 385
 386/**
 387 * drm_sched_entity_add_dependency_cb - add callback for the entities dependency
 388 *
 389 * @entity: entity with dependency
 390 *
 391 * Add a callback to the current dependency of the entity to wake up the
 392 * scheduler when the entity becomes available.
 393 */
 394static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
 395{
 396        struct drm_gpu_scheduler *sched = entity->rq->sched;
 397        struct dma_fence *fence = entity->dependency;
 398        struct drm_sched_fence *s_fence;
 399
 400        if (fence->context == entity->fence_context ||
 401            fence->context == entity->fence_context + 1) {
 402                /*
 403                 * Fence is a scheduled/finished fence from a job
 404                 * which belongs to the same entity, we can ignore
 405                 * fences from ourself
 406                 */
 407                dma_fence_put(entity->dependency);
 408                return false;
 409        }
 410
 411        s_fence = to_drm_sched_fence(fence);
 412        if (s_fence && s_fence->sched == sched) {
 413
 414                /*
 415                 * Fence is from the same scheduler, only need to wait for
 416                 * it to be scheduled
 417                 */
 418                fence = dma_fence_get(&s_fence->scheduled);
 419                dma_fence_put(entity->dependency);
 420                entity->dependency = fence;
 421                if (!dma_fence_add_callback(fence, &entity->cb,
 422                                            drm_sched_entity_clear_dep))
 423                        return true;
 424
 425                /* Ignore it when it is already scheduled */
 426                dma_fence_put(fence);
 427                return false;
 428        }
 429
 430        if (!dma_fence_add_callback(entity->dependency, &entity->cb,
 431                                    drm_sched_entity_wakeup))
 432                return true;
 433
 434        dma_fence_put(entity->dependency);
 435        return false;
 436}
 437
 438/**
 439 * drm_sched_entity_pop_job - get a ready to be scheduled job from the entity
 440 *
 441 * @entity: entity to get the job from
 442 *
 443 * Process all dependencies and try to get one job from the entities queue.
 444 */
 445struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
 446{
 447        struct drm_gpu_scheduler *sched = entity->rq->sched;
 448        struct drm_sched_job *sched_job;
 449
 450        sched_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
 451        if (!sched_job)
 452                return NULL;
 453
 454        while ((entity->dependency =
 455                        sched->ops->dependency(sched_job, entity))) {
 456                trace_drm_sched_job_wait_dep(sched_job, entity->dependency);
 457
 458                if (drm_sched_entity_add_dependency_cb(entity))
 459                        return NULL;
 460        }
 461
 462        /* skip jobs from entity that marked guilty */
 463        if (entity->guilty && atomic_read(entity->guilty))
 464                dma_fence_set_error(&sched_job->s_fence->finished, -ECANCELED);
 465
 466        dma_fence_put(entity->last_scheduled);
 467        entity->last_scheduled = dma_fence_get(&sched_job->s_fence->finished);
 468
 469        spsc_queue_pop(&entity->job_queue);
 470        return sched_job;
 471}
 472
 473/**
 474 * drm_sched_entity_select_rq - select a new rq for the entity
 475 *
 476 * @entity: scheduler entity
 477 *
 478 * Check all prerequisites and select a new rq for the entity for load
 479 * balancing.
 480 */
 481void drm_sched_entity_select_rq(struct drm_sched_entity *entity)
 482{
 483        struct dma_fence *fence;
 484        struct drm_sched_rq *rq;
 485
 486        if (spsc_queue_count(&entity->job_queue) || entity->num_rq_list <= 1)
 487                return;
 488
 489        fence = READ_ONCE(entity->last_scheduled);
 490        if (fence && !dma_fence_is_signaled(fence))
 491                return;
 492
 493        rq = drm_sched_entity_get_free_sched(entity);
 494        if (rq == entity->rq)
 495                return;
 496
 497        spin_lock(&entity->rq_lock);
 498        drm_sched_rq_remove_entity(entity->rq, entity);
 499        entity->rq = rq;
 500        spin_unlock(&entity->rq_lock);
 501}
 502
 503/**
 504 * drm_sched_entity_push_job - Submit a job to the entity's job queue
 505 *
 506 * @sched_job: job to submit
 507 * @entity: scheduler entity
 508 *
 509 * Note: To guarantee that the order of insertion to queue matches
 510 * the job's fence sequence number this function should be
 511 * called with drm_sched_job_init under common lock.
 512 *
 513 * Returns 0 for success, negative error code otherwise.
 514 */
 515void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
 516                               struct drm_sched_entity *entity)
 517{
 518        bool first;
 519
 520        trace_drm_sched_job(sched_job, entity);
 521        atomic_inc(&entity->rq->sched->num_jobs);
 522        WRITE_ONCE(entity->last_user, current->group_leader);
 523        first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
 524
 525        /* first job wakes up scheduler */
 526        if (first) {
 527                /* Add the entity to the run queue */
 528                spin_lock(&entity->rq_lock);
 529                if (entity->stopped) {
 530                        spin_unlock(&entity->rq_lock);
 531
 532                        DRM_ERROR("Trying to push to a killed entity\n");
 533                        return;
 534                }
 535                drm_sched_rq_add_entity(entity->rq, entity);
 536                spin_unlock(&entity->rq_lock);
 537                drm_sched_wakeup(entity->rq->sched);
 538        }
 539}
 540EXPORT_SYMBOL(drm_sched_entity_push_job);
 541