linux/drivers/gpu/drm/scheduler/sched_main.c
<<
>>
Prefs
   1/*
   2 * Copyright 2015 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24/**
  25 * DOC: Overview
  26 *
  27 * The GPU scheduler provides entities which allow userspace to push jobs
  28 * into software queues which are then scheduled on a hardware run queue.
  29 * The software queues have a priority among them. The scheduler selects the entities
  30 * from the run queue using a FIFO. The scheduler provides dependency handling
  31 * features among jobs. The driver is supposed to provide callback functions for
  32 * backend operations to the scheduler like submitting a job to hardware run queue,
  33 * returning the dependencies of a job etc.
  34 *
  35 * The organisation of the scheduler is the following:
  36 *
  37 * 1. Each hw run queue has one scheduler
  38 * 2. Each scheduler has multiple run queues with different priorities
  39 *    (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL)
  40 * 3. Each scheduler run queue has a queue of entities to schedule
  41 * 4. Entities themselves maintain a queue of jobs that will be scheduled on
  42 *    the hardware.
  43 *
  44 * The jobs in a entity are always scheduled in the order that they were pushed.
  45 */
  46
  47#include <linux/kthread.h>
  48#include <linux/wait.h>
  49#include <linux/sched.h>
  50#include <linux/completion.h>
  51#include <uapi/linux/sched/types.h>
  52
  53#include <drm/drm_print.h>
  54#include <drm/gpu_scheduler.h>
  55#include <drm/spsc_queue.h>
  56
  57#define CREATE_TRACE_POINTS
  58#include "gpu_scheduler_trace.h"
  59
  60#define to_drm_sched_job(sched_job)             \
  61                container_of((sched_job), struct drm_sched_job, queue_node)
  62
  63/**
  64 * drm_sched_rq_init - initialize a given run queue struct
  65 *
  66 * @sched: scheduler instance to associate with this run queue
  67 * @rq: scheduler run queue
  68 *
  69 * Initializes a scheduler runqueue.
  70 */
  71static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
  72                              struct drm_sched_rq *rq)
  73{
  74        spin_lock_init(&rq->lock);
  75        INIT_LIST_HEAD(&rq->entities);
  76        rq->current_entity = NULL;
  77        rq->sched = sched;
  78}
  79
  80/**
  81 * drm_sched_rq_add_entity - add an entity
  82 *
  83 * @rq: scheduler run queue
  84 * @entity: scheduler entity
  85 *
  86 * Adds a scheduler entity to the run queue.
  87 */
  88void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
  89                             struct drm_sched_entity *entity)
  90{
  91        if (!list_empty(&entity->list))
  92                return;
  93        spin_lock(&rq->lock);
  94        atomic_inc(rq->sched->score);
  95        list_add_tail(&entity->list, &rq->entities);
  96        spin_unlock(&rq->lock);
  97}
  98
  99/**
 100 * drm_sched_rq_remove_entity - remove an entity
 101 *
 102 * @rq: scheduler run queue
 103 * @entity: scheduler entity
 104 *
 105 * Removes a scheduler entity from the run queue.
 106 */
 107void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
 108                                struct drm_sched_entity *entity)
 109{
 110        if (list_empty(&entity->list))
 111                return;
 112        spin_lock(&rq->lock);
 113        atomic_dec(rq->sched->score);
 114        list_del_init(&entity->list);
 115        if (rq->current_entity == entity)
 116                rq->current_entity = NULL;
 117        spin_unlock(&rq->lock);
 118}
 119
 120/**
 121 * drm_sched_rq_select_entity - Select an entity which could provide a job to run
 122 *
 123 * @rq: scheduler run queue to check.
 124 *
 125 * Try to find a ready entity, returns NULL if none found.
 126 */
 127static struct drm_sched_entity *
 128drm_sched_rq_select_entity(struct drm_sched_rq *rq)
 129{
 130        struct drm_sched_entity *entity;
 131
 132        spin_lock(&rq->lock);
 133
 134        entity = rq->current_entity;
 135        if (entity) {
 136                list_for_each_entry_continue(entity, &rq->entities, list) {
 137                        if (drm_sched_entity_is_ready(entity)) {
 138                                rq->current_entity = entity;
 139                                reinit_completion(&entity->entity_idle);
 140                                spin_unlock(&rq->lock);
 141                                return entity;
 142                        }
 143                }
 144        }
 145
 146        list_for_each_entry(entity, &rq->entities, list) {
 147
 148                if (drm_sched_entity_is_ready(entity)) {
 149                        rq->current_entity = entity;
 150                        reinit_completion(&entity->entity_idle);
 151                        spin_unlock(&rq->lock);
 152                        return entity;
 153                }
 154
 155                if (entity == rq->current_entity)
 156                        break;
 157        }
 158
 159        spin_unlock(&rq->lock);
 160
 161        return NULL;
 162}
 163
 164/**
 165 * drm_sched_job_done - complete a job
 166 * @s_job: pointer to the job which is done
 167 *
 168 * Finish the job's fence and wake up the worker thread.
 169 */
 170static void drm_sched_job_done(struct drm_sched_job *s_job)
 171{
 172        struct drm_sched_fence *s_fence = s_job->s_fence;
 173        struct drm_gpu_scheduler *sched = s_fence->sched;
 174
 175        atomic_dec(&sched->hw_rq_count);
 176        atomic_dec(sched->score);
 177
 178        trace_drm_sched_process_job(s_fence);
 179
 180        dma_fence_get(&s_fence->finished);
 181        drm_sched_fence_finished(s_fence);
 182        dma_fence_put(&s_fence->finished);
 183        wake_up_interruptible(&sched->wake_up_worker);
 184}
 185
 186/**
 187 * drm_sched_job_done_cb - the callback for a done job
 188 * @f: fence
 189 * @cb: fence callbacks
 190 */
 191static void drm_sched_job_done_cb(struct dma_fence *f, struct dma_fence_cb *cb)
 192{
 193        struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
 194
 195        drm_sched_job_done(s_job);
 196}
 197
 198/**
 199 * drm_sched_dependency_optimized
 200 *
 201 * @fence: the dependency fence
 202 * @entity: the entity which depends on the above fence
 203 *
 204 * Returns true if the dependency can be optimized and false otherwise
 205 */
 206bool drm_sched_dependency_optimized(struct dma_fence* fence,
 207                                    struct drm_sched_entity *entity)
 208{
 209        struct drm_gpu_scheduler *sched = entity->rq->sched;
 210        struct drm_sched_fence *s_fence;
 211
 212        if (!fence || dma_fence_is_signaled(fence))
 213                return false;
 214        if (fence->context == entity->fence_context)
 215                return true;
 216        s_fence = to_drm_sched_fence(fence);
 217        if (s_fence && s_fence->sched == sched)
 218                return true;
 219
 220        return false;
 221}
 222EXPORT_SYMBOL(drm_sched_dependency_optimized);
 223
 224/**
 225 * drm_sched_start_timeout - start timeout for reset worker
 226 *
 227 * @sched: scheduler instance to start the worker for
 228 *
 229 * Start the timeout for the given scheduler.
 230 */
 231static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
 232{
 233        if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 234            !list_empty(&sched->pending_list))
 235                queue_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout);
 236}
 237
 238/**
 239 * drm_sched_fault - immediately start timeout handler
 240 *
 241 * @sched: scheduler where the timeout handling should be started.
 242 *
 243 * Start timeout handling immediately when the driver detects a hardware fault.
 244 */
 245void drm_sched_fault(struct drm_gpu_scheduler *sched)
 246{
 247        mod_delayed_work(sched->timeout_wq, &sched->work_tdr, 0);
 248}
 249EXPORT_SYMBOL(drm_sched_fault);
 250
 251/**
 252 * drm_sched_suspend_timeout - Suspend scheduler job timeout
 253 *
 254 * @sched: scheduler instance for which to suspend the timeout
 255 *
 256 * Suspend the delayed work timeout for the scheduler. This is done by
 257 * modifying the delayed work timeout to an arbitrary large value,
 258 * MAX_SCHEDULE_TIMEOUT in this case.
 259 *
 260 * Returns the timeout remaining
 261 *
 262 */
 263unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
 264{
 265        unsigned long sched_timeout, now = jiffies;
 266
 267        sched_timeout = sched->work_tdr.timer.expires;
 268
 269        /*
 270         * Modify the timeout to an arbitrarily large value. This also prevents
 271         * the timeout to be restarted when new submissions arrive
 272         */
 273        if (mod_delayed_work(sched->timeout_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
 274                        && time_after(sched_timeout, now))
 275                return sched_timeout - now;
 276        else
 277                return sched->timeout;
 278}
 279EXPORT_SYMBOL(drm_sched_suspend_timeout);
 280
 281/**
 282 * drm_sched_resume_timeout - Resume scheduler job timeout
 283 *
 284 * @sched: scheduler instance for which to resume the timeout
 285 * @remaining: remaining timeout
 286 *
 287 * Resume the delayed work timeout for the scheduler.
 288 */
 289void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
 290                unsigned long remaining)
 291{
 292        spin_lock(&sched->job_list_lock);
 293
 294        if (list_empty(&sched->pending_list))
 295                cancel_delayed_work(&sched->work_tdr);
 296        else
 297                mod_delayed_work(sched->timeout_wq, &sched->work_tdr, remaining);
 298
 299        spin_unlock(&sched->job_list_lock);
 300}
 301EXPORT_SYMBOL(drm_sched_resume_timeout);
 302
 303static void drm_sched_job_begin(struct drm_sched_job *s_job)
 304{
 305        struct drm_gpu_scheduler *sched = s_job->sched;
 306
 307        spin_lock(&sched->job_list_lock);
 308        list_add_tail(&s_job->list, &sched->pending_list);
 309        drm_sched_start_timeout(sched);
 310        spin_unlock(&sched->job_list_lock);
 311}
 312
 313static void drm_sched_job_timedout(struct work_struct *work)
 314{
 315        struct drm_gpu_scheduler *sched;
 316        struct drm_sched_job *job;
 317        enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL;
 318
 319        sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
 320
 321        /* Protects against concurrent deletion in drm_sched_get_cleanup_job */
 322        spin_lock(&sched->job_list_lock);
 323        job = list_first_entry_or_null(&sched->pending_list,
 324                                       struct drm_sched_job, list);
 325
 326        if (job) {
 327                /*
 328                 * Remove the bad job so it cannot be freed by concurrent
 329                 * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread
 330                 * is parked at which point it's safe.
 331                 */
 332                list_del_init(&job->list);
 333                spin_unlock(&sched->job_list_lock);
 334
 335                status = job->sched->ops->timedout_job(job);
 336
 337                /*
 338                 * Guilty job did complete and hence needs to be manually removed
 339                 * See drm_sched_stop doc.
 340                 */
 341                if (sched->free_guilty) {
 342                        job->sched->ops->free_job(job);
 343                        sched->free_guilty = false;
 344                }
 345        } else {
 346                spin_unlock(&sched->job_list_lock);
 347        }
 348
 349        if (status != DRM_GPU_SCHED_STAT_ENODEV) {
 350                spin_lock(&sched->job_list_lock);
 351                drm_sched_start_timeout(sched);
 352                spin_unlock(&sched->job_list_lock);
 353        }
 354}
 355
 356 /**
 357  * drm_sched_increase_karma - Update sched_entity guilty flag
 358  *
 359  * @bad: The job guilty of time out
 360  *
 361  * Increment on every hang caused by the 'bad' job. If this exceeds the hang
 362  * limit of the scheduler then the respective sched entity is marked guilty and
 363  * jobs from it will not be scheduled further
 364  */
 365void drm_sched_increase_karma(struct drm_sched_job *bad)
 366{
 367        drm_sched_increase_karma_ext(bad, 1);
 368}
 369EXPORT_SYMBOL(drm_sched_increase_karma);
 370
 371void drm_sched_reset_karma(struct drm_sched_job *bad)
 372{
 373        drm_sched_increase_karma_ext(bad, 0);
 374}
 375EXPORT_SYMBOL(drm_sched_reset_karma);
 376
 377/**
 378 * drm_sched_stop - stop the scheduler
 379 *
 380 * @sched: scheduler instance
 381 * @bad: job which caused the time out
 382 *
 383 * Stop the scheduler and also removes and frees all completed jobs.
 384 * Note: bad job will not be freed as it might be used later and so it's
 385 * callers responsibility to release it manually if it's not part of the
 386 * pending list any more.
 387 *
 388 */
 389void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
 390{
 391        struct drm_sched_job *s_job, *tmp;
 392
 393        kthread_park(sched->thread);
 394
 395        /*
 396         * Reinsert back the bad job here - now it's safe as
 397         * drm_sched_get_cleanup_job cannot race against us and release the
 398         * bad job at this point - we parked (waited for) any in progress
 399         * (earlier) cleanups and drm_sched_get_cleanup_job will not be called
 400         * now until the scheduler thread is unparked.
 401         */
 402        if (bad && bad->sched == sched)
 403                /*
 404                 * Add at the head of the queue to reflect it was the earliest
 405                 * job extracted.
 406                 */
 407                list_add(&bad->list, &sched->pending_list);
 408
 409        /*
 410         * Iterate the job list from later to  earlier one and either deactive
 411         * their HW callbacks or remove them from pending list if they already
 412         * signaled.
 413         * This iteration is thread safe as sched thread is stopped.
 414         */
 415        list_for_each_entry_safe_reverse(s_job, tmp, &sched->pending_list,
 416                                         list) {
 417                if (s_job->s_fence->parent &&
 418                    dma_fence_remove_callback(s_job->s_fence->parent,
 419                                              &s_job->cb)) {
 420                        atomic_dec(&sched->hw_rq_count);
 421                } else {
 422                        /*
 423                         * remove job from pending_list.
 424                         * Locking here is for concurrent resume timeout
 425                         */
 426                        spin_lock(&sched->job_list_lock);
 427                        list_del_init(&s_job->list);
 428                        spin_unlock(&sched->job_list_lock);
 429
 430                        /*
 431                         * Wait for job's HW fence callback to finish using s_job
 432                         * before releasing it.
 433                         *
 434                         * Job is still alive so fence refcount at least 1
 435                         */
 436                        dma_fence_wait(&s_job->s_fence->finished, false);
 437
 438                        /*
 439                         * We must keep bad job alive for later use during
 440                         * recovery by some of the drivers but leave a hint
 441                         * that the guilty job must be released.
 442                         */
 443                        if (bad != s_job)
 444                                sched->ops->free_job(s_job);
 445                        else
 446                                sched->free_guilty = true;
 447                }
 448        }
 449
 450        /*
 451         * Stop pending timer in flight as we rearm it in  drm_sched_start. This
 452         * avoids the pending timeout work in progress to fire right away after
 453         * this TDR finished and before the newly restarted jobs had a
 454         * chance to complete.
 455         */
 456        cancel_delayed_work(&sched->work_tdr);
 457}
 458
 459EXPORT_SYMBOL(drm_sched_stop);
 460
 461/**
 462 * drm_sched_start - recover jobs after a reset
 463 *
 464 * @sched: scheduler instance
 465 * @full_recovery: proceed with complete sched restart
 466 *
 467 */
 468void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
 469{
 470        struct drm_sched_job *s_job, *tmp;
 471        int r;
 472
 473        /*
 474         * Locking the list is not required here as the sched thread is parked
 475         * so no new jobs are being inserted or removed. Also concurrent
 476         * GPU recovers can't run in parallel.
 477         */
 478        list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
 479                struct dma_fence *fence = s_job->s_fence->parent;
 480
 481                atomic_inc(&sched->hw_rq_count);
 482
 483                if (!full_recovery)
 484                        continue;
 485
 486                if (fence) {
 487                        r = dma_fence_add_callback(fence, &s_job->cb,
 488                                                   drm_sched_job_done_cb);
 489                        if (r == -ENOENT)
 490                                drm_sched_job_done(s_job);
 491                        else if (r)
 492                                DRM_ERROR("fence add callback failed (%d)\n",
 493                                          r);
 494                } else
 495                        drm_sched_job_done(s_job);
 496        }
 497
 498        if (full_recovery) {
 499                spin_lock(&sched->job_list_lock);
 500                drm_sched_start_timeout(sched);
 501                spin_unlock(&sched->job_list_lock);
 502        }
 503
 504        kthread_unpark(sched->thread);
 505}
 506EXPORT_SYMBOL(drm_sched_start);
 507
 508/**
 509 * drm_sched_resubmit_jobs - helper to relaunch jobs from the pending list
 510 *
 511 * @sched: scheduler instance
 512 *
 513 */
 514void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
 515{
 516        drm_sched_resubmit_jobs_ext(sched, INT_MAX);
 517}
 518EXPORT_SYMBOL(drm_sched_resubmit_jobs);
 519
 520/**
 521 * drm_sched_resubmit_jobs_ext - helper to relunch certain number of jobs from mirror ring list
 522 *
 523 * @sched: scheduler instance
 524 * @max: job numbers to relaunch
 525 *
 526 */
 527void drm_sched_resubmit_jobs_ext(struct drm_gpu_scheduler *sched, int max)
 528{
 529        struct drm_sched_job *s_job, *tmp;
 530        uint64_t guilty_context;
 531        bool found_guilty = false;
 532        struct dma_fence *fence;
 533        int i = 0;
 534
 535        list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
 536                struct drm_sched_fence *s_fence = s_job->s_fence;
 537
 538                if (i >= max)
 539                        break;
 540
 541                if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
 542                        found_guilty = true;
 543                        guilty_context = s_job->s_fence->scheduled.context;
 544                }
 545
 546                if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
 547                        dma_fence_set_error(&s_fence->finished, -ECANCELED);
 548
 549                dma_fence_put(s_job->s_fence->parent);
 550                fence = sched->ops->run_job(s_job);
 551                i++;
 552
 553                if (IS_ERR_OR_NULL(fence)) {
 554                        if (IS_ERR(fence))
 555                                dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
 556
 557                        s_job->s_fence->parent = NULL;
 558                } else {
 559                        s_job->s_fence->parent = fence;
 560                }
 561        }
 562}
 563EXPORT_SYMBOL(drm_sched_resubmit_jobs_ext);
 564
 565/**
 566 * drm_sched_job_init - init a scheduler job
 567 *
 568 * @job: scheduler job to init
 569 * @entity: scheduler entity to use
 570 * @owner: job owner for debugging
 571 *
 572 * Refer to drm_sched_entity_push_job() documentation
 573 * for locking considerations.
 574 *
 575 * Returns 0 for success, negative error code otherwise.
 576 */
 577int drm_sched_job_init(struct drm_sched_job *job,
 578                       struct drm_sched_entity *entity,
 579                       void *owner)
 580{
 581        struct drm_gpu_scheduler *sched;
 582
 583        drm_sched_entity_select_rq(entity);
 584        if (!entity->rq)
 585                return -ENOENT;
 586
 587        sched = entity->rq->sched;
 588
 589        job->sched = sched;
 590        job->entity = entity;
 591        job->s_priority = entity->rq - sched->sched_rq;
 592        job->s_fence = drm_sched_fence_create(entity, owner);
 593        if (!job->s_fence)
 594                return -ENOMEM;
 595        job->id = atomic64_inc_return(&sched->job_id_count);
 596
 597        INIT_LIST_HEAD(&job->list);
 598
 599        return 0;
 600}
 601EXPORT_SYMBOL(drm_sched_job_init);
 602
 603/**
 604 * drm_sched_job_cleanup - clean up scheduler job resources
 605 *
 606 * @job: scheduler job to clean up
 607 */
 608void drm_sched_job_cleanup(struct drm_sched_job *job)
 609{
 610        dma_fence_put(&job->s_fence->finished);
 611        job->s_fence = NULL;
 612}
 613EXPORT_SYMBOL(drm_sched_job_cleanup);
 614
 615/**
 616 * drm_sched_ready - is the scheduler ready
 617 *
 618 * @sched: scheduler instance
 619 *
 620 * Return true if we can push more jobs to the hw, otherwise false.
 621 */
 622static bool drm_sched_ready(struct drm_gpu_scheduler *sched)
 623{
 624        return atomic_read(&sched->hw_rq_count) <
 625                sched->hw_submission_limit;
 626}
 627
 628/**
 629 * drm_sched_wakeup - Wake up the scheduler when it is ready
 630 *
 631 * @sched: scheduler instance
 632 *
 633 */
 634void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
 635{
 636        if (drm_sched_ready(sched))
 637                wake_up_interruptible(&sched->wake_up_worker);
 638}
 639
 640/**
 641 * drm_sched_select_entity - Select next entity to process
 642 *
 643 * @sched: scheduler instance
 644 *
 645 * Returns the entity to process or NULL if none are found.
 646 */
 647static struct drm_sched_entity *
 648drm_sched_select_entity(struct drm_gpu_scheduler *sched)
 649{
 650        struct drm_sched_entity *entity;
 651        int i;
 652
 653        if (!drm_sched_ready(sched))
 654                return NULL;
 655
 656        /* Kernel run queue has higher priority than normal run queue*/
 657        for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
 658                entity = drm_sched_rq_select_entity(&sched->sched_rq[i]);
 659                if (entity)
 660                        break;
 661        }
 662
 663        return entity;
 664}
 665
 666/**
 667 * drm_sched_get_cleanup_job - fetch the next finished job to be destroyed
 668 *
 669 * @sched: scheduler instance
 670 *
 671 * Returns the next finished job from the pending list (if there is one)
 672 * ready for it to be destroyed.
 673 */
 674static struct drm_sched_job *
 675drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
 676{
 677        struct drm_sched_job *job, *next;
 678
 679        /*
 680         * Don't destroy jobs while the timeout worker is running  OR thread
 681         * is being parked and hence assumed to not touch pending_list
 682         */
 683        if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 684            !cancel_delayed_work(&sched->work_tdr)) ||
 685            kthread_should_park())
 686                return NULL;
 687
 688        spin_lock(&sched->job_list_lock);
 689
 690        job = list_first_entry_or_null(&sched->pending_list,
 691                                       struct drm_sched_job, list);
 692
 693        if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
 694                /* remove job from pending_list */
 695                list_del_init(&job->list);
 696                /* make the scheduled timestamp more accurate */
 697                next = list_first_entry_or_null(&sched->pending_list,
 698                                                typeof(*next), list);
 699                if (next)
 700                        next->s_fence->scheduled.timestamp =
 701                                job->s_fence->finished.timestamp;
 702
 703        } else {
 704                job = NULL;
 705                /* queue timeout for next job */
 706                drm_sched_start_timeout(sched);
 707        }
 708
 709        spin_unlock(&sched->job_list_lock);
 710
 711        return job;
 712}
 713
 714/**
 715 * drm_sched_pick_best - Get a drm sched from a sched_list with the least load
 716 * @sched_list: list of drm_gpu_schedulers
 717 * @num_sched_list: number of drm_gpu_schedulers in the sched_list
 718 *
 719 * Returns pointer of the sched with the least load or NULL if none of the
 720 * drm_gpu_schedulers are ready
 721 */
 722struct drm_gpu_scheduler *
 723drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
 724                     unsigned int num_sched_list)
 725{
 726        struct drm_gpu_scheduler *sched, *picked_sched = NULL;
 727        int i;
 728        unsigned int min_score = UINT_MAX, num_score;
 729
 730        for (i = 0; i < num_sched_list; ++i) {
 731                sched = sched_list[i];
 732
 733                if (!sched->ready) {
 734                        DRM_WARN("scheduler %s is not ready, skipping",
 735                                 sched->name);
 736                        continue;
 737                }
 738
 739                num_score = atomic_read(sched->score);
 740                if (num_score < min_score) {
 741                        min_score = num_score;
 742                        picked_sched = sched;
 743                }
 744        }
 745
 746        return picked_sched;
 747}
 748EXPORT_SYMBOL(drm_sched_pick_best);
 749
 750/**
 751 * drm_sched_blocked - check if the scheduler is blocked
 752 *
 753 * @sched: scheduler instance
 754 *
 755 * Returns true if blocked, otherwise false.
 756 */
 757static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
 758{
 759        if (kthread_should_park()) {
 760                kthread_parkme();
 761                return true;
 762        }
 763
 764        return false;
 765}
 766
 767/**
 768 * drm_sched_main - main scheduler thread
 769 *
 770 * @param: scheduler instance
 771 *
 772 * Returns 0.
 773 */
 774static int drm_sched_main(void *param)
 775{
 776        struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
 777        int r;
 778
 779        sched_set_fifo_low(current);
 780
 781        while (!kthread_should_stop()) {
 782                struct drm_sched_entity *entity = NULL;
 783                struct drm_sched_fence *s_fence;
 784                struct drm_sched_job *sched_job;
 785                struct dma_fence *fence;
 786                struct drm_sched_job *cleanup_job = NULL;
 787
 788                wait_event_interruptible(sched->wake_up_worker,
 789                                         (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
 790                                         (!drm_sched_blocked(sched) &&
 791                                          (entity = drm_sched_select_entity(sched))) ||
 792                                         kthread_should_stop());
 793
 794                if (cleanup_job) {
 795                        sched->ops->free_job(cleanup_job);
 796                        /* queue timeout for next job */
 797                        drm_sched_start_timeout(sched);
 798                }
 799
 800                if (!entity)
 801                        continue;
 802
 803                sched_job = drm_sched_entity_pop_job(entity);
 804
 805                if (!sched_job) {
 806                        complete(&entity->entity_idle);
 807                        continue;
 808                }
 809
 810                s_fence = sched_job->s_fence;
 811
 812                atomic_inc(&sched->hw_rq_count);
 813                drm_sched_job_begin(sched_job);
 814
 815                trace_drm_run_job(sched_job, entity);
 816                fence = sched->ops->run_job(sched_job);
 817                complete(&entity->entity_idle);
 818                drm_sched_fence_scheduled(s_fence);
 819
 820                if (!IS_ERR_OR_NULL(fence)) {
 821                        s_fence->parent = dma_fence_get(fence);
 822                        r = dma_fence_add_callback(fence, &sched_job->cb,
 823                                                   drm_sched_job_done_cb);
 824                        if (r == -ENOENT)
 825                                drm_sched_job_done(sched_job);
 826                        else if (r)
 827                                DRM_ERROR("fence add callback failed (%d)\n",
 828                                          r);
 829                        dma_fence_put(fence);
 830                } else {
 831                        if (IS_ERR(fence))
 832                                dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
 833
 834                        drm_sched_job_done(sched_job);
 835                }
 836
 837                wake_up(&sched->job_scheduled);
 838        }
 839        return 0;
 840}
 841
 842/**
 843 * drm_sched_init - Init a gpu scheduler instance
 844 *
 845 * @sched: scheduler instance
 846 * @ops: backend operations for this scheduler
 847 * @hw_submission: number of hw submissions that can be in flight
 848 * @hang_limit: number of times to allow a job to hang before dropping it
 849 * @timeout: timeout value in jiffies for the scheduler
 850 * @timeout_wq: workqueue to use for timeout work. If NULL, the system_wq is
 851 *              used
 852 * @score: optional score atomic shared with other schedulers
 853 * @name: name used for debugging
 854 *
 855 * Return 0 on success, otherwise error code.
 856 */
 857int drm_sched_init(struct drm_gpu_scheduler *sched,
 858                   const struct drm_sched_backend_ops *ops,
 859                   unsigned hw_submission, unsigned hang_limit,
 860                   long timeout, struct workqueue_struct *timeout_wq,
 861                   atomic_t *score, const char *name)
 862{
 863        int i, ret;
 864        sched->ops = ops;
 865        sched->hw_submission_limit = hw_submission;
 866        sched->name = name;
 867        sched->timeout = timeout;
 868        sched->timeout_wq = timeout_wq ? : system_wq;
 869        sched->hang_limit = hang_limit;
 870        sched->score = score ? score : &sched->_score;
 871        for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; i++)
 872                drm_sched_rq_init(sched, &sched->sched_rq[i]);
 873
 874        init_waitqueue_head(&sched->wake_up_worker);
 875        init_waitqueue_head(&sched->job_scheduled);
 876        INIT_LIST_HEAD(&sched->pending_list);
 877        spin_lock_init(&sched->job_list_lock);
 878        atomic_set(&sched->hw_rq_count, 0);
 879        INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
 880        atomic_set(&sched->_score, 0);
 881        atomic64_set(&sched->job_id_count, 0);
 882
 883        /* Each scheduler will run on a seperate kernel thread */
 884        sched->thread = kthread_run(drm_sched_main, sched, sched->name);
 885        if (IS_ERR(sched->thread)) {
 886                ret = PTR_ERR(sched->thread);
 887                sched->thread = NULL;
 888                DRM_ERROR("Failed to create scheduler for %s.\n", name);
 889                return ret;
 890        }
 891
 892        sched->ready = true;
 893        return 0;
 894}
 895EXPORT_SYMBOL(drm_sched_init);
 896
 897/**
 898 * drm_sched_fini - Destroy a gpu scheduler
 899 *
 900 * @sched: scheduler instance
 901 *
 902 * Tears down and cleans up the scheduler.
 903 */
 904void drm_sched_fini(struct drm_gpu_scheduler *sched)
 905{
 906        struct drm_sched_entity *s_entity;
 907        int i;
 908
 909        if (sched->thread)
 910                kthread_stop(sched->thread);
 911
 912        for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
 913                struct drm_sched_rq *rq = &sched->sched_rq[i];
 914
 915                if (!rq)
 916                        continue;
 917
 918                spin_lock(&rq->lock);
 919                list_for_each_entry(s_entity, &rq->entities, list)
 920                        /*
 921                         * Prevents reinsertion and marks job_queue as idle,
 922                         * it will removed from rq in drm_sched_entity_fini
 923                         * eventually
 924                         */
 925                        s_entity->stopped = true;
 926                spin_unlock(&rq->lock);
 927
 928        }
 929
 930        /* Wakeup everyone stuck in drm_sched_entity_flush for this scheduler */
 931        wake_up_all(&sched->job_scheduled);
 932
 933        /* Confirm no work left behind accessing device structures */
 934        cancel_delayed_work_sync(&sched->work_tdr);
 935
 936        sched->ready = false;
 937}
 938EXPORT_SYMBOL(drm_sched_fini);
 939
 940/**
 941 * drm_sched_increase_karma_ext - Update sched_entity guilty flag
 942 *
 943 * @bad: The job guilty of time out
 944 * @type: type for increase/reset karma
 945 *
 946 */
 947void drm_sched_increase_karma_ext(struct drm_sched_job *bad, int type)
 948{
 949        int i;
 950        struct drm_sched_entity *tmp;
 951        struct drm_sched_entity *entity;
 952        struct drm_gpu_scheduler *sched = bad->sched;
 953
 954        /* don't change @bad's karma if it's from KERNEL RQ,
 955         * because sometimes GPU hang would cause kernel jobs (like VM updating jobs)
 956         * corrupt but keep in mind that kernel jobs always considered good.
 957         */
 958        if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
 959                if (type == 0)
 960                        atomic_set(&bad->karma, 0);
 961                else if (type == 1)
 962                        atomic_inc(&bad->karma);
 963
 964                for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL;
 965                     i++) {
 966                        struct drm_sched_rq *rq = &sched->sched_rq[i];
 967
 968                        spin_lock(&rq->lock);
 969                        list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
 970                                if (bad->s_fence->scheduled.context ==
 971                                    entity->fence_context) {
 972                                        if (entity->guilty)
 973                                                atomic_set(entity->guilty, type);
 974                                        break;
 975                                }
 976                        }
 977                        spin_unlock(&rq->lock);
 978                        if (&entity->list != &rq->entities)
 979                                break;
 980                }
 981        }
 982}
 983EXPORT_SYMBOL(drm_sched_increase_karma_ext);
 984