LXR linux/drivers/gpu/drm/scheduler/sched

   1/*
   2 * Copyright 2015 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24/**
  25 * DOC: Overview
  26 *
  27 * The GPU scheduler provides entities which allow userspace to push jobs
  28 * into software queues which are then scheduled on a hardware run queue.
  29 * The software queues have a priority among them. The scheduler selects the entities
  30 * from the run queue using a FIFO. The scheduler provides dependency handling
  31 * features among jobs. The driver is supposed to provide callback functions for
  32 * backend operations to the scheduler like submitting a job to hardware run queue,
  33 * returning the dependencies of a job etc.
  34 *
  35 * The organisation of the scheduler is the following:
  36 *
  37 * 1. Each hw run queue has one scheduler
  38 * 2. Each scheduler has multiple run queues with different priorities
  39 *    (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL)
  40 * 3. Each scheduler run queue has a queue of entities to schedule
  41 * 4. Entities themselves maintain a queue of jobs that will be scheduled on
  42 *    the hardware.
  43 *
  44 * The jobs in a entity are always scheduled in the order that they were pushed.
  45 */
  46
  47#include <linux/kthread.h>
  48#include <linux/wait.h>
  49#include <linux/sched.h>
  50#include <linux/completion.h>
  51#include <uapi/linux/sched/types.h>
  52
  53#include <drm/drm_print.h>
  54#include <drm/gpu_scheduler.h>
  55#include <drm/spsc_queue.h>
  56
  57#define CREATE_TRACE_POINTS
  58#include "gpu_scheduler_trace.h"
  59
  60#define to_drm_sched_job(sched_job)             \
  61                container_of((sched_job), struct drm_sched_job, queue_node)
  62
  63static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb);
  64
  65/**
  66 * drm_sched_rq_init - initialize a given run queue struct
  67 *
  68 * @rq: scheduler run queue
  69 *
  70 * Initializes a scheduler runqueue.
  71 */
  72static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
  73                              struct drm_sched_rq *rq)
  74{
  75        spin_lock_init(&rq->lock);
  76        INIT_LIST_HEAD(&rq->entities);
  77        rq->current_entity = NULL;
  78        rq->sched = sched;
  79}
  80
  81/**
  82 * drm_sched_rq_add_entity - add an entity
  83 *
  84 * @rq: scheduler run queue
  85 * @entity: scheduler entity
  86 *
  87 * Adds a scheduler entity to the run queue.
  88 */
  89void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
  90                             struct drm_sched_entity *entity)
  91{
  92        if (!list_empty(&entity->list))
  93                return;
  94        spin_lock(&rq->lock);
  95        atomic_inc(&rq->sched->score);
  96        list_add_tail(&entity->list, &rq->entities);
  97        spin_unlock(&rq->lock);
  98}
  99
 100/**
 101 * drm_sched_rq_remove_entity - remove an entity
 102 *
 103 * @rq: scheduler run queue
 104 * @entity: scheduler entity
 105 *
 106 * Removes a scheduler entity from the run queue.
 107 */
 108void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
 109                                struct drm_sched_entity *entity)
 110{
 111        if (list_empty(&entity->list))
 112                return;
 113        spin_lock(&rq->lock);
 114        atomic_dec(&rq->sched->score);
 115        list_del_init(&entity->list);
 116        if (rq->current_entity == entity)
 117                rq->current_entity = NULL;
 118        spin_unlock(&rq->lock);
 119}
 120
 121/**
 122 * drm_sched_rq_select_entity - Select an entity which could provide a job to run
 123 *
 124 * @rq: scheduler run queue to check.
 125 *
 126 * Try to find a ready entity, returns NULL if none found.
 127 */
 128static struct drm_sched_entity *
 129drm_sched_rq_select_entity(struct drm_sched_rq *rq)
 130{
 131        struct drm_sched_entity *entity;
 132
 133        spin_lock(&rq->lock);
 134
 135        entity = rq->current_entity;
 136        if (entity) {
 137                list_for_each_entry_continue(entity, &rq->entities, list) {
 138                        if (drm_sched_entity_is_ready(entity)) {
 139                                rq->current_entity = entity;
 140                                reinit_completion(&entity->entity_idle);
 141                                spin_unlock(&rq->lock);
 142                                return entity;
 143                        }
 144                }
 145        }
 146
 147        list_for_each_entry(entity, &rq->entities, list) {
 148
 149                if (drm_sched_entity_is_ready(entity)) {
 150                        rq->current_entity = entity;
 151                        reinit_completion(&entity->entity_idle);
 152                        spin_unlock(&rq->lock);
 153                        return entity;
 154                }
 155
 156                if (entity == rq->current_entity)
 157                        break;
 158        }
 159
 160        spin_unlock(&rq->lock);
 161
 162        return NULL;
 163}
 164
 165/**
 166 * drm_sched_dependency_optimized
 167 *
 168 * @fence: the dependency fence
 169 * @entity: the entity which depends on the above fence
 170 *
 171 * Returns true if the dependency can be optimized and false otherwise
 172 */
 173bool drm_sched_dependency_optimized(struct dma_fence* fence,
 174                                    struct drm_sched_entity *entity)
 175{
 176        struct drm_gpu_scheduler *sched = entity->rq->sched;
 177        struct drm_sched_fence *s_fence;
 178
 179        if (!fence || dma_fence_is_signaled(fence))
 180                return false;
 181        if (fence->context == entity->fence_context)
 182                return true;
 183        s_fence = to_drm_sched_fence(fence);
 184        if (s_fence && s_fence->sched == sched)
 185                return true;
 186
 187        return false;
 188}
 189EXPORT_SYMBOL(drm_sched_dependency_optimized);
 190
 191/**
 192 * drm_sched_start_timeout - start timeout for reset worker
 193 *
 194 * @sched: scheduler instance to start the worker for
 195 *
 196 * Start the timeout for the given scheduler.
 197 */
 198static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
 199{
 200        if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 201            !list_empty(&sched->ring_mirror_list))
 202                schedule_delayed_work(&sched->work_tdr, sched->timeout);
 203}
 204
 205/**
 206 * drm_sched_fault - immediately start timeout handler
 207 *
 208 * @sched: scheduler where the timeout handling should be started.
 209 *
 210 * Start timeout handling immediately when the driver detects a hardware fault.
 211 */
 212void drm_sched_fault(struct drm_gpu_scheduler *sched)
 213{
 214        mod_delayed_work(system_wq, &sched->work_tdr, 0);
 215}
 216EXPORT_SYMBOL(drm_sched_fault);
 217
 218/**
 219 * drm_sched_suspend_timeout - Suspend scheduler job timeout
 220 *
 221 * @sched: scheduler instance for which to suspend the timeout
 222 *
 223 * Suspend the delayed work timeout for the scheduler. This is done by
 224 * modifying the delayed work timeout to an arbitrary large value,
 225 * MAX_SCHEDULE_TIMEOUT in this case. Note that this function can be
 226 * called from an IRQ context.
 227 *
 228 * Returns the timeout remaining
 229 *
 230 */
 231unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
 232{
 233        unsigned long sched_timeout, now = jiffies;
 234
 235        sched_timeout = sched->work_tdr.timer.expires;
 236
 237        /*
 238         * Modify the timeout to an arbitrarily large value. This also prevents
 239         * the timeout to be restarted when new submissions arrive
 240         */
 241        if (mod_delayed_work(system_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
 242                        && time_after(sched_timeout, now))
 243                return sched_timeout - now;
 244        else
 245                return sched->timeout;
 246}
 247EXPORT_SYMBOL(drm_sched_suspend_timeout);
 248
 249/**
 250 * drm_sched_resume_timeout - Resume scheduler job timeout
 251 *
 252 * @sched: scheduler instance for which to resume the timeout
 253 * @remaining: remaining timeout
 254 *
 255 * Resume the delayed work timeout for the scheduler. Note that
 256 * this function can be called from an IRQ context.
 257 */
 258void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
 259                unsigned long remaining)
 260{
 261        unsigned long flags;
 262
 263        spin_lock_irqsave(&sched->job_list_lock, flags);
 264
 265        if (list_empty(&sched->ring_mirror_list))
 266                cancel_delayed_work(&sched->work_tdr);
 267        else
 268                mod_delayed_work(system_wq, &sched->work_tdr, remaining);
 269
 270        spin_unlock_irqrestore(&sched->job_list_lock, flags);
 271}
 272EXPORT_SYMBOL(drm_sched_resume_timeout);
 273
 274static void drm_sched_job_begin(struct drm_sched_job *s_job)
 275{
 276        struct drm_gpu_scheduler *sched = s_job->sched;
 277        unsigned long flags;
 278
 279        spin_lock_irqsave(&sched->job_list_lock, flags);
 280        list_add_tail(&s_job->node, &sched->ring_mirror_list);
 281        drm_sched_start_timeout(sched);
 282        spin_unlock_irqrestore(&sched->job_list_lock, flags);
 283}
 284
 285static void drm_sched_job_timedout(struct work_struct *work)
 286{
 287        struct drm_gpu_scheduler *sched;
 288        struct drm_sched_job *job;
 289        unsigned long flags;
 290
 291        sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
 292
 293        /* Protects against concurrent deletion in drm_sched_get_cleanup_job */
 294        spin_lock_irqsave(&sched->job_list_lock, flags);
 295        job = list_first_entry_or_null(&sched->ring_mirror_list,
 296                                       struct drm_sched_job, node);
 297
 298        if (job) {
 299                /*
 300                 * Remove the bad job so it cannot be freed by concurrent
 301                 * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread
 302                 * is parked at which point it's safe.
 303                 */
 304                list_del_init(&job->node);
 305                spin_unlock_irqrestore(&sched->job_list_lock, flags);
 306
 307                job->sched->ops->timedout_job(job);
 308
 309                /*
 310                 * Guilty job did complete and hence needs to be manually removed
 311                 * See drm_sched_stop doc.
 312                 */
 313                if (sched->free_guilty) {
 314                        job->sched->ops->free_job(job);
 315                        sched->free_guilty = false;
 316                }
 317        } else {
 318                spin_unlock_irqrestore(&sched->job_list_lock, flags);
 319        }
 320
 321        spin_lock_irqsave(&sched->job_list_lock, flags);
 322        drm_sched_start_timeout(sched);
 323        spin_unlock_irqrestore(&sched->job_list_lock, flags);
 324}
 325
 326 /**
 327  * drm_sched_increase_karma - Update sched_entity guilty flag
 328  *
 329  * @bad: The job guilty of time out
 330  *
 331  * Increment on every hang caused by the 'bad' job. If this exceeds the hang
 332  * limit of the scheduler then the respective sched entity is marked guilty and
 333  * jobs from it will not be scheduled further
 334  */
 335void drm_sched_increase_karma(struct drm_sched_job *bad)
 336{
 337        int i;
 338        struct drm_sched_entity *tmp;
 339        struct drm_sched_entity *entity;
 340        struct drm_gpu_scheduler *sched = bad->sched;
 341
 342        /* don't increase @bad's karma if it's from KERNEL RQ,
 343         * because sometimes GPU hang would cause kernel jobs (like VM updating jobs)
 344         * corrupt but keep in mind that kernel jobs always considered good.
 345         */
 346        if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
 347                atomic_inc(&bad->karma);
 348                for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL;
 349                     i++) {
 350                        struct drm_sched_rq *rq = &sched->sched_rq[i];
 351
 352                        spin_lock(&rq->lock);
 353                        list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
 354                                if (bad->s_fence->scheduled.context ==
 355                                    entity->fence_context) {
 356                                        if (atomic_read(&bad->karma) >
 357                                            bad->sched->hang_limit)
 358                                                if (entity->guilty)
 359                                                        atomic_set(entity->guilty, 1);
 360                                        break;
 361                                }
 362                        }
 363                        spin_unlock(&rq->lock);
 364                        if (&entity->list != &rq->entities)
 365                                break;
 366                }
 367        }
 368}
 369EXPORT_SYMBOL(drm_sched_increase_karma);
 370
 371/**
 372 * drm_sched_stop - stop the scheduler
 373 *
 374 * @sched: scheduler instance
 375 * @bad: job which caused the time out
 376 *
 377 * Stop the scheduler and also removes and frees all completed jobs.
 378 * Note: bad job will not be freed as it might be used later and so it's
 379 * callers responsibility to release it manually if it's not part of the
 380 * mirror list any more.
 381 *
 382 */
 383void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
 384{
 385        struct drm_sched_job *s_job, *tmp;
 386        unsigned long flags;
 387
 388        kthread_park(sched->thread);
 389
 390        /*
 391         * Reinsert back the bad job here - now it's safe as
 392         * drm_sched_get_cleanup_job cannot race against us and release the
 393         * bad job at this point - we parked (waited for) any in progress
 394         * (earlier) cleanups and drm_sched_get_cleanup_job will not be called
 395         * now until the scheduler thread is unparked.
 396         */
 397        if (bad && bad->sched == sched)
 398                /*
 399                 * Add at the head of the queue to reflect it was the earliest
 400                 * job extracted.
 401                 */
 402                list_add(&bad->node, &sched->ring_mirror_list);
 403
 404        /*
 405         * Iterate the job list from later to  earlier one and either deactive
 406         * their HW callbacks or remove them from mirror list if they already
 407         * signaled.
 408         * This iteration is thread safe as sched thread is stopped.
 409         */
 410        list_for_each_entry_safe_reverse(s_job, tmp, &sched->ring_mirror_list, node) {
 411                if (s_job->s_fence->parent &&
 412                    dma_fence_remove_callback(s_job->s_fence->parent,
 413                                              &s_job->cb)) {
 414                        atomic_dec(&sched->hw_rq_count);
 415                } else {
 416                        /*
 417                         * remove job from ring_mirror_list.
 418                         * Locking here is for concurrent resume timeout
 419                         */
 420                        spin_lock_irqsave(&sched->job_list_lock, flags);
 421                        list_del_init(&s_job->node);
 422                        spin_unlock_irqrestore(&sched->job_list_lock, flags);
 423
 424                        /*
 425                         * Wait for job's HW fence callback to finish using s_job
 426                         * before releasing it.
 427                         *
 428                         * Job is still alive so fence refcount at least 1
 429                         */
 430                        dma_fence_wait(&s_job->s_fence->finished, false);
 431
 432                        /*
 433                         * We must keep bad job alive for later use during
 434                         * recovery by some of the drivers but leave a hint
 435                         * that the guilty job must be released.
 436                         */
 437                        if (bad != s_job)
 438                                sched->ops->free_job(s_job);
 439                        else
 440                                sched->free_guilty = true;
 441                }
 442        }
 443
 444        /*
 445         * Stop pending timer in flight as we rearm it in  drm_sched_start. This
 446         * avoids the pending timeout work in progress to fire right away after
 447         * this TDR finished and before the newly restarted jobs had a
 448         * chance to complete.
 449         */
 450        cancel_delayed_work(&sched->work_tdr);
 451}
 452
 453EXPORT_SYMBOL(drm_sched_stop);
 454
 455/**
 456 * drm_sched_job_recovery - recover jobs after a reset
 457 *
 458 * @sched: scheduler instance
 459 * @full_recovery: proceed with complete sched restart
 460 *
 461 */
 462void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
 463{
 464        struct drm_sched_job *s_job, *tmp;
 465        unsigned long flags;
 466        int r;
 467
 468        /*
 469         * Locking the list is not required here as the sched thread is parked
 470         * so no new jobs are being inserted or removed. Also concurrent
 471         * GPU recovers can't run in parallel.
 472         */
 473        list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 474                struct dma_fence *fence = s_job->s_fence->parent;
 475
 476                atomic_inc(&sched->hw_rq_count);
 477
 478                if (!full_recovery)
 479                        continue;
 480
 481                if (fence) {
 482                        r = dma_fence_add_callback(fence, &s_job->cb,
 483                                                   drm_sched_process_job);
 484                        if (r == -ENOENT)
 485                                drm_sched_process_job(fence, &s_job->cb);
 486                        else if (r)
 487                                DRM_ERROR("fence add callback failed (%d)\n",
 488                                          r);
 489                } else
 490                        drm_sched_process_job(NULL, &s_job->cb);
 491        }
 492
 493        if (full_recovery) {
 494                spin_lock_irqsave(&sched->job_list_lock, flags);
 495                drm_sched_start_timeout(sched);
 496                spin_unlock_irqrestore(&sched->job_list_lock, flags);
 497        }
 498
 499        kthread_unpark(sched->thread);
 500}
 501EXPORT_SYMBOL(drm_sched_start);
 502
 503/**
 504 * drm_sched_resubmit_jobs - helper to relunch job from mirror ring list
 505 *
 506 * @sched: scheduler instance
 507 *
 508 */
 509void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
 510{
 511        struct drm_sched_job *s_job, *tmp;
 512        uint64_t guilty_context;
 513        bool found_guilty = false;
 514        struct dma_fence *fence;
 515
 516        list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 517                struct drm_sched_fence *s_fence = s_job->s_fence;
 518
 519                if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
 520                        found_guilty = true;
 521                        guilty_context = s_job->s_fence->scheduled.context;
 522                }
 523
 524                if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
 525                        dma_fence_set_error(&s_fence->finished, -ECANCELED);
 526
 527                dma_fence_put(s_job->s_fence->parent);
 528                fence = sched->ops->run_job(s_job);
 529
 530                if (IS_ERR_OR_NULL(fence)) {
 531                        if (IS_ERR(fence))
 532                                dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
 533
 534                        s_job->s_fence->parent = NULL;
 535                } else {
 536                        s_job->s_fence->parent = fence;
 537                }
 538
 539
 540        }
 541}
 542EXPORT_SYMBOL(drm_sched_resubmit_jobs);
 543
 544/**
 545 * drm_sched_job_init - init a scheduler job
 546 *
 547 * @job: scheduler job to init
 548 * @entity: scheduler entity to use
 549 * @owner: job owner for debugging
 550 *
 551 * Refer to drm_sched_entity_push_job() documentation
 552 * for locking considerations.
 553 *
 554 * Returns 0 for success, negative error code otherwise.
 555 */
 556int drm_sched_job_init(struct drm_sched_job *job,
 557                       struct drm_sched_entity *entity,
 558                       void *owner)
 559{
 560        struct drm_gpu_scheduler *sched;
 561
 562        drm_sched_entity_select_rq(entity);
 563        if (!entity->rq)
 564                return -ENOENT;
 565
 566        sched = entity->rq->sched;
 567
 568        job->sched = sched;
 569        job->entity = entity;
 570        job->s_priority = entity->rq - sched->sched_rq;
 571        job->s_fence = drm_sched_fence_create(entity, owner);
 572        if (!job->s_fence)
 573                return -ENOMEM;
 574        job->id = atomic64_inc_return(&sched->job_id_count);
 575
 576        INIT_LIST_HEAD(&job->node);
 577
 578        return 0;
 579}
 580EXPORT_SYMBOL(drm_sched_job_init);
 581
 582/**
 583 * drm_sched_job_cleanup - clean up scheduler job resources
 584 *
 585 * @job: scheduler job to clean up
 586 */
 587void drm_sched_job_cleanup(struct drm_sched_job *job)
 588{
 589        dma_fence_put(&job->s_fence->finished);
 590        job->s_fence = NULL;
 591}
 592EXPORT_SYMBOL(drm_sched_job_cleanup);
 593
 594/**
 595 * drm_sched_ready - is the scheduler ready
 596 *
 597 * @sched: scheduler instance
 598 *
 599 * Return true if we can push more jobs to the hw, otherwise false.
 600 */
 601static bool drm_sched_ready(struct drm_gpu_scheduler *sched)
 602{
 603        return atomic_read(&sched->hw_rq_count) <
 604                sched->hw_submission_limit;
 605}
 606
 607/**
 608 * drm_sched_wakeup - Wake up the scheduler when it is ready
 609 *
 610 * @sched: scheduler instance
 611 *
 612 */
 613void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
 614{
 615        if (drm_sched_ready(sched))
 616                wake_up_interruptible(&sched->wake_up_worker);
 617}
 618
 619/**
 620 * drm_sched_select_entity - Select next entity to process
 621 *
 622 * @sched: scheduler instance
 623 *
 624 * Returns the entity to process or NULL if none are found.
 625 */
 626static struct drm_sched_entity *
 627drm_sched_select_entity(struct drm_gpu_scheduler *sched)
 628{
 629        struct drm_sched_entity *entity;
 630        int i;
 631
 632        if (!drm_sched_ready(sched))
 633                return NULL;
 634
 635        /* Kernel run queue has higher priority than normal run queue*/
 636        for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
 637                entity = drm_sched_rq_select_entity(&sched->sched_rq[i]);
 638                if (entity)
 639                        break;
 640        }
 641
 642        return entity;
 643}
 644
 645/**
 646 * drm_sched_process_job - process a job
 647 *
 648 * @f: fence
 649 * @cb: fence callbacks
 650 *
 651 * Called after job has finished execution.
 652 */
 653static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
 654{
 655        struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
 656        struct drm_sched_fence *s_fence = s_job->s_fence;
 657        struct drm_gpu_scheduler *sched = s_fence->sched;
 658
 659        atomic_dec(&sched->hw_rq_count);
 660        atomic_dec(&sched->score);
 661
 662        trace_drm_sched_process_job(s_fence);
 663
 664        dma_fence_get(&s_fence->finished);
 665        drm_sched_fence_finished(s_fence);
 666        dma_fence_put(&s_fence->finished);
 667        wake_up_interruptible(&sched->wake_up_worker);
 668}
 669
 670/**
 671 * drm_sched_get_cleanup_job - fetch the next finished job to be destroyed
 672 *
 673 * @sched: scheduler instance
 674 *
 675 * Returns the next finished job from the mirror list (if there is one)
 676 * ready for it to be destroyed.
 677 */
 678static struct drm_sched_job *
 679drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
 680{
 681        struct drm_sched_job *job;
 682        unsigned long flags;
 683
 684        /*
 685         * Don't destroy jobs while the timeout worker is running  OR thread
 686         * is being parked and hence assumed to not touch ring_mirror_list
 687         */
 688        if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 689            !cancel_delayed_work(&sched->work_tdr)) ||
 690            __kthread_should_park(sched->thread))
 691                return NULL;
 692
 693        spin_lock_irqsave(&sched->job_list_lock, flags);
 694
 695        job = list_first_entry_or_null(&sched->ring_mirror_list,
 696                                       struct drm_sched_job, node);
 697
 698        if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
 699                /* remove job from ring_mirror_list */
 700                list_del_init(&job->node);
 701        } else {
 702                job = NULL;
 703                /* queue timeout for next job */
 704                drm_sched_start_timeout(sched);
 705        }
 706
 707        spin_unlock_irqrestore(&sched->job_list_lock, flags);
 708
 709        return job;
 710}
 711
 712/**
 713 * drm_sched_blocked - check if the scheduler is blocked
 714 *
 715 * @sched: scheduler instance
 716 *
 717 * Returns true if blocked, otherwise false.
 718 */
 719static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
 720{
 721        if (kthread_should_park()) {
 722                kthread_parkme();
 723                return true;
 724        }
 725
 726        return false;
 727}
 728
 729/**
 730 * drm_sched_main - main scheduler thread
 731 *
 732 * @param: scheduler instance
 733 *
 734 * Returns 0.
 735 */
 736static int drm_sched_main(void *param)
 737{
 738        struct sched_param sparam = {.sched_priority = 1};
 739        struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
 740        int r;
 741
 742        sched_setscheduler(current, SCHED_FIFO, &sparam);
 743
 744        while (!kthread_should_stop()) {
 745                struct drm_sched_entity *entity = NULL;
 746                struct drm_sched_fence *s_fence;
 747                struct drm_sched_job *sched_job;
 748                struct dma_fence *fence;
 749                struct drm_sched_job *cleanup_job = NULL;
 750
 751                wait_event_interruptible(sched->wake_up_worker,
 752                                         (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
 753                                         (!drm_sched_blocked(sched) &&
 754                                          (entity = drm_sched_select_entity(sched))) ||
 755                                         kthread_should_stop());
 756
 757                if (cleanup_job) {
 758                        sched->ops->free_job(cleanup_job);
 759                        /* queue timeout for next job */
 760                        drm_sched_start_timeout(sched);
 761                }
 762
 763                if (!entity)
 764                        continue;
 765
 766                sched_job = drm_sched_entity_pop_job(entity);
 767
 768                complete(&entity->entity_idle);
 769
 770                if (!sched_job)
 771                        continue;
 772
 773                s_fence = sched_job->s_fence;
 774
 775                atomic_inc(&sched->hw_rq_count);
 776                drm_sched_job_begin(sched_job);
 777
 778                fence = sched->ops->run_job(sched_job);
 779                drm_sched_fence_scheduled(s_fence);
 780
 781                if (!IS_ERR_OR_NULL(fence)) {
 782                        s_fence->parent = dma_fence_get(fence);
 783                        r = dma_fence_add_callback(fence, &sched_job->cb,
 784                                                   drm_sched_process_job);
 785                        if (r == -ENOENT)
 786                                drm_sched_process_job(fence, &sched_job->cb);
 787                        else if (r)
 788                                DRM_ERROR("fence add callback failed (%d)\n",
 789                                          r);
 790                        dma_fence_put(fence);
 791                } else {
 792                        if (IS_ERR(fence))
 793                                dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
 794
 795                        drm_sched_process_job(NULL, &sched_job->cb);
 796                }
 797
 798                wake_up(&sched->job_scheduled);
 799        }
 800        return 0;
 801}
 802
 803/**
 804 * drm_sched_init - Init a gpu scheduler instance
 805 *
 806 * @sched: scheduler instance
 807 * @ops: backend operations for this scheduler
 808 * @hw_submission: number of hw submissions that can be in flight
 809 * @hang_limit: number of times to allow a job to hang before dropping it
 810 * @timeout: timeout value in jiffies for the scheduler
 811 * @name: name used for debugging
 812 *
 813 * Return 0 on success, otherwise error code.
 814 */
 815int drm_sched_init(struct drm_gpu_scheduler *sched,
 816                   const struct drm_sched_backend_ops *ops,
 817                   unsigned hw_submission,
 818                   unsigned hang_limit,
 819                   long timeout,
 820                   const char *name)
 821{
 822        int i, ret;
 823        sched->ops = ops;
 824        sched->hw_submission_limit = hw_submission;
 825        sched->name = name;
 826        sched->timeout = timeout;
 827        sched->hang_limit = hang_limit;
 828        for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_MAX; i++)
 829                drm_sched_rq_init(sched, &sched->sched_rq[i]);
 830
 831        init_waitqueue_head(&sched->wake_up_worker);
 832        init_waitqueue_head(&sched->job_scheduled);
 833        INIT_LIST_HEAD(&sched->ring_mirror_list);
 834        spin_lock_init(&sched->job_list_lock);
 835        atomic_set(&sched->hw_rq_count, 0);
 836        INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
 837        atomic_set(&sched->score, 0);
 838        atomic64_set(&sched->job_id_count, 0);
 839
 840        /* Each scheduler will run on a seperate kernel thread */
 841        sched->thread = kthread_run(drm_sched_main, sched, sched->name);
 842        if (IS_ERR(sched->thread)) {
 843                ret = PTR_ERR(sched->thread);
 844                sched->thread = NULL;
 845                DRM_ERROR("Failed to create scheduler for %s.\n", name);
 846                return ret;
 847        }
 848
 849        sched->ready = true;
 850        return 0;
 851}
 852EXPORT_SYMBOL(drm_sched_init);
 853
 854/**
 855 * drm_sched_fini - Destroy a gpu scheduler
 856 *
 857 * @sched: scheduler instance
 858 *
 859 * Tears down and cleans up the scheduler.
 860 */
 861void drm_sched_fini(struct drm_gpu_scheduler *sched)
 862{
 863        if (sched->thread)
 864                kthread_stop(sched->thread);
 865
 866        sched->ready = false;
 867}
 868EXPORT_SYMBOL(drm_sched_fini);
 869