LXR linux/drivers/gpu/drm/scheduler/sched

   1/*
   2 * Copyright 2015 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24/**
  25 * DOC: Overview
  26 *
  27 * The GPU scheduler provides entities which allow userspace to push jobs
  28 * into software queues which are then scheduled on a hardware run queue.
  29 * The software queues have a priority among them. The scheduler selects the entities
  30 * from the run queue using a FIFO. The scheduler provides dependency handling
  31 * features among jobs. The driver is supposed to provide callback functions for
  32 * backend operations to the scheduler like submitting a job to hardware run queue,
  33 * returning the dependencies of a job etc.
  34 *
  35 * The organisation of the scheduler is the following:
  36 *
  37 * 1. Each hw run queue has one scheduler
  38 * 2. Each scheduler has multiple run queues with different priorities
  39 *    (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL)
  40 * 3. Each scheduler run queue has a queue of entities to schedule
  41 * 4. Entities themselves maintain a queue of jobs that will be scheduled on
  42 *    the hardware.
  43 *
  44 * The jobs in a entity are always scheduled in the order that they were pushed.
  45 */
  46
  47#include <linux/kthread.h>
  48#include <linux/wait.h>
  49#include <linux/sched.h>
  50#include <linux/completion.h>
  51#include <uapi/linux/sched/types.h>
  52
  53#include <drm/drm_print.h>
  54#include <drm/gpu_scheduler.h>
  55#include <drm/spsc_queue.h>
  56
  57#define CREATE_TRACE_POINTS
  58#include "gpu_scheduler_trace.h"
  59
  60#define to_drm_sched_job(sched_job)             \
  61                container_of((sched_job), struct drm_sched_job, queue_node)
  62
  63static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb);
  64
  65/**
  66 * drm_sched_rq_init - initialize a given run queue struct
  67 *
  68 * @rq: scheduler run queue
  69 *
  70 * Initializes a scheduler runqueue.
  71 */
  72static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
  73                              struct drm_sched_rq *rq)
  74{
  75        spin_lock_init(&rq->lock);
  76        INIT_LIST_HEAD(&rq->entities);
  77        rq->current_entity = NULL;
  78        rq->sched = sched;
  79}
  80
  81/**
  82 * drm_sched_rq_add_entity - add an entity
  83 *
  84 * @rq: scheduler run queue
  85 * @entity: scheduler entity
  86 *
  87 * Adds a scheduler entity to the run queue.
  88 */
  89void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
  90                             struct drm_sched_entity *entity)
  91{
  92        if (!list_empty(&entity->list))
  93                return;
  94        spin_lock(&rq->lock);
  95        list_add_tail(&entity->list, &rq->entities);
  96        spin_unlock(&rq->lock);
  97}
  98
  99/**
 100 * drm_sched_rq_remove_entity - remove an entity
 101 *
 102 * @rq: scheduler run queue
 103 * @entity: scheduler entity
 104 *
 105 * Removes a scheduler entity from the run queue.
 106 */
 107void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
 108                                struct drm_sched_entity *entity)
 109{
 110        if (list_empty(&entity->list))
 111                return;
 112        spin_lock(&rq->lock);
 113        list_del_init(&entity->list);
 114        if (rq->current_entity == entity)
 115                rq->current_entity = NULL;
 116        spin_unlock(&rq->lock);
 117}
 118
 119/**
 120 * drm_sched_rq_select_entity - Select an entity which could provide a job to run
 121 *
 122 * @rq: scheduler run queue to check.
 123 *
 124 * Try to find a ready entity, returns NULL if none found.
 125 */
 126static struct drm_sched_entity *
 127drm_sched_rq_select_entity(struct drm_sched_rq *rq)
 128{
 129        struct drm_sched_entity *entity;
 130
 131        spin_lock(&rq->lock);
 132
 133        entity = rq->current_entity;
 134        if (entity) {
 135                list_for_each_entry_continue(entity, &rq->entities, list) {
 136                        if (drm_sched_entity_is_ready(entity)) {
 137                                rq->current_entity = entity;
 138                                reinit_completion(&entity->entity_idle);
 139                                spin_unlock(&rq->lock);
 140                                return entity;
 141                        }
 142                }
 143        }
 144
 145        list_for_each_entry(entity, &rq->entities, list) {
 146
 147                if (drm_sched_entity_is_ready(entity)) {
 148                        rq->current_entity = entity;
 149                        reinit_completion(&entity->entity_idle);
 150                        spin_unlock(&rq->lock);
 151                        return entity;
 152                }
 153
 154                if (entity == rq->current_entity)
 155                        break;
 156        }
 157
 158        spin_unlock(&rq->lock);
 159
 160        return NULL;
 161}
 162
 163/**
 164 * drm_sched_dependency_optimized
 165 *
 166 * @fence: the dependency fence
 167 * @entity: the entity which depends on the above fence
 168 *
 169 * Returns true if the dependency can be optimized and false otherwise
 170 */
 171bool drm_sched_dependency_optimized(struct dma_fence* fence,
 172                                    struct drm_sched_entity *entity)
 173{
 174        struct drm_gpu_scheduler *sched = entity->rq->sched;
 175        struct drm_sched_fence *s_fence;
 176
 177        if (!fence || dma_fence_is_signaled(fence))
 178                return false;
 179        if (fence->context == entity->fence_context)
 180                return true;
 181        s_fence = to_drm_sched_fence(fence);
 182        if (s_fence && s_fence->sched == sched)
 183                return true;
 184
 185        return false;
 186}
 187EXPORT_SYMBOL(drm_sched_dependency_optimized);
 188
 189/**
 190 * drm_sched_start_timeout - start timeout for reset worker
 191 *
 192 * @sched: scheduler instance to start the worker for
 193 *
 194 * Start the timeout for the given scheduler.
 195 */
 196static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
 197{
 198        if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 199            !list_empty(&sched->ring_mirror_list))
 200                schedule_delayed_work(&sched->work_tdr, sched->timeout);
 201}
 202
 203/**
 204 * drm_sched_fault - immediately start timeout handler
 205 *
 206 * @sched: scheduler where the timeout handling should be started.
 207 *
 208 * Start timeout handling immediately when the driver detects a hardware fault.
 209 */
 210void drm_sched_fault(struct drm_gpu_scheduler *sched)
 211{
 212        mod_delayed_work(system_wq, &sched->work_tdr, 0);
 213}
 214EXPORT_SYMBOL(drm_sched_fault);
 215
 216/**
 217 * drm_sched_suspend_timeout - Suspend scheduler job timeout
 218 *
 219 * @sched: scheduler instance for which to suspend the timeout
 220 *
 221 * Suspend the delayed work timeout for the scheduler. This is done by
 222 * modifying the delayed work timeout to an arbitrary large value,
 223 * MAX_SCHEDULE_TIMEOUT in this case. Note that this function can be
 224 * called from an IRQ context.
 225 *
 226 * Returns the timeout remaining
 227 *
 228 */
 229unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
 230{
 231        unsigned long sched_timeout, now = jiffies;
 232
 233        sched_timeout = sched->work_tdr.timer.expires;
 234
 235        /*
 236         * Modify the timeout to an arbitrarily large value. This also prevents
 237         * the timeout to be restarted when new submissions arrive
 238         */
 239        if (mod_delayed_work(system_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
 240                        && time_after(sched_timeout, now))
 241                return sched_timeout - now;
 242        else
 243                return sched->timeout;
 244}
 245EXPORT_SYMBOL(drm_sched_suspend_timeout);
 246
 247/**
 248 * drm_sched_resume_timeout - Resume scheduler job timeout
 249 *
 250 * @sched: scheduler instance for which to resume the timeout
 251 * @remaining: remaining timeout
 252 *
 253 * Resume the delayed work timeout for the scheduler. Note that
 254 * this function can be called from an IRQ context.
 255 */
 256void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
 257                unsigned long remaining)
 258{
 259        unsigned long flags;
 260
 261        spin_lock_irqsave(&sched->job_list_lock, flags);
 262
 263        if (list_empty(&sched->ring_mirror_list))
 264                cancel_delayed_work(&sched->work_tdr);
 265        else
 266                mod_delayed_work(system_wq, &sched->work_tdr, remaining);
 267
 268        spin_unlock_irqrestore(&sched->job_list_lock, flags);
 269}
 270EXPORT_SYMBOL(drm_sched_resume_timeout);
 271
 272static void drm_sched_job_begin(struct drm_sched_job *s_job)
 273{
 274        struct drm_gpu_scheduler *sched = s_job->sched;
 275        unsigned long flags;
 276
 277        spin_lock_irqsave(&sched->job_list_lock, flags);
 278        list_add_tail(&s_job->node, &sched->ring_mirror_list);
 279        drm_sched_start_timeout(sched);
 280        spin_unlock_irqrestore(&sched->job_list_lock, flags);
 281}
 282
 283static void drm_sched_job_timedout(struct work_struct *work)
 284{
 285        struct drm_gpu_scheduler *sched;
 286        struct drm_sched_job *job;
 287        unsigned long flags;
 288
 289        sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
 290        job = list_first_entry_or_null(&sched->ring_mirror_list,
 291                                       struct drm_sched_job, node);
 292
 293        if (job) {
 294                job->sched->ops->timedout_job(job);
 295
 296                /*
 297                 * Guilty job did complete and hence needs to be manually removed
 298                 * See drm_sched_stop doc.
 299                 */
 300                if (sched->free_guilty) {
 301                        job->sched->ops->free_job(job);
 302                        sched->free_guilty = false;
 303                }
 304        }
 305
 306        spin_lock_irqsave(&sched->job_list_lock, flags);
 307        drm_sched_start_timeout(sched);
 308        spin_unlock_irqrestore(&sched->job_list_lock, flags);
 309}
 310
 311 /**
 312  * drm_sched_increase_karma - Update sched_entity guilty flag
 313  *
 314  * @bad: The job guilty of time out
 315  *
 316  * Increment on every hang caused by the 'bad' job. If this exceeds the hang
 317  * limit of the scheduler then the respective sched entity is marked guilty and
 318  * jobs from it will not be scheduled further
 319  */
 320void drm_sched_increase_karma(struct drm_sched_job *bad)
 321{
 322        int i;
 323        struct drm_sched_entity *tmp;
 324        struct drm_sched_entity *entity;
 325        struct drm_gpu_scheduler *sched = bad->sched;
 326
 327        /* don't increase @bad's karma if it's from KERNEL RQ,
 328         * because sometimes GPU hang would cause kernel jobs (like VM updating jobs)
 329         * corrupt but keep in mind that kernel jobs always considered good.
 330         */
 331        if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
 332                atomic_inc(&bad->karma);
 333                for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL;
 334                     i++) {
 335                        struct drm_sched_rq *rq = &sched->sched_rq[i];
 336
 337                        spin_lock(&rq->lock);
 338                        list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
 339                                if (bad->s_fence->scheduled.context ==
 340                                    entity->fence_context) {
 341                                        if (atomic_read(&bad->karma) >
 342                                            bad->sched->hang_limit)
 343                                                if (entity->guilty)
 344                                                        atomic_set(entity->guilty, 1);
 345                                        break;
 346                                }
 347                        }
 348                        spin_unlock(&rq->lock);
 349                        if (&entity->list != &rq->entities)
 350                                break;
 351                }
 352        }
 353}
 354EXPORT_SYMBOL(drm_sched_increase_karma);
 355
 356/**
 357 * drm_sched_stop - stop the scheduler
 358 *
 359 * @sched: scheduler instance
 360 * @bad: job which caused the time out
 361 *
 362 * Stop the scheduler and also removes and frees all completed jobs.
 363 * Note: bad job will not be freed as it might be used later and so it's
 364 * callers responsibility to release it manually if it's not part of the
 365 * mirror list any more.
 366 *
 367 */
 368void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
 369{
 370        struct drm_sched_job *s_job, *tmp;
 371        unsigned long flags;
 372
 373        kthread_park(sched->thread);
 374
 375        /*
 376         * Iterate the job list from later to  earlier one and either deactive
 377         * their HW callbacks or remove them from mirror list if they already
 378         * signaled.
 379         * This iteration is thread safe as sched thread is stopped.
 380         */
 381        list_for_each_entry_safe_reverse(s_job, tmp, &sched->ring_mirror_list, node) {
 382                if (s_job->s_fence->parent &&
 383                    dma_fence_remove_callback(s_job->s_fence->parent,
 384                                              &s_job->cb)) {
 385                        atomic_dec(&sched->hw_rq_count);
 386                } else {
 387                        /*
 388                         * remove job from ring_mirror_list.
 389                         * Locking here is for concurrent resume timeout
 390                         */
 391                        spin_lock_irqsave(&sched->job_list_lock, flags);
 392                        list_del_init(&s_job->node);
 393                        spin_unlock_irqrestore(&sched->job_list_lock, flags);
 394
 395                        /*
 396                         * Wait for job's HW fence callback to finish using s_job
 397                         * before releasing it.
 398                         *
 399                         * Job is still alive so fence refcount at least 1
 400                         */
 401                        dma_fence_wait(&s_job->s_fence->finished, false);
 402
 403                        /*
 404                         * We must keep bad job alive for later use during
 405                         * recovery by some of the drivers but leave a hint
 406                         * that the guilty job must be released.
 407                         */
 408                        if (bad != s_job)
 409                                sched->ops->free_job(s_job);
 410                        else
 411                                sched->free_guilty = true;
 412                }
 413        }
 414
 415        /*
 416         * Stop pending timer in flight as we rearm it in  drm_sched_start. This
 417         * avoids the pending timeout work in progress to fire right away after
 418         * this TDR finished and before the newly restarted jobs had a
 419         * chance to complete.
 420         */
 421        cancel_delayed_work(&sched->work_tdr);
 422}
 423
 424EXPORT_SYMBOL(drm_sched_stop);
 425
 426/**
 427 * drm_sched_job_recovery - recover jobs after a reset
 428 *
 429 * @sched: scheduler instance
 430 * @full_recovery: proceed with complete sched restart
 431 *
 432 */
 433void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
 434{
 435        struct drm_sched_job *s_job, *tmp;
 436        unsigned long flags;
 437        int r;
 438
 439        /*
 440         * Locking the list is not required here as the sched thread is parked
 441         * so no new jobs are being inserted or removed. Also concurrent
 442         * GPU recovers can't run in parallel.
 443         */
 444        list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 445                struct dma_fence *fence = s_job->s_fence->parent;
 446
 447                atomic_inc(&sched->hw_rq_count);
 448
 449                if (!full_recovery)
 450                        continue;
 451
 452                if (fence) {
 453                        r = dma_fence_add_callback(fence, &s_job->cb,
 454                                                   drm_sched_process_job);
 455                        if (r == -ENOENT)
 456                                drm_sched_process_job(fence, &s_job->cb);
 457                        else if (r)
 458                                DRM_ERROR("fence add callback failed (%d)\n",
 459                                          r);
 460                } else
 461                        drm_sched_process_job(NULL, &s_job->cb);
 462        }
 463
 464        if (full_recovery) {
 465                spin_lock_irqsave(&sched->job_list_lock, flags);
 466                drm_sched_start_timeout(sched);
 467                spin_unlock_irqrestore(&sched->job_list_lock, flags);
 468        }
 469
 470        kthread_unpark(sched->thread);
 471}
 472EXPORT_SYMBOL(drm_sched_start);
 473
 474/**
 475 * drm_sched_resubmit_jobs - helper to relunch job from mirror ring list
 476 *
 477 * @sched: scheduler instance
 478 *
 479 */
 480void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
 481{
 482        struct drm_sched_job *s_job, *tmp;
 483        uint64_t guilty_context;
 484        bool found_guilty = false;
 485        struct dma_fence *fence;
 486
 487        list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 488                struct drm_sched_fence *s_fence = s_job->s_fence;
 489
 490                if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
 491                        found_guilty = true;
 492                        guilty_context = s_job->s_fence->scheduled.context;
 493                }
 494
 495                if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
 496                        dma_fence_set_error(&s_fence->finished, -ECANCELED);
 497
 498                dma_fence_put(s_job->s_fence->parent);
 499                fence = sched->ops->run_job(s_job);
 500
 501                if (IS_ERR_OR_NULL(fence)) {
 502                        if (IS_ERR(fence))
 503                                dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
 504
 505                        s_job->s_fence->parent = NULL;
 506                } else {
 507                        s_job->s_fence->parent = fence;
 508                }
 509
 510
 511        }
 512}
 513EXPORT_SYMBOL(drm_sched_resubmit_jobs);
 514
 515/**
 516 * drm_sched_job_init - init a scheduler job
 517 *
 518 * @job: scheduler job to init
 519 * @entity: scheduler entity to use
 520 * @owner: job owner for debugging
 521 *
 522 * Refer to drm_sched_entity_push_job() documentation
 523 * for locking considerations.
 524 *
 525 * Returns 0 for success, negative error code otherwise.
 526 */
 527int drm_sched_job_init(struct drm_sched_job *job,
 528                       struct drm_sched_entity *entity,
 529                       void *owner)
 530{
 531        struct drm_gpu_scheduler *sched;
 532
 533        drm_sched_entity_select_rq(entity);
 534        if (!entity->rq)
 535                return -ENOENT;
 536
 537        sched = entity->rq->sched;
 538
 539        job->sched = sched;
 540        job->entity = entity;
 541        job->s_priority = entity->rq - sched->sched_rq;
 542        job->s_fence = drm_sched_fence_create(entity, owner);
 543        if (!job->s_fence)
 544                return -ENOMEM;
 545        job->id = atomic64_inc_return(&sched->job_id_count);
 546
 547        INIT_LIST_HEAD(&job->node);
 548
 549        return 0;
 550}
 551EXPORT_SYMBOL(drm_sched_job_init);
 552
 553/**
 554 * drm_sched_job_cleanup - clean up scheduler job resources
 555 *
 556 * @job: scheduler job to clean up
 557 */
 558void drm_sched_job_cleanup(struct drm_sched_job *job)
 559{
 560        dma_fence_put(&job->s_fence->finished);
 561        job->s_fence = NULL;
 562}
 563EXPORT_SYMBOL(drm_sched_job_cleanup);
 564
 565/**
 566 * drm_sched_ready - is the scheduler ready
 567 *
 568 * @sched: scheduler instance
 569 *
 570 * Return true if we can push more jobs to the hw, otherwise false.
 571 */
 572static bool drm_sched_ready(struct drm_gpu_scheduler *sched)
 573{
 574        return atomic_read(&sched->hw_rq_count) <
 575                sched->hw_submission_limit;
 576}
 577
 578/**
 579 * drm_sched_wakeup - Wake up the scheduler when it is ready
 580 *
 581 * @sched: scheduler instance
 582 *
 583 */
 584void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
 585{
 586        if (drm_sched_ready(sched))
 587                wake_up_interruptible(&sched->wake_up_worker);
 588}
 589
 590/**
 591 * drm_sched_select_entity - Select next entity to process
 592 *
 593 * @sched: scheduler instance
 594 *
 595 * Returns the entity to process or NULL if none are found.
 596 */
 597static struct drm_sched_entity *
 598drm_sched_select_entity(struct drm_gpu_scheduler *sched)
 599{
 600        struct drm_sched_entity *entity;
 601        int i;
 602
 603        if (!drm_sched_ready(sched))
 604                return NULL;
 605
 606        /* Kernel run queue has higher priority than normal run queue*/
 607        for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
 608                entity = drm_sched_rq_select_entity(&sched->sched_rq[i]);
 609                if (entity)
 610                        break;
 611        }
 612
 613        return entity;
 614}
 615
 616/**
 617 * drm_sched_process_job - process a job
 618 *
 619 * @f: fence
 620 * @cb: fence callbacks
 621 *
 622 * Called after job has finished execution.
 623 */
 624static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
 625{
 626        struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
 627        struct drm_sched_fence *s_fence = s_job->s_fence;
 628        struct drm_gpu_scheduler *sched = s_fence->sched;
 629
 630        atomic_dec(&sched->hw_rq_count);
 631        atomic_dec(&sched->num_jobs);
 632
 633        trace_drm_sched_process_job(s_fence);
 634
 635        drm_sched_fence_finished(s_fence);
 636        wake_up_interruptible(&sched->wake_up_worker);
 637}
 638
 639/**
 640 * drm_sched_get_cleanup_job - fetch the next finished job to be destroyed
 641 *
 642 * @sched: scheduler instance
 643 *
 644 * Returns the next finished job from the mirror list (if there is one)
 645 * ready for it to be destroyed.
 646 */
 647static struct drm_sched_job *
 648drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
 649{
 650        struct drm_sched_job *job;
 651        unsigned long flags;
 652
 653        /*
 654         * Don't destroy jobs while the timeout worker is running  OR thread
 655         * is being parked and hence assumed to not touch ring_mirror_list
 656         */
 657        if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 658            !cancel_delayed_work(&sched->work_tdr)) ||
 659            __kthread_should_park(sched->thread))
 660                return NULL;
 661
 662        spin_lock_irqsave(&sched->job_list_lock, flags);
 663
 664        job = list_first_entry_or_null(&sched->ring_mirror_list,
 665                                       struct drm_sched_job, node);
 666
 667        if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
 668                /* remove job from ring_mirror_list */
 669                list_del_init(&job->node);
 670        } else {
 671                job = NULL;
 672                /* queue timeout for next job */
 673                drm_sched_start_timeout(sched);
 674        }
 675
 676        spin_unlock_irqrestore(&sched->job_list_lock, flags);
 677
 678        return job;
 679}
 680
 681/**
 682 * drm_sched_blocked - check if the scheduler is blocked
 683 *
 684 * @sched: scheduler instance
 685 *
 686 * Returns true if blocked, otherwise false.
 687 */
 688static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
 689{
 690        if (kthread_should_park()) {
 691                kthread_parkme();
 692                return true;
 693        }
 694
 695        return false;
 696}
 697
 698/**
 699 * drm_sched_main - main scheduler thread
 700 *
 701 * @param: scheduler instance
 702 *
 703 * Returns 0.
 704 */
 705static int drm_sched_main(void *param)
 706{
 707        struct sched_param sparam = {.sched_priority = 1};
 708        struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
 709        int r;
 710
 711        sched_setscheduler(current, SCHED_FIFO, &sparam);
 712
 713        while (!kthread_should_stop()) {
 714                struct drm_sched_entity *entity = NULL;
 715                struct drm_sched_fence *s_fence;
 716                struct drm_sched_job *sched_job;
 717                struct dma_fence *fence;
 718                struct drm_sched_job *cleanup_job = NULL;
 719
 720                wait_event_interruptible(sched->wake_up_worker,
 721                                         (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
 722                                         (!drm_sched_blocked(sched) &&
 723                                          (entity = drm_sched_select_entity(sched))) ||
 724                                         kthread_should_stop());
 725
 726                if (cleanup_job) {
 727                        sched->ops->free_job(cleanup_job);
 728                        /* queue timeout for next job */
 729                        drm_sched_start_timeout(sched);
 730                }
 731
 732                if (!entity)
 733                        continue;
 734
 735                sched_job = drm_sched_entity_pop_job(entity);
 736
 737                complete(&entity->entity_idle);
 738
 739                if (!sched_job)
 740                        continue;
 741
 742                s_fence = sched_job->s_fence;
 743
 744                atomic_inc(&sched->hw_rq_count);
 745                drm_sched_job_begin(sched_job);
 746
 747                fence = sched->ops->run_job(sched_job);
 748                drm_sched_fence_scheduled(s_fence);
 749
 750                if (!IS_ERR_OR_NULL(fence)) {
 751                        s_fence->parent = dma_fence_get(fence);
 752                        r = dma_fence_add_callback(fence, &sched_job->cb,
 753                                                   drm_sched_process_job);
 754                        if (r == -ENOENT)
 755                                drm_sched_process_job(fence, &sched_job->cb);
 756                        else if (r)
 757                                DRM_ERROR("fence add callback failed (%d)\n",
 758                                          r);
 759                        dma_fence_put(fence);
 760                } else {
 761                        if (IS_ERR(fence))
 762                                dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
 763
 764                        drm_sched_process_job(NULL, &sched_job->cb);
 765                }
 766
 767                wake_up(&sched->job_scheduled);
 768        }
 769        return 0;
 770}
 771
 772/**
 773 * drm_sched_init - Init a gpu scheduler instance
 774 *
 775 * @sched: scheduler instance
 776 * @ops: backend operations for this scheduler
 777 * @hw_submission: number of hw submissions that can be in flight
 778 * @hang_limit: number of times to allow a job to hang before dropping it
 779 * @timeout: timeout value in jiffies for the scheduler
 780 * @name: name used for debugging
 781 *
 782 * Return 0 on success, otherwise error code.
 783 */
 784int drm_sched_init(struct drm_gpu_scheduler *sched,
 785                   const struct drm_sched_backend_ops *ops,
 786                   unsigned hw_submission,
 787                   unsigned hang_limit,
 788                   long timeout,
 789                   const char *name)
 790{
 791        int i, ret;
 792        sched->ops = ops;
 793        sched->hw_submission_limit = hw_submission;
 794        sched->name = name;
 795        sched->timeout = timeout;
 796        sched->hang_limit = hang_limit;
 797        for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_MAX; i++)
 798                drm_sched_rq_init(sched, &sched->sched_rq[i]);
 799
 800        init_waitqueue_head(&sched->wake_up_worker);
 801        init_waitqueue_head(&sched->job_scheduled);
 802        INIT_LIST_HEAD(&sched->ring_mirror_list);
 803        spin_lock_init(&sched->job_list_lock);
 804        atomic_set(&sched->hw_rq_count, 0);
 805        INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
 806        atomic_set(&sched->num_jobs, 0);
 807        atomic64_set(&sched->job_id_count, 0);
 808
 809        /* Each scheduler will run on a seperate kernel thread */
 810        sched->thread = kthread_run(drm_sched_main, sched, sched->name);
 811        if (IS_ERR(sched->thread)) {
 812                ret = PTR_ERR(sched->thread);
 813                sched->thread = NULL;
 814                DRM_ERROR("Failed to create scheduler for %s.\n", name);
 815                return ret;
 816        }
 817
 818        sched->ready = true;
 819        return 0;
 820}
 821EXPORT_SYMBOL(drm_sched_init);
 822
 823/**
 824 * drm_sched_fini - Destroy a gpu scheduler
 825 *
 826 * @sched: scheduler instance
 827 *
 828 * Tears down and cleans up the scheduler.
 829 */
 830void drm_sched_fini(struct drm_gpu_scheduler *sched)
 831{
 832        if (sched->thread)
 833                kthread_stop(sched->thread);
 834
 835        sched->ready = false;
 836}
 837EXPORT_SYMBOL(drm_sched_fini);
 838