LXR linux/drivers/gpu/drm/scheduler/sched

   1/*
   2 * Copyright 2015 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24/**
  25 * DOC: Overview
  26 *
  27 * The GPU scheduler provides entities which allow userspace to push jobs
  28 * into software queues which are then scheduled on a hardware run queue.
  29 * The software queues have a priority among them. The scheduler selects the entities
  30 * from the run queue using a FIFO. The scheduler provides dependency handling
  31 * features among jobs. The driver is supposed to provide callback functions for
  32 * backend operations to the scheduler like submitting a job to hardware run queue,
  33 * returning the dependencies of a job etc.
  34 *
  35 * The organisation of the scheduler is the following:
  36 *
  37 * 1. Each hw run queue has one scheduler
  38 * 2. Each scheduler has multiple run queues with different priorities
  39 *    (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL)
  40 * 3. Each scheduler run queue has a queue of entities to schedule
  41 * 4. Entities themselves maintain a queue of jobs that will be scheduled on
  42 *    the hardware.
  43 *
  44 * The jobs in a entity are always scheduled in the order that they were pushed.
  45 */
  46
  47#include <linux/kthread.h>
  48#include <linux/wait.h>
  49#include <linux/sched.h>
  50#include <linux/completion.h>
  51#include <uapi/linux/sched/types.h>
  52
  53#include <drm/drm_print.h>
  54#include <drm/gpu_scheduler.h>
  55#include <drm/spsc_queue.h>
  56
  57#define CREATE_TRACE_POINTS
  58#include "gpu_scheduler_trace.h"
  59
  60#define to_drm_sched_job(sched_job)             \
  61                container_of((sched_job), struct drm_sched_job, queue_node)
  62
  63static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb);
  64
  65/**
  66 * drm_sched_rq_init - initialize a given run queue struct
  67 *
  68 * @rq: scheduler run queue
  69 *
  70 * Initializes a scheduler runqueue.
  71 */
  72static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
  73                              struct drm_sched_rq *rq)
  74{
  75        spin_lock_init(&rq->lock);
  76        INIT_LIST_HEAD(&rq->entities);
  77        rq->current_entity = NULL;
  78        rq->sched = sched;
  79}
  80
  81/**
  82 * drm_sched_rq_add_entity - add an entity
  83 *
  84 * @rq: scheduler run queue
  85 * @entity: scheduler entity
  86 *
  87 * Adds a scheduler entity to the run queue.
  88 */
  89void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
  90                             struct drm_sched_entity *entity)
  91{
  92        if (!list_empty(&entity->list))
  93                return;
  94        spin_lock(&rq->lock);
  95        atomic_inc(&rq->sched->score);
  96        list_add_tail(&entity->list, &rq->entities);
  97        spin_unlock(&rq->lock);
  98}
  99
 100/**
 101 * drm_sched_rq_remove_entity - remove an entity
 102 *
 103 * @rq: scheduler run queue
 104 * @entity: scheduler entity
 105 *
 106 * Removes a scheduler entity from the run queue.
 107 */
 108void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
 109                                struct drm_sched_entity *entity)
 110{
 111        if (list_empty(&entity->list))
 112                return;
 113        spin_lock(&rq->lock);
 114        atomic_dec(&rq->sched->score);
 115        list_del_init(&entity->list);
 116        if (rq->current_entity == entity)
 117                rq->current_entity = NULL;
 118        spin_unlock(&rq->lock);
 119}
 120
 121/**
 122 * drm_sched_rq_select_entity - Select an entity which could provide a job to run
 123 *
 124 * @rq: scheduler run queue to check.
 125 *
 126 * Try to find a ready entity, returns NULL if none found.
 127 */
 128static struct drm_sched_entity *
 129drm_sched_rq_select_entity(struct drm_sched_rq *rq)
 130{
 131        struct drm_sched_entity *entity;
 132
 133        spin_lock(&rq->lock);
 134
 135        entity = rq->current_entity;
 136        if (entity) {
 137                list_for_each_entry_continue(entity, &rq->entities, list) {
 138                        if (drm_sched_entity_is_ready(entity)) {
 139                                rq->current_entity = entity;
 140                                reinit_completion(&entity->entity_idle);
 141                                spin_unlock(&rq->lock);
 142                                return entity;
 143                        }
 144                }
 145        }
 146
 147        list_for_each_entry(entity, &rq->entities, list) {
 148
 149                if (drm_sched_entity_is_ready(entity)) {
 150                        rq->current_entity = entity;
 151                        reinit_completion(&entity->entity_idle);
 152                        spin_unlock(&rq->lock);
 153                        return entity;
 154                }
 155
 156                if (entity == rq->current_entity)
 157                        break;
 158        }
 159
 160        spin_unlock(&rq->lock);
 161
 162        return NULL;
 163}
 164
 165/**
 166 * drm_sched_dependency_optimized
 167 *
 168 * @fence: the dependency fence
 169 * @entity: the entity which depends on the above fence
 170 *
 171 * Returns true if the dependency can be optimized and false otherwise
 172 */
 173bool drm_sched_dependency_optimized(struct dma_fence* fence,
 174                                    struct drm_sched_entity *entity)
 175{
 176        struct drm_gpu_scheduler *sched = entity->rq->sched;
 177        struct drm_sched_fence *s_fence;
 178
 179        if (!fence || dma_fence_is_signaled(fence))
 180                return false;
 181        if (fence->context == entity->fence_context)
 182                return true;
 183        s_fence = to_drm_sched_fence(fence);
 184        if (s_fence && s_fence->sched == sched)
 185                return true;
 186
 187        return false;
 188}
 189EXPORT_SYMBOL(drm_sched_dependency_optimized);
 190
 191/**
 192 * drm_sched_start_timeout - start timeout for reset worker
 193 *
 194 * @sched: scheduler instance to start the worker for
 195 *
 196 * Start the timeout for the given scheduler.
 197 */
 198static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
 199{
 200        if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 201            !list_empty(&sched->ring_mirror_list))
 202                schedule_delayed_work(&sched->work_tdr, sched->timeout);
 203}
 204
 205/**
 206 * drm_sched_fault - immediately start timeout handler
 207 *
 208 * @sched: scheduler where the timeout handling should be started.
 209 *
 210 * Start timeout handling immediately when the driver detects a hardware fault.
 211 */
 212void drm_sched_fault(struct drm_gpu_scheduler *sched)
 213{
 214        mod_delayed_work(system_wq, &sched->work_tdr, 0);
 215}
 216EXPORT_SYMBOL(drm_sched_fault);
 217
 218/**
 219 * drm_sched_suspend_timeout - Suspend scheduler job timeout
 220 *
 221 * @sched: scheduler instance for which to suspend the timeout
 222 *
 223 * Suspend the delayed work timeout for the scheduler. This is done by
 224 * modifying the delayed work timeout to an arbitrary large value,
 225 * MAX_SCHEDULE_TIMEOUT in this case.
 226 *
 227 * Returns the timeout remaining
 228 *
 229 */
 230unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
 231{
 232        unsigned long sched_timeout, now = jiffies;
 233
 234        sched_timeout = sched->work_tdr.timer.expires;
 235
 236        /*
 237         * Modify the timeout to an arbitrarily large value. This also prevents
 238         * the timeout to be restarted when new submissions arrive
 239         */
 240        if (mod_delayed_work(system_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
 241                        && time_after(sched_timeout, now))
 242                return sched_timeout - now;
 243        else
 244                return sched->timeout;
 245}
 246EXPORT_SYMBOL(drm_sched_suspend_timeout);
 247
 248/**
 249 * drm_sched_resume_timeout - Resume scheduler job timeout
 250 *
 251 * @sched: scheduler instance for which to resume the timeout
 252 * @remaining: remaining timeout
 253 *
 254 * Resume the delayed work timeout for the scheduler.
 255 */
 256void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
 257                unsigned long remaining)
 258{
 259        spin_lock(&sched->job_list_lock);
 260
 261        if (list_empty(&sched->ring_mirror_list))
 262                cancel_delayed_work(&sched->work_tdr);
 263        else
 264                mod_delayed_work(system_wq, &sched->work_tdr, remaining);
 265
 266        spin_unlock(&sched->job_list_lock);
 267}
 268EXPORT_SYMBOL(drm_sched_resume_timeout);
 269
 270static void drm_sched_job_begin(struct drm_sched_job *s_job)
 271{
 272        struct drm_gpu_scheduler *sched = s_job->sched;
 273
 274        spin_lock(&sched->job_list_lock);
 275        list_add_tail(&s_job->node, &sched->ring_mirror_list);
 276        drm_sched_start_timeout(sched);
 277        spin_unlock(&sched->job_list_lock);
 278}
 279
 280static void drm_sched_job_timedout(struct work_struct *work)
 281{
 282        struct drm_gpu_scheduler *sched;
 283        struct drm_sched_job *job;
 284
 285        sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
 286
 287        /* Protects against concurrent deletion in drm_sched_get_cleanup_job */
 288        spin_lock(&sched->job_list_lock);
 289        job = list_first_entry_or_null(&sched->ring_mirror_list,
 290                                       struct drm_sched_job, node);
 291
 292        if (job) {
 293                /*
 294                 * Remove the bad job so it cannot be freed by concurrent
 295                 * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread
 296                 * is parked at which point it's safe.
 297                 */
 298                list_del_init(&job->node);
 299                spin_unlock(&sched->job_list_lock);
 300
 301                job->sched->ops->timedout_job(job);
 302
 303                /*
 304                 * Guilty job did complete and hence needs to be manually removed
 305                 * See drm_sched_stop doc.
 306                 */
 307                if (sched->free_guilty) {
 308                        job->sched->ops->free_job(job);
 309                        sched->free_guilty = false;
 310                }
 311        } else {
 312                spin_unlock(&sched->job_list_lock);
 313        }
 314
 315        spin_lock(&sched->job_list_lock);
 316        drm_sched_start_timeout(sched);
 317        spin_unlock(&sched->job_list_lock);
 318}
 319
 320 /**
 321  * drm_sched_increase_karma - Update sched_entity guilty flag
 322  *
 323  * @bad: The job guilty of time out
 324  *
 325  * Increment on every hang caused by the 'bad' job. If this exceeds the hang
 326  * limit of the scheduler then the respective sched entity is marked guilty and
 327  * jobs from it will not be scheduled further
 328  */
 329void drm_sched_increase_karma(struct drm_sched_job *bad)
 330{
 331        int i;
 332        struct drm_sched_entity *tmp;
 333        struct drm_sched_entity *entity;
 334        struct drm_gpu_scheduler *sched = bad->sched;
 335
 336        /* don't increase @bad's karma if it's from KERNEL RQ,
 337         * because sometimes GPU hang would cause kernel jobs (like VM updating jobs)
 338         * corrupt but keep in mind that kernel jobs always considered good.
 339         */
 340        if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
 341                atomic_inc(&bad->karma);
 342                for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL;
 343                     i++) {
 344                        struct drm_sched_rq *rq = &sched->sched_rq[i];
 345
 346                        spin_lock(&rq->lock);
 347                        list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
 348                                if (bad->s_fence->scheduled.context ==
 349                                    entity->fence_context) {
 350                                        if (atomic_read(&bad->karma) >
 351                                            bad->sched->hang_limit)
 352                                                if (entity->guilty)
 353                                                        atomic_set(entity->guilty, 1);
 354                                        break;
 355                                }
 356                        }
 357                        spin_unlock(&rq->lock);
 358                        if (&entity->list != &rq->entities)
 359                                break;
 360                }
 361        }
 362}
 363EXPORT_SYMBOL(drm_sched_increase_karma);
 364
 365/**
 366 * drm_sched_stop - stop the scheduler
 367 *
 368 * @sched: scheduler instance
 369 * @bad: job which caused the time out
 370 *
 371 * Stop the scheduler and also removes and frees all completed jobs.
 372 * Note: bad job will not be freed as it might be used later and so it's
 373 * callers responsibility to release it manually if it's not part of the
 374 * mirror list any more.
 375 *
 376 */
 377void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
 378{
 379        struct drm_sched_job *s_job, *tmp;
 380
 381        kthread_park(sched->thread);
 382
 383        /*
 384         * Reinsert back the bad job here - now it's safe as
 385         * drm_sched_get_cleanup_job cannot race against us and release the
 386         * bad job at this point - we parked (waited for) any in progress
 387         * (earlier) cleanups and drm_sched_get_cleanup_job will not be called
 388         * now until the scheduler thread is unparked.
 389         */
 390        if (bad && bad->sched == sched)
 391                /*
 392                 * Add at the head of the queue to reflect it was the earliest
 393                 * job extracted.
 394                 */
 395                list_add(&bad->node, &sched->ring_mirror_list);
 396
 397        /*
 398         * Iterate the job list from later to  earlier one and either deactive
 399         * their HW callbacks or remove them from mirror list if they already
 400         * signaled.
 401         * This iteration is thread safe as sched thread is stopped.
 402         */
 403        list_for_each_entry_safe_reverse(s_job, tmp, &sched->ring_mirror_list, node) {
 404                if (s_job->s_fence->parent &&
 405                    dma_fence_remove_callback(s_job->s_fence->parent,
 406                                              &s_job->cb)) {
 407                        atomic_dec(&sched->hw_rq_count);
 408                } else {
 409                        /*
 410                         * remove job from ring_mirror_list.
 411                         * Locking here is for concurrent resume timeout
 412                         */
 413                        spin_lock(&sched->job_list_lock);
 414                        list_del_init(&s_job->node);
 415                        spin_unlock(&sched->job_list_lock);
 416
 417                        /*
 418                         * Wait for job's HW fence callback to finish using s_job
 419                         * before releasing it.
 420                         *
 421                         * Job is still alive so fence refcount at least 1
 422                         */
 423                        dma_fence_wait(&s_job->s_fence->finished, false);
 424
 425                        /*
 426                         * We must keep bad job alive for later use during
 427                         * recovery by some of the drivers but leave a hint
 428                         * that the guilty job must be released.
 429                         */
 430                        if (bad != s_job)
 431                                sched->ops->free_job(s_job);
 432                        else
 433                                sched->free_guilty = true;
 434                }
 435        }
 436
 437        /*
 438         * Stop pending timer in flight as we rearm it in  drm_sched_start. This
 439         * avoids the pending timeout work in progress to fire right away after
 440         * this TDR finished and before the newly restarted jobs had a
 441         * chance to complete.
 442         */
 443        cancel_delayed_work(&sched->work_tdr);
 444}
 445
 446EXPORT_SYMBOL(drm_sched_stop);
 447
 448/**
 449 * drm_sched_job_recovery - recover jobs after a reset
 450 *
 451 * @sched: scheduler instance
 452 * @full_recovery: proceed with complete sched restart
 453 *
 454 */
 455void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
 456{
 457        struct drm_sched_job *s_job, *tmp;
 458        int r;
 459
 460        /*
 461         * Locking the list is not required here as the sched thread is parked
 462         * so no new jobs are being inserted or removed. Also concurrent
 463         * GPU recovers can't run in parallel.
 464         */
 465        list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 466                struct dma_fence *fence = s_job->s_fence->parent;
 467
 468                atomic_inc(&sched->hw_rq_count);
 469
 470                if (!full_recovery)
 471                        continue;
 472
 473                if (fence) {
 474                        r = dma_fence_add_callback(fence, &s_job->cb,
 475                                                   drm_sched_process_job);
 476                        if (r == -ENOENT)
 477                                drm_sched_process_job(fence, &s_job->cb);
 478                        else if (r)
 479                                DRM_ERROR("fence add callback failed (%d)\n",
 480                                          r);
 481                } else
 482                        drm_sched_process_job(NULL, &s_job->cb);
 483        }
 484
 485        if (full_recovery) {
 486                spin_lock(&sched->job_list_lock);
 487                drm_sched_start_timeout(sched);
 488                spin_unlock(&sched->job_list_lock);
 489        }
 490
 491        kthread_unpark(sched->thread);
 492}
 493EXPORT_SYMBOL(drm_sched_start);
 494
 495/**
 496 * drm_sched_resubmit_jobs - helper to relunch job from mirror ring list
 497 *
 498 * @sched: scheduler instance
 499 *
 500 */
 501void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
 502{
 503        struct drm_sched_job *s_job, *tmp;
 504        uint64_t guilty_context;
 505        bool found_guilty = false;
 506        struct dma_fence *fence;
 507
 508        list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 509                struct drm_sched_fence *s_fence = s_job->s_fence;
 510
 511                if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
 512                        found_guilty = true;
 513                        guilty_context = s_job->s_fence->scheduled.context;
 514                }
 515
 516                if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
 517                        dma_fence_set_error(&s_fence->finished, -ECANCELED);
 518
 519                dma_fence_put(s_job->s_fence->parent);
 520                fence = sched->ops->run_job(s_job);
 521
 522                if (IS_ERR_OR_NULL(fence)) {
 523                        if (IS_ERR(fence))
 524                                dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
 525
 526                        s_job->s_fence->parent = NULL;
 527                } else {
 528                        s_job->s_fence->parent = fence;
 529                }
 530
 531
 532        }
 533}
 534EXPORT_SYMBOL(drm_sched_resubmit_jobs);
 535
 536/**
 537 * drm_sched_job_init - init a scheduler job
 538 *
 539 * @job: scheduler job to init
 540 * @entity: scheduler entity to use
 541 * @owner: job owner for debugging
 542 *
 543 * Refer to drm_sched_entity_push_job() documentation
 544 * for locking considerations.
 545 *
 546 * Returns 0 for success, negative error code otherwise.
 547 */
 548int drm_sched_job_init(struct drm_sched_job *job,
 549                       struct drm_sched_entity *entity,
 550                       void *owner)
 551{
 552        struct drm_gpu_scheduler *sched;
 553
 554        drm_sched_entity_select_rq(entity);
 555        if (!entity->rq)
 556                return -ENOENT;
 557
 558        sched = entity->rq->sched;
 559
 560        job->sched = sched;
 561        job->entity = entity;
 562        job->s_priority = entity->rq - sched->sched_rq;
 563        job->s_fence = drm_sched_fence_create(entity, owner);
 564        if (!job->s_fence)
 565                return -ENOMEM;
 566        job->id = atomic64_inc_return(&sched->job_id_count);
 567
 568        INIT_LIST_HEAD(&job->node);
 569
 570        return 0;
 571}
 572EXPORT_SYMBOL(drm_sched_job_init);
 573
 574/**
 575 * drm_sched_job_cleanup - clean up scheduler job resources
 576 *
 577 * @job: scheduler job to clean up
 578 */
 579void drm_sched_job_cleanup(struct drm_sched_job *job)
 580{
 581        dma_fence_put(&job->s_fence->finished);
 582        job->s_fence = NULL;
 583}
 584EXPORT_SYMBOL(drm_sched_job_cleanup);
 585
 586/**
 587 * drm_sched_ready - is the scheduler ready
 588 *
 589 * @sched: scheduler instance
 590 *
 591 * Return true if we can push more jobs to the hw, otherwise false.
 592 */
 593static bool drm_sched_ready(struct drm_gpu_scheduler *sched)
 594{
 595        return atomic_read(&sched->hw_rq_count) <
 596                sched->hw_submission_limit;
 597}
 598
 599/**
 600 * drm_sched_wakeup - Wake up the scheduler when it is ready
 601 *
 602 * @sched: scheduler instance
 603 *
 604 */
 605void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
 606{
 607        if (drm_sched_ready(sched))
 608                wake_up_interruptible(&sched->wake_up_worker);
 609}
 610
 611/**
 612 * drm_sched_select_entity - Select next entity to process
 613 *
 614 * @sched: scheduler instance
 615 *
 616 * Returns the entity to process or NULL if none are found.
 617 */
 618static struct drm_sched_entity *
 619drm_sched_select_entity(struct drm_gpu_scheduler *sched)
 620{
 621        struct drm_sched_entity *entity;
 622        int i;
 623
 624        if (!drm_sched_ready(sched))
 625                return NULL;
 626
 627        /* Kernel run queue has higher priority than normal run queue*/
 628        for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
 629                entity = drm_sched_rq_select_entity(&sched->sched_rq[i]);
 630                if (entity)
 631                        break;
 632        }
 633
 634        return entity;
 635}
 636
 637/**
 638 * drm_sched_process_job - process a job
 639 *
 640 * @f: fence
 641 * @cb: fence callbacks
 642 *
 643 * Called after job has finished execution.
 644 */
 645static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
 646{
 647        struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
 648        struct drm_sched_fence *s_fence = s_job->s_fence;
 649        struct drm_gpu_scheduler *sched = s_fence->sched;
 650
 651        atomic_dec(&sched->hw_rq_count);
 652        atomic_dec(&sched->score);
 653
 654        trace_drm_sched_process_job(s_fence);
 655
 656        dma_fence_get(&s_fence->finished);
 657        drm_sched_fence_finished(s_fence);
 658        dma_fence_put(&s_fence->finished);
 659        wake_up_interruptible(&sched->wake_up_worker);
 660}
 661
 662/**
 663 * drm_sched_get_cleanup_job - fetch the next finished job to be destroyed
 664 *
 665 * @sched: scheduler instance
 666 *
 667 * Returns the next finished job from the mirror list (if there is one)
 668 * ready for it to be destroyed.
 669 */
 670static struct drm_sched_job *
 671drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
 672{
 673        struct drm_sched_job *job;
 674
 675        /*
 676         * Don't destroy jobs while the timeout worker is running  OR thread
 677         * is being parked and hence assumed to not touch ring_mirror_list
 678         */
 679        if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 680            !cancel_delayed_work(&sched->work_tdr)) ||
 681            kthread_should_park())
 682                return NULL;
 683
 684        spin_lock(&sched->job_list_lock);
 685
 686        job = list_first_entry_or_null(&sched->ring_mirror_list,
 687                                       struct drm_sched_job, node);
 688
 689        if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
 690                /* remove job from ring_mirror_list */
 691                list_del_init(&job->node);
 692        } else {
 693                job = NULL;
 694                /* queue timeout for next job */
 695                drm_sched_start_timeout(sched);
 696        }
 697
 698        spin_unlock(&sched->job_list_lock);
 699
 700        return job;
 701}
 702
 703/**
 704 * drm_sched_pick_best - Get a drm sched from a sched_list with the least load
 705 * @sched_list: list of drm_gpu_schedulers
 706 * @num_sched_list: number of drm_gpu_schedulers in the sched_list
 707 *
 708 * Returns pointer of the sched with the least load or NULL if none of the
 709 * drm_gpu_schedulers are ready
 710 */
 711struct drm_gpu_scheduler *
 712drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
 713                     unsigned int num_sched_list)
 714{
 715        struct drm_gpu_scheduler *sched, *picked_sched = NULL;
 716        int i;
 717        unsigned int min_score = UINT_MAX, num_score;
 718
 719        for (i = 0; i < num_sched_list; ++i) {
 720                sched = sched_list[i];
 721
 722                if (!sched->ready) {
 723                        DRM_WARN("scheduler %s is not ready, skipping",
 724                                 sched->name);
 725                        continue;
 726                }
 727
 728                num_score = atomic_read(&sched->score);
 729                if (num_score < min_score) {
 730                        min_score = num_score;
 731                        picked_sched = sched;
 732                }
 733        }
 734
 735        return picked_sched;
 736}
 737EXPORT_SYMBOL(drm_sched_pick_best);
 738
 739/**
 740 * drm_sched_blocked - check if the scheduler is blocked
 741 *
 742 * @sched: scheduler instance
 743 *
 744 * Returns true if blocked, otherwise false.
 745 */
 746static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
 747{
 748        if (kthread_should_park()) {
 749                kthread_parkme();
 750                return true;
 751        }
 752
 753        return false;
 754}
 755
 756/**
 757 * drm_sched_main - main scheduler thread
 758 *
 759 * @param: scheduler instance
 760 *
 761 * Returns 0.
 762 */
 763static int drm_sched_main(void *param)
 764{
 765        struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
 766        int r;
 767
 768        sched_set_fifo_low(current);
 769
 770        while (!kthread_should_stop()) {
 771                struct drm_sched_entity *entity = NULL;
 772                struct drm_sched_fence *s_fence;
 773                struct drm_sched_job *sched_job;
 774                struct dma_fence *fence;
 775                struct drm_sched_job *cleanup_job = NULL;
 776
 777                wait_event_interruptible(sched->wake_up_worker,
 778                                         (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
 779                                         (!drm_sched_blocked(sched) &&
 780                                          (entity = drm_sched_select_entity(sched))) ||
 781                                         kthread_should_stop());
 782
 783                if (cleanup_job) {
 784                        sched->ops->free_job(cleanup_job);
 785                        /* queue timeout for next job */
 786                        drm_sched_start_timeout(sched);
 787                }
 788
 789                if (!entity)
 790                        continue;
 791
 792                sched_job = drm_sched_entity_pop_job(entity);
 793
 794                complete(&entity->entity_idle);
 795
 796                if (!sched_job)
 797                        continue;
 798
 799                s_fence = sched_job->s_fence;
 800
 801                atomic_inc(&sched->hw_rq_count);
 802                drm_sched_job_begin(sched_job);
 803
 804                trace_drm_run_job(sched_job, entity);
 805                fence = sched->ops->run_job(sched_job);
 806                drm_sched_fence_scheduled(s_fence);
 807
 808                if (!IS_ERR_OR_NULL(fence)) {
 809                        s_fence->parent = dma_fence_get(fence);
 810                        r = dma_fence_add_callback(fence, &sched_job->cb,
 811                                                   drm_sched_process_job);
 812                        if (r == -ENOENT)
 813                                drm_sched_process_job(fence, &sched_job->cb);
 814                        else if (r)
 815                                DRM_ERROR("fence add callback failed (%d)\n",
 816                                          r);
 817                        dma_fence_put(fence);
 818                } else {
 819                        if (IS_ERR(fence))
 820                                dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
 821
 822                        drm_sched_process_job(NULL, &sched_job->cb);
 823                }
 824
 825                wake_up(&sched->job_scheduled);
 826        }
 827        return 0;
 828}
 829
 830/**
 831 * drm_sched_init - Init a gpu scheduler instance
 832 *
 833 * @sched: scheduler instance
 834 * @ops: backend operations for this scheduler
 835 * @hw_submission: number of hw submissions that can be in flight
 836 * @hang_limit: number of times to allow a job to hang before dropping it
 837 * @timeout: timeout value in jiffies for the scheduler
 838 * @name: name used for debugging
 839 *
 840 * Return 0 on success, otherwise error code.
 841 */
 842int drm_sched_init(struct drm_gpu_scheduler *sched,
 843                   const struct drm_sched_backend_ops *ops,
 844                   unsigned hw_submission,
 845                   unsigned hang_limit,
 846                   long timeout,
 847                   const char *name)
 848{
 849        int i, ret;
 850        sched->ops = ops;
 851        sched->hw_submission_limit = hw_submission;
 852        sched->name = name;
 853        sched->timeout = timeout;
 854        sched->hang_limit = hang_limit;
 855        for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_MAX; i++)
 856                drm_sched_rq_init(sched, &sched->sched_rq[i]);
 857
 858        init_waitqueue_head(&sched->wake_up_worker);
 859        init_waitqueue_head(&sched->job_scheduled);
 860        INIT_LIST_HEAD(&sched->ring_mirror_list);
 861        spin_lock_init(&sched->job_list_lock);
 862        atomic_set(&sched->hw_rq_count, 0);
 863        INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
 864        atomic_set(&sched->score, 0);
 865        atomic64_set(&sched->job_id_count, 0);
 866
 867        /* Each scheduler will run on a seperate kernel thread */
 868        sched->thread = kthread_run(drm_sched_main, sched, sched->name);
 869        if (IS_ERR(sched->thread)) {
 870                ret = PTR_ERR(sched->thread);
 871                sched->thread = NULL;
 872                DRM_ERROR("Failed to create scheduler for %s.\n", name);
 873                return ret;
 874        }
 875
 876        sched->ready = true;
 877        return 0;
 878}
 879EXPORT_SYMBOL(drm_sched_init);
 880
 881/**
 882 * drm_sched_fini - Destroy a gpu scheduler
 883 *
 884 * @sched: scheduler instance
 885 *
 886 * Tears down and cleans up the scheduler.
 887 */
 888void drm_sched_fini(struct drm_gpu_scheduler *sched)
 889{
 890        if (sched->thread)
 891                kthread_stop(sched->thread);
 892
 893        sched->ready = false;
 894}
 895EXPORT_SYMBOL(drm_sched_fini);
 896