linux/include/drm/gpu_scheduler.h
<<
>>
Prefs
   1/*
   2 * Copyright 2015 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24#ifndef _DRM_GPU_SCHEDULER_H_
  25#define _DRM_GPU_SCHEDULER_H_
  26
  27#include <drm/spsc_queue.h>
  28#include <linux/dma-fence.h>
  29#include <linux/completion.h>
  30#include <linux/xarray.h>
  31#include <linux/irq_work.h>
  32
  33#define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
  34
  35struct drm_gem_object;
  36
  37struct drm_gpu_scheduler;
  38struct drm_sched_rq;
  39
  40/* These are often used as an (initial) index
  41 * to an array, and as such should start at 0.
  42 */
  43enum drm_sched_priority {
  44        DRM_SCHED_PRIORITY_MIN,
  45        DRM_SCHED_PRIORITY_NORMAL,
  46        DRM_SCHED_PRIORITY_HIGH,
  47        DRM_SCHED_PRIORITY_KERNEL,
  48
  49        DRM_SCHED_PRIORITY_COUNT,
  50        DRM_SCHED_PRIORITY_UNSET = -2
  51};
  52
  53/**
  54 * struct drm_sched_entity - A wrapper around a job queue (typically
  55 * attached to the DRM file_priv).
  56 *
  57 * Entities will emit jobs in order to their corresponding hardware
  58 * ring, and the scheduler will alternate between entities based on
  59 * scheduling policy.
  60 */
  61struct drm_sched_entity {
  62        /**
  63         * @list:
  64         *
  65         * Used to append this struct to the list of entities in the runqueue
  66         * @rq under &drm_sched_rq.entities.
  67         *
  68         * Protected by &drm_sched_rq.lock of @rq.
  69         */
  70        struct list_head                list;
  71
  72        /**
  73         * @rq:
  74         *
  75         * Runqueue on which this entity is currently scheduled.
  76         *
  77         * FIXME: Locking is very unclear for this. Writers are protected by
  78         * @rq_lock, but readers are generally lockless and seem to just race
  79         * with not even a READ_ONCE.
  80         */
  81        struct drm_sched_rq             *rq;
  82
  83        /**
  84         * @sched_list:
  85         *
  86         * A list of schedulers (struct drm_gpu_scheduler).  Jobs from this entity can
  87         * be scheduled on any scheduler on this list.
  88         *
  89         * This can be modified by calling drm_sched_entity_modify_sched().
  90         * Locking is entirely up to the driver, see the above function for more
  91         * details.
  92         *
  93         * This will be set to NULL if &num_sched_list equals 1 and @rq has been
  94         * set already.
  95         *
  96         * FIXME: This means priority changes through
  97         * drm_sched_entity_set_priority() will be lost henceforth in this case.
  98         */
  99        struct drm_gpu_scheduler        **sched_list;
 100
 101        /**
 102         * @num_sched_list:
 103         *
 104         * Number of drm_gpu_schedulers in the @sched_list.
 105         */
 106        unsigned int                    num_sched_list;
 107
 108        /**
 109         * @priority:
 110         *
 111         * Priority of the entity. This can be modified by calling
 112         * drm_sched_entity_set_priority(). Protected by &rq_lock.
 113         */
 114        enum drm_sched_priority         priority;
 115
 116        /**
 117         * @rq_lock:
 118         *
 119         * Lock to modify the runqueue to which this entity belongs.
 120         */
 121        spinlock_t                      rq_lock;
 122
 123        /**
 124         * @job_queue: the list of jobs of this entity.
 125         */
 126        struct spsc_queue               job_queue;
 127
 128        /**
 129         * @fence_seq:
 130         *
 131         * A linearly increasing seqno incremented with each new
 132         * &drm_sched_fence which is part of the entity.
 133         *
 134         * FIXME: Callers of drm_sched_job_arm() need to ensure correct locking,
 135         * this doesn't need to be atomic.
 136         */
 137        atomic_t                        fence_seq;
 138
 139        /**
 140         * @fence_context:
 141         *
 142         * A unique context for all the fences which belong to this entity.  The
 143         * &drm_sched_fence.scheduled uses the fence_context but
 144         * &drm_sched_fence.finished uses fence_context + 1.
 145         */
 146        uint64_t                        fence_context;
 147
 148        /**
 149         * @dependency:
 150         *
 151         * The dependency fence of the job which is on the top of the job queue.
 152         */
 153        struct dma_fence                *dependency;
 154
 155        /**
 156         * @cb:
 157         *
 158         * Callback for the dependency fence above.
 159         */
 160        struct dma_fence_cb             cb;
 161
 162        /**
 163         * @guilty:
 164         *
 165         * Points to entities' guilty.
 166         */
 167        atomic_t                        *guilty;
 168
 169        /**
 170         * @last_scheduled:
 171         *
 172         * Points to the finished fence of the last scheduled job. Only written
 173         * by the scheduler thread, can be accessed locklessly from
 174         * drm_sched_job_arm() iff the queue is empty.
 175         */
 176        struct dma_fence                *last_scheduled;
 177
 178        /**
 179         * @last_user: last group leader pushing a job into the entity.
 180         */
 181        struct task_struct              *last_user;
 182
 183        /**
 184         * @stopped:
 185         *
 186         * Marks the enity as removed from rq and destined for
 187         * termination. This is set by calling drm_sched_entity_flush() and by
 188         * drm_sched_fini().
 189         */
 190        bool                            stopped;
 191
 192        /**
 193         * @entity_idle:
 194         *
 195         * Signals when entity is not in use, used to sequence entity cleanup in
 196         * drm_sched_entity_fini().
 197         */
 198        struct completion               entity_idle;
 199};
 200
 201/**
 202 * struct drm_sched_rq - queue of entities to be scheduled.
 203 *
 204 * @lock: to modify the entities list.
 205 * @sched: the scheduler to which this rq belongs to.
 206 * @entities: list of the entities to be scheduled.
 207 * @current_entity: the entity which is to be scheduled.
 208 *
 209 * Run queue is a set of entities scheduling command submissions for
 210 * one specific ring. It implements the scheduling policy that selects
 211 * the next entity to emit commands from.
 212 */
 213struct drm_sched_rq {
 214        spinlock_t                      lock;
 215        struct drm_gpu_scheduler        *sched;
 216        struct list_head                entities;
 217        struct drm_sched_entity         *current_entity;
 218};
 219
 220/**
 221 * struct drm_sched_fence - fences corresponding to the scheduling of a job.
 222 */
 223struct drm_sched_fence {
 224        /**
 225         * @scheduled: this fence is what will be signaled by the scheduler
 226         * when the job is scheduled.
 227         */
 228        struct dma_fence                scheduled;
 229
 230        /**
 231         * @finished: this fence is what will be signaled by the scheduler
 232         * when the job is completed.
 233         *
 234         * When setting up an out fence for the job, you should use
 235         * this, since it's available immediately upon
 236         * drm_sched_job_init(), and the fence returned by the driver
 237         * from run_job() won't be created until the dependencies have
 238         * resolved.
 239         */
 240        struct dma_fence                finished;
 241
 242        /**
 243         * @parent: the fence returned by &drm_sched_backend_ops.run_job
 244         * when scheduling the job on hardware. We signal the
 245         * &drm_sched_fence.finished fence once parent is signalled.
 246         */
 247        struct dma_fence                *parent;
 248        /**
 249         * @sched: the scheduler instance to which the job having this struct
 250         * belongs to.
 251         */
 252        struct drm_gpu_scheduler        *sched;
 253        /**
 254         * @lock: the lock used by the scheduled and the finished fences.
 255         */
 256        spinlock_t                      lock;
 257        /**
 258         * @owner: job owner for debugging
 259         */
 260        void                            *owner;
 261};
 262
 263struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
 264
 265/**
 266 * struct drm_sched_job - A job to be run by an entity.
 267 *
 268 * @queue_node: used to append this struct to the queue of jobs in an entity.
 269 * @list: a job participates in a "pending" and "done" lists.
 270 * @sched: the scheduler instance on which this job is scheduled.
 271 * @s_fence: contains the fences for the scheduling of job.
 272 * @finish_cb: the callback for the finished fence.
 273 * @id: a unique id assigned to each job scheduled on the scheduler.
 274 * @karma: increment on every hang caused by this job. If this exceeds the hang
 275 *         limit of the scheduler then the job is marked guilty and will not
 276 *         be scheduled further.
 277 * @s_priority: the priority of the job.
 278 * @entity: the entity to which this job belongs.
 279 * @cb: the callback for the parent fence in s_fence.
 280 *
 281 * A job is created by the driver using drm_sched_job_init(), and
 282 * should call drm_sched_entity_push_job() once it wants the scheduler
 283 * to schedule the job.
 284 */
 285struct drm_sched_job {
 286        struct spsc_node                queue_node;
 287        struct list_head                list;
 288        struct drm_gpu_scheduler        *sched;
 289        struct drm_sched_fence          *s_fence;
 290
 291        /*
 292         * work is used only after finish_cb has been used and will not be
 293         * accessed anymore.
 294         */
 295        union {
 296                struct dma_fence_cb             finish_cb;
 297                struct irq_work                 work;
 298        };
 299
 300        uint64_t                        id;
 301        atomic_t                        karma;
 302        enum drm_sched_priority         s_priority;
 303        struct drm_sched_entity         *entity;
 304        struct dma_fence_cb             cb;
 305        /**
 306         * @dependencies:
 307         *
 308         * Contains the dependencies as struct dma_fence for this job, see
 309         * drm_sched_job_add_dependency() and
 310         * drm_sched_job_add_implicit_dependencies().
 311         */
 312        struct xarray                   dependencies;
 313
 314        /** @last_dependency: tracks @dependencies as they signal */
 315        unsigned long                   last_dependency;
 316};
 317
 318static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job,
 319                                            int threshold)
 320{
 321        return s_job && atomic_inc_return(&s_job->karma) > threshold;
 322}
 323
 324enum drm_gpu_sched_stat {
 325        DRM_GPU_SCHED_STAT_NONE, /* Reserve 0 */
 326        DRM_GPU_SCHED_STAT_NOMINAL,
 327        DRM_GPU_SCHED_STAT_ENODEV,
 328};
 329
 330/**
 331 * struct drm_sched_backend_ops
 332 *
 333 * Define the backend operations called by the scheduler,
 334 * these functions should be implemented in driver side.
 335 */
 336struct drm_sched_backend_ops {
 337        /**
 338         * @dependency:
 339         *
 340         * Called when the scheduler is considering scheduling this job next, to
 341         * get another struct dma_fence for this job to block on.  Once it
 342         * returns NULL, run_job() may be called.
 343         *
 344         * If a driver exclusively uses drm_sched_job_add_dependency() and
 345         * drm_sched_job_add_implicit_dependencies() this can be ommitted and
 346         * left as NULL.
 347         */
 348        struct dma_fence *(*dependency)(struct drm_sched_job *sched_job,
 349                                        struct drm_sched_entity *s_entity);
 350
 351        /**
 352         * @run_job: Called to execute the job once all of the dependencies
 353         * have been resolved.  This may be called multiple times, if
 354         * timedout_job() has happened and drm_sched_job_recovery()
 355         * decides to try it again.
 356         */
 357        struct dma_fence *(*run_job)(struct drm_sched_job *sched_job);
 358
 359        /**
 360         * @timedout_job: Called when a job has taken too long to execute,
 361         * to trigger GPU recovery.
 362         *
 363         * This method is called in a workqueue context.
 364         *
 365         * Drivers typically issue a reset to recover from GPU hangs, and this
 366         * procedure usually follows the following workflow:
 367         *
 368         * 1. Stop the scheduler using drm_sched_stop(). This will park the
 369         *    scheduler thread and cancel the timeout work, guaranteeing that
 370         *    nothing is queued while we reset the hardware queue
 371         * 2. Try to gracefully stop non-faulty jobs (optional)
 372         * 3. Issue a GPU reset (driver-specific)
 373         * 4. Re-submit jobs using drm_sched_resubmit_jobs()
 374         * 5. Restart the scheduler using drm_sched_start(). At that point, new
 375         *    jobs can be queued, and the scheduler thread is unblocked
 376         *
 377         * Note that some GPUs have distinct hardware queues but need to reset
 378         * the GPU globally, which requires extra synchronization between the
 379         * timeout handler of the different &drm_gpu_scheduler. One way to
 380         * achieve this synchronization is to create an ordered workqueue
 381         * (using alloc_ordered_workqueue()) at the driver level, and pass this
 382         * queue to drm_sched_init(), to guarantee that timeout handlers are
 383         * executed sequentially. The above workflow needs to be slightly
 384         * adjusted in that case:
 385         *
 386         * 1. Stop all schedulers impacted by the reset using drm_sched_stop()
 387         * 2. Try to gracefully stop non-faulty jobs on all queues impacted by
 388         *    the reset (optional)
 389         * 3. Issue a GPU reset on all faulty queues (driver-specific)
 390         * 4. Re-submit jobs on all schedulers impacted by the reset using
 391         *    drm_sched_resubmit_jobs()
 392         * 5. Restart all schedulers that were stopped in step #1 using
 393         *    drm_sched_start()
 394         *
 395         * Return DRM_GPU_SCHED_STAT_NOMINAL, when all is normal,
 396         * and the underlying driver has started or completed recovery.
 397         *
 398         * Return DRM_GPU_SCHED_STAT_ENODEV, if the device is no longer
 399         * available, i.e. has been unplugged.
 400         */
 401        enum drm_gpu_sched_stat (*timedout_job)(struct drm_sched_job *sched_job);
 402
 403        /**
 404         * @free_job: Called once the job's finished fence has been signaled
 405         * and it's time to clean it up.
 406         */
 407        void (*free_job)(struct drm_sched_job *sched_job);
 408};
 409
 410/**
 411 * struct drm_gpu_scheduler
 412 *
 413 * @ops: backend operations provided by the driver.
 414 * @hw_submission_limit: the max size of the hardware queue.
 415 * @timeout: the time after which a job is removed from the scheduler.
 416 * @name: name of the ring for which this scheduler is being used.
 417 * @sched_rq: priority wise array of run queues.
 418 * @wake_up_worker: the wait queue on which the scheduler sleeps until a job
 419 *                  is ready to be scheduled.
 420 * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler
 421 *                 waits on this wait queue until all the scheduled jobs are
 422 *                 finished.
 423 * @hw_rq_count: the number of jobs currently in the hardware queue.
 424 * @job_id_count: used to assign unique id to the each job.
 425 * @timeout_wq: workqueue used to queue @work_tdr
 426 * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
 427 *            timeout interval is over.
 428 * @thread: the kthread on which the scheduler which run.
 429 * @pending_list: the list of jobs which are currently in the job queue.
 430 * @job_list_lock: lock to protect the pending_list.
 431 * @hang_limit: once the hangs by a job crosses this limit then it is marked
 432 *              guilty and it will no longer be considered for scheduling.
 433 * @score: score to help loadbalancer pick a idle sched
 434 * @_score: score used when the driver doesn't provide one
 435 * @ready: marks if the underlying HW is ready to work
 436 * @free_guilty: A hit to time out handler to free the guilty job.
 437 *
 438 * One scheduler is implemented for each hardware ring.
 439 */
 440struct drm_gpu_scheduler {
 441        const struct drm_sched_backend_ops      *ops;
 442        uint32_t                        hw_submission_limit;
 443        long                            timeout;
 444        const char                      *name;
 445        struct drm_sched_rq             sched_rq[DRM_SCHED_PRIORITY_COUNT];
 446        wait_queue_head_t               wake_up_worker;
 447        wait_queue_head_t               job_scheduled;
 448        atomic_t                        hw_rq_count;
 449        atomic64_t                      job_id_count;
 450        struct workqueue_struct         *timeout_wq;
 451        struct delayed_work             work_tdr;
 452        struct task_struct              *thread;
 453        struct list_head                pending_list;
 454        spinlock_t                      job_list_lock;
 455        int                             hang_limit;
 456        atomic_t                        *score;
 457        atomic_t                        _score;
 458        bool                            ready;
 459        bool                            free_guilty;
 460};
 461
 462int drm_sched_init(struct drm_gpu_scheduler *sched,
 463                   const struct drm_sched_backend_ops *ops,
 464                   uint32_t hw_submission, unsigned hang_limit,
 465                   long timeout, struct workqueue_struct *timeout_wq,
 466                   atomic_t *score, const char *name);
 467
 468void drm_sched_fini(struct drm_gpu_scheduler *sched);
 469int drm_sched_job_init(struct drm_sched_job *job,
 470                       struct drm_sched_entity *entity,
 471                       void *owner);
 472void drm_sched_job_arm(struct drm_sched_job *job);
 473int drm_sched_job_add_dependency(struct drm_sched_job *job,
 474                                 struct dma_fence *fence);
 475int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job,
 476                                            struct drm_gem_object *obj,
 477                                            bool write);
 478
 479
 480void drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
 481                                    struct drm_gpu_scheduler **sched_list,
 482                                   unsigned int num_sched_list);
 483
 484void drm_sched_job_cleanup(struct drm_sched_job *job);
 485void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
 486void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad);
 487void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery);
 488void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched);
 489void drm_sched_resubmit_jobs_ext(struct drm_gpu_scheduler *sched, int max);
 490void drm_sched_increase_karma(struct drm_sched_job *bad);
 491void drm_sched_reset_karma(struct drm_sched_job *bad);
 492void drm_sched_increase_karma_ext(struct drm_sched_job *bad, int type);
 493bool drm_sched_dependency_optimized(struct dma_fence* fence,
 494                                    struct drm_sched_entity *entity);
 495void drm_sched_fault(struct drm_gpu_scheduler *sched);
 496void drm_sched_job_kickout(struct drm_sched_job *s_job);
 497
 498void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
 499                             struct drm_sched_entity *entity);
 500void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
 501                                struct drm_sched_entity *entity);
 502
 503int drm_sched_entity_init(struct drm_sched_entity *entity,
 504                          enum drm_sched_priority priority,
 505                          struct drm_gpu_scheduler **sched_list,
 506                          unsigned int num_sched_list,
 507                          atomic_t *guilty);
 508long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout);
 509void drm_sched_entity_fini(struct drm_sched_entity *entity);
 510void drm_sched_entity_destroy(struct drm_sched_entity *entity);
 511void drm_sched_entity_select_rq(struct drm_sched_entity *entity);
 512struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity);
 513void drm_sched_entity_push_job(struct drm_sched_job *sched_job);
 514void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
 515                                   enum drm_sched_priority priority);
 516bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
 517
 518struct drm_sched_fence *drm_sched_fence_alloc(
 519        struct drm_sched_entity *s_entity, void *owner);
 520void drm_sched_fence_init(struct drm_sched_fence *fence,
 521                          struct drm_sched_entity *entity);
 522void drm_sched_fence_free(struct drm_sched_fence *fence);
 523
 524void drm_sched_fence_scheduled(struct drm_sched_fence *fence);
 525void drm_sched_fence_finished(struct drm_sched_fence *fence);
 526
 527unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched);
 528void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
 529                                unsigned long remaining);
 530struct drm_gpu_scheduler *
 531drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
 532                     unsigned int num_sched_list);
 533
 534#endif
 535