linux/drivers/gpu/drm/v3d/v3d_sched.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2/* Copyright (C) 2018 Broadcom */
   3
   4/**
   5 * DOC: Broadcom V3D scheduling
   6 *
   7 * The shared DRM GPU scheduler is used to coordinate submitting jobs
   8 * to the hardware.  Each DRM fd (roughly a client process) gets its
   9 * own scheduler entity, which will process jobs in order.  The GPU
  10 * scheduler will round-robin between clients to submit the next job.
  11 *
  12 * For simplicity, and in order to keep latency low for interactive
  13 * jobs when bulk background jobs are queued up, we submit a new job
  14 * to the HW only when it has completed the last one, instead of
  15 * filling up the CT[01]Q FIFOs with jobs.  Similarly, we use
  16 * v3d_job_dependency() to manage the dependency between bin and
  17 * render, instead of having the clients submit jobs using the HW's
  18 * semaphores to interlock between them.
  19 */
  20
  21#include <linux/kthread.h>
  22
  23#include "v3d_drv.h"
  24#include "v3d_regs.h"
  25#include "v3d_trace.h"
  26
  27static struct v3d_job *
  28to_v3d_job(struct drm_sched_job *sched_job)
  29{
  30        return container_of(sched_job, struct v3d_job, base);
  31}
  32
  33static struct v3d_bin_job *
  34to_bin_job(struct drm_sched_job *sched_job)
  35{
  36        return container_of(sched_job, struct v3d_bin_job, base.base);
  37}
  38
  39static struct v3d_render_job *
  40to_render_job(struct drm_sched_job *sched_job)
  41{
  42        return container_of(sched_job, struct v3d_render_job, base.base);
  43}
  44
  45static struct v3d_tfu_job *
  46to_tfu_job(struct drm_sched_job *sched_job)
  47{
  48        return container_of(sched_job, struct v3d_tfu_job, base.base);
  49}
  50
  51static struct v3d_csd_job *
  52to_csd_job(struct drm_sched_job *sched_job)
  53{
  54        return container_of(sched_job, struct v3d_csd_job, base.base);
  55}
  56
  57static void
  58v3d_job_free(struct drm_sched_job *sched_job)
  59{
  60        struct v3d_job *job = to_v3d_job(sched_job);
  61
  62        drm_sched_job_cleanup(sched_job);
  63        v3d_job_put(job);
  64}
  65
  66/**
  67 * Returns the fences that the job depends on, one by one.
  68 *
  69 * If placed in the scheduler's .dependency method, the corresponding
  70 * .run_job won't be called until all of them have been signaled.
  71 */
  72static struct dma_fence *
  73v3d_job_dependency(struct drm_sched_job *sched_job,
  74                   struct drm_sched_entity *s_entity)
  75{
  76        struct v3d_job *job = to_v3d_job(sched_job);
  77
  78        /* XXX: Wait on a fence for switching the GMP if necessary,
  79         * and then do so.
  80         */
  81
  82        if (!xa_empty(&job->deps))
  83                return xa_erase(&job->deps, job->last_dep++);
  84
  85        return NULL;
  86}
  87
  88static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
  89{
  90        struct v3d_bin_job *job = to_bin_job(sched_job);
  91        struct v3d_dev *v3d = job->base.v3d;
  92        struct drm_device *dev = &v3d->drm;
  93        struct dma_fence *fence;
  94        unsigned long irqflags;
  95
  96        if (unlikely(job->base.base.s_fence->finished.error))
  97                return NULL;
  98
  99        /* Lock required around bin_job update vs
 100         * v3d_overflow_mem_work().
 101         */
 102        spin_lock_irqsave(&v3d->job_lock, irqflags);
 103        v3d->bin_job = job;
 104        /* Clear out the overflow allocation, so we don't
 105         * reuse the overflow attached to a previous job.
 106         */
 107        V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
 108        spin_unlock_irqrestore(&v3d->job_lock, irqflags);
 109
 110        v3d_invalidate_caches(v3d);
 111
 112        fence = v3d_fence_create(v3d, V3D_BIN);
 113        if (IS_ERR(fence))
 114                return NULL;
 115
 116        if (job->base.irq_fence)
 117                dma_fence_put(job->base.irq_fence);
 118        job->base.irq_fence = dma_fence_get(fence);
 119
 120        trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
 121                            job->start, job->end);
 122
 123        /* Set the current and end address of the control list.
 124         * Writing the end register is what starts the job.
 125         */
 126        if (job->qma) {
 127                V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, job->qma);
 128                V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, job->qms);
 129        }
 130        if (job->qts) {
 131                V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
 132                               V3D_CLE_CT0QTS_ENABLE |
 133                               job->qts);
 134        }
 135        V3D_CORE_WRITE(0, V3D_CLE_CT0QBA, job->start);
 136        V3D_CORE_WRITE(0, V3D_CLE_CT0QEA, job->end);
 137
 138        return fence;
 139}
 140
 141static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
 142{
 143        struct v3d_render_job *job = to_render_job(sched_job);
 144        struct v3d_dev *v3d = job->base.v3d;
 145        struct drm_device *dev = &v3d->drm;
 146        struct dma_fence *fence;
 147
 148        if (unlikely(job->base.base.s_fence->finished.error))
 149                return NULL;
 150
 151        v3d->render_job = job;
 152
 153        /* Can we avoid this flush?  We need to be careful of
 154         * scheduling, though -- imagine job0 rendering to texture and
 155         * job1 reading, and them being executed as bin0, bin1,
 156         * render0, render1, so that render1's flush at bin time
 157         * wasn't enough.
 158         */
 159        v3d_invalidate_caches(v3d);
 160
 161        fence = v3d_fence_create(v3d, V3D_RENDER);
 162        if (IS_ERR(fence))
 163                return NULL;
 164
 165        if (job->base.irq_fence)
 166                dma_fence_put(job->base.irq_fence);
 167        job->base.irq_fence = dma_fence_get(fence);
 168
 169        trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
 170                            job->start, job->end);
 171
 172        /* XXX: Set the QCFG */
 173
 174        /* Set the current and end address of the control list.
 175         * Writing the end register is what starts the job.
 176         */
 177        V3D_CORE_WRITE(0, V3D_CLE_CT1QBA, job->start);
 178        V3D_CORE_WRITE(0, V3D_CLE_CT1QEA, job->end);
 179
 180        return fence;
 181}
 182
 183static struct dma_fence *
 184v3d_tfu_job_run(struct drm_sched_job *sched_job)
 185{
 186        struct v3d_tfu_job *job = to_tfu_job(sched_job);
 187        struct v3d_dev *v3d = job->base.v3d;
 188        struct drm_device *dev = &v3d->drm;
 189        struct dma_fence *fence;
 190
 191        fence = v3d_fence_create(v3d, V3D_TFU);
 192        if (IS_ERR(fence))
 193                return NULL;
 194
 195        v3d->tfu_job = job;
 196        if (job->base.irq_fence)
 197                dma_fence_put(job->base.irq_fence);
 198        job->base.irq_fence = dma_fence_get(fence);
 199
 200        trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
 201
 202        V3D_WRITE(V3D_TFU_IIA, job->args.iia);
 203        V3D_WRITE(V3D_TFU_IIS, job->args.iis);
 204        V3D_WRITE(V3D_TFU_ICA, job->args.ica);
 205        V3D_WRITE(V3D_TFU_IUA, job->args.iua);
 206        V3D_WRITE(V3D_TFU_IOA, job->args.ioa);
 207        V3D_WRITE(V3D_TFU_IOS, job->args.ios);
 208        V3D_WRITE(V3D_TFU_COEF0, job->args.coef[0]);
 209        if (job->args.coef[0] & V3D_TFU_COEF0_USECOEF) {
 210                V3D_WRITE(V3D_TFU_COEF1, job->args.coef[1]);
 211                V3D_WRITE(V3D_TFU_COEF2, job->args.coef[2]);
 212                V3D_WRITE(V3D_TFU_COEF3, job->args.coef[3]);
 213        }
 214        /* ICFG kicks off the job. */
 215        V3D_WRITE(V3D_TFU_ICFG, job->args.icfg | V3D_TFU_ICFG_IOC);
 216
 217        return fence;
 218}
 219
 220static struct dma_fence *
 221v3d_csd_job_run(struct drm_sched_job *sched_job)
 222{
 223        struct v3d_csd_job *job = to_csd_job(sched_job);
 224        struct v3d_dev *v3d = job->base.v3d;
 225        struct drm_device *dev = &v3d->drm;
 226        struct dma_fence *fence;
 227        int i;
 228
 229        v3d->csd_job = job;
 230
 231        v3d_invalidate_caches(v3d);
 232
 233        fence = v3d_fence_create(v3d, V3D_CSD);
 234        if (IS_ERR(fence))
 235                return NULL;
 236
 237        if (job->base.irq_fence)
 238                dma_fence_put(job->base.irq_fence);
 239        job->base.irq_fence = dma_fence_get(fence);
 240
 241        trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
 242
 243        for (i = 1; i <= 6; i++)
 244                V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0 + 4 * i, job->args.cfg[i]);
 245        /* CFG0 write kicks off the job. */
 246        V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0, job->args.cfg[0]);
 247
 248        return fence;
 249}
 250
 251static struct dma_fence *
 252v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
 253{
 254        struct v3d_job *job = to_v3d_job(sched_job);
 255        struct v3d_dev *v3d = job->v3d;
 256
 257        v3d_clean_caches(v3d);
 258
 259        return NULL;
 260}
 261
 262static void
 263v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
 264{
 265        enum v3d_queue q;
 266
 267        mutex_lock(&v3d->reset_lock);
 268
 269        /* block scheduler */
 270        for (q = 0; q < V3D_MAX_QUEUES; q++)
 271                drm_sched_stop(&v3d->queue[q].sched, sched_job);
 272
 273        if (sched_job)
 274                drm_sched_increase_karma(sched_job);
 275
 276        /* get the GPU back into the init state */
 277        v3d_reset(v3d);
 278
 279        for (q = 0; q < V3D_MAX_QUEUES; q++)
 280                drm_sched_resubmit_jobs(&v3d->queue[q].sched);
 281
 282        /* Unblock schedulers and restart their jobs. */
 283        for (q = 0; q < V3D_MAX_QUEUES; q++) {
 284                drm_sched_start(&v3d->queue[q].sched, true);
 285        }
 286
 287        mutex_unlock(&v3d->reset_lock);
 288}
 289
 290/* If the current address or return address have changed, then the GPU
 291 * has probably made progress and we should delay the reset.  This
 292 * could fail if the GPU got in an infinite loop in the CL, but that
 293 * is pretty unlikely outside of an i-g-t testcase.
 294 */
 295static void
 296v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q,
 297                    u32 *timedout_ctca, u32 *timedout_ctra)
 298{
 299        struct v3d_job *job = to_v3d_job(sched_job);
 300        struct v3d_dev *v3d = job->v3d;
 301        u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q));
 302        u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q));
 303
 304        if (*timedout_ctca != ctca || *timedout_ctra != ctra) {
 305                *timedout_ctca = ctca;
 306                *timedout_ctra = ctra;
 307                return;
 308        }
 309
 310        v3d_gpu_reset_for_timeout(v3d, sched_job);
 311}
 312
 313static void
 314v3d_bin_job_timedout(struct drm_sched_job *sched_job)
 315{
 316        struct v3d_bin_job *job = to_bin_job(sched_job);
 317
 318        v3d_cl_job_timedout(sched_job, V3D_BIN,
 319                            &job->timedout_ctca, &job->timedout_ctra);
 320}
 321
 322static void
 323v3d_render_job_timedout(struct drm_sched_job *sched_job)
 324{
 325        struct v3d_render_job *job = to_render_job(sched_job);
 326
 327        v3d_cl_job_timedout(sched_job, V3D_RENDER,
 328                            &job->timedout_ctca, &job->timedout_ctra);
 329}
 330
 331static void
 332v3d_generic_job_timedout(struct drm_sched_job *sched_job)
 333{
 334        struct v3d_job *job = to_v3d_job(sched_job);
 335
 336        v3d_gpu_reset_for_timeout(job->v3d, sched_job);
 337}
 338
 339static void
 340v3d_csd_job_timedout(struct drm_sched_job *sched_job)
 341{
 342        struct v3d_csd_job *job = to_csd_job(sched_job);
 343        struct v3d_dev *v3d = job->base.v3d;
 344        u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4);
 345
 346        /* If we've made progress, skip reset and let the timer get
 347         * rearmed.
 348         */
 349        if (job->timedout_batches != batches) {
 350                job->timedout_batches = batches;
 351                return;
 352        }
 353
 354        v3d_gpu_reset_for_timeout(v3d, sched_job);
 355}
 356
 357static const struct drm_sched_backend_ops v3d_bin_sched_ops = {
 358        .dependency = v3d_job_dependency,
 359        .run_job = v3d_bin_job_run,
 360        .timedout_job = v3d_bin_job_timedout,
 361        .free_job = v3d_job_free,
 362};
 363
 364static const struct drm_sched_backend_ops v3d_render_sched_ops = {
 365        .dependency = v3d_job_dependency,
 366        .run_job = v3d_render_job_run,
 367        .timedout_job = v3d_render_job_timedout,
 368        .free_job = v3d_job_free,
 369};
 370
 371static const struct drm_sched_backend_ops v3d_tfu_sched_ops = {
 372        .dependency = v3d_job_dependency,
 373        .run_job = v3d_tfu_job_run,
 374        .timedout_job = v3d_generic_job_timedout,
 375        .free_job = v3d_job_free,
 376};
 377
 378static const struct drm_sched_backend_ops v3d_csd_sched_ops = {
 379        .dependency = v3d_job_dependency,
 380        .run_job = v3d_csd_job_run,
 381        .timedout_job = v3d_csd_job_timedout,
 382        .free_job = v3d_job_free
 383};
 384
 385static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = {
 386        .dependency = v3d_job_dependency,
 387        .run_job = v3d_cache_clean_job_run,
 388        .timedout_job = v3d_generic_job_timedout,
 389        .free_job = v3d_job_free
 390};
 391
 392int
 393v3d_sched_init(struct v3d_dev *v3d)
 394{
 395        int hw_jobs_limit = 1;
 396        int job_hang_limit = 0;
 397        int hang_limit_ms = 500;
 398        int ret;
 399
 400        ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
 401                             &v3d_bin_sched_ops,
 402                             hw_jobs_limit, job_hang_limit,
 403                             msecs_to_jiffies(hang_limit_ms),
 404                             "v3d_bin");
 405        if (ret) {
 406                dev_err(v3d->drm.dev, "Failed to create bin scheduler: %d.", ret);
 407                return ret;
 408        }
 409
 410        ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
 411                             &v3d_render_sched_ops,
 412                             hw_jobs_limit, job_hang_limit,
 413                             msecs_to_jiffies(hang_limit_ms),
 414                             "v3d_render");
 415        if (ret) {
 416                dev_err(v3d->drm.dev, "Failed to create render scheduler: %d.",
 417                        ret);
 418                v3d_sched_fini(v3d);
 419                return ret;
 420        }
 421
 422        ret = drm_sched_init(&v3d->queue[V3D_TFU].sched,
 423                             &v3d_tfu_sched_ops,
 424                             hw_jobs_limit, job_hang_limit,
 425                             msecs_to_jiffies(hang_limit_ms),
 426                             "v3d_tfu");
 427        if (ret) {
 428                dev_err(v3d->drm.dev, "Failed to create TFU scheduler: %d.",
 429                        ret);
 430                v3d_sched_fini(v3d);
 431                return ret;
 432        }
 433
 434        if (v3d_has_csd(v3d)) {
 435                ret = drm_sched_init(&v3d->queue[V3D_CSD].sched,
 436                                     &v3d_csd_sched_ops,
 437                                     hw_jobs_limit, job_hang_limit,
 438                                     msecs_to_jiffies(hang_limit_ms),
 439                                     "v3d_csd");
 440                if (ret) {
 441                        dev_err(v3d->drm.dev, "Failed to create CSD scheduler: %d.",
 442                                ret);
 443                        v3d_sched_fini(v3d);
 444                        return ret;
 445                }
 446
 447                ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched,
 448                                     &v3d_cache_clean_sched_ops,
 449                                     hw_jobs_limit, job_hang_limit,
 450                                     msecs_to_jiffies(hang_limit_ms),
 451                                     "v3d_cache_clean");
 452                if (ret) {
 453                        dev_err(v3d->drm.dev, "Failed to create CACHE_CLEAN scheduler: %d.",
 454                                ret);
 455                        v3d_sched_fini(v3d);
 456                        return ret;
 457                }
 458        }
 459
 460        return 0;
 461}
 462
 463void
 464v3d_sched_fini(struct v3d_dev *v3d)
 465{
 466        enum v3d_queue q;
 467
 468        for (q = 0; q < V3D_MAX_QUEUES; q++) {
 469                if (v3d->queue[q].sched.ready)
 470                        drm_sched_fini(&v3d->queue[q].sched);
 471        }
 472}
 473