linux/drivers/gpu/drm/vc4/vc4_gem.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2014 Broadcom
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 */
  23
  24#include <linux/module.h>
  25#include <linux/platform_device.h>
  26#include <linux/pm_runtime.h>
  27#include <linux/device.h>
  28#include <linux/io.h>
  29
  30#include "uapi/drm/vc4_drm.h"
  31#include "vc4_drv.h"
  32#include "vc4_regs.h"
  33#include "vc4_trace.h"
  34
  35static void
  36vc4_queue_hangcheck(struct drm_device *dev)
  37{
  38        struct vc4_dev *vc4 = to_vc4_dev(dev);
  39
  40        mod_timer(&vc4->hangcheck.timer,
  41                  round_jiffies_up(jiffies + msecs_to_jiffies(100)));
  42}
  43
  44struct vc4_hang_state {
  45        struct drm_vc4_get_hang_state user_state;
  46
  47        u32 bo_count;
  48        struct drm_gem_object **bo;
  49};
  50
  51static void
  52vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state)
  53{
  54        unsigned int i;
  55
  56        for (i = 0; i < state->user_state.bo_count; i++)
  57                drm_gem_object_unreference_unlocked(state->bo[i]);
  58
  59        kfree(state);
  60}
  61
  62int
  63vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
  64                         struct drm_file *file_priv)
  65{
  66        struct drm_vc4_get_hang_state *get_state = data;
  67        struct drm_vc4_get_hang_state_bo *bo_state;
  68        struct vc4_hang_state *kernel_state;
  69        struct drm_vc4_get_hang_state *state;
  70        struct vc4_dev *vc4 = to_vc4_dev(dev);
  71        unsigned long irqflags;
  72        u32 i;
  73        int ret = 0;
  74
  75        spin_lock_irqsave(&vc4->job_lock, irqflags);
  76        kernel_state = vc4->hang_state;
  77        if (!kernel_state) {
  78                spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  79                return -ENOENT;
  80        }
  81        state = &kernel_state->user_state;
  82
  83        /* If the user's array isn't big enough, just return the
  84         * required array size.
  85         */
  86        if (get_state->bo_count < state->bo_count) {
  87                get_state->bo_count = state->bo_count;
  88                spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  89                return 0;
  90        }
  91
  92        vc4->hang_state = NULL;
  93        spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  94
  95        /* Save the user's BO pointer, so we don't stomp it with the memcpy. */
  96        state->bo = get_state->bo;
  97        memcpy(get_state, state, sizeof(*state));
  98
  99        bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL);
 100        if (!bo_state) {
 101                ret = -ENOMEM;
 102                goto err_free;
 103        }
 104
 105        for (i = 0; i < state->bo_count; i++) {
 106                struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
 107                u32 handle;
 108
 109                ret = drm_gem_handle_create(file_priv, kernel_state->bo[i],
 110                                            &handle);
 111
 112                if (ret) {
 113                        state->bo_count = i - 1;
 114                        goto err;
 115                }
 116                bo_state[i].handle = handle;
 117                bo_state[i].paddr = vc4_bo->base.paddr;
 118                bo_state[i].size = vc4_bo->base.base.size;
 119        }
 120
 121        if (copy_to_user((void __user *)(uintptr_t)get_state->bo,
 122                         bo_state,
 123                         state->bo_count * sizeof(*bo_state)))
 124                ret = -EFAULT;
 125
 126        kfree(bo_state);
 127
 128err_free:
 129
 130        vc4_free_hang_state(dev, kernel_state);
 131
 132err:
 133        return ret;
 134}
 135
 136static void
 137vc4_save_hang_state(struct drm_device *dev)
 138{
 139        struct vc4_dev *vc4 = to_vc4_dev(dev);
 140        struct drm_vc4_get_hang_state *state;
 141        struct vc4_hang_state *kernel_state;
 142        struct vc4_exec_info *exec[2];
 143        struct vc4_bo *bo;
 144        unsigned long irqflags;
 145        unsigned int i, j, unref_list_count, prev_idx;
 146
 147        kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL);
 148        if (!kernel_state)
 149                return;
 150
 151        state = &kernel_state->user_state;
 152
 153        spin_lock_irqsave(&vc4->job_lock, irqflags);
 154        exec[0] = vc4_first_bin_job(vc4);
 155        exec[1] = vc4_first_render_job(vc4);
 156        if (!exec[0] && !exec[1]) {
 157                spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 158                return;
 159        }
 160
 161        /* Get the bos from both binner and renderer into hang state. */
 162        state->bo_count = 0;
 163        for (i = 0; i < 2; i++) {
 164                if (!exec[i])
 165                        continue;
 166
 167                unref_list_count = 0;
 168                list_for_each_entry(bo, &exec[i]->unref_list, unref_head)
 169                        unref_list_count++;
 170                state->bo_count += exec[i]->bo_count + unref_list_count;
 171        }
 172
 173        kernel_state->bo = kcalloc(state->bo_count,
 174                                   sizeof(*kernel_state->bo), GFP_ATOMIC);
 175
 176        if (!kernel_state->bo) {
 177                spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 178                return;
 179        }
 180
 181        prev_idx = 0;
 182        for (i = 0; i < 2; i++) {
 183                if (!exec[i])
 184                        continue;
 185
 186                for (j = 0; j < exec[i]->bo_count; j++) {
 187                        drm_gem_object_reference(&exec[i]->bo[j]->base);
 188                        kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base;
 189                }
 190
 191                list_for_each_entry(bo, &exec[i]->unref_list, unref_head) {
 192                        drm_gem_object_reference(&bo->base.base);
 193                        kernel_state->bo[j + prev_idx] = &bo->base.base;
 194                        j++;
 195                }
 196                prev_idx = j + 1;
 197        }
 198
 199        if (exec[0])
 200                state->start_bin = exec[0]->ct0ca;
 201        if (exec[1])
 202                state->start_render = exec[1]->ct1ca;
 203
 204        spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 205
 206        state->ct0ca = V3D_READ(V3D_CTNCA(0));
 207        state->ct0ea = V3D_READ(V3D_CTNEA(0));
 208
 209        state->ct1ca = V3D_READ(V3D_CTNCA(1));
 210        state->ct1ea = V3D_READ(V3D_CTNEA(1));
 211
 212        state->ct0cs = V3D_READ(V3D_CTNCS(0));
 213        state->ct1cs = V3D_READ(V3D_CTNCS(1));
 214
 215        state->ct0ra0 = V3D_READ(V3D_CT00RA0);
 216        state->ct1ra0 = V3D_READ(V3D_CT01RA0);
 217
 218        state->bpca = V3D_READ(V3D_BPCA);
 219        state->bpcs = V3D_READ(V3D_BPCS);
 220        state->bpoa = V3D_READ(V3D_BPOA);
 221        state->bpos = V3D_READ(V3D_BPOS);
 222
 223        state->vpmbase = V3D_READ(V3D_VPMBASE);
 224
 225        state->dbge = V3D_READ(V3D_DBGE);
 226        state->fdbgo = V3D_READ(V3D_FDBGO);
 227        state->fdbgb = V3D_READ(V3D_FDBGB);
 228        state->fdbgr = V3D_READ(V3D_FDBGR);
 229        state->fdbgs = V3D_READ(V3D_FDBGS);
 230        state->errstat = V3D_READ(V3D_ERRSTAT);
 231
 232        spin_lock_irqsave(&vc4->job_lock, irqflags);
 233        if (vc4->hang_state) {
 234                spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 235                vc4_free_hang_state(dev, kernel_state);
 236        } else {
 237                vc4->hang_state = kernel_state;
 238                spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 239        }
 240}
 241
 242static void
 243vc4_reset(struct drm_device *dev)
 244{
 245        struct vc4_dev *vc4 = to_vc4_dev(dev);
 246
 247        DRM_INFO("Resetting GPU.\n");
 248
 249        mutex_lock(&vc4->power_lock);
 250        if (vc4->power_refcount) {
 251                /* Power the device off and back on the by dropping the
 252                 * reference on runtime PM.
 253                 */
 254                pm_runtime_put_sync_suspend(&vc4->v3d->pdev->dev);
 255                pm_runtime_get_sync(&vc4->v3d->pdev->dev);
 256        }
 257        mutex_unlock(&vc4->power_lock);
 258
 259        vc4_irq_reset(dev);
 260
 261        /* Rearm the hangcheck -- another job might have been waiting
 262         * for our hung one to get kicked off, and vc4_irq_reset()
 263         * would have started it.
 264         */
 265        vc4_queue_hangcheck(dev);
 266}
 267
 268static void
 269vc4_reset_work(struct work_struct *work)
 270{
 271        struct vc4_dev *vc4 =
 272                container_of(work, struct vc4_dev, hangcheck.reset_work);
 273
 274        vc4_save_hang_state(vc4->dev);
 275
 276        vc4_reset(vc4->dev);
 277}
 278
 279static void
 280vc4_hangcheck_elapsed(unsigned long data)
 281{
 282        struct drm_device *dev = (struct drm_device *)data;
 283        struct vc4_dev *vc4 = to_vc4_dev(dev);
 284        uint32_t ct0ca, ct1ca;
 285        unsigned long irqflags;
 286        struct vc4_exec_info *bin_exec, *render_exec;
 287
 288        spin_lock_irqsave(&vc4->job_lock, irqflags);
 289
 290        bin_exec = vc4_first_bin_job(vc4);
 291        render_exec = vc4_first_render_job(vc4);
 292
 293        /* If idle, we can stop watching for hangs. */
 294        if (!bin_exec && !render_exec) {
 295                spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 296                return;
 297        }
 298
 299        ct0ca = V3D_READ(V3D_CTNCA(0));
 300        ct1ca = V3D_READ(V3D_CTNCA(1));
 301
 302        /* If we've made any progress in execution, rearm the timer
 303         * and wait.
 304         */
 305        if ((bin_exec && ct0ca != bin_exec->last_ct0ca) ||
 306            (render_exec && ct1ca != render_exec->last_ct1ca)) {
 307                if (bin_exec)
 308                        bin_exec->last_ct0ca = ct0ca;
 309                if (render_exec)
 310                        render_exec->last_ct1ca = ct1ca;
 311                spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 312                vc4_queue_hangcheck(dev);
 313                return;
 314        }
 315
 316        spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 317
 318        /* We've gone too long with no progress, reset.  This has to
 319         * be done from a work struct, since resetting can sleep and
 320         * this timer hook isn't allowed to.
 321         */
 322        schedule_work(&vc4->hangcheck.reset_work);
 323}
 324
 325static void
 326submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end)
 327{
 328        struct vc4_dev *vc4 = to_vc4_dev(dev);
 329
 330        /* Set the current and end address of the control list.
 331         * Writing the end register is what starts the job.
 332         */
 333        V3D_WRITE(V3D_CTNCA(thread), start);
 334        V3D_WRITE(V3D_CTNEA(thread), end);
 335}
 336
 337int
 338vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns,
 339                   bool interruptible)
 340{
 341        struct vc4_dev *vc4 = to_vc4_dev(dev);
 342        int ret = 0;
 343        unsigned long timeout_expire;
 344        DEFINE_WAIT(wait);
 345
 346        if (vc4->finished_seqno >= seqno)
 347                return 0;
 348
 349        if (timeout_ns == 0)
 350                return -ETIME;
 351
 352        timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns);
 353
 354        trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns);
 355        for (;;) {
 356                prepare_to_wait(&vc4->job_wait_queue, &wait,
 357                                interruptible ? TASK_INTERRUPTIBLE :
 358                                TASK_UNINTERRUPTIBLE);
 359
 360                if (interruptible && signal_pending(current)) {
 361                        ret = -ERESTARTSYS;
 362                        break;
 363                }
 364
 365                if (vc4->finished_seqno >= seqno)
 366                        break;
 367
 368                if (timeout_ns != ~0ull) {
 369                        if (time_after_eq(jiffies, timeout_expire)) {
 370                                ret = -ETIME;
 371                                break;
 372                        }
 373                        schedule_timeout(timeout_expire - jiffies);
 374                } else {
 375                        schedule();
 376                }
 377        }
 378
 379        finish_wait(&vc4->job_wait_queue, &wait);
 380        trace_vc4_wait_for_seqno_end(dev, seqno);
 381
 382        return ret;
 383}
 384
 385static void
 386vc4_flush_caches(struct drm_device *dev)
 387{
 388        struct vc4_dev *vc4 = to_vc4_dev(dev);
 389
 390        /* Flush the GPU L2 caches.  These caches sit on top of system
 391         * L3 (the 128kb or so shared with the CPU), and are
 392         * non-allocating in the L3.
 393         */
 394        V3D_WRITE(V3D_L2CACTL,
 395                  V3D_L2CACTL_L2CCLR);
 396
 397        V3D_WRITE(V3D_SLCACTL,
 398                  VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) |
 399                  VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) |
 400                  VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
 401                  VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC));
 402}
 403
 404/* Sets the registers for the next job to be actually be executed in
 405 * the hardware.
 406 *
 407 * The job_lock should be held during this.
 408 */
 409void
 410vc4_submit_next_bin_job(struct drm_device *dev)
 411{
 412        struct vc4_dev *vc4 = to_vc4_dev(dev);
 413        struct vc4_exec_info *exec;
 414
 415again:
 416        exec = vc4_first_bin_job(vc4);
 417        if (!exec)
 418                return;
 419
 420        vc4_flush_caches(dev);
 421
 422        /* Disable the binner's pre-loaded overflow memory address */
 423        V3D_WRITE(V3D_BPOA, 0);
 424        V3D_WRITE(V3D_BPOS, 0);
 425
 426        /* Either put the job in the binner if it uses the binner, or
 427         * immediately move it to the to-be-rendered queue.
 428         */
 429        if (exec->ct0ca != exec->ct0ea) {
 430                submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
 431        } else {
 432                vc4_move_job_to_render(dev, exec);
 433                goto again;
 434        }
 435}
 436
 437void
 438vc4_submit_next_render_job(struct drm_device *dev)
 439{
 440        struct vc4_dev *vc4 = to_vc4_dev(dev);
 441        struct vc4_exec_info *exec = vc4_first_render_job(vc4);
 442
 443        if (!exec)
 444                return;
 445
 446        submit_cl(dev, 1, exec->ct1ca, exec->ct1ea);
 447}
 448
 449void
 450vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec)
 451{
 452        struct vc4_dev *vc4 = to_vc4_dev(dev);
 453        bool was_empty = list_empty(&vc4->render_job_list);
 454
 455        list_move_tail(&exec->head, &vc4->render_job_list);
 456        if (was_empty)
 457                vc4_submit_next_render_job(dev);
 458}
 459
 460static void
 461vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
 462{
 463        struct vc4_bo *bo;
 464        unsigned i;
 465
 466        for (i = 0; i < exec->bo_count; i++) {
 467                bo = to_vc4_bo(&exec->bo[i]->base);
 468                bo->seqno = seqno;
 469        }
 470
 471        list_for_each_entry(bo, &exec->unref_list, unref_head) {
 472                bo->seqno = seqno;
 473        }
 474}
 475
 476/* Queues a struct vc4_exec_info for execution.  If no job is
 477 * currently executing, then submits it.
 478 *
 479 * Unlike most GPUs, our hardware only handles one command list at a
 480 * time.  To queue multiple jobs at once, we'd need to edit the
 481 * previous command list to have a jump to the new one at the end, and
 482 * then bump the end address.  That's a change for a later date,
 483 * though.
 484 */
 485static void
 486vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
 487{
 488        struct vc4_dev *vc4 = to_vc4_dev(dev);
 489        uint64_t seqno;
 490        unsigned long irqflags;
 491
 492        spin_lock_irqsave(&vc4->job_lock, irqflags);
 493
 494        seqno = ++vc4->emit_seqno;
 495        exec->seqno = seqno;
 496        vc4_update_bo_seqnos(exec, seqno);
 497
 498        list_add_tail(&exec->head, &vc4->bin_job_list);
 499
 500        /* If no job was executing, kick ours off.  Otherwise, it'll
 501         * get started when the previous job's flush done interrupt
 502         * occurs.
 503         */
 504        if (vc4_first_bin_job(vc4) == exec) {
 505                vc4_submit_next_bin_job(dev);
 506                vc4_queue_hangcheck(dev);
 507        }
 508
 509        spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 510}
 511
 512/**
 513 * Looks up a bunch of GEM handles for BOs and stores the array for
 514 * use in the command validator that actually writes relocated
 515 * addresses pointing to them.
 516 */
 517static int
 518vc4_cl_lookup_bos(struct drm_device *dev,
 519                  struct drm_file *file_priv,
 520                  struct vc4_exec_info *exec)
 521{
 522        struct drm_vc4_submit_cl *args = exec->args;
 523        uint32_t *handles;
 524        int ret = 0;
 525        int i;
 526
 527        exec->bo_count = args->bo_handle_count;
 528
 529        if (!exec->bo_count) {
 530                /* See comment on bo_index for why we have to check
 531                 * this.
 532                 */
 533                DRM_ERROR("Rendering requires BOs to validate\n");
 534                return -EINVAL;
 535        }
 536
 537        exec->bo = drm_calloc_large(exec->bo_count,
 538                                    sizeof(struct drm_gem_cma_object *));
 539        if (!exec->bo) {
 540                DRM_ERROR("Failed to allocate validated BO pointers\n");
 541                return -ENOMEM;
 542        }
 543
 544        handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t));
 545        if (!handles) {
 546                DRM_ERROR("Failed to allocate incoming GEM handles\n");
 547                goto fail;
 548        }
 549
 550        ret = copy_from_user(handles,
 551                             (void __user *)(uintptr_t)args->bo_handles,
 552                             exec->bo_count * sizeof(uint32_t));
 553        if (ret) {
 554                DRM_ERROR("Failed to copy in GEM handles\n");
 555                goto fail;
 556        }
 557
 558        spin_lock(&file_priv->table_lock);
 559        for (i = 0; i < exec->bo_count; i++) {
 560                struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
 561                                                     handles[i]);
 562                if (!bo) {
 563                        DRM_ERROR("Failed to look up GEM BO %d: %d\n",
 564                                  i, handles[i]);
 565                        ret = -EINVAL;
 566                        spin_unlock(&file_priv->table_lock);
 567                        goto fail;
 568                }
 569                drm_gem_object_reference(bo);
 570                exec->bo[i] = (struct drm_gem_cma_object *)bo;
 571        }
 572        spin_unlock(&file_priv->table_lock);
 573
 574fail:
 575        drm_free_large(handles);
 576        return ret;
 577}
 578
 579static int
 580vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
 581{
 582        struct drm_vc4_submit_cl *args = exec->args;
 583        void *temp = NULL;
 584        void *bin;
 585        int ret = 0;
 586        uint32_t bin_offset = 0;
 587        uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size,
 588                                             16);
 589        uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
 590        uint32_t exec_size = uniforms_offset + args->uniforms_size;
 591        uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
 592                                          args->shader_rec_count);
 593        struct vc4_bo *bo;
 594
 595        if (uniforms_offset < shader_rec_offset ||
 596            exec_size < uniforms_offset ||
 597            args->shader_rec_count >= (UINT_MAX /
 598                                          sizeof(struct vc4_shader_state)) ||
 599            temp_size < exec_size) {
 600                DRM_ERROR("overflow in exec arguments\n");
 601                goto fail;
 602        }
 603
 604        /* Allocate space where we'll store the copied in user command lists
 605         * and shader records.
 606         *
 607         * We don't just copy directly into the BOs because we need to
 608         * read the contents back for validation, and I think the
 609         * bo->vaddr is uncached access.
 610         */
 611        temp = drm_malloc_ab(temp_size, 1);
 612        if (!temp) {
 613                DRM_ERROR("Failed to allocate storage for copying "
 614                          "in bin/render CLs.\n");
 615                ret = -ENOMEM;
 616                goto fail;
 617        }
 618        bin = temp + bin_offset;
 619        exec->shader_rec_u = temp + shader_rec_offset;
 620        exec->uniforms_u = temp + uniforms_offset;
 621        exec->shader_state = temp + exec_size;
 622        exec->shader_state_size = args->shader_rec_count;
 623
 624        if (copy_from_user(bin,
 625                           (void __user *)(uintptr_t)args->bin_cl,
 626                           args->bin_cl_size)) {
 627                ret = -EFAULT;
 628                goto fail;
 629        }
 630
 631        if (copy_from_user(exec->shader_rec_u,
 632                           (void __user *)(uintptr_t)args->shader_rec,
 633                           args->shader_rec_size)) {
 634                ret = -EFAULT;
 635                goto fail;
 636        }
 637
 638        if (copy_from_user(exec->uniforms_u,
 639                           (void __user *)(uintptr_t)args->uniforms,
 640                           args->uniforms_size)) {
 641                ret = -EFAULT;
 642                goto fail;
 643        }
 644
 645        bo = vc4_bo_create(dev, exec_size, true);
 646        if (IS_ERR(bo)) {
 647                DRM_ERROR("Couldn't allocate BO for binning\n");
 648                ret = PTR_ERR(bo);
 649                goto fail;
 650        }
 651        exec->exec_bo = &bo->base;
 652
 653        list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head,
 654                      &exec->unref_list);
 655
 656        exec->ct0ca = exec->exec_bo->paddr + bin_offset;
 657
 658        exec->bin_u = bin;
 659
 660        exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
 661        exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
 662        exec->shader_rec_size = args->shader_rec_size;
 663
 664        exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
 665        exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
 666        exec->uniforms_size = args->uniforms_size;
 667
 668        ret = vc4_validate_bin_cl(dev,
 669                                  exec->exec_bo->vaddr + bin_offset,
 670                                  bin,
 671                                  exec);
 672        if (ret)
 673                goto fail;
 674
 675        ret = vc4_validate_shader_recs(dev, exec);
 676
 677fail:
 678        drm_free_large(temp);
 679        return ret;
 680}
 681
 682static void
 683vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
 684{
 685        struct vc4_dev *vc4 = to_vc4_dev(dev);
 686        unsigned i;
 687
 688        if (exec->bo) {
 689                for (i = 0; i < exec->bo_count; i++)
 690                        drm_gem_object_unreference_unlocked(&exec->bo[i]->base);
 691                drm_free_large(exec->bo);
 692        }
 693
 694        while (!list_empty(&exec->unref_list)) {
 695                struct vc4_bo *bo = list_first_entry(&exec->unref_list,
 696                                                     struct vc4_bo, unref_head);
 697                list_del(&bo->unref_head);
 698                drm_gem_object_unreference_unlocked(&bo->base.base);
 699        }
 700
 701        mutex_lock(&vc4->power_lock);
 702        if (--vc4->power_refcount == 0)
 703                pm_runtime_put(&vc4->v3d->pdev->dev);
 704        mutex_unlock(&vc4->power_lock);
 705
 706        kfree(exec);
 707}
 708
 709void
 710vc4_job_handle_completed(struct vc4_dev *vc4)
 711{
 712        unsigned long irqflags;
 713        struct vc4_seqno_cb *cb, *cb_temp;
 714
 715        spin_lock_irqsave(&vc4->job_lock, irqflags);
 716        while (!list_empty(&vc4->job_done_list)) {
 717                struct vc4_exec_info *exec =
 718                        list_first_entry(&vc4->job_done_list,
 719                                         struct vc4_exec_info, head);
 720                list_del(&exec->head);
 721
 722                spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 723                vc4_complete_exec(vc4->dev, exec);
 724                spin_lock_irqsave(&vc4->job_lock, irqflags);
 725        }
 726
 727        list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) {
 728                if (cb->seqno <= vc4->finished_seqno) {
 729                        list_del_init(&cb->work.entry);
 730                        schedule_work(&cb->work);
 731                }
 732        }
 733
 734        spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 735}
 736
 737static void vc4_seqno_cb_work(struct work_struct *work)
 738{
 739        struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work);
 740
 741        cb->func(cb);
 742}
 743
 744int vc4_queue_seqno_cb(struct drm_device *dev,
 745                       struct vc4_seqno_cb *cb, uint64_t seqno,
 746                       void (*func)(struct vc4_seqno_cb *cb))
 747{
 748        struct vc4_dev *vc4 = to_vc4_dev(dev);
 749        int ret = 0;
 750        unsigned long irqflags;
 751
 752        cb->func = func;
 753        INIT_WORK(&cb->work, vc4_seqno_cb_work);
 754
 755        spin_lock_irqsave(&vc4->job_lock, irqflags);
 756        if (seqno > vc4->finished_seqno) {
 757                cb->seqno = seqno;
 758                list_add_tail(&cb->work.entry, &vc4->seqno_cb_list);
 759        } else {
 760                schedule_work(&cb->work);
 761        }
 762        spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 763
 764        return ret;
 765}
 766
 767/* Scheduled when any job has been completed, this walks the list of
 768 * jobs that had completed and unrefs their BOs and frees their exec
 769 * structs.
 770 */
 771static void
 772vc4_job_done_work(struct work_struct *work)
 773{
 774        struct vc4_dev *vc4 =
 775                container_of(work, struct vc4_dev, job_done_work);
 776
 777        vc4_job_handle_completed(vc4);
 778}
 779
 780static int
 781vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev,
 782                                uint64_t seqno,
 783                                uint64_t *timeout_ns)
 784{
 785        unsigned long start = jiffies;
 786        int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true);
 787
 788        if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) {
 789                uint64_t delta = jiffies_to_nsecs(jiffies - start);
 790
 791                if (*timeout_ns >= delta)
 792                        *timeout_ns -= delta;
 793        }
 794
 795        return ret;
 796}
 797
 798int
 799vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
 800                     struct drm_file *file_priv)
 801{
 802        struct drm_vc4_wait_seqno *args = data;
 803
 804        return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno,
 805                                               &args->timeout_ns);
 806}
 807
 808int
 809vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
 810                  struct drm_file *file_priv)
 811{
 812        int ret;
 813        struct drm_vc4_wait_bo *args = data;
 814        struct drm_gem_object *gem_obj;
 815        struct vc4_bo *bo;
 816
 817        if (args->pad != 0)
 818                return -EINVAL;
 819
 820        gem_obj = drm_gem_object_lookup(file_priv, args->handle);
 821        if (!gem_obj) {
 822                DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
 823                return -EINVAL;
 824        }
 825        bo = to_vc4_bo(gem_obj);
 826
 827        ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno,
 828                                              &args->timeout_ns);
 829
 830        drm_gem_object_unreference_unlocked(gem_obj);
 831        return ret;
 832}
 833
 834/**
 835 * Submits a command list to the VC4.
 836 *
 837 * This is what is called batchbuffer emitting on other hardware.
 838 */
 839int
 840vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 841                    struct drm_file *file_priv)
 842{
 843        struct vc4_dev *vc4 = to_vc4_dev(dev);
 844        struct drm_vc4_submit_cl *args = data;
 845        struct vc4_exec_info *exec;
 846        int ret = 0;
 847
 848        if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
 849                DRM_ERROR("Unknown flags: 0x%02x\n", args->flags);
 850                return -EINVAL;
 851        }
 852
 853        exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
 854        if (!exec) {
 855                DRM_ERROR("malloc failure on exec struct\n");
 856                return -ENOMEM;
 857        }
 858
 859        mutex_lock(&vc4->power_lock);
 860        if (vc4->power_refcount++ == 0)
 861                ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
 862        mutex_unlock(&vc4->power_lock);
 863        if (ret < 0) {
 864                kfree(exec);
 865                return ret;
 866        }
 867
 868        exec->args = args;
 869        INIT_LIST_HEAD(&exec->unref_list);
 870
 871        ret = vc4_cl_lookup_bos(dev, file_priv, exec);
 872        if (ret)
 873                goto fail;
 874
 875        if (exec->args->bin_cl_size != 0) {
 876                ret = vc4_get_bcl(dev, exec);
 877                if (ret)
 878                        goto fail;
 879        } else {
 880                exec->ct0ca = 0;
 881                exec->ct0ea = 0;
 882        }
 883
 884        ret = vc4_get_rcl(dev, exec);
 885        if (ret)
 886                goto fail;
 887
 888        /* Clear this out of the struct we'll be putting in the queue,
 889         * since it's part of our stack.
 890         */
 891        exec->args = NULL;
 892
 893        vc4_queue_submit(dev, exec);
 894
 895        /* Return the seqno for our job. */
 896        args->seqno = vc4->emit_seqno;
 897
 898        return 0;
 899
 900fail:
 901        vc4_complete_exec(vc4->dev, exec);
 902
 903        return ret;
 904}
 905
 906void
 907vc4_gem_init(struct drm_device *dev)
 908{
 909        struct vc4_dev *vc4 = to_vc4_dev(dev);
 910
 911        INIT_LIST_HEAD(&vc4->bin_job_list);
 912        INIT_LIST_HEAD(&vc4->render_job_list);
 913        INIT_LIST_HEAD(&vc4->job_done_list);
 914        INIT_LIST_HEAD(&vc4->seqno_cb_list);
 915        spin_lock_init(&vc4->job_lock);
 916
 917        INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
 918        setup_timer(&vc4->hangcheck.timer,
 919                    vc4_hangcheck_elapsed,
 920                    (unsigned long)dev);
 921
 922        INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
 923
 924        mutex_init(&vc4->power_lock);
 925}
 926
 927void
 928vc4_gem_destroy(struct drm_device *dev)
 929{
 930        struct vc4_dev *vc4 = to_vc4_dev(dev);
 931
 932        /* Waiting for exec to finish would need to be done before
 933         * unregistering V3D.
 934         */
 935        WARN_ON(vc4->emit_seqno != vc4->finished_seqno);
 936
 937        /* V3D should already have disabled its interrupt and cleared
 938         * the overflow allocation registers.  Now free the object.
 939         */
 940        if (vc4->overflow_mem) {
 941                drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
 942                vc4->overflow_mem = NULL;
 943        }
 944
 945        if (vc4->hang_state)
 946                vc4_free_hang_state(dev, vc4->hang_state);
 947
 948        vc4_bo_cache_destroy(dev);
 949}
 950