linux/drivers/gpu/drm/radeon/radeon_fence.c
<<
>>
Prefs
   1/*
   2 * Copyright 2009 Jerome Glisse.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the
   7 * "Software"), to deal in the Software without restriction, including
   8 * without limitation the rights to use, copy, modify, merge, publish,
   9 * distribute, sub license, and/or sell copies of the Software, and to
  10 * permit persons to whom the Software is furnished to do so, subject to
  11 * the following conditions:
  12 *
  13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20 *
  21 * The above copyright notice and this permission notice (including the
  22 * next paragraph) shall be included in all copies or substantial portions
  23 * of the Software.
  24 *
  25 */
  26/*
  27 * Authors:
  28 *    Jerome Glisse <glisse@freedesktop.org>
  29 *    Dave Airlie
  30 */
  31
  32#include <linux/atomic.h>
  33#include <linux/firmware.h>
  34#include <linux/kref.h>
  35#include <linux/sched/signal.h>
  36#include <linux/seq_file.h>
  37#include <linux/slab.h>
  38#include <linux/wait.h>
  39
  40#include <drm/drm_debugfs.h>
  41#include <drm/drm_device.h>
  42#include <drm/drm_file.h>
  43
  44#include "radeon.h"
  45#include "radeon_reg.h"
  46#include "radeon_trace.h"
  47
  48/*
  49 * Fences
  50 * Fences mark an event in the GPUs pipeline and are used
  51 * for GPU/CPU synchronization.  When the fence is written,
  52 * it is expected that all buffers associated with that fence
  53 * are no longer in use by the associated ring on the GPU and
  54 * that the the relevant GPU caches have been flushed.  Whether
  55 * we use a scratch register or memory location depends on the asic
  56 * and whether writeback is enabled.
  57 */
  58
  59/**
  60 * radeon_fence_write - write a fence value
  61 *
  62 * @rdev: radeon_device pointer
  63 * @seq: sequence number to write
  64 * @ring: ring index the fence is associated with
  65 *
  66 * Writes a fence value to memory or a scratch register (all asics).
  67 */
  68static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
  69{
  70        struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
  71        if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
  72                if (drv->cpu_addr) {
  73                        *drv->cpu_addr = cpu_to_le32(seq);
  74                }
  75        } else {
  76                WREG32(drv->scratch_reg, seq);
  77        }
  78}
  79
  80/**
  81 * radeon_fence_read - read a fence value
  82 *
  83 * @rdev: radeon_device pointer
  84 * @ring: ring index the fence is associated with
  85 *
  86 * Reads a fence value from memory or a scratch register (all asics).
  87 * Returns the value of the fence read from memory or register.
  88 */
  89static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
  90{
  91        struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
  92        u32 seq = 0;
  93
  94        if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
  95                if (drv->cpu_addr) {
  96                        seq = le32_to_cpu(*drv->cpu_addr);
  97                } else {
  98                        seq = lower_32_bits(atomic64_read(&drv->last_seq));
  99                }
 100        } else {
 101                seq = RREG32(drv->scratch_reg);
 102        }
 103        return seq;
 104}
 105
 106/**
 107 * radeon_fence_schedule_check - schedule lockup check
 108 *
 109 * @rdev: radeon_device pointer
 110 * @ring: ring index we should work with
 111 *
 112 * Queues a delayed work item to check for lockups.
 113 */
 114static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring)
 115{
 116        /*
 117         * Do not reset the timer here with mod_delayed_work,
 118         * this can livelock in an interaction with TTM delayed destroy.
 119         */
 120        queue_delayed_work(system_power_efficient_wq,
 121                           &rdev->fence_drv[ring].lockup_work,
 122                           RADEON_FENCE_JIFFIES_TIMEOUT);
 123}
 124
 125/**
 126 * radeon_fence_emit - emit a fence on the requested ring
 127 *
 128 * @rdev: radeon_device pointer
 129 * @fence: radeon fence object
 130 * @ring: ring index the fence is associated with
 131 *
 132 * Emits a fence command on the requested ring (all asics).
 133 * Returns 0 on success, -ENOMEM on failure.
 134 */
 135int radeon_fence_emit(struct radeon_device *rdev,
 136                      struct radeon_fence **fence,
 137                      int ring)
 138{
 139        u64 seq;
 140
 141        /* we are protected by the ring emission mutex */
 142        *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
 143        if ((*fence) == NULL) {
 144                return -ENOMEM;
 145        }
 146        (*fence)->rdev = rdev;
 147        (*fence)->seq = seq = ++rdev->fence_drv[ring].sync_seq[ring];
 148        (*fence)->ring = ring;
 149        (*fence)->is_vm_update = false;
 150        dma_fence_init(&(*fence)->base, &radeon_fence_ops,
 151                       &rdev->fence_queue.lock,
 152                       rdev->fence_context + ring,
 153                       seq);
 154        radeon_fence_ring_emit(rdev, ring, *fence);
 155        trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
 156        radeon_fence_schedule_check(rdev, ring);
 157        return 0;
 158}
 159
 160/**
 161 * radeon_fence_check_signaled - callback from fence_queue
 162 *
 163 * this function is called with fence_queue lock held, which is also used
 164 * for the fence locking itself, so unlocked variants are used for
 165 * fence_signal, and remove_wait_queue.
 166 */
 167static int radeon_fence_check_signaled(wait_queue_entry_t *wait, unsigned mode, int flags, void *key)
 168{
 169        struct radeon_fence *fence;
 170        u64 seq;
 171
 172        fence = container_of(wait, struct radeon_fence, fence_wake);
 173
 174        /*
 175         * We cannot use radeon_fence_process here because we're already
 176         * in the waitqueue, in a call from wake_up_all.
 177         */
 178        seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq);
 179        if (seq >= fence->seq) {
 180                int ret = dma_fence_signal_locked(&fence->base);
 181
 182                if (!ret)
 183                        DMA_FENCE_TRACE(&fence->base, "signaled from irq context\n");
 184                else
 185                        DMA_FENCE_TRACE(&fence->base, "was already signaled\n");
 186
 187                radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring);
 188                __remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake);
 189                dma_fence_put(&fence->base);
 190        } else
 191                DMA_FENCE_TRACE(&fence->base, "pending\n");
 192        return 0;
 193}
 194
 195/**
 196 * radeon_fence_activity - check for fence activity
 197 *
 198 * @rdev: radeon_device pointer
 199 * @ring: ring index the fence is associated with
 200 *
 201 * Checks the current fence value and calculates the last
 202 * signalled fence value. Returns true if activity occured
 203 * on the ring, and the fence_queue should be waken up.
 204 */
 205static bool radeon_fence_activity(struct radeon_device *rdev, int ring)
 206{
 207        uint64_t seq, last_seq, last_emitted;
 208        unsigned count_loop = 0;
 209        bool wake = false;
 210
 211        /* Note there is a scenario here for an infinite loop but it's
 212         * very unlikely to happen. For it to happen, the current polling
 213         * process need to be interrupted by another process and another
 214         * process needs to update the last_seq btw the atomic read and
 215         * xchg of the current process.
 216         *
 217         * More over for this to go in infinite loop there need to be
 218         * continuously new fence signaled ie radeon_fence_read needs
 219         * to return a different value each time for both the currently
 220         * polling process and the other process that xchg the last_seq
 221         * btw atomic read and xchg of the current process. And the
 222         * value the other process set as last seq must be higher than
 223         * the seq value we just read. Which means that current process
 224         * need to be interrupted after radeon_fence_read and before
 225         * atomic xchg.
 226         *
 227         * To be even more safe we count the number of time we loop and
 228         * we bail after 10 loop just accepting the fact that we might
 229         * have temporarly set the last_seq not to the true real last
 230         * seq but to an older one.
 231         */
 232        last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
 233        do {
 234                last_emitted = rdev->fence_drv[ring].sync_seq[ring];
 235                seq = radeon_fence_read(rdev, ring);
 236                seq |= last_seq & 0xffffffff00000000LL;
 237                if (seq < last_seq) {
 238                        seq &= 0xffffffff;
 239                        seq |= last_emitted & 0xffffffff00000000LL;
 240                }
 241
 242                if (seq <= last_seq || seq > last_emitted) {
 243                        break;
 244                }
 245                /* If we loop over we don't want to return without
 246                 * checking if a fence is signaled as it means that the
 247                 * seq we just read is different from the previous on.
 248                 */
 249                wake = true;
 250                last_seq = seq;
 251                if ((count_loop++) > 10) {
 252                        /* We looped over too many time leave with the
 253                         * fact that we might have set an older fence
 254                         * seq then the current real last seq as signaled
 255                         * by the hw.
 256                         */
 257                        break;
 258                }
 259        } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
 260
 261        if (seq < last_emitted)
 262                radeon_fence_schedule_check(rdev, ring);
 263
 264        return wake;
 265}
 266
 267/**
 268 * radeon_fence_check_lockup - check for hardware lockup
 269 *
 270 * @work: delayed work item
 271 *
 272 * Checks for fence activity and if there is none probe
 273 * the hardware if a lockup occured.
 274 */
 275static void radeon_fence_check_lockup(struct work_struct *work)
 276{
 277        struct radeon_fence_driver *fence_drv;
 278        struct radeon_device *rdev;
 279        int ring;
 280
 281        fence_drv = container_of(work, struct radeon_fence_driver,
 282                                 lockup_work.work);
 283        rdev = fence_drv->rdev;
 284        ring = fence_drv - &rdev->fence_drv[0];
 285
 286        if (!down_read_trylock(&rdev->exclusive_lock)) {
 287                /* just reschedule the check if a reset is going on */
 288                radeon_fence_schedule_check(rdev, ring);
 289                return;
 290        }
 291
 292        if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) {
 293                unsigned long irqflags;
 294
 295                fence_drv->delayed_irq = false;
 296                spin_lock_irqsave(&rdev->irq.lock, irqflags);
 297                radeon_irq_set(rdev);
 298                spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
 299        }
 300
 301        if (radeon_fence_activity(rdev, ring))
 302                wake_up_all(&rdev->fence_queue);
 303
 304        else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
 305
 306                /* good news we believe it's a lockup */
 307                dev_warn(rdev->dev, "GPU lockup (current fence id "
 308                         "0x%016llx last fence id 0x%016llx on ring %d)\n",
 309                         (uint64_t)atomic64_read(&fence_drv->last_seq),
 310                         fence_drv->sync_seq[ring], ring);
 311
 312                /* remember that we need an reset */
 313                rdev->needs_reset = true;
 314                wake_up_all(&rdev->fence_queue);
 315        }
 316        up_read(&rdev->exclusive_lock);
 317}
 318
 319/**
 320 * radeon_fence_process - process a fence
 321 *
 322 * @rdev: radeon_device pointer
 323 * @ring: ring index the fence is associated with
 324 *
 325 * Checks the current fence value and wakes the fence queue
 326 * if the sequence number has increased (all asics).
 327 */
 328void radeon_fence_process(struct radeon_device *rdev, int ring)
 329{
 330        if (radeon_fence_activity(rdev, ring))
 331                wake_up_all(&rdev->fence_queue);
 332}
 333
 334/**
 335 * radeon_fence_seq_signaled - check if a fence sequence number has signaled
 336 *
 337 * @rdev: radeon device pointer
 338 * @seq: sequence number
 339 * @ring: ring index the fence is associated with
 340 *
 341 * Check if the last signaled fence sequnce number is >= the requested
 342 * sequence number (all asics).
 343 * Returns true if the fence has signaled (current fence value
 344 * is >= requested value) or false if it has not (current fence
 345 * value is < the requested value.  Helper function for
 346 * radeon_fence_signaled().
 347 */
 348static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
 349                                      u64 seq, unsigned ring)
 350{
 351        if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
 352                return true;
 353        }
 354        /* poll new last sequence at least once */
 355        radeon_fence_process(rdev, ring);
 356        if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
 357                return true;
 358        }
 359        return false;
 360}
 361
 362static bool radeon_fence_is_signaled(struct dma_fence *f)
 363{
 364        struct radeon_fence *fence = to_radeon_fence(f);
 365        struct radeon_device *rdev = fence->rdev;
 366        unsigned ring = fence->ring;
 367        u64 seq = fence->seq;
 368
 369        if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
 370                return true;
 371        }
 372
 373        if (down_read_trylock(&rdev->exclusive_lock)) {
 374                radeon_fence_process(rdev, ring);
 375                up_read(&rdev->exclusive_lock);
 376
 377                if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
 378                        return true;
 379                }
 380        }
 381        return false;
 382}
 383
 384/**
 385 * radeon_fence_enable_signaling - enable signalling on fence
 386 * @fence: fence
 387 *
 388 * This function is called with fence_queue lock held, and adds a callback
 389 * to fence_queue that checks if this fence is signaled, and if so it
 390 * signals the fence and removes itself.
 391 */
 392static bool radeon_fence_enable_signaling(struct dma_fence *f)
 393{
 394        struct radeon_fence *fence = to_radeon_fence(f);
 395        struct radeon_device *rdev = fence->rdev;
 396
 397        if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq)
 398                return false;
 399
 400        if (down_read_trylock(&rdev->exclusive_lock)) {
 401                radeon_irq_kms_sw_irq_get(rdev, fence->ring);
 402
 403                if (radeon_fence_activity(rdev, fence->ring))
 404                        wake_up_all_locked(&rdev->fence_queue);
 405
 406                /* did fence get signaled after we enabled the sw irq? */
 407                if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) {
 408                        radeon_irq_kms_sw_irq_put(rdev, fence->ring);
 409                        up_read(&rdev->exclusive_lock);
 410                        return false;
 411                }
 412
 413                up_read(&rdev->exclusive_lock);
 414        } else {
 415                /* we're probably in a lockup, lets not fiddle too much */
 416                if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring))
 417                        rdev->fence_drv[fence->ring].delayed_irq = true;
 418                radeon_fence_schedule_check(rdev, fence->ring);
 419        }
 420
 421        fence->fence_wake.flags = 0;
 422        fence->fence_wake.private = NULL;
 423        fence->fence_wake.func = radeon_fence_check_signaled;
 424        __add_wait_queue(&rdev->fence_queue, &fence->fence_wake);
 425        dma_fence_get(f);
 426
 427        DMA_FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring);
 428        return true;
 429}
 430
 431/**
 432 * radeon_fence_signaled - check if a fence has signaled
 433 *
 434 * @fence: radeon fence object
 435 *
 436 * Check if the requested fence has signaled (all asics).
 437 * Returns true if the fence has signaled or false if it has not.
 438 */
 439bool radeon_fence_signaled(struct radeon_fence *fence)
 440{
 441        if (!fence)
 442                return true;
 443
 444        if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
 445                int ret;
 446
 447                ret = dma_fence_signal(&fence->base);
 448                if (!ret)
 449                        DMA_FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n");
 450                return true;
 451        }
 452        return false;
 453}
 454
 455/**
 456 * radeon_fence_any_seq_signaled - check if any sequence number is signaled
 457 *
 458 * @rdev: radeon device pointer
 459 * @seq: sequence numbers
 460 *
 461 * Check if the last signaled fence sequnce number is >= the requested
 462 * sequence number (all asics).
 463 * Returns true if any has signaled (current value is >= requested value)
 464 * or false if it has not. Helper function for radeon_fence_wait_seq.
 465 */
 466static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
 467{
 468        unsigned i;
 469
 470        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 471                if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i))
 472                        return true;
 473        }
 474        return false;
 475}
 476
 477/**
 478 * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers
 479 *
 480 * @rdev: radeon device pointer
 481 * @target_seq: sequence number(s) we want to wait for
 482 * @intr: use interruptable sleep
 483 * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
 484 *
 485 * Wait for the requested sequence number(s) to be written by any ring
 486 * (all asics).  Sequnce number array is indexed by ring id.
 487 * @intr selects whether to use interruptable (true) or non-interruptable
 488 * (false) sleep when waiting for the sequence number.  Helper function
 489 * for radeon_fence_wait_*().
 490 * Returns remaining time if the sequence number has passed, 0 when
 491 * the wait timeout, or an error for all other cases.
 492 * -EDEADLK is returned when a GPU lockup has been detected.
 493 */
 494static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
 495                                          u64 *target_seq, bool intr,
 496                                          long timeout)
 497{
 498        long r;
 499        int i;
 500
 501        if (radeon_fence_any_seq_signaled(rdev, target_seq))
 502                return timeout;
 503
 504        /* enable IRQs and tracing */
 505        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 506                if (!target_seq[i])
 507                        continue;
 508
 509                trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
 510                radeon_irq_kms_sw_irq_get(rdev, i);
 511        }
 512
 513        if (intr) {
 514                r = wait_event_interruptible_timeout(rdev->fence_queue, (
 515                        radeon_fence_any_seq_signaled(rdev, target_seq)
 516                         || rdev->needs_reset), timeout);
 517        } else {
 518                r = wait_event_timeout(rdev->fence_queue, (
 519                        radeon_fence_any_seq_signaled(rdev, target_seq)
 520                         || rdev->needs_reset), timeout);
 521        }
 522
 523        if (rdev->needs_reset)
 524                r = -EDEADLK;
 525
 526        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 527                if (!target_seq[i])
 528                        continue;
 529
 530                radeon_irq_kms_sw_irq_put(rdev, i);
 531                trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
 532        }
 533
 534        return r;
 535}
 536
 537/**
 538 * radeon_fence_wait_timeout - wait for a fence to signal with timeout
 539 *
 540 * @fence: radeon fence object
 541 * @intr: use interruptible sleep
 542 *
 543 * Wait for the requested fence to signal (all asics).
 544 * @intr selects whether to use interruptable (true) or non-interruptable
 545 * (false) sleep when waiting for the fence.
 546 * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
 547 * Returns remaining time if the sequence number has passed, 0 when
 548 * the wait timeout, or an error for all other cases.
 549 */
 550long radeon_fence_wait_timeout(struct radeon_fence *fence, bool intr, long timeout)
 551{
 552        uint64_t seq[RADEON_NUM_RINGS] = {};
 553        long r;
 554        int r_sig;
 555
 556        /*
 557         * This function should not be called on !radeon fences.
 558         * If this is the case, it would mean this function can
 559         * also be called on radeon fences belonging to another card.
 560         * exclusive_lock is not held in that case.
 561         */
 562        if (WARN_ON_ONCE(!to_radeon_fence(&fence->base)))
 563                return dma_fence_wait(&fence->base, intr);
 564
 565        seq[fence->ring] = fence->seq;
 566        r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, timeout);
 567        if (r <= 0) {
 568                return r;
 569        }
 570
 571        r_sig = dma_fence_signal(&fence->base);
 572        if (!r_sig)
 573                DMA_FENCE_TRACE(&fence->base, "signaled from fence_wait\n");
 574        return r;
 575}
 576
 577/**
 578 * radeon_fence_wait - wait for a fence to signal
 579 *
 580 * @fence: radeon fence object
 581 * @intr: use interruptible sleep
 582 *
 583 * Wait for the requested fence to signal (all asics).
 584 * @intr selects whether to use interruptable (true) or non-interruptable
 585 * (false) sleep when waiting for the fence.
 586 * Returns 0 if the fence has passed, error for all other cases.
 587 */
 588int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 589{
 590        long r = radeon_fence_wait_timeout(fence, intr, MAX_SCHEDULE_TIMEOUT);
 591        if (r > 0) {
 592                return 0;
 593        } else {
 594                return r;
 595        }
 596}
 597
 598/**
 599 * radeon_fence_wait_any - wait for a fence to signal on any ring
 600 *
 601 * @rdev: radeon device pointer
 602 * @fences: radeon fence object(s)
 603 * @intr: use interruptable sleep
 604 *
 605 * Wait for any requested fence to signal (all asics).  Fence
 606 * array is indexed by ring id.  @intr selects whether to use
 607 * interruptable (true) or non-interruptable (false) sleep when
 608 * waiting for the fences. Used by the suballocator.
 609 * Returns 0 if any fence has passed, error for all other cases.
 610 */
 611int radeon_fence_wait_any(struct radeon_device *rdev,
 612                          struct radeon_fence **fences,
 613                          bool intr)
 614{
 615        uint64_t seq[RADEON_NUM_RINGS];
 616        unsigned i, num_rings = 0;
 617        long r;
 618
 619        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 620                seq[i] = 0;
 621
 622                if (!fences[i]) {
 623                        continue;
 624                }
 625
 626                seq[i] = fences[i]->seq;
 627                ++num_rings;
 628        }
 629
 630        /* nothing to wait for ? */
 631        if (num_rings == 0)
 632                return -ENOENT;
 633
 634        r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
 635        if (r < 0) {
 636                return r;
 637        }
 638        return 0;
 639}
 640
 641/**
 642 * radeon_fence_wait_next - wait for the next fence to signal
 643 *
 644 * @rdev: radeon device pointer
 645 * @ring: ring index the fence is associated with
 646 *
 647 * Wait for the next fence on the requested ring to signal (all asics).
 648 * Returns 0 if the next fence has passed, error for all other cases.
 649 * Caller must hold ring lock.
 650 */
 651int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
 652{
 653        uint64_t seq[RADEON_NUM_RINGS] = {};
 654        long r;
 655
 656        seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
 657        if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) {
 658                /* nothing to wait for, last_seq is
 659                   already the last emited fence */
 660                return -ENOENT;
 661        }
 662        r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
 663        if (r < 0)
 664                return r;
 665        return 0;
 666}
 667
 668/**
 669 * radeon_fence_wait_empty - wait for all fences to signal
 670 *
 671 * @rdev: radeon device pointer
 672 * @ring: ring index the fence is associated with
 673 *
 674 * Wait for all fences on the requested ring to signal (all asics).
 675 * Returns 0 if the fences have passed, error for all other cases.
 676 * Caller must hold ring lock.
 677 */
 678int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
 679{
 680        uint64_t seq[RADEON_NUM_RINGS] = {};
 681        long r;
 682
 683        seq[ring] = rdev->fence_drv[ring].sync_seq[ring];
 684        if (!seq[ring])
 685                return 0;
 686
 687        r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
 688        if (r < 0) {
 689                if (r == -EDEADLK)
 690                        return -EDEADLK;
 691
 692                dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n",
 693                        ring, r);
 694        }
 695        return 0;
 696}
 697
 698/**
 699 * radeon_fence_ref - take a ref on a fence
 700 *
 701 * @fence: radeon fence object
 702 *
 703 * Take a reference on a fence (all asics).
 704 * Returns the fence.
 705 */
 706struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
 707{
 708        dma_fence_get(&fence->base);
 709        return fence;
 710}
 711
 712/**
 713 * radeon_fence_unref - remove a ref on a fence
 714 *
 715 * @fence: radeon fence object
 716 *
 717 * Remove a reference on a fence (all asics).
 718 */
 719void radeon_fence_unref(struct radeon_fence **fence)
 720{
 721        struct radeon_fence *tmp = *fence;
 722
 723        *fence = NULL;
 724        if (tmp) {
 725                dma_fence_put(&tmp->base);
 726        }
 727}
 728
 729/**
 730 * radeon_fence_count_emitted - get the count of emitted fences
 731 *
 732 * @rdev: radeon device pointer
 733 * @ring: ring index the fence is associated with
 734 *
 735 * Get the number of fences emitted on the requested ring (all asics).
 736 * Returns the number of emitted fences on the ring.  Used by the
 737 * dynpm code to ring track activity.
 738 */
 739unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
 740{
 741        uint64_t emitted;
 742
 743        /* We are not protected by ring lock when reading the last sequence
 744         * but it's ok to report slightly wrong fence count here.
 745         */
 746        radeon_fence_process(rdev, ring);
 747        emitted = rdev->fence_drv[ring].sync_seq[ring]
 748                - atomic64_read(&rdev->fence_drv[ring].last_seq);
 749        /* to avoid 32bits warp around */
 750        if (emitted > 0x10000000) {
 751                emitted = 0x10000000;
 752        }
 753        return (unsigned)emitted;
 754}
 755
 756/**
 757 * radeon_fence_need_sync - do we need a semaphore
 758 *
 759 * @fence: radeon fence object
 760 * @dst_ring: which ring to check against
 761 *
 762 * Check if the fence needs to be synced against another ring
 763 * (all asics).  If so, we need to emit a semaphore.
 764 * Returns true if we need to sync with another ring, false if
 765 * not.
 766 */
 767bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
 768{
 769        struct radeon_fence_driver *fdrv;
 770
 771        if (!fence) {
 772                return false;
 773        }
 774
 775        if (fence->ring == dst_ring) {
 776                return false;
 777        }
 778
 779        /* we are protected by the ring mutex */
 780        fdrv = &fence->rdev->fence_drv[dst_ring];
 781        if (fence->seq <= fdrv->sync_seq[fence->ring]) {
 782                return false;
 783        }
 784
 785        return true;
 786}
 787
 788/**
 789 * radeon_fence_note_sync - record the sync point
 790 *
 791 * @fence: radeon fence object
 792 * @dst_ring: which ring to check against
 793 *
 794 * Note the sequence number at which point the fence will
 795 * be synced with the requested ring (all asics).
 796 */
 797void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
 798{
 799        struct radeon_fence_driver *dst, *src;
 800        unsigned i;
 801
 802        if (!fence) {
 803                return;
 804        }
 805
 806        if (fence->ring == dst_ring) {
 807                return;
 808        }
 809
 810        /* we are protected by the ring mutex */
 811        src = &fence->rdev->fence_drv[fence->ring];
 812        dst = &fence->rdev->fence_drv[dst_ring];
 813        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 814                if (i == dst_ring) {
 815                        continue;
 816                }
 817                dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
 818        }
 819}
 820
 821/**
 822 * radeon_fence_driver_start_ring - make the fence driver
 823 * ready for use on the requested ring.
 824 *
 825 * @rdev: radeon device pointer
 826 * @ring: ring index to start the fence driver on
 827 *
 828 * Make the fence driver ready for processing (all asics).
 829 * Not all asics have all rings, so each asic will only
 830 * start the fence driver on the rings it has.
 831 * Returns 0 for success, errors for failure.
 832 */
 833int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
 834{
 835        uint64_t index;
 836        int r;
 837
 838        radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
 839        if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
 840                rdev->fence_drv[ring].scratch_reg = 0;
 841                if (ring != R600_RING_TYPE_UVD_INDEX) {
 842                        index = R600_WB_EVENT_OFFSET + ring * 4;
 843                        rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
 844                        rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
 845                                                         index;
 846
 847                } else {
 848                        /* put fence directly behind firmware */
 849                        index = ALIGN(rdev->uvd_fw->size, 8);
 850                        rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + index;
 851                        rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index;
 852                }
 853
 854        } else {
 855                r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
 856                if (r) {
 857                        dev_err(rdev->dev, "fence failed to get scratch register\n");
 858                        return r;
 859                }
 860                index = RADEON_WB_SCRATCH_OFFSET +
 861                        rdev->fence_drv[ring].scratch_reg -
 862                        rdev->scratch.reg_base;
 863                rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
 864                rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
 865        }
 866        radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
 867        rdev->fence_drv[ring].initialized = true;
 868        dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
 869                 ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
 870        return 0;
 871}
 872
 873/**
 874 * radeon_fence_driver_init_ring - init the fence driver
 875 * for the requested ring.
 876 *
 877 * @rdev: radeon device pointer
 878 * @ring: ring index to start the fence driver on
 879 *
 880 * Init the fence driver for the requested ring (all asics).
 881 * Helper function for radeon_fence_driver_init().
 882 */
 883static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
 884{
 885        int i;
 886
 887        rdev->fence_drv[ring].scratch_reg = -1;
 888        rdev->fence_drv[ring].cpu_addr = NULL;
 889        rdev->fence_drv[ring].gpu_addr = 0;
 890        for (i = 0; i < RADEON_NUM_RINGS; ++i)
 891                rdev->fence_drv[ring].sync_seq[i] = 0;
 892        atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
 893        rdev->fence_drv[ring].initialized = false;
 894        INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work,
 895                          radeon_fence_check_lockup);
 896        rdev->fence_drv[ring].rdev = rdev;
 897}
 898
 899/**
 900 * radeon_fence_driver_init - init the fence driver
 901 * for all possible rings.
 902 *
 903 * @rdev: radeon device pointer
 904 *
 905 * Init the fence driver for all possible rings (all asics).
 906 * Not all asics have all rings, so each asic will only
 907 * start the fence driver on the rings it has using
 908 * radeon_fence_driver_start_ring().
 909 * Returns 0 for success.
 910 */
 911int radeon_fence_driver_init(struct radeon_device *rdev)
 912{
 913        int ring;
 914
 915        init_waitqueue_head(&rdev->fence_queue);
 916        for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
 917                radeon_fence_driver_init_ring(rdev, ring);
 918        }
 919        if (radeon_debugfs_fence_init(rdev)) {
 920                dev_err(rdev->dev, "fence debugfs file creation failed\n");
 921        }
 922        return 0;
 923}
 924
 925/**
 926 * radeon_fence_driver_fini - tear down the fence driver
 927 * for all possible rings.
 928 *
 929 * @rdev: radeon device pointer
 930 *
 931 * Tear down the fence driver for all possible rings (all asics).
 932 */
 933void radeon_fence_driver_fini(struct radeon_device *rdev)
 934{
 935        int ring, r;
 936
 937        mutex_lock(&rdev->ring_lock);
 938        for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
 939                if (!rdev->fence_drv[ring].initialized)
 940                        continue;
 941                r = radeon_fence_wait_empty(rdev, ring);
 942                if (r) {
 943                        /* no need to trigger GPU reset as we are unloading */
 944                        radeon_fence_driver_force_completion(rdev, ring);
 945                }
 946                cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
 947                wake_up_all(&rdev->fence_queue);
 948                radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
 949                rdev->fence_drv[ring].initialized = false;
 950        }
 951        mutex_unlock(&rdev->ring_lock);
 952}
 953
 954/**
 955 * radeon_fence_driver_force_completion - force all fence waiter to complete
 956 *
 957 * @rdev: radeon device pointer
 958 * @ring: the ring to complete
 959 *
 960 * In case of GPU reset failure make sure no process keep waiting on fence
 961 * that will never complete.
 962 */
 963void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring)
 964{
 965        if (rdev->fence_drv[ring].initialized) {
 966                radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
 967                cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
 968        }
 969}
 970
 971
 972/*
 973 * Fence debugfs
 974 */
 975#if defined(CONFIG_DEBUG_FS)
 976static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
 977{
 978        struct drm_info_node *node = (struct drm_info_node *)m->private;
 979        struct drm_device *dev = node->minor->dev;
 980        struct radeon_device *rdev = dev->dev_private;
 981        int i, j;
 982
 983        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 984                if (!rdev->fence_drv[i].initialized)
 985                        continue;
 986
 987                radeon_fence_process(rdev, i);
 988
 989                seq_printf(m, "--- ring %d ---\n", i);
 990                seq_printf(m, "Last signaled fence 0x%016llx\n",
 991                           (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
 992                seq_printf(m, "Last emitted        0x%016llx\n",
 993                           rdev->fence_drv[i].sync_seq[i]);
 994
 995                for (j = 0; j < RADEON_NUM_RINGS; ++j) {
 996                        if (i != j && rdev->fence_drv[j].initialized)
 997                                seq_printf(m, "Last sync to ring %d 0x%016llx\n",
 998                                           j, rdev->fence_drv[i].sync_seq[j]);
 999                }
1000        }
1001        return 0;
1002}
1003
1004/**
1005 * radeon_debugfs_gpu_reset - manually trigger a gpu reset
1006 *
1007 * Manually trigger a gpu reset at the next fence wait.
1008 */
1009static int radeon_debugfs_gpu_reset(struct seq_file *m, void *data)
1010{
1011        struct drm_info_node *node = (struct drm_info_node *) m->private;
1012        struct drm_device *dev = node->minor->dev;
1013        struct radeon_device *rdev = dev->dev_private;
1014
1015        down_read(&rdev->exclusive_lock);
1016        seq_printf(m, "%d\n", rdev->needs_reset);
1017        rdev->needs_reset = true;
1018        wake_up_all(&rdev->fence_queue);
1019        up_read(&rdev->exclusive_lock);
1020
1021        return 0;
1022}
1023
1024static struct drm_info_list radeon_debugfs_fence_list[] = {
1025        {"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL},
1026        {"radeon_gpu_reset", &radeon_debugfs_gpu_reset, 0, NULL}
1027};
1028#endif
1029
1030int radeon_debugfs_fence_init(struct radeon_device *rdev)
1031{
1032#if defined(CONFIG_DEBUG_FS)
1033        return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 2);
1034#else
1035        return 0;
1036#endif
1037}
1038
1039static const char *radeon_fence_get_driver_name(struct dma_fence *fence)
1040{
1041        return "radeon";
1042}
1043
1044static const char *radeon_fence_get_timeline_name(struct dma_fence *f)
1045{
1046        struct radeon_fence *fence = to_radeon_fence(f);
1047        switch (fence->ring) {
1048        case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx";
1049        case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1";
1050        case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2";
1051        case R600_RING_TYPE_DMA_INDEX: return "radeon.dma";
1052        case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1";
1053        case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd";
1054        case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1";
1055        case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2";
1056        default: WARN_ON_ONCE(1); return "radeon.unk";
1057        }
1058}
1059
1060static inline bool radeon_test_signaled(struct radeon_fence *fence)
1061{
1062        return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
1063}
1064
1065struct radeon_wait_cb {
1066        struct dma_fence_cb base;
1067        struct task_struct *task;
1068};
1069
1070static void
1071radeon_fence_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
1072{
1073        struct radeon_wait_cb *wait =
1074                container_of(cb, struct radeon_wait_cb, base);
1075
1076        wake_up_process(wait->task);
1077}
1078
1079static signed long radeon_fence_default_wait(struct dma_fence *f, bool intr,
1080                                             signed long t)
1081{
1082        struct radeon_fence *fence = to_radeon_fence(f);
1083        struct radeon_device *rdev = fence->rdev;
1084        struct radeon_wait_cb cb;
1085
1086        cb.task = current;
1087
1088        if (dma_fence_add_callback(f, &cb.base, radeon_fence_wait_cb))
1089                return t;
1090
1091        while (t > 0) {
1092                if (intr)
1093                        set_current_state(TASK_INTERRUPTIBLE);
1094                else
1095                        set_current_state(TASK_UNINTERRUPTIBLE);
1096
1097                /*
1098                 * radeon_test_signaled must be called after
1099                 * set_current_state to prevent a race with wake_up_process
1100                 */
1101                if (radeon_test_signaled(fence))
1102                        break;
1103
1104                if (rdev->needs_reset) {
1105                        t = -EDEADLK;
1106                        break;
1107                }
1108
1109                t = schedule_timeout(t);
1110
1111                if (t > 0 && intr && signal_pending(current))
1112                        t = -ERESTARTSYS;
1113        }
1114
1115        __set_current_state(TASK_RUNNING);
1116        dma_fence_remove_callback(f, &cb.base);
1117
1118        return t;
1119}
1120
1121const struct dma_fence_ops radeon_fence_ops = {
1122        .get_driver_name = radeon_fence_get_driver_name,
1123        .get_timeline_name = radeon_fence_get_timeline_name,
1124        .enable_signaling = radeon_fence_enable_signaling,
1125        .signaled = radeon_fence_is_signaled,
1126        .wait = radeon_fence_default_wait,
1127        .release = NULL,
1128};
1129