linux/drivers/gpu/drm/radeon/radeon_fence.c
<<
>>
Prefs
   1/*
   2 * Copyright 2009 Jerome Glisse.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the
   7 * "Software"), to deal in the Software without restriction, including
   8 * without limitation the rights to use, copy, modify, merge, publish,
   9 * distribute, sub license, and/or sell copies of the Software, and to
  10 * permit persons to whom the Software is furnished to do so, subject to
  11 * the following conditions:
  12 *
  13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20 *
  21 * The above copyright notice and this permission notice (including the
  22 * next paragraph) shall be included in all copies or substantial portions
  23 * of the Software.
  24 *
  25 */
  26/*
  27 * Authors:
  28 *    Jerome Glisse <glisse@freedesktop.org>
  29 *    Dave Airlie
  30 */
  31
  32#include <linux/atomic.h>
  33#include <linux/firmware.h>
  34#include <linux/kref.h>
  35#include <linux/sched/signal.h>
  36#include <linux/seq_file.h>
  37#include <linux/slab.h>
  38#include <linux/wait.h>
  39
  40#include <drm/drm_device.h>
  41#include <drm/drm_file.h>
  42
  43#include "radeon.h"
  44#include "radeon_reg.h"
  45#include "radeon_trace.h"
  46
  47/*
  48 * Fences
  49 * Fences mark an event in the GPUs pipeline and are used
  50 * for GPU/CPU synchronization.  When the fence is written,
  51 * it is expected that all buffers associated with that fence
  52 * are no longer in use by the associated ring on the GPU and
  53 * that the relevant GPU caches have been flushed.  Whether
  54 * we use a scratch register or memory location depends on the asic
  55 * and whether writeback is enabled.
  56 */
  57
  58/**
  59 * radeon_fence_write - write a fence value
  60 *
  61 * @rdev: radeon_device pointer
  62 * @seq: sequence number to write
  63 * @ring: ring index the fence is associated with
  64 *
  65 * Writes a fence value to memory or a scratch register (all asics).
  66 */
  67static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
  68{
  69        struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
  70        if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
  71                if (drv->cpu_addr) {
  72                        *drv->cpu_addr = cpu_to_le32(seq);
  73                }
  74        } else {
  75                WREG32(drv->scratch_reg, seq);
  76        }
  77}
  78
  79/**
  80 * radeon_fence_read - read a fence value
  81 *
  82 * @rdev: radeon_device pointer
  83 * @ring: ring index the fence is associated with
  84 *
  85 * Reads a fence value from memory or a scratch register (all asics).
  86 * Returns the value of the fence read from memory or register.
  87 */
  88static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
  89{
  90        struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
  91        u32 seq = 0;
  92
  93        if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
  94                if (drv->cpu_addr) {
  95                        seq = le32_to_cpu(*drv->cpu_addr);
  96                } else {
  97                        seq = lower_32_bits(atomic64_read(&drv->last_seq));
  98                }
  99        } else {
 100                seq = RREG32(drv->scratch_reg);
 101        }
 102        return seq;
 103}
 104
 105/**
 106 * radeon_fence_schedule_check - schedule lockup check
 107 *
 108 * @rdev: radeon_device pointer
 109 * @ring: ring index we should work with
 110 *
 111 * Queues a delayed work item to check for lockups.
 112 */
 113static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring)
 114{
 115        /*
 116         * Do not reset the timer here with mod_delayed_work,
 117         * this can livelock in an interaction with TTM delayed destroy.
 118         */
 119        queue_delayed_work(system_power_efficient_wq,
 120                           &rdev->fence_drv[ring].lockup_work,
 121                           RADEON_FENCE_JIFFIES_TIMEOUT);
 122}
 123
 124/**
 125 * radeon_fence_emit - emit a fence on the requested ring
 126 *
 127 * @rdev: radeon_device pointer
 128 * @fence: radeon fence object
 129 * @ring: ring index the fence is associated with
 130 *
 131 * Emits a fence command on the requested ring (all asics).
 132 * Returns 0 on success, -ENOMEM on failure.
 133 */
 134int radeon_fence_emit(struct radeon_device *rdev,
 135                      struct radeon_fence **fence,
 136                      int ring)
 137{
 138        u64 seq;
 139
 140        /* we are protected by the ring emission mutex */
 141        *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
 142        if ((*fence) == NULL) {
 143                return -ENOMEM;
 144        }
 145        (*fence)->rdev = rdev;
 146        (*fence)->seq = seq = ++rdev->fence_drv[ring].sync_seq[ring];
 147        (*fence)->ring = ring;
 148        (*fence)->is_vm_update = false;
 149        dma_fence_init(&(*fence)->base, &radeon_fence_ops,
 150                       &rdev->fence_queue.lock,
 151                       rdev->fence_context + ring,
 152                       seq);
 153        radeon_fence_ring_emit(rdev, ring, *fence);
 154        trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
 155        radeon_fence_schedule_check(rdev, ring);
 156        return 0;
 157}
 158
 159/*
 160 * radeon_fence_check_signaled - callback from fence_queue
 161 *
 162 * this function is called with fence_queue lock held, which is also used
 163 * for the fence locking itself, so unlocked variants are used for
 164 * fence_signal, and remove_wait_queue.
 165 */
 166static int radeon_fence_check_signaled(wait_queue_entry_t *wait, unsigned mode, int flags, void *key)
 167{
 168        struct radeon_fence *fence;
 169        u64 seq;
 170
 171        fence = container_of(wait, struct radeon_fence, fence_wake);
 172
 173        /*
 174         * We cannot use radeon_fence_process here because we're already
 175         * in the waitqueue, in a call from wake_up_all.
 176         */
 177        seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq);
 178        if (seq >= fence->seq) {
 179                int ret = dma_fence_signal_locked(&fence->base);
 180
 181                if (!ret)
 182                        DMA_FENCE_TRACE(&fence->base, "signaled from irq context\n");
 183                else
 184                        DMA_FENCE_TRACE(&fence->base, "was already signaled\n");
 185
 186                radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring);
 187                __remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake);
 188                dma_fence_put(&fence->base);
 189        } else
 190                DMA_FENCE_TRACE(&fence->base, "pending\n");
 191        return 0;
 192}
 193
 194/**
 195 * radeon_fence_activity - check for fence activity
 196 *
 197 * @rdev: radeon_device pointer
 198 * @ring: ring index the fence is associated with
 199 *
 200 * Checks the current fence value and calculates the last
 201 * signalled fence value. Returns true if activity occured
 202 * on the ring, and the fence_queue should be waken up.
 203 */
 204static bool radeon_fence_activity(struct radeon_device *rdev, int ring)
 205{
 206        uint64_t seq, last_seq, last_emitted;
 207        unsigned count_loop = 0;
 208        bool wake = false;
 209
 210        /* Note there is a scenario here for an infinite loop but it's
 211         * very unlikely to happen. For it to happen, the current polling
 212         * process need to be interrupted by another process and another
 213         * process needs to update the last_seq btw the atomic read and
 214         * xchg of the current process.
 215         *
 216         * More over for this to go in infinite loop there need to be
 217         * continuously new fence signaled ie radeon_fence_read needs
 218         * to return a different value each time for both the currently
 219         * polling process and the other process that xchg the last_seq
 220         * btw atomic read and xchg of the current process. And the
 221         * value the other process set as last seq must be higher than
 222         * the seq value we just read. Which means that current process
 223         * need to be interrupted after radeon_fence_read and before
 224         * atomic xchg.
 225         *
 226         * To be even more safe we count the number of time we loop and
 227         * we bail after 10 loop just accepting the fact that we might
 228         * have temporarly set the last_seq not to the true real last
 229         * seq but to an older one.
 230         */
 231        last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
 232        do {
 233                last_emitted = rdev->fence_drv[ring].sync_seq[ring];
 234                seq = radeon_fence_read(rdev, ring);
 235                seq |= last_seq & 0xffffffff00000000LL;
 236                if (seq < last_seq) {
 237                        seq &= 0xffffffff;
 238                        seq |= last_emitted & 0xffffffff00000000LL;
 239                }
 240
 241                if (seq <= last_seq || seq > last_emitted) {
 242                        break;
 243                }
 244                /* If we loop over we don't want to return without
 245                 * checking if a fence is signaled as it means that the
 246                 * seq we just read is different from the previous on.
 247                 */
 248                wake = true;
 249                last_seq = seq;
 250                if ((count_loop++) > 10) {
 251                        /* We looped over too many time leave with the
 252                         * fact that we might have set an older fence
 253                         * seq then the current real last seq as signaled
 254                         * by the hw.
 255                         */
 256                        break;
 257                }
 258        } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
 259
 260        if (seq < last_emitted)
 261                radeon_fence_schedule_check(rdev, ring);
 262
 263        return wake;
 264}
 265
 266/**
 267 * radeon_fence_check_lockup - check for hardware lockup
 268 *
 269 * @work: delayed work item
 270 *
 271 * Checks for fence activity and if there is none probe
 272 * the hardware if a lockup occured.
 273 */
 274static void radeon_fence_check_lockup(struct work_struct *work)
 275{
 276        struct radeon_fence_driver *fence_drv;
 277        struct radeon_device *rdev;
 278        int ring;
 279
 280        fence_drv = container_of(work, struct radeon_fence_driver,
 281                                 lockup_work.work);
 282        rdev = fence_drv->rdev;
 283        ring = fence_drv - &rdev->fence_drv[0];
 284
 285        if (!down_read_trylock(&rdev->exclusive_lock)) {
 286                /* just reschedule the check if a reset is going on */
 287                radeon_fence_schedule_check(rdev, ring);
 288                return;
 289        }
 290
 291        if (fence_drv->delayed_irq && rdev->irq.installed) {
 292                unsigned long irqflags;
 293
 294                fence_drv->delayed_irq = false;
 295                spin_lock_irqsave(&rdev->irq.lock, irqflags);
 296                radeon_irq_set(rdev);
 297                spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
 298        }
 299
 300        if (radeon_fence_activity(rdev, ring))
 301                wake_up_all(&rdev->fence_queue);
 302
 303        else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
 304
 305                /* good news we believe it's a lockup */
 306                dev_warn(rdev->dev, "GPU lockup (current fence id "
 307                         "0x%016llx last fence id 0x%016llx on ring %d)\n",
 308                         (uint64_t)atomic64_read(&fence_drv->last_seq),
 309                         fence_drv->sync_seq[ring], ring);
 310
 311                /* remember that we need an reset */
 312                rdev->needs_reset = true;
 313                wake_up_all(&rdev->fence_queue);
 314        }
 315        up_read(&rdev->exclusive_lock);
 316}
 317
 318/**
 319 * radeon_fence_process - process a fence
 320 *
 321 * @rdev: radeon_device pointer
 322 * @ring: ring index the fence is associated with
 323 *
 324 * Checks the current fence value and wakes the fence queue
 325 * if the sequence number has increased (all asics).
 326 */
 327void radeon_fence_process(struct radeon_device *rdev, int ring)
 328{
 329        if (radeon_fence_activity(rdev, ring))
 330                wake_up_all(&rdev->fence_queue);
 331}
 332
 333/**
 334 * radeon_fence_seq_signaled - check if a fence sequence number has signaled
 335 *
 336 * @rdev: radeon device pointer
 337 * @seq: sequence number
 338 * @ring: ring index the fence is associated with
 339 *
 340 * Check if the last signaled fence sequnce number is >= the requested
 341 * sequence number (all asics).
 342 * Returns true if the fence has signaled (current fence value
 343 * is >= requested value) or false if it has not (current fence
 344 * value is < the requested value.  Helper function for
 345 * radeon_fence_signaled().
 346 */
 347static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
 348                                      u64 seq, unsigned ring)
 349{
 350        if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
 351                return true;
 352        }
 353        /* poll new last sequence at least once */
 354        radeon_fence_process(rdev, ring);
 355        if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
 356                return true;
 357        }
 358        return false;
 359}
 360
 361static bool radeon_fence_is_signaled(struct dma_fence *f)
 362{
 363        struct radeon_fence *fence = to_radeon_fence(f);
 364        struct radeon_device *rdev = fence->rdev;
 365        unsigned ring = fence->ring;
 366        u64 seq = fence->seq;
 367
 368        if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
 369                return true;
 370        }
 371
 372        if (down_read_trylock(&rdev->exclusive_lock)) {
 373                radeon_fence_process(rdev, ring);
 374                up_read(&rdev->exclusive_lock);
 375
 376                if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
 377                        return true;
 378                }
 379        }
 380        return false;
 381}
 382
 383/**
 384 * radeon_fence_enable_signaling - enable signalling on fence
 385 * @f: fence
 386 *
 387 * This function is called with fence_queue lock held, and adds a callback
 388 * to fence_queue that checks if this fence is signaled, and if so it
 389 * signals the fence and removes itself.
 390 */
 391static bool radeon_fence_enable_signaling(struct dma_fence *f)
 392{
 393        struct radeon_fence *fence = to_radeon_fence(f);
 394        struct radeon_device *rdev = fence->rdev;
 395
 396        if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq)
 397                return false;
 398
 399        if (down_read_trylock(&rdev->exclusive_lock)) {
 400                radeon_irq_kms_sw_irq_get(rdev, fence->ring);
 401
 402                if (radeon_fence_activity(rdev, fence->ring))
 403                        wake_up_all_locked(&rdev->fence_queue);
 404
 405                /* did fence get signaled after we enabled the sw irq? */
 406                if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) {
 407                        radeon_irq_kms_sw_irq_put(rdev, fence->ring);
 408                        up_read(&rdev->exclusive_lock);
 409                        return false;
 410                }
 411
 412                up_read(&rdev->exclusive_lock);
 413        } else {
 414                /* we're probably in a lockup, lets not fiddle too much */
 415                if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring))
 416                        rdev->fence_drv[fence->ring].delayed_irq = true;
 417                radeon_fence_schedule_check(rdev, fence->ring);
 418        }
 419
 420        fence->fence_wake.flags = 0;
 421        fence->fence_wake.private = NULL;
 422        fence->fence_wake.func = radeon_fence_check_signaled;
 423        __add_wait_queue(&rdev->fence_queue, &fence->fence_wake);
 424        dma_fence_get(f);
 425
 426        DMA_FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring);
 427        return true;
 428}
 429
 430/**
 431 * radeon_fence_signaled - check if a fence has signaled
 432 *
 433 * @fence: radeon fence object
 434 *
 435 * Check if the requested fence has signaled (all asics).
 436 * Returns true if the fence has signaled or false if it has not.
 437 */
 438bool radeon_fence_signaled(struct radeon_fence *fence)
 439{
 440        if (!fence)
 441                return true;
 442
 443        if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
 444                int ret;
 445
 446                ret = dma_fence_signal(&fence->base);
 447                if (!ret)
 448                        DMA_FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n");
 449                return true;
 450        }
 451        return false;
 452}
 453
 454/**
 455 * radeon_fence_any_seq_signaled - check if any sequence number is signaled
 456 *
 457 * @rdev: radeon device pointer
 458 * @seq: sequence numbers
 459 *
 460 * Check if the last signaled fence sequnce number is >= the requested
 461 * sequence number (all asics).
 462 * Returns true if any has signaled (current value is >= requested value)
 463 * or false if it has not. Helper function for radeon_fence_wait_seq.
 464 */
 465static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
 466{
 467        unsigned i;
 468
 469        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 470                if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i))
 471                        return true;
 472        }
 473        return false;
 474}
 475
 476/**
 477 * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers
 478 *
 479 * @rdev: radeon device pointer
 480 * @target_seq: sequence number(s) we want to wait for
 481 * @intr: use interruptable sleep
 482 * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
 483 *
 484 * Wait for the requested sequence number(s) to be written by any ring
 485 * (all asics).  Sequnce number array is indexed by ring id.
 486 * @intr selects whether to use interruptable (true) or non-interruptable
 487 * (false) sleep when waiting for the sequence number.  Helper function
 488 * for radeon_fence_wait_*().
 489 * Returns remaining time if the sequence number has passed, 0 when
 490 * the wait timeout, or an error for all other cases.
 491 * -EDEADLK is returned when a GPU lockup has been detected.
 492 */
 493static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
 494                                          u64 *target_seq, bool intr,
 495                                          long timeout)
 496{
 497        long r;
 498        int i;
 499
 500        if (radeon_fence_any_seq_signaled(rdev, target_seq))
 501                return timeout;
 502
 503        /* enable IRQs and tracing */
 504        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 505                if (!target_seq[i])
 506                        continue;
 507
 508                trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
 509                radeon_irq_kms_sw_irq_get(rdev, i);
 510        }
 511
 512        if (intr) {
 513                r = wait_event_interruptible_timeout(rdev->fence_queue, (
 514                        radeon_fence_any_seq_signaled(rdev, target_seq)
 515                         || rdev->needs_reset), timeout);
 516        } else {
 517                r = wait_event_timeout(rdev->fence_queue, (
 518                        radeon_fence_any_seq_signaled(rdev, target_seq)
 519                         || rdev->needs_reset), timeout);
 520        }
 521
 522        if (rdev->needs_reset)
 523                r = -EDEADLK;
 524
 525        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 526                if (!target_seq[i])
 527                        continue;
 528
 529                radeon_irq_kms_sw_irq_put(rdev, i);
 530                trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
 531        }
 532
 533        return r;
 534}
 535
 536/**
 537 * radeon_fence_wait_timeout - wait for a fence to signal with timeout
 538 *
 539 * @fence: radeon fence object
 540 * @intr: use interruptible sleep
 541 *
 542 * Wait for the requested fence to signal (all asics).
 543 * @intr selects whether to use interruptable (true) or non-interruptable
 544 * (false) sleep when waiting for the fence.
 545 * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
 546 * Returns remaining time if the sequence number has passed, 0 when
 547 * the wait timeout, or an error for all other cases.
 548 */
 549long radeon_fence_wait_timeout(struct radeon_fence *fence, bool intr, long timeout)
 550{
 551        uint64_t seq[RADEON_NUM_RINGS] = {};
 552        long r;
 553        int r_sig;
 554
 555        /*
 556         * This function should not be called on !radeon fences.
 557         * If this is the case, it would mean this function can
 558         * also be called on radeon fences belonging to another card.
 559         * exclusive_lock is not held in that case.
 560         */
 561        if (WARN_ON_ONCE(!to_radeon_fence(&fence->base)))
 562                return dma_fence_wait(&fence->base, intr);
 563
 564        seq[fence->ring] = fence->seq;
 565        r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, timeout);
 566        if (r <= 0) {
 567                return r;
 568        }
 569
 570        r_sig = dma_fence_signal(&fence->base);
 571        if (!r_sig)
 572                DMA_FENCE_TRACE(&fence->base, "signaled from fence_wait\n");
 573        return r;
 574}
 575
 576/**
 577 * radeon_fence_wait - wait for a fence to signal
 578 *
 579 * @fence: radeon fence object
 580 * @intr: use interruptible sleep
 581 *
 582 * Wait for the requested fence to signal (all asics).
 583 * @intr selects whether to use interruptable (true) or non-interruptable
 584 * (false) sleep when waiting for the fence.
 585 * Returns 0 if the fence has passed, error for all other cases.
 586 */
 587int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 588{
 589        long r = radeon_fence_wait_timeout(fence, intr, MAX_SCHEDULE_TIMEOUT);
 590        if (r > 0) {
 591                return 0;
 592        } else {
 593                return r;
 594        }
 595}
 596
 597/**
 598 * radeon_fence_wait_any - wait for a fence to signal on any ring
 599 *
 600 * @rdev: radeon device pointer
 601 * @fences: radeon fence object(s)
 602 * @intr: use interruptable sleep
 603 *
 604 * Wait for any requested fence to signal (all asics).  Fence
 605 * array is indexed by ring id.  @intr selects whether to use
 606 * interruptable (true) or non-interruptable (false) sleep when
 607 * waiting for the fences. Used by the suballocator.
 608 * Returns 0 if any fence has passed, error for all other cases.
 609 */
 610int radeon_fence_wait_any(struct radeon_device *rdev,
 611                          struct radeon_fence **fences,
 612                          bool intr)
 613{
 614        uint64_t seq[RADEON_NUM_RINGS];
 615        unsigned i, num_rings = 0;
 616        long r;
 617
 618        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 619                seq[i] = 0;
 620
 621                if (!fences[i]) {
 622                        continue;
 623                }
 624
 625                seq[i] = fences[i]->seq;
 626                ++num_rings;
 627        }
 628
 629        /* nothing to wait for ? */
 630        if (num_rings == 0)
 631                return -ENOENT;
 632
 633        r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
 634        if (r < 0) {
 635                return r;
 636        }
 637        return 0;
 638}
 639
 640/**
 641 * radeon_fence_wait_next - wait for the next fence to signal
 642 *
 643 * @rdev: radeon device pointer
 644 * @ring: ring index the fence is associated with
 645 *
 646 * Wait for the next fence on the requested ring to signal (all asics).
 647 * Returns 0 if the next fence has passed, error for all other cases.
 648 * Caller must hold ring lock.
 649 */
 650int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
 651{
 652        uint64_t seq[RADEON_NUM_RINGS] = {};
 653        long r;
 654
 655        seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
 656        if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) {
 657                /* nothing to wait for, last_seq is
 658                   already the last emited fence */
 659                return -ENOENT;
 660        }
 661        r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
 662        if (r < 0)
 663                return r;
 664        return 0;
 665}
 666
 667/**
 668 * radeon_fence_wait_empty - wait for all fences to signal
 669 *
 670 * @rdev: radeon device pointer
 671 * @ring: ring index the fence is associated with
 672 *
 673 * Wait for all fences on the requested ring to signal (all asics).
 674 * Returns 0 if the fences have passed, error for all other cases.
 675 * Caller must hold ring lock.
 676 */
 677int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
 678{
 679        uint64_t seq[RADEON_NUM_RINGS] = {};
 680        long r;
 681
 682        seq[ring] = rdev->fence_drv[ring].sync_seq[ring];
 683        if (!seq[ring])
 684                return 0;
 685
 686        r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
 687        if (r < 0) {
 688                if (r == -EDEADLK)
 689                        return -EDEADLK;
 690
 691                dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n",
 692                        ring, r);
 693        }
 694        return 0;
 695}
 696
 697/**
 698 * radeon_fence_ref - take a ref on a fence
 699 *
 700 * @fence: radeon fence object
 701 *
 702 * Take a reference on a fence (all asics).
 703 * Returns the fence.
 704 */
 705struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
 706{
 707        dma_fence_get(&fence->base);
 708        return fence;
 709}
 710
 711/**
 712 * radeon_fence_unref - remove a ref on a fence
 713 *
 714 * @fence: radeon fence object
 715 *
 716 * Remove a reference on a fence (all asics).
 717 */
 718void radeon_fence_unref(struct radeon_fence **fence)
 719{
 720        struct radeon_fence *tmp = *fence;
 721
 722        *fence = NULL;
 723        if (tmp) {
 724                dma_fence_put(&tmp->base);
 725        }
 726}
 727
 728/**
 729 * radeon_fence_count_emitted - get the count of emitted fences
 730 *
 731 * @rdev: radeon device pointer
 732 * @ring: ring index the fence is associated with
 733 *
 734 * Get the number of fences emitted on the requested ring (all asics).
 735 * Returns the number of emitted fences on the ring.  Used by the
 736 * dynpm code to ring track activity.
 737 */
 738unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
 739{
 740        uint64_t emitted;
 741
 742        /* We are not protected by ring lock when reading the last sequence
 743         * but it's ok to report slightly wrong fence count here.
 744         */
 745        radeon_fence_process(rdev, ring);
 746        emitted = rdev->fence_drv[ring].sync_seq[ring]
 747                - atomic64_read(&rdev->fence_drv[ring].last_seq);
 748        /* to avoid 32bits warp around */
 749        if (emitted > 0x10000000) {
 750                emitted = 0x10000000;
 751        }
 752        return (unsigned)emitted;
 753}
 754
 755/**
 756 * radeon_fence_need_sync - do we need a semaphore
 757 *
 758 * @fence: radeon fence object
 759 * @dst_ring: which ring to check against
 760 *
 761 * Check if the fence needs to be synced against another ring
 762 * (all asics).  If so, we need to emit a semaphore.
 763 * Returns true if we need to sync with another ring, false if
 764 * not.
 765 */
 766bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
 767{
 768        struct radeon_fence_driver *fdrv;
 769
 770        if (!fence) {
 771                return false;
 772        }
 773
 774        if (fence->ring == dst_ring) {
 775                return false;
 776        }
 777
 778        /* we are protected by the ring mutex */
 779        fdrv = &fence->rdev->fence_drv[dst_ring];
 780        if (fence->seq <= fdrv->sync_seq[fence->ring]) {
 781                return false;
 782        }
 783
 784        return true;
 785}
 786
 787/**
 788 * radeon_fence_note_sync - record the sync point
 789 *
 790 * @fence: radeon fence object
 791 * @dst_ring: which ring to check against
 792 *
 793 * Note the sequence number at which point the fence will
 794 * be synced with the requested ring (all asics).
 795 */
 796void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
 797{
 798        struct radeon_fence_driver *dst, *src;
 799        unsigned i;
 800
 801        if (!fence) {
 802                return;
 803        }
 804
 805        if (fence->ring == dst_ring) {
 806                return;
 807        }
 808
 809        /* we are protected by the ring mutex */
 810        src = &fence->rdev->fence_drv[fence->ring];
 811        dst = &fence->rdev->fence_drv[dst_ring];
 812        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 813                if (i == dst_ring) {
 814                        continue;
 815                }
 816                dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
 817        }
 818}
 819
 820/**
 821 * radeon_fence_driver_start_ring - make the fence driver
 822 * ready for use on the requested ring.
 823 *
 824 * @rdev: radeon device pointer
 825 * @ring: ring index to start the fence driver on
 826 *
 827 * Make the fence driver ready for processing (all asics).
 828 * Not all asics have all rings, so each asic will only
 829 * start the fence driver on the rings it has.
 830 * Returns 0 for success, errors for failure.
 831 */
 832int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
 833{
 834        uint64_t index;
 835        int r;
 836
 837        radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
 838        if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
 839                rdev->fence_drv[ring].scratch_reg = 0;
 840                if (ring != R600_RING_TYPE_UVD_INDEX) {
 841                        index = R600_WB_EVENT_OFFSET + ring * 4;
 842                        rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
 843                        rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
 844                                                         index;
 845
 846                } else {
 847                        /* put fence directly behind firmware */
 848                        index = ALIGN(rdev->uvd_fw->size, 8);
 849                        rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + index;
 850                        rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index;
 851                }
 852
 853        } else {
 854                r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
 855                if (r) {
 856                        dev_err(rdev->dev, "fence failed to get scratch register\n");
 857                        return r;
 858                }
 859                index = RADEON_WB_SCRATCH_OFFSET +
 860                        rdev->fence_drv[ring].scratch_reg -
 861                        rdev->scratch.reg_base;
 862                rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
 863                rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
 864        }
 865        radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
 866        rdev->fence_drv[ring].initialized = true;
 867        dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx\n",
 868                 ring, rdev->fence_drv[ring].gpu_addr);
 869        return 0;
 870}
 871
 872/**
 873 * radeon_fence_driver_init_ring - init the fence driver
 874 * for the requested ring.
 875 *
 876 * @rdev: radeon device pointer
 877 * @ring: ring index to start the fence driver on
 878 *
 879 * Init the fence driver for the requested ring (all asics).
 880 * Helper function for radeon_fence_driver_init().
 881 */
 882static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
 883{
 884        int i;
 885
 886        rdev->fence_drv[ring].scratch_reg = -1;
 887        rdev->fence_drv[ring].cpu_addr = NULL;
 888        rdev->fence_drv[ring].gpu_addr = 0;
 889        for (i = 0; i < RADEON_NUM_RINGS; ++i)
 890                rdev->fence_drv[ring].sync_seq[i] = 0;
 891        atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
 892        rdev->fence_drv[ring].initialized = false;
 893        INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work,
 894                          radeon_fence_check_lockup);
 895        rdev->fence_drv[ring].rdev = rdev;
 896}
 897
 898/**
 899 * radeon_fence_driver_init - init the fence driver
 900 * for all possible rings.
 901 *
 902 * @rdev: radeon device pointer
 903 *
 904 * Init the fence driver for all possible rings (all asics).
 905 * Not all asics have all rings, so each asic will only
 906 * start the fence driver on the rings it has using
 907 * radeon_fence_driver_start_ring().
 908 */
 909void radeon_fence_driver_init(struct radeon_device *rdev)
 910{
 911        int ring;
 912
 913        init_waitqueue_head(&rdev->fence_queue);
 914        for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
 915                radeon_fence_driver_init_ring(rdev, ring);
 916        }
 917
 918        radeon_debugfs_fence_init(rdev);
 919}
 920
 921/**
 922 * radeon_fence_driver_fini - tear down the fence driver
 923 * for all possible rings.
 924 *
 925 * @rdev: radeon device pointer
 926 *
 927 * Tear down the fence driver for all possible rings (all asics).
 928 */
 929void radeon_fence_driver_fini(struct radeon_device *rdev)
 930{
 931        int ring, r;
 932
 933        mutex_lock(&rdev->ring_lock);
 934        for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
 935                if (!rdev->fence_drv[ring].initialized)
 936                        continue;
 937                r = radeon_fence_wait_empty(rdev, ring);
 938                if (r) {
 939                        /* no need to trigger GPU reset as we are unloading */
 940                        radeon_fence_driver_force_completion(rdev, ring);
 941                }
 942                cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
 943                wake_up_all(&rdev->fence_queue);
 944                radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
 945                rdev->fence_drv[ring].initialized = false;
 946        }
 947        mutex_unlock(&rdev->ring_lock);
 948}
 949
 950/**
 951 * radeon_fence_driver_force_completion - force all fence waiter to complete
 952 *
 953 * @rdev: radeon device pointer
 954 * @ring: the ring to complete
 955 *
 956 * In case of GPU reset failure make sure no process keep waiting on fence
 957 * that will never complete.
 958 */
 959void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring)
 960{
 961        if (rdev->fence_drv[ring].initialized) {
 962                radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
 963                cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
 964        }
 965}
 966
 967
 968/*
 969 * Fence debugfs
 970 */
 971#if defined(CONFIG_DEBUG_FS)
 972static int radeon_debugfs_fence_info_show(struct seq_file *m, void *data)
 973{
 974        struct radeon_device *rdev = (struct radeon_device *)m->private;
 975        int i, j;
 976
 977        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 978                if (!rdev->fence_drv[i].initialized)
 979                        continue;
 980
 981                radeon_fence_process(rdev, i);
 982
 983                seq_printf(m, "--- ring %d ---\n", i);
 984                seq_printf(m, "Last signaled fence 0x%016llx\n",
 985                           (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
 986                seq_printf(m, "Last emitted        0x%016llx\n",
 987                           rdev->fence_drv[i].sync_seq[i]);
 988
 989                for (j = 0; j < RADEON_NUM_RINGS; ++j) {
 990                        if (i != j && rdev->fence_drv[j].initialized)
 991                                seq_printf(m, "Last sync to ring %d 0x%016llx\n",
 992                                           j, rdev->fence_drv[i].sync_seq[j]);
 993                }
 994        }
 995        return 0;
 996}
 997
 998/*
 999 * radeon_debugfs_gpu_reset - manually trigger a gpu reset
1000 *
1001 * Manually trigger a gpu reset at the next fence wait.
1002 */
1003static int radeon_debugfs_gpu_reset(void *data, u64 *val)
1004{
1005        struct radeon_device *rdev = (struct radeon_device *)data;
1006
1007        down_read(&rdev->exclusive_lock);
1008        *val = rdev->needs_reset;
1009        rdev->needs_reset = true;
1010        wake_up_all(&rdev->fence_queue);
1011        up_read(&rdev->exclusive_lock);
1012
1013        return 0;
1014}
1015DEFINE_SHOW_ATTRIBUTE(radeon_debugfs_fence_info);
1016DEFINE_DEBUGFS_ATTRIBUTE(radeon_debugfs_gpu_reset_fops,
1017                         radeon_debugfs_gpu_reset, NULL, "%lld\n");
1018#endif
1019
1020void radeon_debugfs_fence_init(struct radeon_device *rdev)
1021{
1022#if defined(CONFIG_DEBUG_FS)
1023        struct dentry *root = rdev->ddev->primary->debugfs_root;
1024
1025        debugfs_create_file("radeon_gpu_reset", 0444, root, rdev,
1026                            &radeon_debugfs_gpu_reset_fops);
1027        debugfs_create_file("radeon_fence_info", 0444, root, rdev,
1028                            &radeon_debugfs_fence_info_fops);
1029
1030
1031#endif
1032}
1033
1034static const char *radeon_fence_get_driver_name(struct dma_fence *fence)
1035{
1036        return "radeon";
1037}
1038
1039static const char *radeon_fence_get_timeline_name(struct dma_fence *f)
1040{
1041        struct radeon_fence *fence = to_radeon_fence(f);
1042        switch (fence->ring) {
1043        case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx";
1044        case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1";
1045        case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2";
1046        case R600_RING_TYPE_DMA_INDEX: return "radeon.dma";
1047        case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1";
1048        case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd";
1049        case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1";
1050        case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2";
1051        default: WARN_ON_ONCE(1); return "radeon.unk";
1052        }
1053}
1054
1055static inline bool radeon_test_signaled(struct radeon_fence *fence)
1056{
1057        return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
1058}
1059
1060struct radeon_wait_cb {
1061        struct dma_fence_cb base;
1062        struct task_struct *task;
1063};
1064
1065static void
1066radeon_fence_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
1067{
1068        struct radeon_wait_cb *wait =
1069                container_of(cb, struct radeon_wait_cb, base);
1070
1071        wake_up_process(wait->task);
1072}
1073
1074static signed long radeon_fence_default_wait(struct dma_fence *f, bool intr,
1075                                             signed long t)
1076{
1077        struct radeon_fence *fence = to_radeon_fence(f);
1078        struct radeon_device *rdev = fence->rdev;
1079        struct radeon_wait_cb cb;
1080
1081        cb.task = current;
1082
1083        if (dma_fence_add_callback(f, &cb.base, radeon_fence_wait_cb))
1084                return t;
1085
1086        while (t > 0) {
1087                if (intr)
1088                        set_current_state(TASK_INTERRUPTIBLE);
1089                else
1090                        set_current_state(TASK_UNINTERRUPTIBLE);
1091
1092                /*
1093                 * radeon_test_signaled must be called after
1094                 * set_current_state to prevent a race with wake_up_process
1095                 */
1096                if (radeon_test_signaled(fence))
1097                        break;
1098
1099                if (rdev->needs_reset) {
1100                        t = -EDEADLK;
1101                        break;
1102                }
1103
1104                t = schedule_timeout(t);
1105
1106                if (t > 0 && intr && signal_pending(current))
1107                        t = -ERESTARTSYS;
1108        }
1109
1110        __set_current_state(TASK_RUNNING);
1111        dma_fence_remove_callback(f, &cb.base);
1112
1113        return t;
1114}
1115
1116const struct dma_fence_ops radeon_fence_ops = {
1117        .get_driver_name = radeon_fence_get_driver_name,
1118        .get_timeline_name = radeon_fence_get_timeline_name,
1119        .enable_signaling = radeon_fence_enable_signaling,
1120        .signaled = radeon_fence_is_signaled,
1121        .wait = radeon_fence_default_wait,
1122        .release = NULL,
1123};
1124