linux/drivers/gpu/drm/radeon/radeon_fence.c
<<
>>
Prefs
   1/*
   2 * Copyright 2009 Jerome Glisse.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the
   7 * "Software"), to deal in the Software without restriction, including
   8 * without limitation the rights to use, copy, modify, merge, publish,
   9 * distribute, sub license, and/or sell copies of the Software, and to
  10 * permit persons to whom the Software is furnished to do so, subject to
  11 * the following conditions:
  12 *
  13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20 *
  21 * The above copyright notice and this permission notice (including the
  22 * next paragraph) shall be included in all copies or substantial portions
  23 * of the Software.
  24 *
  25 */
  26/*
  27 * Authors:
  28 *    Jerome Glisse <glisse@freedesktop.org>
  29 *    Dave Airlie
  30 */
  31#include <linux/seq_file.h>
  32#include <linux/atomic.h>
  33#include <linux/wait.h>
  34#include <linux/kref.h>
  35#include <linux/slab.h>
  36#include <linux/firmware.h>
  37#include <drm/drmP.h>
  38#include "radeon_reg.h"
  39#include "radeon.h"
  40#include "radeon_trace.h"
  41
  42/*
  43 * Fences
  44 * Fences mark an event in the GPUs pipeline and are used
  45 * for GPU/CPU synchronization.  When the fence is written,
  46 * it is expected that all buffers associated with that fence
  47 * are no longer in use by the associated ring on the GPU and
  48 * that the the relevant GPU caches have been flushed.  Whether
  49 * we use a scratch register or memory location depends on the asic
  50 * and whether writeback is enabled.
  51 */
  52
  53/**
  54 * radeon_fence_write - write a fence value
  55 *
  56 * @rdev: radeon_device pointer
  57 * @seq: sequence number to write
  58 * @ring: ring index the fence is associated with
  59 *
  60 * Writes a fence value to memory or a scratch register (all asics).
  61 */
  62static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
  63{
  64        struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
  65        if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
  66                if (drv->cpu_addr) {
  67                        *drv->cpu_addr = cpu_to_le32(seq);
  68                }
  69        } else {
  70                WREG32(drv->scratch_reg, seq);
  71        }
  72}
  73
  74/**
  75 * radeon_fence_read - read a fence value
  76 *
  77 * @rdev: radeon_device pointer
  78 * @ring: ring index the fence is associated with
  79 *
  80 * Reads a fence value from memory or a scratch register (all asics).
  81 * Returns the value of the fence read from memory or register.
  82 */
  83static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
  84{
  85        struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
  86        u32 seq = 0;
  87
  88        if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
  89                if (drv->cpu_addr) {
  90                        seq = le32_to_cpu(*drv->cpu_addr);
  91                } else {
  92                        seq = lower_32_bits(atomic64_read(&drv->last_seq));
  93                }
  94        } else {
  95                seq = RREG32(drv->scratch_reg);
  96        }
  97        return seq;
  98}
  99
 100/**
 101 * radeon_fence_emit - emit a fence on the requested ring
 102 *
 103 * @rdev: radeon_device pointer
 104 * @fence: radeon fence object
 105 * @ring: ring index the fence is associated with
 106 *
 107 * Emits a fence command on the requested ring (all asics).
 108 * Returns 0 on success, -ENOMEM on failure.
 109 */
 110int radeon_fence_emit(struct radeon_device *rdev,
 111                      struct radeon_fence **fence,
 112                      int ring)
 113{
 114        /* we are protected by the ring emission mutex */
 115        *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
 116        if ((*fence) == NULL) {
 117                return -ENOMEM;
 118        }
 119        kref_init(&((*fence)->kref));
 120        (*fence)->rdev = rdev;
 121        (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring];
 122        (*fence)->ring = ring;
 123        radeon_fence_ring_emit(rdev, ring, *fence);
 124        trace_radeon_fence_emit(rdev->ddev, (*fence)->seq);
 125        return 0;
 126}
 127
 128/**
 129 * radeon_fence_process - process a fence
 130 *
 131 * @rdev: radeon_device pointer
 132 * @ring: ring index the fence is associated with
 133 *
 134 * Checks the current fence value and wakes the fence queue
 135 * if the sequence number has increased (all asics).
 136 */
 137void radeon_fence_process(struct radeon_device *rdev, int ring)
 138{
 139        uint64_t seq, last_seq, last_emitted;
 140        unsigned count_loop = 0;
 141        bool wake = false;
 142
 143        /* Note there is a scenario here for an infinite loop but it's
 144         * very unlikely to happen. For it to happen, the current polling
 145         * process need to be interrupted by another process and another
 146         * process needs to update the last_seq btw the atomic read and
 147         * xchg of the current process.
 148         *
 149         * More over for this to go in infinite loop there need to be
 150         * continuously new fence signaled ie radeon_fence_read needs
 151         * to return a different value each time for both the currently
 152         * polling process and the other process that xchg the last_seq
 153         * btw atomic read and xchg of the current process. And the
 154         * value the other process set as last seq must be higher than
 155         * the seq value we just read. Which means that current process
 156         * need to be interrupted after radeon_fence_read and before
 157         * atomic xchg.
 158         *
 159         * To be even more safe we count the number of time we loop and
 160         * we bail after 10 loop just accepting the fact that we might
 161         * have temporarly set the last_seq not to the true real last
 162         * seq but to an older one.
 163         */
 164        last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
 165        do {
 166                last_emitted = rdev->fence_drv[ring].sync_seq[ring];
 167                seq = radeon_fence_read(rdev, ring);
 168                seq |= last_seq & 0xffffffff00000000LL;
 169                if (seq < last_seq) {
 170                        seq &= 0xffffffff;
 171                        seq |= last_emitted & 0xffffffff00000000LL;
 172                }
 173
 174                if (seq <= last_seq || seq > last_emitted) {
 175                        break;
 176                }
 177                /* If we loop over we don't want to return without
 178                 * checking if a fence is signaled as it means that the
 179                 * seq we just read is different from the previous on.
 180                 */
 181                wake = true;
 182                last_seq = seq;
 183                if ((count_loop++) > 10) {
 184                        /* We looped over too many time leave with the
 185                         * fact that we might have set an older fence
 186                         * seq then the current real last seq as signaled
 187                         * by the hw.
 188                         */
 189                        break;
 190                }
 191        } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
 192
 193        if (wake) {
 194                rdev->fence_drv[ring].last_activity = jiffies;
 195                wake_up_all(&rdev->fence_queue);
 196        }
 197}
 198
 199/**
 200 * radeon_fence_destroy - destroy a fence
 201 *
 202 * @kref: fence kref
 203 *
 204 * Frees the fence object (all asics).
 205 */
 206static void radeon_fence_destroy(struct kref *kref)
 207{
 208        struct radeon_fence *fence;
 209
 210        fence = container_of(kref, struct radeon_fence, kref);
 211        kfree(fence);
 212}
 213
 214/**
 215 * radeon_fence_seq_signaled - check if a fence sequeuce number has signaled
 216 *
 217 * @rdev: radeon device pointer
 218 * @seq: sequence number
 219 * @ring: ring index the fence is associated with
 220 *
 221 * Check if the last singled fence sequnce number is >= the requested
 222 * sequence number (all asics).
 223 * Returns true if the fence has signaled (current fence value
 224 * is >= requested value) or false if it has not (current fence
 225 * value is < the requested value.  Helper function for
 226 * radeon_fence_signaled().
 227 */
 228static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
 229                                      u64 seq, unsigned ring)
 230{
 231        if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
 232                return true;
 233        }
 234        /* poll new last sequence at least once */
 235        radeon_fence_process(rdev, ring);
 236        if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
 237                return true;
 238        }
 239        return false;
 240}
 241
 242/**
 243 * radeon_fence_signaled - check if a fence has signaled
 244 *
 245 * @fence: radeon fence object
 246 *
 247 * Check if the requested fence has signaled (all asics).
 248 * Returns true if the fence has signaled or false if it has not.
 249 */
 250bool radeon_fence_signaled(struct radeon_fence *fence)
 251{
 252        if (!fence) {
 253                return true;
 254        }
 255        if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) {
 256                return true;
 257        }
 258        if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
 259                fence->seq = RADEON_FENCE_SIGNALED_SEQ;
 260                return true;
 261        }
 262        return false;
 263}
 264
 265/**
 266 * radeon_fence_wait_seq - wait for a specific sequence number
 267 *
 268 * @rdev: radeon device pointer
 269 * @target_seq: sequence number we want to wait for
 270 * @ring: ring index the fence is associated with
 271 * @intr: use interruptable sleep
 272 * @lock_ring: whether the ring should be locked or not
 273 *
 274 * Wait for the requested sequence number to be written (all asics).
 275 * @intr selects whether to use interruptable (true) or non-interruptable
 276 * (false) sleep when waiting for the sequence number.  Helper function
 277 * for radeon_fence_wait(), et al.
 278 * Returns 0 if the sequence number has passed, error for all other cases.
 279 * -EDEADLK is returned when a GPU lockup has been detected and the ring is
 280 * marked as not ready so no further jobs get scheduled until a successful
 281 * reset.
 282 */
 283static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 target_seq,
 284                                 unsigned ring, bool intr, bool lock_ring)
 285{
 286        unsigned long timeout, last_activity;
 287        uint64_t seq;
 288        unsigned i;
 289        bool signaled;
 290        int r;
 291
 292        while (target_seq > atomic64_read(&rdev->fence_drv[ring].last_seq)) {
 293                if (!rdev->ring[ring].ready) {
 294                        return -EBUSY;
 295                }
 296
 297                timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT;
 298                if (time_after(rdev->fence_drv[ring].last_activity, timeout)) {
 299                        /* the normal case, timeout is somewhere before last_activity */
 300                        timeout = rdev->fence_drv[ring].last_activity - timeout;
 301                } else {
 302                        /* either jiffies wrapped around, or no fence was signaled in the last 500ms
 303                         * anyway we will just wait for the minimum amount and then check for a lockup
 304                         */
 305                        timeout = 1;
 306                }
 307                seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
 308                /* Save current last activity valuee, used to check for GPU lockups */
 309                last_activity = rdev->fence_drv[ring].last_activity;
 310
 311                trace_radeon_fence_wait_begin(rdev->ddev, seq);
 312                radeon_irq_kms_sw_irq_get(rdev, ring);
 313                if (intr) {
 314                        r = wait_event_interruptible_timeout(rdev->fence_queue,
 315                                (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)),
 316                                timeout);
 317                } else {
 318                        r = wait_event_timeout(rdev->fence_queue,
 319                                (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)),
 320                                timeout);
 321                }
 322                radeon_irq_kms_sw_irq_put(rdev, ring);
 323                if (unlikely(r < 0)) {
 324                        return r;
 325                }
 326                trace_radeon_fence_wait_end(rdev->ddev, seq);
 327
 328                if (unlikely(!signaled)) {
 329                        /* we were interrupted for some reason and fence
 330                         * isn't signaled yet, resume waiting */
 331                        if (r) {
 332                                continue;
 333                        }
 334
 335                        /* check if sequence value has changed since last_activity */
 336                        if (seq != atomic64_read(&rdev->fence_drv[ring].last_seq)) {
 337                                continue;
 338                        }
 339
 340                        if (lock_ring) {
 341                                mutex_lock(&rdev->ring_lock);
 342                        }
 343
 344                        /* test if somebody else has already decided that this is a lockup */
 345                        if (last_activity != rdev->fence_drv[ring].last_activity) {
 346                                if (lock_ring) {
 347                                        mutex_unlock(&rdev->ring_lock);
 348                                }
 349                                continue;
 350                        }
 351
 352                        if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
 353                                /* good news we believe it's a lockup */
 354                                dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx last fence id 0x%016llx)\n",
 355                                         target_seq, seq);
 356
 357                                /* change last activity so nobody else think there is a lockup */
 358                                for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 359                                        rdev->fence_drv[i].last_activity = jiffies;
 360                                }
 361
 362                                /* mark the ring as not ready any more */
 363                                rdev->ring[ring].ready = false;
 364                                if (lock_ring) {
 365                                        mutex_unlock(&rdev->ring_lock);
 366                                }
 367                                return -EDEADLK;
 368                        }
 369
 370                        if (lock_ring) {
 371                                mutex_unlock(&rdev->ring_lock);
 372                        }
 373                }
 374        }
 375        return 0;
 376}
 377
 378/**
 379 * radeon_fence_wait - wait for a fence to signal
 380 *
 381 * @fence: radeon fence object
 382 * @intr: use interruptable sleep
 383 *
 384 * Wait for the requested fence to signal (all asics).
 385 * @intr selects whether to use interruptable (true) or non-interruptable
 386 * (false) sleep when waiting for the fence.
 387 * Returns 0 if the fence has passed, error for all other cases.
 388 */
 389int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 390{
 391        int r;
 392
 393        if (fence == NULL) {
 394                WARN(1, "Querying an invalid fence : %p !\n", fence);
 395                return -EINVAL;
 396        }
 397
 398        r = radeon_fence_wait_seq(fence->rdev, fence->seq,
 399                                  fence->ring, intr, true);
 400        if (r) {
 401                return r;
 402        }
 403        fence->seq = RADEON_FENCE_SIGNALED_SEQ;
 404        return 0;
 405}
 406
 407static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
 408{
 409        unsigned i;
 410
 411        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 412                if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i)) {
 413                        return true;
 414                }
 415        }
 416        return false;
 417}
 418
 419/**
 420 * radeon_fence_wait_any_seq - wait for a sequence number on any ring
 421 *
 422 * @rdev: radeon device pointer
 423 * @target_seq: sequence number(s) we want to wait for
 424 * @intr: use interruptable sleep
 425 *
 426 * Wait for the requested sequence number(s) to be written by any ring
 427 * (all asics).  Sequnce number array is indexed by ring id.
 428 * @intr selects whether to use interruptable (true) or non-interruptable
 429 * (false) sleep when waiting for the sequence number.  Helper function
 430 * for radeon_fence_wait_any(), et al.
 431 * Returns 0 if the sequence number has passed, error for all other cases.
 432 */
 433static int radeon_fence_wait_any_seq(struct radeon_device *rdev,
 434                                     u64 *target_seq, bool intr)
 435{
 436        unsigned long timeout, last_activity, tmp;
 437        unsigned i, ring = RADEON_NUM_RINGS;
 438        bool signaled;
 439        int r;
 440
 441        for (i = 0, last_activity = 0; i < RADEON_NUM_RINGS; ++i) {
 442                if (!target_seq[i]) {
 443                        continue;
 444                }
 445
 446                /* use the most recent one as indicator */
 447                if (time_after(rdev->fence_drv[i].last_activity, last_activity)) {
 448                        last_activity = rdev->fence_drv[i].last_activity;
 449                }
 450
 451                /* For lockup detection just pick the lowest ring we are
 452                 * actively waiting for
 453                 */
 454                if (i < ring) {
 455                        ring = i;
 456                }
 457        }
 458
 459        /* nothing to wait for ? */
 460        if (ring == RADEON_NUM_RINGS) {
 461                return -ENOENT;
 462        }
 463
 464        while (!radeon_fence_any_seq_signaled(rdev, target_seq)) {
 465                timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT;
 466                if (time_after(last_activity, timeout)) {
 467                        /* the normal case, timeout is somewhere before last_activity */
 468                        timeout = last_activity - timeout;
 469                } else {
 470                        /* either jiffies wrapped around, or no fence was signaled in the last 500ms
 471                         * anyway we will just wait for the minimum amount and then check for a lockup
 472                         */
 473                        timeout = 1;
 474                }
 475
 476                trace_radeon_fence_wait_begin(rdev->ddev, target_seq[ring]);
 477                for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 478                        if (target_seq[i]) {
 479                                radeon_irq_kms_sw_irq_get(rdev, i);
 480                        }
 481                }
 482                if (intr) {
 483                        r = wait_event_interruptible_timeout(rdev->fence_queue,
 484                                (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)),
 485                                timeout);
 486                } else {
 487                        r = wait_event_timeout(rdev->fence_queue,
 488                                (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)),
 489                                timeout);
 490                }
 491                for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 492                        if (target_seq[i]) {
 493                                radeon_irq_kms_sw_irq_put(rdev, i);
 494                        }
 495                }
 496                if (unlikely(r < 0)) {
 497                        return r;
 498                }
 499                trace_radeon_fence_wait_end(rdev->ddev, target_seq[ring]);
 500
 501                if (unlikely(!signaled)) {
 502                        /* we were interrupted for some reason and fence
 503                         * isn't signaled yet, resume waiting */
 504                        if (r) {
 505                                continue;
 506                        }
 507
 508                        mutex_lock(&rdev->ring_lock);
 509                        for (i = 0, tmp = 0; i < RADEON_NUM_RINGS; ++i) {
 510                                if (time_after(rdev->fence_drv[i].last_activity, tmp)) {
 511                                        tmp = rdev->fence_drv[i].last_activity;
 512                                }
 513                        }
 514                        /* test if somebody else has already decided that this is a lockup */
 515                        if (last_activity != tmp) {
 516                                last_activity = tmp;
 517                                mutex_unlock(&rdev->ring_lock);
 518                                continue;
 519                        }
 520
 521                        if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
 522                                /* good news we believe it's a lockup */
 523                                dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx)\n",
 524                                         target_seq[ring]);
 525
 526                                /* change last activity so nobody else think there is a lockup */
 527                                for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 528                                        rdev->fence_drv[i].last_activity = jiffies;
 529                                }
 530
 531                                /* mark the ring as not ready any more */
 532                                rdev->ring[ring].ready = false;
 533                                mutex_unlock(&rdev->ring_lock);
 534                                return -EDEADLK;
 535                        }
 536                        mutex_unlock(&rdev->ring_lock);
 537                }
 538        }
 539        return 0;
 540}
 541
 542/**
 543 * radeon_fence_wait_any - wait for a fence to signal on any ring
 544 *
 545 * @rdev: radeon device pointer
 546 * @fences: radeon fence object(s)
 547 * @intr: use interruptable sleep
 548 *
 549 * Wait for any requested fence to signal (all asics).  Fence
 550 * array is indexed by ring id.  @intr selects whether to use
 551 * interruptable (true) or non-interruptable (false) sleep when
 552 * waiting for the fences. Used by the suballocator.
 553 * Returns 0 if any fence has passed, error for all other cases.
 554 */
 555int radeon_fence_wait_any(struct radeon_device *rdev,
 556                          struct radeon_fence **fences,
 557                          bool intr)
 558{
 559        uint64_t seq[RADEON_NUM_RINGS];
 560        unsigned i;
 561        int r;
 562
 563        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 564                seq[i] = 0;
 565
 566                if (!fences[i]) {
 567                        continue;
 568                }
 569
 570                if (fences[i]->seq == RADEON_FENCE_SIGNALED_SEQ) {
 571                        /* something was allready signaled */
 572                        return 0;
 573                }
 574
 575                seq[i] = fences[i]->seq;
 576        }
 577
 578        r = radeon_fence_wait_any_seq(rdev, seq, intr);
 579        if (r) {
 580                return r;
 581        }
 582        return 0;
 583}
 584
 585/**
 586 * radeon_fence_wait_next_locked - wait for the next fence to signal
 587 *
 588 * @rdev: radeon device pointer
 589 * @ring: ring index the fence is associated with
 590 *
 591 * Wait for the next fence on the requested ring to signal (all asics).
 592 * Returns 0 if the next fence has passed, error for all other cases.
 593 * Caller must hold ring lock.
 594 */
 595int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
 596{
 597        uint64_t seq;
 598
 599        seq = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
 600        if (seq >= rdev->fence_drv[ring].sync_seq[ring]) {
 601                /* nothing to wait for, last_seq is
 602                   already the last emited fence */
 603                return -ENOENT;
 604        }
 605        return radeon_fence_wait_seq(rdev, seq, ring, false, false);
 606}
 607
 608/**
 609 * radeon_fence_wait_empty_locked - wait for all fences to signal
 610 *
 611 * @rdev: radeon device pointer
 612 * @ring: ring index the fence is associated with
 613 *
 614 * Wait for all fences on the requested ring to signal (all asics).
 615 * Returns 0 if the fences have passed, error for all other cases.
 616 * Caller must hold ring lock.
 617 */
 618int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
 619{
 620        uint64_t seq = rdev->fence_drv[ring].sync_seq[ring];
 621        int r;
 622
 623        r = radeon_fence_wait_seq(rdev, seq, ring, false, false);
 624        if (r) {
 625                if (r == -EDEADLK) {
 626                        return -EDEADLK;
 627                }
 628                dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%d)\n",
 629                        ring, r);
 630        }
 631        return 0;
 632}
 633
 634/**
 635 * radeon_fence_ref - take a ref on a fence
 636 *
 637 * @fence: radeon fence object
 638 *
 639 * Take a reference on a fence (all asics).
 640 * Returns the fence.
 641 */
 642struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
 643{
 644        kref_get(&fence->kref);
 645        return fence;
 646}
 647
 648/**
 649 * radeon_fence_unref - remove a ref on a fence
 650 *
 651 * @fence: radeon fence object
 652 *
 653 * Remove a reference on a fence (all asics).
 654 */
 655void radeon_fence_unref(struct radeon_fence **fence)
 656{
 657        struct radeon_fence *tmp = *fence;
 658
 659        *fence = NULL;
 660        if (tmp) {
 661                kref_put(&tmp->kref, radeon_fence_destroy);
 662        }
 663}
 664
 665/**
 666 * radeon_fence_count_emitted - get the count of emitted fences
 667 *
 668 * @rdev: radeon device pointer
 669 * @ring: ring index the fence is associated with
 670 *
 671 * Get the number of fences emitted on the requested ring (all asics).
 672 * Returns the number of emitted fences on the ring.  Used by the
 673 * dynpm code to ring track activity.
 674 */
 675unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
 676{
 677        uint64_t emitted;
 678
 679        /* We are not protected by ring lock when reading the last sequence
 680         * but it's ok to report slightly wrong fence count here.
 681         */
 682        radeon_fence_process(rdev, ring);
 683        emitted = rdev->fence_drv[ring].sync_seq[ring]
 684                - atomic64_read(&rdev->fence_drv[ring].last_seq);
 685        /* to avoid 32bits warp around */
 686        if (emitted > 0x10000000) {
 687                emitted = 0x10000000;
 688        }
 689        return (unsigned)emitted;
 690}
 691
 692/**
 693 * radeon_fence_need_sync - do we need a semaphore
 694 *
 695 * @fence: radeon fence object
 696 * @dst_ring: which ring to check against
 697 *
 698 * Check if the fence needs to be synced against another ring
 699 * (all asics).  If so, we need to emit a semaphore.
 700 * Returns true if we need to sync with another ring, false if
 701 * not.
 702 */
 703bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
 704{
 705        struct radeon_fence_driver *fdrv;
 706
 707        if (!fence) {
 708                return false;
 709        }
 710
 711        if (fence->ring == dst_ring) {
 712                return false;
 713        }
 714
 715        /* we are protected by the ring mutex */
 716        fdrv = &fence->rdev->fence_drv[dst_ring];
 717        if (fence->seq <= fdrv->sync_seq[fence->ring]) {
 718                return false;
 719        }
 720
 721        return true;
 722}
 723
 724/**
 725 * radeon_fence_note_sync - record the sync point
 726 *
 727 * @fence: radeon fence object
 728 * @dst_ring: which ring to check against
 729 *
 730 * Note the sequence number at which point the fence will
 731 * be synced with the requested ring (all asics).
 732 */
 733void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
 734{
 735        struct radeon_fence_driver *dst, *src;
 736        unsigned i;
 737
 738        if (!fence) {
 739                return;
 740        }
 741
 742        if (fence->ring == dst_ring) {
 743                return;
 744        }
 745
 746        /* we are protected by the ring mutex */
 747        src = &fence->rdev->fence_drv[fence->ring];
 748        dst = &fence->rdev->fence_drv[dst_ring];
 749        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 750                if (i == dst_ring) {
 751                        continue;
 752                }
 753                dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
 754        }
 755}
 756
 757/**
 758 * radeon_fence_driver_start_ring - make the fence driver
 759 * ready for use on the requested ring.
 760 *
 761 * @rdev: radeon device pointer
 762 * @ring: ring index to start the fence driver on
 763 *
 764 * Make the fence driver ready for processing (all asics).
 765 * Not all asics have all rings, so each asic will only
 766 * start the fence driver on the rings it has.
 767 * Returns 0 for success, errors for failure.
 768 */
 769int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
 770{
 771        uint64_t index;
 772        int r;
 773
 774        radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
 775        if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
 776                rdev->fence_drv[ring].scratch_reg = 0;
 777                if (ring != R600_RING_TYPE_UVD_INDEX) {
 778                        index = R600_WB_EVENT_OFFSET + ring * 4;
 779                        rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
 780                        rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
 781                                                         index;
 782
 783                } else {
 784                        /* put fence directly behind firmware */
 785                        index = ALIGN(rdev->uvd_fw->size, 8);
 786                        rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + index;
 787                        rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index;
 788                }
 789
 790        } else {
 791                r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
 792                if (r) {
 793                        dev_err(rdev->dev, "fence failed to get scratch register\n");
 794                        return r;
 795                }
 796                index = RADEON_WB_SCRATCH_OFFSET +
 797                        rdev->fence_drv[ring].scratch_reg -
 798                        rdev->scratch.reg_base;
 799                rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
 800                rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
 801        }
 802        radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
 803        rdev->fence_drv[ring].initialized = true;
 804        dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
 805                 ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
 806        return 0;
 807}
 808
 809/**
 810 * radeon_fence_driver_init_ring - init the fence driver
 811 * for the requested ring.
 812 *
 813 * @rdev: radeon device pointer
 814 * @ring: ring index to start the fence driver on
 815 *
 816 * Init the fence driver for the requested ring (all asics).
 817 * Helper function for radeon_fence_driver_init().
 818 */
 819static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
 820{
 821        int i;
 822
 823        rdev->fence_drv[ring].scratch_reg = -1;
 824        rdev->fence_drv[ring].cpu_addr = NULL;
 825        rdev->fence_drv[ring].gpu_addr = 0;
 826        for (i = 0; i < RADEON_NUM_RINGS; ++i)
 827                rdev->fence_drv[ring].sync_seq[i] = 0;
 828        atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
 829        rdev->fence_drv[ring].last_activity = jiffies;
 830        rdev->fence_drv[ring].initialized = false;
 831}
 832
 833/**
 834 * radeon_fence_driver_init - init the fence driver
 835 * for all possible rings.
 836 *
 837 * @rdev: radeon device pointer
 838 *
 839 * Init the fence driver for all possible rings (all asics).
 840 * Not all asics have all rings, so each asic will only
 841 * start the fence driver on the rings it has using
 842 * radeon_fence_driver_start_ring().
 843 * Returns 0 for success.
 844 */
 845int radeon_fence_driver_init(struct radeon_device *rdev)
 846{
 847        int ring;
 848
 849        init_waitqueue_head(&rdev->fence_queue);
 850        for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
 851                radeon_fence_driver_init_ring(rdev, ring);
 852        }
 853        if (radeon_debugfs_fence_init(rdev)) {
 854                dev_err(rdev->dev, "fence debugfs file creation failed\n");
 855        }
 856        return 0;
 857}
 858
 859/**
 860 * radeon_fence_driver_fini - tear down the fence driver
 861 * for all possible rings.
 862 *
 863 * @rdev: radeon device pointer
 864 *
 865 * Tear down the fence driver for all possible rings (all asics).
 866 */
 867void radeon_fence_driver_fini(struct radeon_device *rdev)
 868{
 869        int ring, r;
 870
 871        mutex_lock(&rdev->ring_lock);
 872        for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
 873                if (!rdev->fence_drv[ring].initialized)
 874                        continue;
 875                r = radeon_fence_wait_empty_locked(rdev, ring);
 876                if (r) {
 877                        /* no need to trigger GPU reset as we are unloading */
 878                        radeon_fence_driver_force_completion(rdev);
 879                }
 880                wake_up_all(&rdev->fence_queue);
 881                radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
 882                rdev->fence_drv[ring].initialized = false;
 883        }
 884        mutex_unlock(&rdev->ring_lock);
 885}
 886
 887/**
 888 * radeon_fence_driver_force_completion - force all fence waiter to complete
 889 *
 890 * @rdev: radeon device pointer
 891 *
 892 * In case of GPU reset failure make sure no process keep waiting on fence
 893 * that will never complete.
 894 */
 895void radeon_fence_driver_force_completion(struct radeon_device *rdev)
 896{
 897        int ring;
 898
 899        for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
 900                if (!rdev->fence_drv[ring].initialized)
 901                        continue;
 902                radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
 903        }
 904}
 905
 906
 907/*
 908 * Fence debugfs
 909 */
 910#if defined(CONFIG_DEBUG_FS)
 911static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
 912{
 913        struct drm_info_node *node = (struct drm_info_node *)m->private;
 914        struct drm_device *dev = node->minor->dev;
 915        struct radeon_device *rdev = dev->dev_private;
 916        int i, j;
 917
 918        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 919                if (!rdev->fence_drv[i].initialized)
 920                        continue;
 921
 922                seq_printf(m, "--- ring %d ---\n", i);
 923                seq_printf(m, "Last signaled fence 0x%016llx\n",
 924                           (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
 925                seq_printf(m, "Last emitted        0x%016llx\n",
 926                           rdev->fence_drv[i].sync_seq[i]);
 927
 928                for (j = 0; j < RADEON_NUM_RINGS; ++j) {
 929                        if (i != j && rdev->fence_drv[j].initialized)
 930                                seq_printf(m, "Last sync to ring %d 0x%016llx\n",
 931                                           j, rdev->fence_drv[i].sync_seq[j]);
 932                }
 933        }
 934        return 0;
 935}
 936
 937static struct drm_info_list radeon_debugfs_fence_list[] = {
 938        {"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL},
 939};
 940#endif
 941
 942int radeon_debugfs_fence_init(struct radeon_device *rdev)
 943{
 944#if defined(CONFIG_DEBUG_FS)
 945        return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 1);
 946#else
 947        return 0;
 948#endif
 949}
 950