linux/drivers/gpu/drm/radeon/r600_dma.c
<<
>>
Prefs
   1/*
   2 * Copyright 2013 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 * Authors: Alex Deucher
  23 */
  24#include <drm/drmP.h>
  25#include "radeon.h"
  26#include "radeon_asic.h"
  27#include "r600d.h"
  28
  29u32 r600_gpu_check_soft_reset(struct radeon_device *rdev);
  30
  31/*
  32 * DMA
  33 * Starting with R600, the GPU has an asynchronous
  34 * DMA engine.  The programming model is very similar
  35 * to the 3D engine (ring buffer, IBs, etc.), but the
  36 * DMA controller has it's own packet format that is
  37 * different form the PM4 format used by the 3D engine.
  38 * It supports copying data, writing embedded data,
  39 * solid fills, and a number of other things.  It also
  40 * has support for tiling/detiling of buffers.
  41 */
  42
  43/**
  44 * r600_dma_get_rptr - get the current read pointer
  45 *
  46 * @rdev: radeon_device pointer
  47 * @ring: radeon ring pointer
  48 *
  49 * Get the current rptr from the hardware (r6xx+).
  50 */
  51uint32_t r600_dma_get_rptr(struct radeon_device *rdev,
  52                           struct radeon_ring *ring)
  53{
  54        return (radeon_ring_generic_get_rptr(rdev, ring) & 0x3fffc) >> 2;
  55}
  56
  57/**
  58 * r600_dma_get_wptr - get the current write pointer
  59 *
  60 * @rdev: radeon_device pointer
  61 * @ring: radeon ring pointer
  62 *
  63 * Get the current wptr from the hardware (r6xx+).
  64 */
  65uint32_t r600_dma_get_wptr(struct radeon_device *rdev,
  66                           struct radeon_ring *ring)
  67{
  68        return (RREG32(ring->wptr_reg) & 0x3fffc) >> 2;
  69}
  70
  71/**
  72 * r600_dma_set_wptr - commit the write pointer
  73 *
  74 * @rdev: radeon_device pointer
  75 * @ring: radeon ring pointer
  76 *
  77 * Write the wptr back to the hardware (r6xx+).
  78 */
  79void r600_dma_set_wptr(struct radeon_device *rdev,
  80                       struct radeon_ring *ring)
  81{
  82        WREG32(ring->wptr_reg, (ring->wptr << 2) & 0x3fffc);
  83}
  84
  85/**
  86 * r600_dma_stop - stop the async dma engine
  87 *
  88 * @rdev: radeon_device pointer
  89 *
  90 * Stop the async dma engine (r6xx-evergreen).
  91 */
  92void r600_dma_stop(struct radeon_device *rdev)
  93{
  94        u32 rb_cntl = RREG32(DMA_RB_CNTL);
  95
  96        radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
  97
  98        rb_cntl &= ~DMA_RB_ENABLE;
  99        WREG32(DMA_RB_CNTL, rb_cntl);
 100
 101        rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
 102}
 103
 104/**
 105 * r600_dma_resume - setup and start the async dma engine
 106 *
 107 * @rdev: radeon_device pointer
 108 *
 109 * Set up the DMA ring buffer and enable it. (r6xx-evergreen).
 110 * Returns 0 for success, error for failure.
 111 */
 112int r600_dma_resume(struct radeon_device *rdev)
 113{
 114        struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
 115        u32 rb_cntl, dma_cntl, ib_cntl;
 116        u32 rb_bufsz;
 117        int r;
 118
 119        /* Reset dma */
 120        if (rdev->family >= CHIP_RV770)
 121                WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA);
 122        else
 123                WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA);
 124        RREG32(SRBM_SOFT_RESET);
 125        udelay(50);
 126        WREG32(SRBM_SOFT_RESET, 0);
 127
 128        WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0);
 129        WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
 130
 131        /* Set ring buffer size in dwords */
 132        rb_bufsz = order_base_2(ring->ring_size / 4);
 133        rb_cntl = rb_bufsz << 1;
 134#ifdef __BIG_ENDIAN
 135        rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
 136#endif
 137        WREG32(DMA_RB_CNTL, rb_cntl);
 138
 139        /* Initialize the ring buffer's read and write pointers */
 140        WREG32(DMA_RB_RPTR, 0);
 141        WREG32(DMA_RB_WPTR, 0);
 142
 143        /* set the wb address whether it's enabled or not */
 144        WREG32(DMA_RB_RPTR_ADDR_HI,
 145               upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF);
 146        WREG32(DMA_RB_RPTR_ADDR_LO,
 147               ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC));
 148
 149        if (rdev->wb.enabled)
 150                rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
 151
 152        WREG32(DMA_RB_BASE, ring->gpu_addr >> 8);
 153
 154        /* enable DMA IBs */
 155        ib_cntl = DMA_IB_ENABLE;
 156#ifdef __BIG_ENDIAN
 157        ib_cntl |= DMA_IB_SWAP_ENABLE;
 158#endif
 159        WREG32(DMA_IB_CNTL, ib_cntl);
 160
 161        dma_cntl = RREG32(DMA_CNTL);
 162        dma_cntl &= ~CTXEMPTY_INT_ENABLE;
 163        WREG32(DMA_CNTL, dma_cntl);
 164
 165        if (rdev->family >= CHIP_RV770)
 166                WREG32(DMA_MODE, 1);
 167
 168        ring->wptr = 0;
 169        WREG32(DMA_RB_WPTR, ring->wptr << 2);
 170
 171        ring->rptr = RREG32(DMA_RB_RPTR) >> 2;
 172
 173        WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE);
 174
 175        ring->ready = true;
 176
 177        r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring);
 178        if (r) {
 179                ring->ready = false;
 180                return r;
 181        }
 182
 183        radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
 184
 185        return 0;
 186}
 187
 188/**
 189 * r600_dma_fini - tear down the async dma engine
 190 *
 191 * @rdev: radeon_device pointer
 192 *
 193 * Stop the async dma engine and free the ring (r6xx-evergreen).
 194 */
 195void r600_dma_fini(struct radeon_device *rdev)
 196{
 197        r600_dma_stop(rdev);
 198        radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
 199}
 200
 201/**
 202 * r600_dma_is_lockup - Check if the DMA engine is locked up
 203 *
 204 * @rdev: radeon_device pointer
 205 * @ring: radeon_ring structure holding ring information
 206 *
 207 * Check if the async DMA engine is locked up.
 208 * Returns true if the engine appears to be locked up, false if not.
 209 */
 210bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
 211{
 212        u32 reset_mask = r600_gpu_check_soft_reset(rdev);
 213
 214        if (!(reset_mask & RADEON_RESET_DMA)) {
 215                radeon_ring_lockup_update(ring);
 216                return false;
 217        }
 218        /* force ring activities */
 219        radeon_ring_force_activity(rdev, ring);
 220        return radeon_ring_test_lockup(rdev, ring);
 221}
 222
 223
 224/**
 225 * r600_dma_ring_test - simple async dma engine test
 226 *
 227 * @rdev: radeon_device pointer
 228 * @ring: radeon_ring structure holding ring information
 229 *
 230 * Test the DMA engine by writing using it to write an
 231 * value to memory. (r6xx-SI).
 232 * Returns 0 for success, error for failure.
 233 */
 234int r600_dma_ring_test(struct radeon_device *rdev,
 235                       struct radeon_ring *ring)
 236{
 237        unsigned i;
 238        int r;
 239        void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
 240        u32 tmp;
 241
 242        if (!ptr) {
 243                DRM_ERROR("invalid vram scratch pointer\n");
 244                return -EINVAL;
 245        }
 246
 247        tmp = 0xCAFEDEAD;
 248        writel(tmp, ptr);
 249
 250        r = radeon_ring_lock(rdev, ring, 4);
 251        if (r) {
 252                DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
 253                return r;
 254        }
 255        radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
 256        radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
 257        radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff);
 258        radeon_ring_write(ring, 0xDEADBEEF);
 259        radeon_ring_unlock_commit(rdev, ring);
 260
 261        for (i = 0; i < rdev->usec_timeout; i++) {
 262                tmp = readl(ptr);
 263                if (tmp == 0xDEADBEEF)
 264                        break;
 265                DRM_UDELAY(1);
 266        }
 267
 268        if (i < rdev->usec_timeout) {
 269                DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
 270        } else {
 271                DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
 272                          ring->idx, tmp);
 273                r = -EINVAL;
 274        }
 275        return r;
 276}
 277
 278/**
 279 * r600_dma_fence_ring_emit - emit a fence on the DMA ring
 280 *
 281 * @rdev: radeon_device pointer
 282 * @fence: radeon fence object
 283 *
 284 * Add a DMA fence packet to the ring to write
 285 * the fence seq number and DMA trap packet to generate
 286 * an interrupt if needed (r6xx-r7xx).
 287 */
 288void r600_dma_fence_ring_emit(struct radeon_device *rdev,
 289                              struct radeon_fence *fence)
 290{
 291        struct radeon_ring *ring = &rdev->ring[fence->ring];
 292        u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
 293
 294        /* write the fence */
 295        radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
 296        radeon_ring_write(ring, addr & 0xfffffffc);
 297        radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
 298        radeon_ring_write(ring, lower_32_bits(fence->seq));
 299        /* generate an interrupt */
 300        radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
 301}
 302
 303/**
 304 * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring
 305 *
 306 * @rdev: radeon_device pointer
 307 * @ring: radeon_ring structure holding ring information
 308 * @semaphore: radeon semaphore object
 309 * @emit_wait: wait or signal semaphore
 310 *
 311 * Add a DMA semaphore packet to the ring wait on or signal
 312 * other rings (r6xx-SI).
 313 */
 314bool r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
 315                                  struct radeon_ring *ring,
 316                                  struct radeon_semaphore *semaphore,
 317                                  bool emit_wait)
 318{
 319        u64 addr = semaphore->gpu_addr;
 320        u32 s = emit_wait ? 0 : 1;
 321
 322        radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0));
 323        radeon_ring_write(ring, addr & 0xfffffffc);
 324        radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
 325
 326        return true;
 327}
 328
 329/**
 330 * r600_dma_ib_test - test an IB on the DMA engine
 331 *
 332 * @rdev: radeon_device pointer
 333 * @ring: radeon_ring structure holding ring information
 334 *
 335 * Test a simple IB in the DMA ring (r6xx-SI).
 336 * Returns 0 on success, error on failure.
 337 */
 338int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 339{
 340        struct radeon_ib ib;
 341        unsigned i;
 342        int r;
 343        void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
 344        u32 tmp = 0;
 345
 346        if (!ptr) {
 347                DRM_ERROR("invalid vram scratch pointer\n");
 348                return -EINVAL;
 349        }
 350
 351        tmp = 0xCAFEDEAD;
 352        writel(tmp, ptr);
 353
 354        r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
 355        if (r) {
 356                DRM_ERROR("radeon: failed to get ib (%d).\n", r);
 357                return r;
 358        }
 359
 360        ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1);
 361        ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
 362        ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff;
 363        ib.ptr[3] = 0xDEADBEEF;
 364        ib.length_dw = 4;
 365
 366        r = radeon_ib_schedule(rdev, &ib, NULL);
 367        if (r) {
 368                radeon_ib_free(rdev, &ib);
 369                DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
 370                return r;
 371        }
 372        r = radeon_fence_wait(ib.fence, false);
 373        if (r) {
 374                DRM_ERROR("radeon: fence wait failed (%d).\n", r);
 375                return r;
 376        }
 377        for (i = 0; i < rdev->usec_timeout; i++) {
 378                tmp = readl(ptr);
 379                if (tmp == 0xDEADBEEF)
 380                        break;
 381                DRM_UDELAY(1);
 382        }
 383        if (i < rdev->usec_timeout) {
 384                DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
 385        } else {
 386                DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
 387                r = -EINVAL;
 388        }
 389        radeon_ib_free(rdev, &ib);
 390        return r;
 391}
 392
 393/**
 394 * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine
 395 *
 396 * @rdev: radeon_device pointer
 397 * @ib: IB object to schedule
 398 *
 399 * Schedule an IB in the DMA ring (r6xx-r7xx).
 400 */
 401void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 402{
 403        struct radeon_ring *ring = &rdev->ring[ib->ring];
 404
 405        if (rdev->wb.enabled) {
 406                u32 next_rptr = ring->wptr + 4;
 407                while ((next_rptr & 7) != 5)
 408                        next_rptr++;
 409                next_rptr += 3;
 410                radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
 411                radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
 412                radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
 413                radeon_ring_write(ring, next_rptr);
 414        }
 415
 416        /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
 417         * Pad as necessary with NOPs.
 418         */
 419        while ((ring->wptr & 7) != 5)
 420                radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
 421        radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
 422        radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
 423        radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF));
 424
 425}
 426
 427/**
 428 * r600_copy_dma - copy pages using the DMA engine
 429 *
 430 * @rdev: radeon_device pointer
 431 * @src_offset: src GPU address
 432 * @dst_offset: dst GPU address
 433 * @num_gpu_pages: number of GPU pages to xfer
 434 * @fence: radeon fence object
 435 *
 436 * Copy GPU paging using the DMA engine (r6xx).
 437 * Used by the radeon ttm implementation to move pages if
 438 * registered as the asic copy callback.
 439 */
 440int r600_copy_dma(struct radeon_device *rdev,
 441                  uint64_t src_offset, uint64_t dst_offset,
 442                  unsigned num_gpu_pages,
 443                  struct radeon_fence **fence)
 444{
 445        struct radeon_semaphore *sem = NULL;
 446        int ring_index = rdev->asic->copy.dma_ring_index;
 447        struct radeon_ring *ring = &rdev->ring[ring_index];
 448        u32 size_in_dw, cur_size_in_dw;
 449        int i, num_loops;
 450        int r = 0;
 451
 452        r = radeon_semaphore_create(rdev, &sem);
 453        if (r) {
 454                DRM_ERROR("radeon: moving bo (%d).\n", r);
 455                return r;
 456        }
 457
 458        size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
 459        num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE);
 460        r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8);
 461        if (r) {
 462                DRM_ERROR("radeon: moving bo (%d).\n", r);
 463                radeon_semaphore_free(rdev, &sem, NULL);
 464                return r;
 465        }
 466
 467        radeon_semaphore_sync_to(sem, *fence);
 468        radeon_semaphore_sync_rings(rdev, sem, ring->idx);
 469
 470        for (i = 0; i < num_loops; i++) {
 471                cur_size_in_dw = size_in_dw;
 472                if (cur_size_in_dw > 0xFFFE)
 473                        cur_size_in_dw = 0xFFFE;
 474                size_in_dw -= cur_size_in_dw;
 475                radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
 476                radeon_ring_write(ring, dst_offset & 0xfffffffc);
 477                radeon_ring_write(ring, src_offset & 0xfffffffc);
 478                radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) |
 479                                         (upper_32_bits(src_offset) & 0xff)));
 480                src_offset += cur_size_in_dw * 4;
 481                dst_offset += cur_size_in_dw * 4;
 482        }
 483
 484        r = radeon_fence_emit(rdev, fence, ring->idx);
 485        if (r) {
 486                radeon_ring_unlock_undo(rdev, ring);
 487                return r;
 488        }
 489
 490        radeon_ring_unlock_commit(rdev, ring);
 491        radeon_semaphore_free(rdev, &sem, *fence);
 492
 493        return r;
 494}
 495