linux/drivers/gpu/drm/i915/intel_ringbuffer.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2008-2010 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 * Authors:
  24 *    Eric Anholt <eric@anholt.net>
  25 *    Zou Nan hai <nanhai.zou@intel.com>
  26 *    Xiang Hai hao<haihao.xiang@intel.com>
  27 *
  28 */
  29
  30#include <drm/drmP.h>
  31#include "i915_drv.h"
  32#include <drm/i915_drm.h>
  33#include "i915_trace.h"
  34#include "intel_drv.h"
  35
  36/*
  37 * 965+ support PIPE_CONTROL commands, which provide finer grained control
  38 * over cache flushing.
  39 */
  40struct pipe_control {
  41        struct drm_i915_gem_object *obj;
  42        volatile u32 *cpu_page;
  43        u32 gtt_offset;
  44};
  45
  46static inline int ring_space(struct intel_ring_buffer *ring)
  47{
  48        int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
  49        if (space < 0)
  50                space += ring->size;
  51        return space;
  52}
  53
  54static int
  55gen2_render_ring_flush(struct intel_ring_buffer *ring,
  56                       u32      invalidate_domains,
  57                       u32      flush_domains)
  58{
  59        u32 cmd;
  60        int ret;
  61
  62        cmd = MI_FLUSH;
  63        if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
  64                cmd |= MI_NO_WRITE_FLUSH;
  65
  66        if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
  67                cmd |= MI_READ_FLUSH;
  68
  69        ret = intel_ring_begin(ring, 2);
  70        if (ret)
  71                return ret;
  72
  73        intel_ring_emit(ring, cmd);
  74        intel_ring_emit(ring, MI_NOOP);
  75        intel_ring_advance(ring);
  76
  77        return 0;
  78}
  79
  80static int
  81gen4_render_ring_flush(struct intel_ring_buffer *ring,
  82                       u32      invalidate_domains,
  83                       u32      flush_domains)
  84{
  85        struct drm_device *dev = ring->dev;
  86        u32 cmd;
  87        int ret;
  88
  89        /*
  90         * read/write caches:
  91         *
  92         * I915_GEM_DOMAIN_RENDER is always invalidated, but is
  93         * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
  94         * also flushed at 2d versus 3d pipeline switches.
  95         *
  96         * read-only caches:
  97         *
  98         * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
  99         * MI_READ_FLUSH is set, and is always flushed on 965.
 100         *
 101         * I915_GEM_DOMAIN_COMMAND may not exist?
 102         *
 103         * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
 104         * invalidated when MI_EXE_FLUSH is set.
 105         *
 106         * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
 107         * invalidated with every MI_FLUSH.
 108         *
 109         * TLBs:
 110         *
 111         * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
 112         * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
 113         * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
 114         * are flushed at any MI_FLUSH.
 115         */
 116
 117        cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
 118        if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
 119                cmd &= ~MI_NO_WRITE_FLUSH;
 120        if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
 121                cmd |= MI_EXE_FLUSH;
 122
 123        if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
 124            (IS_G4X(dev) || IS_GEN5(dev)))
 125                cmd |= MI_INVALIDATE_ISP;
 126
 127        ret = intel_ring_begin(ring, 2);
 128        if (ret)
 129                return ret;
 130
 131        intel_ring_emit(ring, cmd);
 132        intel_ring_emit(ring, MI_NOOP);
 133        intel_ring_advance(ring);
 134
 135        return 0;
 136}
 137
 138/**
 139 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
 140 * implementing two workarounds on gen6.  From section 1.4.7.1
 141 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
 142 *
 143 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
 144 * produced by non-pipelined state commands), software needs to first
 145 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
 146 * 0.
 147 *
 148 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
 149 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
 150 *
 151 * And the workaround for these two requires this workaround first:
 152 *
 153 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
 154 * BEFORE the pipe-control with a post-sync op and no write-cache
 155 * flushes.
 156 *
 157 * And this last workaround is tricky because of the requirements on
 158 * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
 159 * volume 2 part 1:
 160 *
 161 *     "1 of the following must also be set:
 162 *      - Render Target Cache Flush Enable ([12] of DW1)
 163 *      - Depth Cache Flush Enable ([0] of DW1)
 164 *      - Stall at Pixel Scoreboard ([1] of DW1)
 165 *      - Depth Stall ([13] of DW1)
 166 *      - Post-Sync Operation ([13] of DW1)
 167 *      - Notify Enable ([8] of DW1)"
 168 *
 169 * The cache flushes require the workaround flush that triggered this
 170 * one, so we can't use it.  Depth stall would trigger the same.
 171 * Post-sync nonzero is what triggered this second workaround, so we
 172 * can't use that one either.  Notify enable is IRQs, which aren't
 173 * really our business.  That leaves only stall at scoreboard.
 174 */
 175static int
 176intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
 177{
 178        struct pipe_control *pc = ring->private;
 179        u32 scratch_addr = pc->gtt_offset + 128;
 180        int ret;
 181
 182
 183        ret = intel_ring_begin(ring, 6);
 184        if (ret)
 185                return ret;
 186
 187        intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
 188        intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
 189                        PIPE_CONTROL_STALL_AT_SCOREBOARD);
 190        intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
 191        intel_ring_emit(ring, 0); /* low dword */
 192        intel_ring_emit(ring, 0); /* high dword */
 193        intel_ring_emit(ring, MI_NOOP);
 194        intel_ring_advance(ring);
 195
 196        ret = intel_ring_begin(ring, 6);
 197        if (ret)
 198                return ret;
 199
 200        intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
 201        intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
 202        intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
 203        intel_ring_emit(ring, 0);
 204        intel_ring_emit(ring, 0);
 205        intel_ring_emit(ring, MI_NOOP);
 206        intel_ring_advance(ring);
 207
 208        return 0;
 209}
 210
 211static int
 212gen6_render_ring_flush(struct intel_ring_buffer *ring,
 213                         u32 invalidate_domains, u32 flush_domains)
 214{
 215        u32 flags = 0;
 216        struct pipe_control *pc = ring->private;
 217        u32 scratch_addr = pc->gtt_offset + 128;
 218        int ret;
 219
 220        /* Force SNB workarounds for PIPE_CONTROL flushes */
 221        ret = intel_emit_post_sync_nonzero_flush(ring);
 222        if (ret)
 223                return ret;
 224
 225        /* Just flush everything.  Experiments have shown that reducing the
 226         * number of bits based on the write domains has little performance
 227         * impact.
 228         */
 229        if (flush_domains) {
 230                flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
 231                flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
 232                /*
 233                 * Ensure that any following seqno writes only happen
 234                 * when the render cache is indeed flushed.
 235                 */
 236                flags |= PIPE_CONTROL_CS_STALL;
 237        }
 238        if (invalidate_domains) {
 239                flags |= PIPE_CONTROL_TLB_INVALIDATE;
 240                flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
 241                flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
 242                flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
 243                flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
 244                flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
 245                /*
 246                 * TLB invalidate requires a post-sync write.
 247                 */
 248                flags |= PIPE_CONTROL_QW_WRITE;
 249        }
 250
 251        ret = intel_ring_begin(ring, 4);
 252        if (ret)
 253                return ret;
 254
 255        intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
 256        intel_ring_emit(ring, flags);
 257        intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
 258        intel_ring_emit(ring, 0);
 259        intel_ring_advance(ring);
 260
 261        return 0;
 262}
 263
 264static int
 265gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring)
 266{
 267        int ret;
 268
 269        ret = intel_ring_begin(ring, 4);
 270        if (ret)
 271                return ret;
 272
 273        intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
 274        intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
 275                              PIPE_CONTROL_STALL_AT_SCOREBOARD);
 276        intel_ring_emit(ring, 0);
 277        intel_ring_emit(ring, 0);
 278        intel_ring_advance(ring);
 279
 280        return 0;
 281}
 282
 283static int
 284gen7_render_ring_flush(struct intel_ring_buffer *ring,
 285                       u32 invalidate_domains, u32 flush_domains)
 286{
 287        u32 flags = 0;
 288        struct pipe_control *pc = ring->private;
 289        u32 scratch_addr = pc->gtt_offset + 128;
 290        int ret;
 291
 292        /*
 293         * Ensure that any following seqno writes only happen when the render
 294         * cache is indeed flushed.
 295         *
 296         * Workaround: 4th PIPE_CONTROL command (except the ones with only
 297         * read-cache invalidate bits set) must have the CS_STALL bit set. We
 298         * don't try to be clever and just set it unconditionally.
 299         */
 300        flags |= PIPE_CONTROL_CS_STALL;
 301
 302        /* Just flush everything.  Experiments have shown that reducing the
 303         * number of bits based on the write domains has little performance
 304         * impact.
 305         */
 306        if (flush_domains) {
 307                flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
 308                flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
 309        }
 310        if (invalidate_domains) {
 311                flags |= PIPE_CONTROL_TLB_INVALIDATE;
 312                flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
 313                flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
 314                flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
 315                flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
 316                flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
 317                /*
 318                 * TLB invalidate requires a post-sync write.
 319                 */
 320                flags |= PIPE_CONTROL_QW_WRITE;
 321
 322                /* Workaround: we must issue a pipe_control with CS-stall bit
 323                 * set before a pipe_control command that has the state cache
 324                 * invalidate bit set. */
 325                gen7_render_ring_cs_stall_wa(ring);
 326        }
 327
 328        ret = intel_ring_begin(ring, 4);
 329        if (ret)
 330                return ret;
 331
 332        intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
 333        intel_ring_emit(ring, flags);
 334        intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
 335        intel_ring_emit(ring, 0);
 336        intel_ring_advance(ring);
 337
 338        return 0;
 339}
 340
 341static void ring_write_tail(struct intel_ring_buffer *ring,
 342                            u32 value)
 343{
 344        drm_i915_private_t *dev_priv = ring->dev->dev_private;
 345        I915_WRITE_TAIL(ring, value);
 346}
 347
 348u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
 349{
 350        drm_i915_private_t *dev_priv = ring->dev->dev_private;
 351        u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
 352                        RING_ACTHD(ring->mmio_base) : ACTHD;
 353
 354        return I915_READ(acthd_reg);
 355}
 356
 357static int init_ring_common(struct intel_ring_buffer *ring)
 358{
 359        struct drm_device *dev = ring->dev;
 360        drm_i915_private_t *dev_priv = dev->dev_private;
 361        struct drm_i915_gem_object *obj = ring->obj;
 362        int ret = 0;
 363        u32 head;
 364
 365        if (HAS_FORCE_WAKE(dev))
 366                gen6_gt_force_wake_get(dev_priv);
 367
 368        /* Stop the ring if it's running. */
 369        I915_WRITE_CTL(ring, 0);
 370        I915_WRITE_HEAD(ring, 0);
 371        ring->write_tail(ring, 0);
 372
 373        head = I915_READ_HEAD(ring) & HEAD_ADDR;
 374
 375        /* G45 ring initialization fails to reset head to zero */
 376        if (head != 0) {
 377                DRM_DEBUG_KMS("%s head not reset to zero "
 378                              "ctl %08x head %08x tail %08x start %08x\n",
 379                              ring->name,
 380                              I915_READ_CTL(ring),
 381                              I915_READ_HEAD(ring),
 382                              I915_READ_TAIL(ring),
 383                              I915_READ_START(ring));
 384
 385                I915_WRITE_HEAD(ring, 0);
 386
 387                if (I915_READ_HEAD(ring) & HEAD_ADDR) {
 388                        DRM_ERROR("failed to set %s head to zero "
 389                                  "ctl %08x head %08x tail %08x start %08x\n",
 390                                  ring->name,
 391                                  I915_READ_CTL(ring),
 392                                  I915_READ_HEAD(ring),
 393                                  I915_READ_TAIL(ring),
 394                                  I915_READ_START(ring));
 395                }
 396        }
 397
 398        /* Initialize the ring. This must happen _after_ we've cleared the ring
 399         * registers with the above sequence (the readback of the HEAD registers
 400         * also enforces ordering), otherwise the hw might lose the new ring
 401         * register values. */
 402        I915_WRITE_START(ring, obj->gtt_offset);
 403        I915_WRITE_CTL(ring,
 404                        ((ring->size - PAGE_SIZE) & RING_NR_PAGES)
 405                        | RING_VALID);
 406
 407        /* If the head is still not zero, the ring is dead */
 408        if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
 409                     I915_READ_START(ring) == obj->gtt_offset &&
 410                     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
 411                DRM_ERROR("%s initialization failed "
 412                                "ctl %08x head %08x tail %08x start %08x\n",
 413                                ring->name,
 414                                I915_READ_CTL(ring),
 415                                I915_READ_HEAD(ring),
 416                                I915_READ_TAIL(ring),
 417                                I915_READ_START(ring));
 418                ret = -EIO;
 419                goto out;
 420        }
 421
 422        if (!drm_core_check_feature(ring->dev, DRIVER_MODESET))
 423                i915_kernel_lost_context(ring->dev);
 424        else {
 425                ring->head = I915_READ_HEAD(ring);
 426                ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
 427                ring->space = ring_space(ring);
 428                ring->last_retired_head = -1;
 429        }
 430
 431out:
 432        if (HAS_FORCE_WAKE(dev))
 433                gen6_gt_force_wake_put(dev_priv);
 434
 435        return ret;
 436}
 437
 438static int
 439init_pipe_control(struct intel_ring_buffer *ring)
 440{
 441        struct pipe_control *pc;
 442        struct drm_i915_gem_object *obj;
 443        int ret;
 444
 445        if (ring->private)
 446                return 0;
 447
 448        pc = kmalloc(sizeof(*pc), GFP_KERNEL);
 449        if (!pc)
 450                return -ENOMEM;
 451
 452        obj = i915_gem_alloc_object(ring->dev, 4096);
 453        if (obj == NULL) {
 454                DRM_ERROR("Failed to allocate seqno page\n");
 455                ret = -ENOMEM;
 456                goto err;
 457        }
 458
 459        i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
 460
 461        ret = i915_gem_object_pin(obj, 4096, true, false);
 462        if (ret)
 463                goto err_unref;
 464
 465        pc->gtt_offset = obj->gtt_offset;
 466        pc->cpu_page =  kmap(sg_page(obj->pages->sgl));
 467        if (pc->cpu_page == NULL)
 468                goto err_unpin;
 469
 470        pc->obj = obj;
 471        ring->private = pc;
 472        return 0;
 473
 474err_unpin:
 475        i915_gem_object_unpin(obj);
 476err_unref:
 477        drm_gem_object_unreference(&obj->base);
 478err:
 479        kfree(pc);
 480        return ret;
 481}
 482
 483static void
 484cleanup_pipe_control(struct intel_ring_buffer *ring)
 485{
 486        struct pipe_control *pc = ring->private;
 487        struct drm_i915_gem_object *obj;
 488
 489        if (!ring->private)
 490                return;
 491
 492        obj = pc->obj;
 493
 494        kunmap(sg_page(obj->pages->sgl));
 495        i915_gem_object_unpin(obj);
 496        drm_gem_object_unreference(&obj->base);
 497
 498        kfree(pc);
 499        ring->private = NULL;
 500}
 501
 502static int init_render_ring(struct intel_ring_buffer *ring)
 503{
 504        struct drm_device *dev = ring->dev;
 505        struct drm_i915_private *dev_priv = dev->dev_private;
 506        int ret = init_ring_common(ring);
 507
 508        if (INTEL_INFO(dev)->gen > 3) {
 509                I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
 510                if (IS_GEN7(dev))
 511                        I915_WRITE(GFX_MODE_GEN7,
 512                                   _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
 513                                   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
 514        }
 515
 516        if (INTEL_INFO(dev)->gen >= 5) {
 517                ret = init_pipe_control(ring);
 518                if (ret)
 519                        return ret;
 520        }
 521
 522        if (IS_GEN6(dev)) {
 523                /* From the Sandybridge PRM, volume 1 part 3, page 24:
 524                 * "If this bit is set, STCunit will have LRA as replacement
 525                 *  policy. [...] This bit must be reset.  LRA replacement
 526                 *  policy is not supported."
 527                 */
 528                I915_WRITE(CACHE_MODE_0,
 529                           _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
 530
 531                /* This is not explicitly set for GEN6, so read the register.
 532                 * see intel_ring_mi_set_context() for why we care.
 533                 * TODO: consider explicitly setting the bit for GEN5
 534                 */
 535                ring->itlb_before_ctx_switch =
 536                        !!(I915_READ(GFX_MODE) & GFX_TLB_INVALIDATE_ALWAYS);
 537        }
 538
 539        if (INTEL_INFO(dev)->gen >= 6)
 540                I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
 541
 542        if (HAS_L3_GPU_CACHE(dev))
 543                I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
 544
 545        return ret;
 546}
 547
 548static void render_ring_cleanup(struct intel_ring_buffer *ring)
 549{
 550        if (!ring->private)
 551                return;
 552
 553        cleanup_pipe_control(ring);
 554}
 555
 556static void
 557update_mboxes(struct intel_ring_buffer *ring,
 558            u32 seqno,
 559            u32 mmio_offset)
 560{
 561        intel_ring_emit(ring, MI_SEMAPHORE_MBOX |
 562                              MI_SEMAPHORE_GLOBAL_GTT |
 563                              MI_SEMAPHORE_REGISTER |
 564                              MI_SEMAPHORE_UPDATE);
 565        intel_ring_emit(ring, seqno);
 566        intel_ring_emit(ring, mmio_offset);
 567}
 568
 569/**
 570 * gen6_add_request - Update the semaphore mailbox registers
 571 * 
 572 * @ring - ring that is adding a request
 573 * @seqno - return seqno stuck into the ring
 574 *
 575 * Update the mailbox registers in the *other* rings with the current seqno.
 576 * This acts like a signal in the canonical semaphore.
 577 */
 578static int
 579gen6_add_request(struct intel_ring_buffer *ring,
 580                 u32 *seqno)
 581{
 582        u32 mbox1_reg;
 583        u32 mbox2_reg;
 584        int ret;
 585
 586        ret = intel_ring_begin(ring, 10);
 587        if (ret)
 588                return ret;
 589
 590        mbox1_reg = ring->signal_mbox[0];
 591        mbox2_reg = ring->signal_mbox[1];
 592
 593        *seqno = i915_gem_next_request_seqno(ring);
 594
 595        update_mboxes(ring, *seqno, mbox1_reg);
 596        update_mboxes(ring, *seqno, mbox2_reg);
 597        intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 598        intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
 599        intel_ring_emit(ring, *seqno);
 600        intel_ring_emit(ring, MI_USER_INTERRUPT);
 601        intel_ring_advance(ring);
 602
 603        return 0;
 604}
 605
 606/**
 607 * intel_ring_sync - sync the waiter to the signaller on seqno
 608 *
 609 * @waiter - ring that is waiting
 610 * @signaller - ring which has, or will signal
 611 * @seqno - seqno which the waiter will block on
 612 */
 613static int
 614gen6_ring_sync(struct intel_ring_buffer *waiter,
 615               struct intel_ring_buffer *signaller,
 616               u32 seqno)
 617{
 618        int ret;
 619        u32 dw1 = MI_SEMAPHORE_MBOX |
 620                  MI_SEMAPHORE_COMPARE |
 621                  MI_SEMAPHORE_REGISTER;
 622
 623        /* Throughout all of the GEM code, seqno passed implies our current
 624         * seqno is >= the last seqno executed. However for hardware the
 625         * comparison is strictly greater than.
 626         */
 627        seqno -= 1;
 628
 629        WARN_ON(signaller->semaphore_register[waiter->id] ==
 630                MI_SEMAPHORE_SYNC_INVALID);
 631
 632        ret = intel_ring_begin(waiter, 4);
 633        if (ret)
 634                return ret;
 635
 636        intel_ring_emit(waiter,
 637                        dw1 | signaller->semaphore_register[waiter->id]);
 638        intel_ring_emit(waiter, seqno);
 639        intel_ring_emit(waiter, 0);
 640        intel_ring_emit(waiter, MI_NOOP);
 641        intel_ring_advance(waiter);
 642
 643        return 0;
 644}
 645
 646#define PIPE_CONTROL_FLUSH(ring__, addr__)                                      \
 647do {                                                                    \
 648        intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |                \
 649                 PIPE_CONTROL_DEPTH_STALL);                             \
 650        intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);                    \
 651        intel_ring_emit(ring__, 0);                                                     \
 652        intel_ring_emit(ring__, 0);                                                     \
 653} while (0)
 654
 655static int
 656pc_render_add_request(struct intel_ring_buffer *ring,
 657                      u32 *result)
 658{
 659        u32 seqno = i915_gem_next_request_seqno(ring);
 660        struct pipe_control *pc = ring->private;
 661        u32 scratch_addr = pc->gtt_offset + 128;
 662        int ret;
 663
 664        /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
 665         * incoherent with writes to memory, i.e. completely fubar,
 666         * so we need to use PIPE_NOTIFY instead.
 667         *
 668         * However, we also need to workaround the qword write
 669         * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
 670         * memory before requesting an interrupt.
 671         */
 672        ret = intel_ring_begin(ring, 32);
 673        if (ret)
 674                return ret;
 675
 676        intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
 677                        PIPE_CONTROL_WRITE_FLUSH |
 678                        PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
 679        intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
 680        intel_ring_emit(ring, seqno);
 681        intel_ring_emit(ring, 0);
 682        PIPE_CONTROL_FLUSH(ring, scratch_addr);
 683        scratch_addr += 128; /* write to separate cachelines */
 684        PIPE_CONTROL_FLUSH(ring, scratch_addr);
 685        scratch_addr += 128;
 686        PIPE_CONTROL_FLUSH(ring, scratch_addr);
 687        scratch_addr += 128;
 688        PIPE_CONTROL_FLUSH(ring, scratch_addr);
 689        scratch_addr += 128;
 690        PIPE_CONTROL_FLUSH(ring, scratch_addr);
 691        scratch_addr += 128;
 692        PIPE_CONTROL_FLUSH(ring, scratch_addr);
 693
 694        intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
 695                        PIPE_CONTROL_WRITE_FLUSH |
 696                        PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
 697                        PIPE_CONTROL_NOTIFY);
 698        intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
 699        intel_ring_emit(ring, seqno);
 700        intel_ring_emit(ring, 0);
 701        intel_ring_advance(ring);
 702
 703        *result = seqno;
 704        return 0;
 705}
 706
 707static u32
 708gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
 709{
 710        /* Workaround to force correct ordering between irq and seqno writes on
 711         * ivb (and maybe also on snb) by reading from a CS register (like
 712         * ACTHD) before reading the status page. */
 713        if (!lazy_coherency)
 714                intel_ring_get_active_head(ring);
 715        return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
 716}
 717
 718static u32
 719ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
 720{
 721        return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
 722}
 723
 724static u32
 725pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
 726{
 727        struct pipe_control *pc = ring->private;
 728        return pc->cpu_page[0];
 729}
 730
 731static bool
 732gen5_ring_get_irq(struct intel_ring_buffer *ring)
 733{
 734        struct drm_device *dev = ring->dev;
 735        drm_i915_private_t *dev_priv = dev->dev_private;
 736        unsigned long flags;
 737
 738        if (!dev->irq_enabled)
 739                return false;
 740
 741        spin_lock_irqsave(&dev_priv->irq_lock, flags);
 742        if (ring->irq_refcount++ == 0) {
 743                dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
 744                I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
 745                POSTING_READ(GTIMR);
 746        }
 747        spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 748
 749        return true;
 750}
 751
 752static void
 753gen5_ring_put_irq(struct intel_ring_buffer *ring)
 754{
 755        struct drm_device *dev = ring->dev;
 756        drm_i915_private_t *dev_priv = dev->dev_private;
 757        unsigned long flags;
 758
 759        spin_lock_irqsave(&dev_priv->irq_lock, flags);
 760        if (--ring->irq_refcount == 0) {
 761                dev_priv->gt_irq_mask |= ring->irq_enable_mask;
 762                I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
 763                POSTING_READ(GTIMR);
 764        }
 765        spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 766}
 767
 768static bool
 769i9xx_ring_get_irq(struct intel_ring_buffer *ring)
 770{
 771        struct drm_device *dev = ring->dev;
 772        drm_i915_private_t *dev_priv = dev->dev_private;
 773        unsigned long flags;
 774
 775        if (!dev->irq_enabled)
 776                return false;
 777
 778        spin_lock_irqsave(&dev_priv->irq_lock, flags);
 779        if (ring->irq_refcount++ == 0) {
 780                dev_priv->irq_mask &= ~ring->irq_enable_mask;
 781                I915_WRITE(IMR, dev_priv->irq_mask);
 782                POSTING_READ(IMR);
 783        }
 784        spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 785
 786        return true;
 787}
 788
 789static void
 790i9xx_ring_put_irq(struct intel_ring_buffer *ring)
 791{
 792        struct drm_device *dev = ring->dev;
 793        drm_i915_private_t *dev_priv = dev->dev_private;
 794        unsigned long flags;
 795
 796        spin_lock_irqsave(&dev_priv->irq_lock, flags);
 797        if (--ring->irq_refcount == 0) {
 798                dev_priv->irq_mask |= ring->irq_enable_mask;
 799                I915_WRITE(IMR, dev_priv->irq_mask);
 800                POSTING_READ(IMR);
 801        }
 802        spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 803}
 804
 805static bool
 806i8xx_ring_get_irq(struct intel_ring_buffer *ring)
 807{
 808        struct drm_device *dev = ring->dev;
 809        drm_i915_private_t *dev_priv = dev->dev_private;
 810        unsigned long flags;
 811
 812        if (!dev->irq_enabled)
 813                return false;
 814
 815        spin_lock_irqsave(&dev_priv->irq_lock, flags);
 816        if (ring->irq_refcount++ == 0) {
 817                dev_priv->irq_mask &= ~ring->irq_enable_mask;
 818                I915_WRITE16(IMR, dev_priv->irq_mask);
 819                POSTING_READ16(IMR);
 820        }
 821        spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 822
 823        return true;
 824}
 825
 826static void
 827i8xx_ring_put_irq(struct intel_ring_buffer *ring)
 828{
 829        struct drm_device *dev = ring->dev;
 830        drm_i915_private_t *dev_priv = dev->dev_private;
 831        unsigned long flags;
 832
 833        spin_lock_irqsave(&dev_priv->irq_lock, flags);
 834        if (--ring->irq_refcount == 0) {
 835                dev_priv->irq_mask |= ring->irq_enable_mask;
 836                I915_WRITE16(IMR, dev_priv->irq_mask);
 837                POSTING_READ16(IMR);
 838        }
 839        spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 840}
 841
 842void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
 843{
 844        struct drm_device *dev = ring->dev;
 845        drm_i915_private_t *dev_priv = ring->dev->dev_private;
 846        u32 mmio = 0;
 847
 848        /* The ring status page addresses are no longer next to the rest of
 849         * the ring registers as of gen7.
 850         */
 851        if (IS_GEN7(dev)) {
 852                switch (ring->id) {
 853                case RCS:
 854                        mmio = RENDER_HWS_PGA_GEN7;
 855                        break;
 856                case BCS:
 857                        mmio = BLT_HWS_PGA_GEN7;
 858                        break;
 859                case VCS:
 860                        mmio = BSD_HWS_PGA_GEN7;
 861                        break;
 862                }
 863        } else if (IS_GEN6(ring->dev)) {
 864                mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
 865        } else {
 866                mmio = RING_HWS_PGA(ring->mmio_base);
 867        }
 868
 869        I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
 870        POSTING_READ(mmio);
 871}
 872
 873static int
 874bsd_ring_flush(struct intel_ring_buffer *ring,
 875               u32     invalidate_domains,
 876               u32     flush_domains)
 877{
 878        int ret;
 879
 880        ret = intel_ring_begin(ring, 2);
 881        if (ret)
 882                return ret;
 883
 884        intel_ring_emit(ring, MI_FLUSH);
 885        intel_ring_emit(ring, MI_NOOP);
 886        intel_ring_advance(ring);
 887        return 0;
 888}
 889
 890static int
 891i9xx_add_request(struct intel_ring_buffer *ring,
 892                 u32 *result)
 893{
 894        u32 seqno;
 895        int ret;
 896
 897        ret = intel_ring_begin(ring, 4);
 898        if (ret)
 899                return ret;
 900
 901        seqno = i915_gem_next_request_seqno(ring);
 902
 903        intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 904        intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
 905        intel_ring_emit(ring, seqno);
 906        intel_ring_emit(ring, MI_USER_INTERRUPT);
 907        intel_ring_advance(ring);
 908
 909        *result = seqno;
 910        return 0;
 911}
 912
 913static bool
 914gen6_ring_get_irq(struct intel_ring_buffer *ring)
 915{
 916        struct drm_device *dev = ring->dev;
 917        drm_i915_private_t *dev_priv = dev->dev_private;
 918        unsigned long flags;
 919
 920        if (!dev->irq_enabled)
 921               return false;
 922
 923        /* It looks like we need to prevent the gt from suspending while waiting
 924         * for an notifiy irq, otherwise irqs seem to get lost on at least the
 925         * blt/bsd rings on ivb. */
 926        gen6_gt_force_wake_get(dev_priv);
 927
 928        spin_lock_irqsave(&dev_priv->irq_lock, flags);
 929        if (ring->irq_refcount++ == 0) {
 930                if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
 931                        I915_WRITE_IMR(ring, ~(ring->irq_enable_mask |
 932                                                GEN6_RENDER_L3_PARITY_ERROR));
 933                else
 934                        I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
 935                dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
 936                I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
 937                POSTING_READ(GTIMR);
 938        }
 939        spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 940
 941        return true;
 942}
 943
 944static void
 945gen6_ring_put_irq(struct intel_ring_buffer *ring)
 946{
 947        struct drm_device *dev = ring->dev;
 948        drm_i915_private_t *dev_priv = dev->dev_private;
 949        unsigned long flags;
 950
 951        spin_lock_irqsave(&dev_priv->irq_lock, flags);
 952        if (--ring->irq_refcount == 0) {
 953                if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
 954                        I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
 955                else
 956                        I915_WRITE_IMR(ring, ~0);
 957                dev_priv->gt_irq_mask |= ring->irq_enable_mask;
 958                I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
 959                POSTING_READ(GTIMR);
 960        }
 961        spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 962
 963        gen6_gt_force_wake_put(dev_priv);
 964}
 965
 966static int
 967i965_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 length)
 968{
 969        int ret;
 970
 971        ret = intel_ring_begin(ring, 2);
 972        if (ret)
 973                return ret;
 974
 975        intel_ring_emit(ring,
 976                        MI_BATCH_BUFFER_START |
 977                        MI_BATCH_GTT |
 978                        MI_BATCH_NON_SECURE_I965);
 979        intel_ring_emit(ring, offset);
 980        intel_ring_advance(ring);
 981
 982        return 0;
 983}
 984
 985static int
 986i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
 987                                u32 offset, u32 len)
 988{
 989        int ret;
 990
 991        ret = intel_ring_begin(ring, 4);
 992        if (ret)
 993                return ret;
 994
 995        intel_ring_emit(ring, MI_BATCH_BUFFER);
 996        intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
 997        intel_ring_emit(ring, offset + len - 8);
 998        intel_ring_emit(ring, 0);
 999        intel_ring_advance(ring);
1000
1001        return 0;
1002}
1003
1004static int
1005i915_dispatch_execbuffer(struct intel_ring_buffer *ring,
1006                                u32 offset, u32 len)
1007{
1008        int ret;
1009
1010        ret = intel_ring_begin(ring, 2);
1011        if (ret)
1012                return ret;
1013
1014        intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
1015        intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
1016        intel_ring_advance(ring);
1017
1018        return 0;
1019}
1020
1021static void cleanup_status_page(struct intel_ring_buffer *ring)
1022{
1023        struct drm_i915_gem_object *obj;
1024
1025        obj = ring->status_page.obj;
1026        if (obj == NULL)
1027                return;
1028
1029        kunmap(sg_page(obj->pages->sgl));
1030        i915_gem_object_unpin(obj);
1031        drm_gem_object_unreference(&obj->base);
1032        ring->status_page.obj = NULL;
1033}
1034
1035static int init_status_page(struct intel_ring_buffer *ring)
1036{
1037        struct drm_device *dev = ring->dev;
1038        struct drm_i915_gem_object *obj;
1039        int ret;
1040
1041        obj = i915_gem_alloc_object(dev, 4096);
1042        if (obj == NULL) {
1043                DRM_ERROR("Failed to allocate status page\n");
1044                ret = -ENOMEM;
1045                goto err;
1046        }
1047
1048        i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
1049
1050        ret = i915_gem_object_pin(obj, 4096, true, false);
1051        if (ret != 0) {
1052                goto err_unref;
1053        }
1054
1055        ring->status_page.gfx_addr = obj->gtt_offset;
1056        ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
1057        if (ring->status_page.page_addr == NULL) {
1058                ret = -ENOMEM;
1059                goto err_unpin;
1060        }
1061        ring->status_page.obj = obj;
1062        memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1063
1064        intel_ring_setup_status_page(ring);
1065        DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
1066                        ring->name, ring->status_page.gfx_addr);
1067
1068        return 0;
1069
1070err_unpin:
1071        i915_gem_object_unpin(obj);
1072err_unref:
1073        drm_gem_object_unreference(&obj->base);
1074err:
1075        return ret;
1076}
1077
1078static int intel_init_ring_buffer(struct drm_device *dev,
1079                                  struct intel_ring_buffer *ring)
1080{
1081        struct drm_i915_gem_object *obj;
1082        struct drm_i915_private *dev_priv = dev->dev_private;
1083        int ret;
1084
1085        ring->dev = dev;
1086        INIT_LIST_HEAD(&ring->active_list);
1087        INIT_LIST_HEAD(&ring->request_list);
1088        ring->size = 32 * PAGE_SIZE;
1089
1090        init_waitqueue_head(&ring->irq_queue);
1091
1092        if (I915_NEED_GFX_HWS(dev)) {
1093                ret = init_status_page(ring);
1094                if (ret)
1095                        return ret;
1096        }
1097
1098        obj = i915_gem_alloc_object(dev, ring->size);
1099        if (obj == NULL) {
1100                DRM_ERROR("Failed to allocate ringbuffer\n");
1101                ret = -ENOMEM;
1102                goto err_hws;
1103        }
1104
1105        ring->obj = obj;
1106
1107        ret = i915_gem_object_pin(obj, PAGE_SIZE, true, false);
1108        if (ret)
1109                goto err_unref;
1110
1111        ret = i915_gem_object_set_to_gtt_domain(obj, true);
1112        if (ret)
1113                goto err_unpin;
1114
1115        ring->virtual_start =
1116                ioremap_wc(dev_priv->mm.gtt->gma_bus_addr + obj->gtt_offset,
1117                           ring->size);
1118        if (ring->virtual_start == NULL) {
1119                DRM_ERROR("Failed to map ringbuffer.\n");
1120                ret = -EINVAL;
1121                goto err_unpin;
1122        }
1123
1124        ret = ring->init(ring);
1125        if (ret)
1126                goto err_unmap;
1127
1128        /* Workaround an erratum on the i830 which causes a hang if
1129         * the TAIL pointer points to within the last 2 cachelines
1130         * of the buffer.
1131         */
1132        ring->effective_size = ring->size;
1133        if (IS_I830(ring->dev) || IS_845G(ring->dev))
1134                ring->effective_size -= 128;
1135
1136        return 0;
1137
1138err_unmap:
1139        iounmap(ring->virtual_start);
1140err_unpin:
1141        i915_gem_object_unpin(obj);
1142err_unref:
1143        drm_gem_object_unreference(&obj->base);
1144        ring->obj = NULL;
1145err_hws:
1146        cleanup_status_page(ring);
1147        return ret;
1148}
1149
1150void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
1151{
1152        struct drm_i915_private *dev_priv;
1153        int ret;
1154
1155        if (ring->obj == NULL)
1156                return;
1157
1158        /* Disable the ring buffer. The ring must be idle at this point */
1159        dev_priv = ring->dev->dev_private;
1160        ret = intel_wait_ring_idle(ring);
1161        if (ret)
1162                DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
1163                          ring->name, ret);
1164
1165        I915_WRITE_CTL(ring, 0);
1166
1167        iounmap(ring->virtual_start);
1168
1169        i915_gem_object_unpin(ring->obj);
1170        drm_gem_object_unreference(&ring->obj->base);
1171        ring->obj = NULL;
1172
1173        if (ring->cleanup)
1174                ring->cleanup(ring);
1175
1176        cleanup_status_page(ring);
1177}
1178
1179static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1180{
1181        uint32_t __iomem *virt;
1182        int rem = ring->size - ring->tail;
1183
1184        if (ring->space < rem) {
1185                int ret = intel_wait_ring_buffer(ring, rem);
1186                if (ret)
1187                        return ret;
1188        }
1189
1190        virt = ring->virtual_start + ring->tail;
1191        rem /= 4;
1192        while (rem--)
1193                iowrite32(MI_NOOP, virt++);
1194
1195        ring->tail = 0;
1196        ring->space = ring_space(ring);
1197
1198        return 0;
1199}
1200
1201static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
1202{
1203        int ret;
1204
1205        ret = i915_wait_seqno(ring, seqno);
1206        if (!ret)
1207                i915_gem_retire_requests_ring(ring);
1208
1209        return ret;
1210}
1211
1212static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
1213{
1214        struct drm_i915_gem_request *request;
1215        u32 seqno = 0;
1216        int ret;
1217
1218        i915_gem_retire_requests_ring(ring);
1219
1220        if (ring->last_retired_head != -1) {
1221                ring->head = ring->last_retired_head;
1222                ring->last_retired_head = -1;
1223                ring->space = ring_space(ring);
1224                if (ring->space >= n)
1225                        return 0;
1226        }
1227
1228        list_for_each_entry(request, &ring->request_list, list) {
1229                int space;
1230
1231                if (request->tail == -1)
1232                        continue;
1233
1234                space = request->tail - (ring->tail + 8);
1235                if (space < 0)
1236                        space += ring->size;
1237                if (space >= n) {
1238                        seqno = request->seqno;
1239                        break;
1240                }
1241
1242                /* Consume this request in case we need more space than
1243                 * is available and so need to prevent a race between
1244                 * updating last_retired_head and direct reads of
1245                 * I915_RING_HEAD. It also provides a nice sanity check.
1246                 */
1247                request->tail = -1;
1248        }
1249
1250        if (seqno == 0)
1251                return -ENOSPC;
1252
1253        ret = intel_ring_wait_seqno(ring, seqno);
1254        if (ret)
1255                return ret;
1256
1257        if (WARN_ON(ring->last_retired_head == -1))
1258                return -ENOSPC;
1259
1260        ring->head = ring->last_retired_head;
1261        ring->last_retired_head = -1;
1262        ring->space = ring_space(ring);
1263        if (WARN_ON(ring->space < n))
1264                return -ENOSPC;
1265
1266        return 0;
1267}
1268
1269int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n)
1270{
1271        struct drm_device *dev = ring->dev;
1272        struct drm_i915_private *dev_priv = dev->dev_private;
1273        unsigned long end;
1274        int ret;
1275
1276        ret = intel_ring_wait_request(ring, n);
1277        if (ret != -ENOSPC)
1278                return ret;
1279
1280        trace_i915_ring_wait_begin(ring);
1281        /* With GEM the hangcheck timer should kick us out of the loop,
1282         * leaving it early runs the risk of corrupting GEM state (due
1283         * to running on almost untested codepaths). But on resume
1284         * timers don't work yet, so prevent a complete hang in that
1285         * case by choosing an insanely large timeout. */
1286        end = jiffies + 60 * HZ;
1287
1288        do {
1289                ring->head = I915_READ_HEAD(ring);
1290                ring->space = ring_space(ring);
1291                if (ring->space >= n) {
1292                        trace_i915_ring_wait_end(ring);
1293                        return 0;
1294                }
1295
1296                if (dev->primary->master) {
1297                        struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
1298                        if (master_priv->sarea_priv)
1299                                master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1300                }
1301
1302                msleep(1);
1303
1304                ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
1305                if (ret)
1306                        return ret;
1307        } while (!time_after(jiffies, end));
1308        trace_i915_ring_wait_end(ring);
1309        return -EBUSY;
1310}
1311
1312int intel_ring_begin(struct intel_ring_buffer *ring,
1313                     int num_dwords)
1314{
1315        drm_i915_private_t *dev_priv = ring->dev->dev_private;
1316        int n = 4*num_dwords;
1317        int ret;
1318
1319        ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
1320        if (ret)
1321                return ret;
1322
1323        if (unlikely(ring->tail + n > ring->effective_size)) {
1324                ret = intel_wrap_ring_buffer(ring);
1325                if (unlikely(ret))
1326                        return ret;
1327        }
1328
1329        if (unlikely(ring->space < n)) {
1330                ret = intel_wait_ring_buffer(ring, n);
1331                if (unlikely(ret))
1332                        return ret;
1333        }
1334
1335        ring->space -= n;
1336        return 0;
1337}
1338
1339void intel_ring_advance(struct intel_ring_buffer *ring)
1340{
1341        struct drm_i915_private *dev_priv = ring->dev->dev_private;
1342
1343        ring->tail &= ring->size - 1;
1344        if (dev_priv->stop_rings & intel_ring_flag(ring))
1345                return;
1346        ring->write_tail(ring, ring->tail);
1347}
1348
1349
1350static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
1351                                     u32 value)
1352{
1353        drm_i915_private_t *dev_priv = ring->dev->dev_private;
1354
1355       /* Every tail move must follow the sequence below */
1356
1357        /* Disable notification that the ring is IDLE. The GT
1358         * will then assume that it is busy and bring it out of rc6.
1359         */
1360        I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1361                   _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
1362
1363        /* Clear the context id. Here be magic! */
1364        I915_WRITE64(GEN6_BSD_RNCID, 0x0);
1365
1366        /* Wait for the ring not to be idle, i.e. for it to wake up. */
1367        if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
1368                      GEN6_BSD_SLEEP_INDICATOR) == 0,
1369                     50))
1370                DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
1371
1372        /* Now that the ring is fully powered up, update the tail */
1373        I915_WRITE_TAIL(ring, value);
1374        POSTING_READ(RING_TAIL(ring->mmio_base));
1375
1376        /* Let the ring send IDLE messages to the GT again,
1377         * and so let it sleep to conserve power when idle.
1378         */
1379        I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1380                   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
1381}
1382
1383static int gen6_ring_flush(struct intel_ring_buffer *ring,
1384                           u32 invalidate, u32 flush)
1385{
1386        uint32_t cmd;
1387        int ret;
1388
1389        ret = intel_ring_begin(ring, 4);
1390        if (ret)
1391                return ret;
1392
1393        cmd = MI_FLUSH_DW;
1394        if (invalidate & I915_GEM_GPU_DOMAINS)
1395                cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
1396        intel_ring_emit(ring, cmd);
1397        intel_ring_emit(ring, 0);
1398        intel_ring_emit(ring, 0);
1399        intel_ring_emit(ring, MI_NOOP);
1400        intel_ring_advance(ring);
1401        return 0;
1402}
1403
1404static int
1405gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1406                              u32 offset, u32 len)
1407{
1408        int ret;
1409
1410        ret = intel_ring_begin(ring, 2);
1411        if (ret)
1412                return ret;
1413
1414        intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965);
1415        /* bit0-7 is the length on GEN6+ */
1416        intel_ring_emit(ring, offset);
1417        intel_ring_advance(ring);
1418
1419        return 0;
1420}
1421
1422/* Blitter support (SandyBridge+) */
1423
1424static int blt_ring_flush(struct intel_ring_buffer *ring,
1425                          u32 invalidate, u32 flush)
1426{
1427        uint32_t cmd;
1428        int ret;
1429
1430        ret = intel_ring_begin(ring, 4);
1431        if (ret)
1432                return ret;
1433
1434        cmd = MI_FLUSH_DW;
1435        if (invalidate & I915_GEM_DOMAIN_RENDER)
1436                cmd |= MI_INVALIDATE_TLB;
1437        intel_ring_emit(ring, cmd);
1438        intel_ring_emit(ring, 0);
1439        intel_ring_emit(ring, 0);
1440        intel_ring_emit(ring, MI_NOOP);
1441        intel_ring_advance(ring);
1442        return 0;
1443}
1444
1445int intel_init_render_ring_buffer(struct drm_device *dev)
1446{
1447        drm_i915_private_t *dev_priv = dev->dev_private;
1448        struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1449
1450        ring->name = "render ring";
1451        ring->id = RCS;
1452        ring->mmio_base = RENDER_RING_BASE;
1453
1454        if (INTEL_INFO(dev)->gen >= 6) {
1455                ring->add_request = gen6_add_request;
1456                ring->flush = gen7_render_ring_flush;
1457                if (INTEL_INFO(dev)->gen == 6)
1458                        ring->flush = gen6_render_ring_flush;
1459                ring->irq_get = gen6_ring_get_irq;
1460                ring->irq_put = gen6_ring_put_irq;
1461                ring->irq_enable_mask = GT_USER_INTERRUPT;
1462                ring->get_seqno = gen6_ring_get_seqno;
1463                ring->sync_to = gen6_ring_sync;
1464                ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_INVALID;
1465                ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_RV;
1466                ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_RB;
1467                ring->signal_mbox[0] = GEN6_VRSYNC;
1468                ring->signal_mbox[1] = GEN6_BRSYNC;
1469        } else if (IS_GEN5(dev)) {
1470                ring->add_request = pc_render_add_request;
1471                ring->flush = gen4_render_ring_flush;
1472                ring->get_seqno = pc_render_get_seqno;
1473                ring->irq_get = gen5_ring_get_irq;
1474                ring->irq_put = gen5_ring_put_irq;
1475                ring->irq_enable_mask = GT_USER_INTERRUPT | GT_PIPE_NOTIFY;
1476        } else {
1477                ring->add_request = i9xx_add_request;
1478                if (INTEL_INFO(dev)->gen < 4)
1479                        ring->flush = gen2_render_ring_flush;
1480                else
1481                        ring->flush = gen4_render_ring_flush;
1482                ring->get_seqno = ring_get_seqno;
1483                if (IS_GEN2(dev)) {
1484                        ring->irq_get = i8xx_ring_get_irq;
1485                        ring->irq_put = i8xx_ring_put_irq;
1486                } else {
1487                        ring->irq_get = i9xx_ring_get_irq;
1488                        ring->irq_put = i9xx_ring_put_irq;
1489                }
1490                ring->irq_enable_mask = I915_USER_INTERRUPT;
1491        }
1492        ring->write_tail = ring_write_tail;
1493        if (INTEL_INFO(dev)->gen >= 6)
1494                ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1495        else if (INTEL_INFO(dev)->gen >= 4)
1496                ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1497        else if (IS_I830(dev) || IS_845G(dev))
1498                ring->dispatch_execbuffer = i830_dispatch_execbuffer;
1499        else
1500                ring->dispatch_execbuffer = i915_dispatch_execbuffer;
1501        ring->init = init_render_ring;
1502        ring->cleanup = render_ring_cleanup;
1503
1504
1505        if (!I915_NEED_GFX_HWS(dev)) {
1506                ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1507                memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1508        }
1509
1510        return intel_init_ring_buffer(dev, ring);
1511}
1512
1513int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
1514{
1515        drm_i915_private_t *dev_priv = dev->dev_private;
1516        struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1517
1518        ring->name = "render ring";
1519        ring->id = RCS;
1520        ring->mmio_base = RENDER_RING_BASE;
1521
1522        if (INTEL_INFO(dev)->gen >= 6) {
1523                /* non-kms not supported on gen6+ */
1524                return -ENODEV;
1525        }
1526
1527        /* Note: gem is not supported on gen5/ilk without kms (the corresponding
1528         * gem_init ioctl returns with -ENODEV). Hence we do not need to set up
1529         * the special gen5 functions. */
1530        ring->add_request = i9xx_add_request;
1531        if (INTEL_INFO(dev)->gen < 4)
1532                ring->flush = gen2_render_ring_flush;
1533        else
1534                ring->flush = gen4_render_ring_flush;
1535        ring->get_seqno = ring_get_seqno;
1536        if (IS_GEN2(dev)) {
1537                ring->irq_get = i8xx_ring_get_irq;
1538                ring->irq_put = i8xx_ring_put_irq;
1539        } else {
1540                ring->irq_get = i9xx_ring_get_irq;
1541                ring->irq_put = i9xx_ring_put_irq;
1542        }
1543        ring->irq_enable_mask = I915_USER_INTERRUPT;
1544        ring->write_tail = ring_write_tail;
1545        if (INTEL_INFO(dev)->gen >= 4)
1546                ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1547        else if (IS_I830(dev) || IS_845G(dev))
1548                ring->dispatch_execbuffer = i830_dispatch_execbuffer;
1549        else
1550                ring->dispatch_execbuffer = i915_dispatch_execbuffer;
1551        ring->init = init_render_ring;
1552        ring->cleanup = render_ring_cleanup;
1553
1554        if (!I915_NEED_GFX_HWS(dev))
1555                ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1556
1557        ring->dev = dev;
1558        INIT_LIST_HEAD(&ring->active_list);
1559        INIT_LIST_HEAD(&ring->request_list);
1560
1561        ring->size = size;
1562        ring->effective_size = ring->size;
1563        if (IS_I830(ring->dev))
1564                ring->effective_size -= 128;
1565
1566        ring->virtual_start = ioremap_wc(start, size);
1567        if (ring->virtual_start == NULL) {
1568                DRM_ERROR("can not ioremap virtual address for"
1569                          " ring buffer\n");
1570                return -ENOMEM;
1571        }
1572
1573        return 0;
1574}
1575
1576int intel_init_bsd_ring_buffer(struct drm_device *dev)
1577{
1578        drm_i915_private_t *dev_priv = dev->dev_private;
1579        struct intel_ring_buffer *ring = &dev_priv->ring[VCS];
1580
1581        ring->name = "bsd ring";
1582        ring->id = VCS;
1583
1584        ring->write_tail = ring_write_tail;
1585        if (IS_GEN6(dev) || IS_GEN7(dev)) {
1586                ring->mmio_base = GEN6_BSD_RING_BASE;
1587                /* gen6 bsd needs a special wa for tail updates */
1588                if (IS_GEN6(dev))
1589                        ring->write_tail = gen6_bsd_ring_write_tail;
1590                ring->flush = gen6_ring_flush;
1591                ring->add_request = gen6_add_request;
1592                ring->get_seqno = gen6_ring_get_seqno;
1593                ring->irq_enable_mask = GEN6_BSD_USER_INTERRUPT;
1594                ring->irq_get = gen6_ring_get_irq;
1595                ring->irq_put = gen6_ring_put_irq;
1596                ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1597                ring->sync_to = gen6_ring_sync;
1598                ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_VR;
1599                ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_INVALID;
1600                ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_VB;
1601                ring->signal_mbox[0] = GEN6_RVSYNC;
1602                ring->signal_mbox[1] = GEN6_BVSYNC;
1603        } else {
1604                ring->mmio_base = BSD_RING_BASE;
1605                ring->flush = bsd_ring_flush;
1606                ring->add_request = i9xx_add_request;
1607                ring->get_seqno = ring_get_seqno;
1608                if (IS_GEN5(dev)) {
1609                        ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
1610                        ring->irq_get = gen5_ring_get_irq;
1611                        ring->irq_put = gen5_ring_put_irq;
1612                } else {
1613                        ring->irq_enable_mask = I915_BSD_USER_INTERRUPT;
1614                        ring->irq_get = i9xx_ring_get_irq;
1615                        ring->irq_put = i9xx_ring_put_irq;
1616                }
1617                ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1618        }
1619        ring->init = init_ring_common;
1620
1621
1622        return intel_init_ring_buffer(dev, ring);
1623}
1624
1625int intel_init_blt_ring_buffer(struct drm_device *dev)
1626{
1627        drm_i915_private_t *dev_priv = dev->dev_private;
1628        struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
1629
1630        ring->name = "blitter ring";
1631        ring->id = BCS;
1632
1633        ring->mmio_base = BLT_RING_BASE;
1634        ring->write_tail = ring_write_tail;
1635        ring->flush = blt_ring_flush;
1636        ring->add_request = gen6_add_request;
1637        ring->get_seqno = gen6_ring_get_seqno;
1638        ring->irq_enable_mask = GEN6_BLITTER_USER_INTERRUPT;
1639        ring->irq_get = gen6_ring_get_irq;
1640        ring->irq_put = gen6_ring_put_irq;
1641        ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1642        ring->sync_to = gen6_ring_sync;
1643        ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_BR;
1644        ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_BV;
1645        ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_INVALID;
1646        ring->signal_mbox[0] = GEN6_RBSYNC;
1647        ring->signal_mbox[1] = GEN6_VBSYNC;
1648        ring->init = init_ring_common;
1649
1650        return intel_init_ring_buffer(dev, ring);
1651}
1652
1653int
1654intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
1655{
1656        int ret;
1657
1658        if (!ring->gpu_caches_dirty)
1659                return 0;
1660
1661        ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
1662        if (ret)
1663                return ret;
1664
1665        trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS);
1666
1667        ring->gpu_caches_dirty = false;
1668        return 0;
1669}
1670
1671int
1672intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring)
1673{
1674        uint32_t flush_domains;
1675        int ret;
1676
1677        flush_domains = 0;
1678        if (ring->gpu_caches_dirty)
1679                flush_domains = I915_GEM_GPU_DOMAINS;
1680
1681        ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
1682        if (ret)
1683                return ret;
1684
1685        trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
1686
1687        ring->gpu_caches_dirty = false;
1688        return 0;
1689}
1690