linux/drivers/gpu/drm/i915/intel_engine_cs.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2016 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 */
  24
  25#include "i915_drv.h"
  26#include "intel_ringbuffer.h"
  27#include "intel_lrc.h"
  28
  29/* Haswell does have the CXT_SIZE register however it does not appear to be
  30 * valid. Now, docs explain in dwords what is in the context object. The full
  31 * size is 70720 bytes, however, the power context and execlist context will
  32 * never be saved (power context is stored elsewhere, and execlists don't work
  33 * on HSW) - so the final size, including the extra state required for the
  34 * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
  35 */
  36#define HSW_CXT_TOTAL_SIZE              (17 * PAGE_SIZE)
  37/* Same as Haswell, but 72064 bytes now. */
  38#define GEN8_CXT_TOTAL_SIZE             (18 * PAGE_SIZE)
  39
  40#define GEN8_LR_CONTEXT_RENDER_SIZE     (20 * PAGE_SIZE)
  41#define GEN9_LR_CONTEXT_RENDER_SIZE     (22 * PAGE_SIZE)
  42
  43#define GEN8_LR_CONTEXT_OTHER_SIZE      ( 2 * PAGE_SIZE)
  44
  45struct engine_class_info {
  46        const char *name;
  47        int (*init_legacy)(struct intel_engine_cs *engine);
  48        int (*init_execlists)(struct intel_engine_cs *engine);
  49};
  50
  51static const struct engine_class_info intel_engine_classes[] = {
  52        [RENDER_CLASS] = {
  53                .name = "rcs",
  54                .init_execlists = logical_render_ring_init,
  55                .init_legacy = intel_init_render_ring_buffer,
  56        },
  57        [COPY_ENGINE_CLASS] = {
  58                .name = "bcs",
  59                .init_execlists = logical_xcs_ring_init,
  60                .init_legacy = intel_init_blt_ring_buffer,
  61        },
  62        [VIDEO_DECODE_CLASS] = {
  63                .name = "vcs",
  64                .init_execlists = logical_xcs_ring_init,
  65                .init_legacy = intel_init_bsd_ring_buffer,
  66        },
  67        [VIDEO_ENHANCEMENT_CLASS] = {
  68                .name = "vecs",
  69                .init_execlists = logical_xcs_ring_init,
  70                .init_legacy = intel_init_vebox_ring_buffer,
  71        },
  72};
  73
  74struct engine_info {
  75        unsigned int hw_id;
  76        unsigned int uabi_id;
  77        u8 class;
  78        u8 instance;
  79        u32 mmio_base;
  80        unsigned irq_shift;
  81};
  82
  83static const struct engine_info intel_engines[] = {
  84        [RCS] = {
  85                .hw_id = RCS_HW,
  86                .uabi_id = I915_EXEC_RENDER,
  87                .class = RENDER_CLASS,
  88                .instance = 0,
  89                .mmio_base = RENDER_RING_BASE,
  90                .irq_shift = GEN8_RCS_IRQ_SHIFT,
  91        },
  92        [BCS] = {
  93                .hw_id = BCS_HW,
  94                .uabi_id = I915_EXEC_BLT,
  95                .class = COPY_ENGINE_CLASS,
  96                .instance = 0,
  97                .mmio_base = BLT_RING_BASE,
  98                .irq_shift = GEN8_BCS_IRQ_SHIFT,
  99        },
 100        [VCS] = {
 101                .hw_id = VCS_HW,
 102                .uabi_id = I915_EXEC_BSD,
 103                .class = VIDEO_DECODE_CLASS,
 104                .instance = 0,
 105                .mmio_base = GEN6_BSD_RING_BASE,
 106                .irq_shift = GEN8_VCS1_IRQ_SHIFT,
 107        },
 108        [VCS2] = {
 109                .hw_id = VCS2_HW,
 110                .uabi_id = I915_EXEC_BSD,
 111                .class = VIDEO_DECODE_CLASS,
 112                .instance = 1,
 113                .mmio_base = GEN8_BSD2_RING_BASE,
 114                .irq_shift = GEN8_VCS2_IRQ_SHIFT,
 115        },
 116        [VECS] = {
 117                .hw_id = VECS_HW,
 118                .uabi_id = I915_EXEC_VEBOX,
 119                .class = VIDEO_ENHANCEMENT_CLASS,
 120                .instance = 0,
 121                .mmio_base = VEBOX_RING_BASE,
 122                .irq_shift = GEN8_VECS_IRQ_SHIFT,
 123        },
 124};
 125
 126/**
 127 * ___intel_engine_context_size() - return the size of the context for an engine
 128 * @dev_priv: i915 device private
 129 * @class: engine class
 130 *
 131 * Each engine class may require a different amount of space for a context
 132 * image.
 133 *
 134 * Return: size (in bytes) of an engine class specific context image
 135 *
 136 * Note: this size includes the HWSP, which is part of the context image
 137 * in LRC mode, but does not include the "shared data page" used with
 138 * GuC submission. The caller should account for this if using the GuC.
 139 */
 140static u32
 141__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
 142{
 143        u32 cxt_size;
 144
 145        BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
 146
 147        switch (class) {
 148        case RENDER_CLASS:
 149                switch (INTEL_GEN(dev_priv)) {
 150                default:
 151                        MISSING_CASE(INTEL_GEN(dev_priv));
 152                case 9:
 153                        return GEN9_LR_CONTEXT_RENDER_SIZE;
 154                case 8:
 155                        return i915.enable_execlists ?
 156                               GEN8_LR_CONTEXT_RENDER_SIZE :
 157                               GEN8_CXT_TOTAL_SIZE;
 158                case 7:
 159                        if (IS_HASWELL(dev_priv))
 160                                return HSW_CXT_TOTAL_SIZE;
 161
 162                        cxt_size = I915_READ(GEN7_CXT_SIZE);
 163                        return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
 164                                        PAGE_SIZE);
 165                case 6:
 166                        cxt_size = I915_READ(CXT_SIZE);
 167                        return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
 168                                        PAGE_SIZE);
 169                case 5:
 170                case 4:
 171                case 3:
 172                case 2:
 173                /* For the special day when i810 gets merged. */
 174                case 1:
 175                        return 0;
 176                }
 177                break;
 178        default:
 179                MISSING_CASE(class);
 180        case VIDEO_DECODE_CLASS:
 181        case VIDEO_ENHANCEMENT_CLASS:
 182        case COPY_ENGINE_CLASS:
 183                if (INTEL_GEN(dev_priv) < 8)
 184                        return 0;
 185                return GEN8_LR_CONTEXT_OTHER_SIZE;
 186        }
 187}
 188
 189static int
 190intel_engine_setup(struct drm_i915_private *dev_priv,
 191                   enum intel_engine_id id)
 192{
 193        const struct engine_info *info = &intel_engines[id];
 194        const struct engine_class_info *class_info;
 195        struct intel_engine_cs *engine;
 196
 197        GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes));
 198        class_info = &intel_engine_classes[info->class];
 199
 200        GEM_BUG_ON(dev_priv->engine[id]);
 201        engine = kzalloc(sizeof(*engine), GFP_KERNEL);
 202        if (!engine)
 203                return -ENOMEM;
 204
 205        engine->id = id;
 206        engine->i915 = dev_priv;
 207        WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u",
 208                         class_info->name, info->instance) >=
 209                sizeof(engine->name));
 210        engine->uabi_id = info->uabi_id;
 211        engine->hw_id = engine->guc_id = info->hw_id;
 212        engine->mmio_base = info->mmio_base;
 213        engine->irq_shift = info->irq_shift;
 214        engine->class = info->class;
 215        engine->instance = info->instance;
 216
 217        engine->context_size = __intel_engine_context_size(dev_priv,
 218                                                           engine->class);
 219        if (WARN_ON(engine->context_size > BIT(20)))
 220                engine->context_size = 0;
 221
 222        /* Nothing to do here, execute in order of dependencies */
 223        engine->schedule = NULL;
 224
 225        ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
 226
 227        dev_priv->engine[id] = engine;
 228        return 0;
 229}
 230
 231/**
 232 * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
 233 * @dev_priv: i915 device private
 234 *
 235 * Return: non-zero if the initialization failed.
 236 */
 237int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
 238{
 239        struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
 240        const unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask;
 241        struct intel_engine_cs *engine;
 242        enum intel_engine_id id;
 243        unsigned int mask = 0;
 244        unsigned int i;
 245        int err;
 246
 247        WARN_ON(ring_mask == 0);
 248        WARN_ON(ring_mask &
 249                GENMASK(sizeof(mask) * BITS_PER_BYTE - 1, I915_NUM_ENGINES));
 250
 251        for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
 252                if (!HAS_ENGINE(dev_priv, i))
 253                        continue;
 254
 255                err = intel_engine_setup(dev_priv, i);
 256                if (err)
 257                        goto cleanup;
 258
 259                mask |= ENGINE_MASK(i);
 260        }
 261
 262        /*
 263         * Catch failures to update intel_engines table when the new engines
 264         * are added to the driver by a warning and disabling the forgotten
 265         * engines.
 266         */
 267        if (WARN_ON(mask != ring_mask))
 268                device_info->ring_mask = mask;
 269
 270        /* We always presume we have at least RCS available for later probing */
 271        if (WARN_ON(!HAS_ENGINE(dev_priv, RCS))) {
 272                err = -ENODEV;
 273                goto cleanup;
 274        }
 275
 276        device_info->num_rings = hweight32(mask);
 277
 278        return 0;
 279
 280cleanup:
 281        for_each_engine(engine, dev_priv, id)
 282                kfree(engine);
 283        return err;
 284}
 285
 286/**
 287 * intel_engines_init() - init the Engine Command Streamers
 288 * @dev_priv: i915 device private
 289 *
 290 * Return: non-zero if the initialization failed.
 291 */
 292int intel_engines_init(struct drm_i915_private *dev_priv)
 293{
 294        struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
 295        struct intel_engine_cs *engine;
 296        enum intel_engine_id id, err_id;
 297        unsigned int mask = 0;
 298        int err = 0;
 299
 300        for_each_engine(engine, dev_priv, id) {
 301                const struct engine_class_info *class_info =
 302                        &intel_engine_classes[engine->class];
 303                int (*init)(struct intel_engine_cs *engine);
 304
 305                if (i915.enable_execlists)
 306                        init = class_info->init_execlists;
 307                else
 308                        init = class_info->init_legacy;
 309                if (!init) {
 310                        kfree(engine);
 311                        dev_priv->engine[id] = NULL;
 312                        continue;
 313                }
 314
 315                err = init(engine);
 316                if (err) {
 317                        err_id = id;
 318                        goto cleanup;
 319                }
 320
 321                GEM_BUG_ON(!engine->submit_request);
 322                mask |= ENGINE_MASK(id);
 323        }
 324
 325        /*
 326         * Catch failures to update intel_engines table when the new engines
 327         * are added to the driver by a warning and disabling the forgotten
 328         * engines.
 329         */
 330        if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask))
 331                device_info->ring_mask = mask;
 332
 333        device_info->num_rings = hweight32(mask);
 334
 335        return 0;
 336
 337cleanup:
 338        for_each_engine(engine, dev_priv, id) {
 339                if (id >= err_id)
 340                        kfree(engine);
 341                else
 342                        dev_priv->gt.cleanup_engine(engine);
 343        }
 344        return err;
 345}
 346
 347void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 348{
 349        struct drm_i915_private *dev_priv = engine->i915;
 350
 351        GEM_BUG_ON(!intel_engine_is_idle(engine));
 352        GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
 353
 354        /* Our semaphore implementation is strictly monotonic (i.e. we proceed
 355         * so long as the semaphore value in the register/page is greater
 356         * than the sync value), so whenever we reset the seqno,
 357         * so long as we reset the tracking semaphore value to 0, it will
 358         * always be before the next request's seqno. If we don't reset
 359         * the semaphore value, then when the seqno moves backwards all
 360         * future waits will complete instantly (causing rendering corruption).
 361         */
 362        if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) {
 363                I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
 364                I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
 365                if (HAS_VEBOX(dev_priv))
 366                        I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
 367        }
 368        if (dev_priv->semaphore) {
 369                struct page *page = i915_vma_first_page(dev_priv->semaphore);
 370                void *semaphores;
 371
 372                /* Semaphores are in noncoherent memory, flush to be safe */
 373                semaphores = kmap_atomic(page);
 374                memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
 375                       0, I915_NUM_ENGINES * gen8_semaphore_seqno_size);
 376                drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
 377                                       I915_NUM_ENGINES * gen8_semaphore_seqno_size);
 378                kunmap_atomic(semaphores);
 379        }
 380
 381        intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
 382        clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
 383
 384        /* After manually advancing the seqno, fake the interrupt in case
 385         * there are any waiters for that seqno.
 386         */
 387        intel_engine_wakeup(engine);
 388
 389        GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno);
 390}
 391
 392static void intel_engine_init_timeline(struct intel_engine_cs *engine)
 393{
 394        engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id];
 395}
 396
 397/**
 398 * intel_engines_setup_common - setup engine state not requiring hw access
 399 * @engine: Engine to setup.
 400 *
 401 * Initializes @engine@ structure members shared between legacy and execlists
 402 * submission modes which do not require hardware access.
 403 *
 404 * Typically done early in the submission mode specific engine setup stage.
 405 */
 406void intel_engine_setup_common(struct intel_engine_cs *engine)
 407{
 408        engine->execlist_queue = RB_ROOT;
 409        engine->execlist_first = NULL;
 410
 411        intel_engine_init_timeline(engine);
 412        intel_engine_init_hangcheck(engine);
 413        i915_gem_batch_pool_init(engine, &engine->batch_pool);
 414
 415        intel_engine_init_cmd_parser(engine);
 416}
 417
 418int intel_engine_create_scratch(struct intel_engine_cs *engine, int size)
 419{
 420        struct drm_i915_gem_object *obj;
 421        struct i915_vma *vma;
 422        int ret;
 423
 424        WARN_ON(engine->scratch);
 425
 426        obj = i915_gem_object_create_stolen(engine->i915, size);
 427        if (!obj)
 428                obj = i915_gem_object_create_internal(engine->i915, size);
 429        if (IS_ERR(obj)) {
 430                DRM_ERROR("Failed to allocate scratch page\n");
 431                return PTR_ERR(obj);
 432        }
 433
 434        vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
 435        if (IS_ERR(vma)) {
 436                ret = PTR_ERR(vma);
 437                goto err_unref;
 438        }
 439
 440        ret = i915_vma_pin(vma, 0, 4096, PIN_GLOBAL | PIN_HIGH);
 441        if (ret)
 442                goto err_unref;
 443
 444        engine->scratch = vma;
 445        DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
 446                         engine->name, i915_ggtt_offset(vma));
 447        return 0;
 448
 449err_unref:
 450        i915_gem_object_put(obj);
 451        return ret;
 452}
 453
 454static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
 455{
 456        i915_vma_unpin_and_release(&engine->scratch);
 457}
 458
 459/**
 460 * intel_engines_init_common - initialize cengine state which might require hw access
 461 * @engine: Engine to initialize.
 462 *
 463 * Initializes @engine@ structure members shared between legacy and execlists
 464 * submission modes which do require hardware access.
 465 *
 466 * Typcally done at later stages of submission mode specific engine setup.
 467 *
 468 * Returns zero on success or an error code on failure.
 469 */
 470int intel_engine_init_common(struct intel_engine_cs *engine)
 471{
 472        struct intel_ring *ring;
 473        int ret;
 474
 475        engine->set_default_submission(engine);
 476
 477        /* We may need to do things with the shrinker which
 478         * require us to immediately switch back to the default
 479         * context. This can cause a problem as pinning the
 480         * default context also requires GTT space which may not
 481         * be available. To avoid this we always pin the default
 482         * context.
 483         */
 484        ring = engine->context_pin(engine, engine->i915->kernel_context);
 485        if (IS_ERR(ring))
 486                return PTR_ERR(ring);
 487
 488        ret = intel_engine_init_breadcrumbs(engine);
 489        if (ret)
 490                goto err_unpin;
 491
 492        ret = i915_gem_render_state_init(engine);
 493        if (ret)
 494                goto err_unpin;
 495
 496        return 0;
 497
 498err_unpin:
 499        engine->context_unpin(engine, engine->i915->kernel_context);
 500        return ret;
 501}
 502
 503/**
 504 * intel_engines_cleanup_common - cleans up the engine state created by
 505 *                                the common initiailizers.
 506 * @engine: Engine to cleanup.
 507 *
 508 * This cleans up everything created by the common helpers.
 509 */
 510void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 511{
 512        intel_engine_cleanup_scratch(engine);
 513
 514        i915_gem_render_state_fini(engine);
 515        intel_engine_fini_breadcrumbs(engine);
 516        intel_engine_cleanup_cmd_parser(engine);
 517        i915_gem_batch_pool_fini(&engine->batch_pool);
 518
 519        engine->context_unpin(engine, engine->i915->kernel_context);
 520}
 521
 522u64 intel_engine_get_active_head(struct intel_engine_cs *engine)
 523{
 524        struct drm_i915_private *dev_priv = engine->i915;
 525        u64 acthd;
 526
 527        if (INTEL_GEN(dev_priv) >= 8)
 528                acthd = I915_READ64_2x32(RING_ACTHD(engine->mmio_base),
 529                                         RING_ACTHD_UDW(engine->mmio_base));
 530        else if (INTEL_GEN(dev_priv) >= 4)
 531                acthd = I915_READ(RING_ACTHD(engine->mmio_base));
 532        else
 533                acthd = I915_READ(ACTHD);
 534
 535        return acthd;
 536}
 537
 538u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine)
 539{
 540        struct drm_i915_private *dev_priv = engine->i915;
 541        u64 bbaddr;
 542
 543        if (INTEL_GEN(dev_priv) >= 8)
 544                bbaddr = I915_READ64_2x32(RING_BBADDR(engine->mmio_base),
 545                                          RING_BBADDR_UDW(engine->mmio_base));
 546        else
 547                bbaddr = I915_READ(RING_BBADDR(engine->mmio_base));
 548
 549        return bbaddr;
 550}
 551
 552const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
 553{
 554        switch (type) {
 555        case I915_CACHE_NONE: return " uncached";
 556        case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped";
 557        case I915_CACHE_L3_LLC: return " L3+LLC";
 558        case I915_CACHE_WT: return " WT";
 559        default: return "";
 560        }
 561}
 562
 563static inline uint32_t
 564read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
 565                  int subslice, i915_reg_t reg)
 566{
 567        uint32_t mcr;
 568        uint32_t ret;
 569        enum forcewake_domains fw_domains;
 570
 571        fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg,
 572                                                    FW_REG_READ);
 573        fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
 574                                                     GEN8_MCR_SELECTOR,
 575                                                     FW_REG_READ | FW_REG_WRITE);
 576
 577        spin_lock_irq(&dev_priv->uncore.lock);
 578        intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
 579
 580        mcr = I915_READ_FW(GEN8_MCR_SELECTOR);
 581        /*
 582         * The HW expects the slice and sublice selectors to be reset to 0
 583         * after reading out the registers.
 584         */
 585        WARN_ON_ONCE(mcr & (GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK));
 586        mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
 587        mcr |= GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
 588        I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
 589
 590        ret = I915_READ_FW(reg);
 591
 592        mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
 593        I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
 594
 595        intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
 596        spin_unlock_irq(&dev_priv->uncore.lock);
 597
 598        return ret;
 599}
 600
 601/* NB: please notice the memset */
 602void intel_engine_get_instdone(struct intel_engine_cs *engine,
 603                               struct intel_instdone *instdone)
 604{
 605        struct drm_i915_private *dev_priv = engine->i915;
 606        u32 mmio_base = engine->mmio_base;
 607        int slice;
 608        int subslice;
 609
 610        memset(instdone, 0, sizeof(*instdone));
 611
 612        switch (INTEL_GEN(dev_priv)) {
 613        default:
 614                instdone->instdone = I915_READ(RING_INSTDONE(mmio_base));
 615
 616                if (engine->id != RCS)
 617                        break;
 618
 619                instdone->slice_common = I915_READ(GEN7_SC_INSTDONE);
 620                for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
 621                        instdone->sampler[slice][subslice] =
 622                                read_subslice_reg(dev_priv, slice, subslice,
 623                                                  GEN7_SAMPLER_INSTDONE);
 624                        instdone->row[slice][subslice] =
 625                                read_subslice_reg(dev_priv, slice, subslice,
 626                                                  GEN7_ROW_INSTDONE);
 627                }
 628                break;
 629        case 7:
 630                instdone->instdone = I915_READ(RING_INSTDONE(mmio_base));
 631
 632                if (engine->id != RCS)
 633                        break;
 634
 635                instdone->slice_common = I915_READ(GEN7_SC_INSTDONE);
 636                instdone->sampler[0][0] = I915_READ(GEN7_SAMPLER_INSTDONE);
 637                instdone->row[0][0] = I915_READ(GEN7_ROW_INSTDONE);
 638
 639                break;
 640        case 6:
 641        case 5:
 642        case 4:
 643                instdone->instdone = I915_READ(RING_INSTDONE(mmio_base));
 644
 645                if (engine->id == RCS)
 646                        /* HACK: Using the wrong struct member */
 647                        instdone->slice_common = I915_READ(GEN4_INSTDONE1);
 648                break;
 649        case 3:
 650        case 2:
 651                instdone->instdone = I915_READ(GEN2_INSTDONE);
 652                break;
 653        }
 654}
 655
 656static int wa_add(struct drm_i915_private *dev_priv,
 657                  i915_reg_t addr,
 658                  const u32 mask, const u32 val)
 659{
 660        const u32 idx = dev_priv->workarounds.count;
 661
 662        if (WARN_ON(idx >= I915_MAX_WA_REGS))
 663                return -ENOSPC;
 664
 665        dev_priv->workarounds.reg[idx].addr = addr;
 666        dev_priv->workarounds.reg[idx].value = val;
 667        dev_priv->workarounds.reg[idx].mask = mask;
 668
 669        dev_priv->workarounds.count++;
 670
 671        return 0;
 672}
 673
 674#define WA_REG(addr, mask, val) do { \
 675                const int r = wa_add(dev_priv, (addr), (mask), (val)); \
 676                if (r) \
 677                        return r; \
 678        } while (0)
 679
 680#define WA_SET_BIT_MASKED(addr, mask) \
 681        WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
 682
 683#define WA_CLR_BIT_MASKED(addr, mask) \
 684        WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
 685
 686#define WA_SET_FIELD_MASKED(addr, mask, value) \
 687        WA_REG(addr, mask, _MASKED_FIELD(mask, value))
 688
 689#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask))
 690#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask))
 691
 692#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
 693
 694static int wa_ring_whitelist_reg(struct intel_engine_cs *engine,
 695                                 i915_reg_t reg)
 696{
 697        struct drm_i915_private *dev_priv = engine->i915;
 698        struct i915_workarounds *wa = &dev_priv->workarounds;
 699        const uint32_t index = wa->hw_whitelist_count[engine->id];
 700
 701        if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS))
 702                return -EINVAL;
 703
 704        WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index),
 705                 i915_mmio_reg_offset(reg));
 706        wa->hw_whitelist_count[engine->id]++;
 707
 708        return 0;
 709}
 710
 711static int gen8_init_workarounds(struct intel_engine_cs *engine)
 712{
 713        struct drm_i915_private *dev_priv = engine->i915;
 714
 715        WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
 716
 717        /* WaDisableAsyncFlipPerfMode:bdw,chv */
 718        WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
 719
 720        /* WaDisablePartialInstShootdown:bdw,chv */
 721        WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
 722                          PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 723
 724        /* Use Force Non-Coherent whenever executing a 3D context. This is a
 725         * workaround for for a possible hang in the unlikely event a TLB
 726         * invalidation occurs during a PSD flush.
 727         */
 728        /* WaForceEnableNonCoherent:bdw,chv */
 729        /* WaHdcDisableFetchWhenMasked:bdw,chv */
 730        WA_SET_BIT_MASKED(HDC_CHICKEN0,
 731                          HDC_DONOT_FETCH_MEM_WHEN_MASKED |
 732                          HDC_FORCE_NON_COHERENT);
 733
 734        /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
 735         * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
 736         *  polygons in the same 8x4 pixel/sample area to be processed without
 737         *  stalling waiting for the earlier ones to write to Hierarchical Z
 738         *  buffer."
 739         *
 740         * This optimization is off by default for BDW and CHV; turn it on.
 741         */
 742        WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
 743
 744        /* Wa4x4STCOptimizationDisable:bdw,chv */
 745        WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
 746
 747        /*
 748         * BSpec recommends 8x4 when MSAA is used,
 749         * however in practice 16x4 seems fastest.
 750         *
 751         * Note that PS/WM thread counts depend on the WIZ hashing
 752         * disable bit, which we don't touch here, but it's good
 753         * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
 754         */
 755        WA_SET_FIELD_MASKED(GEN7_GT_MODE,
 756                            GEN6_WIZ_HASHING_MASK,
 757                            GEN6_WIZ_HASHING_16x4);
 758
 759        return 0;
 760}
 761
 762static int bdw_init_workarounds(struct intel_engine_cs *engine)
 763{
 764        struct drm_i915_private *dev_priv = engine->i915;
 765        int ret;
 766
 767        ret = gen8_init_workarounds(engine);
 768        if (ret)
 769                return ret;
 770
 771        /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
 772        WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 773
 774        /* WaDisableDopClockGating:bdw
 775         *
 776         * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
 777         * to disable EUTC clock gating.
 778         */
 779        WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
 780                          DOP_CLOCK_GATING_DISABLE);
 781
 782        WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
 783                          GEN8_SAMPLER_POWER_BYPASS_DIS);
 784
 785        WA_SET_BIT_MASKED(HDC_CHICKEN0,
 786                          /* WaForceContextSaveRestoreNonCoherent:bdw */
 787                          HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
 788                          /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
 789                          (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
 790
 791        return 0;
 792}
 793
 794static int chv_init_workarounds(struct intel_engine_cs *engine)
 795{
 796        struct drm_i915_private *dev_priv = engine->i915;
 797        int ret;
 798
 799        ret = gen8_init_workarounds(engine);
 800        if (ret)
 801                return ret;
 802
 803        /* WaDisableThreadStallDopClockGating:chv */
 804        WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 805
 806        /* Improve HiZ throughput on CHV. */
 807        WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
 808
 809        return 0;
 810}
 811
 812static int gen9_init_workarounds(struct intel_engine_cs *engine)
 813{
 814        struct drm_i915_private *dev_priv = engine->i915;
 815        int ret;
 816
 817        /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
 818        I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
 819
 820        /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
 821        I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
 822                   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
 823
 824        /* WaDisableKillLogic:bxt,skl,kbl */
 825        if (!IS_COFFEELAKE(dev_priv))
 826                I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
 827                           ECOCHK_DIS_TLB);
 828
 829        /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
 830        /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
 831        WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
 832                          FLOW_CONTROL_ENABLE |
 833                          PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 834
 835        /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
 836        if (!IS_COFFEELAKE(dev_priv))
 837                WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
 838                                  GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
 839
 840        /* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */
 841        if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
 842                WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
 843                                  GEN9_DG_MIRROR_FIX_ENABLE);
 844
 845        /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */
 846        if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) {
 847                WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
 848                                  GEN9_RHWO_OPTIMIZATION_DISABLE);
 849                /*
 850                 * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set
 851                 * but we do that in per ctx batchbuffer as there is an issue
 852                 * with this register not getting restored on ctx restore
 853                 */
 854        }
 855
 856        /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
 857        /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
 858        WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
 859                          GEN9_ENABLE_YV12_BUGFIX |
 860                          GEN9_ENABLE_GPGPU_PREEMPTION);
 861
 862        /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
 863        /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
 864        WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
 865                                         GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
 866
 867        /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
 868        WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
 869                          GEN9_CCS_TLB_PREFETCH_ENABLE);
 870
 871        /* WaDisableMaskBasedCammingInRCC:bxt */
 872        if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
 873                WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
 874                                  PIXEL_MASK_CAMMING_DISABLE);
 875
 876        /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
 877        WA_SET_BIT_MASKED(HDC_CHICKEN0,
 878                          HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
 879                          HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
 880
 881        /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
 882         * both tied to WaForceContextSaveRestoreNonCoherent
 883         * in some hsds for skl. We keep the tie for all gen9. The
 884         * documentation is a bit hazy and so we want to get common behaviour,
 885         * even though there is no clear evidence we would need both on kbl/bxt.
 886         * This area has been source of system hangs so we play it safe
 887         * and mimic the skl regardless of what bspec says.
 888         *
 889         * Use Force Non-Coherent whenever executing a 3D context. This
 890         * is a workaround for a possible hang in the unlikely event
 891         * a TLB invalidation occurs during a PSD flush.
 892         */
 893
 894        /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
 895        WA_SET_BIT_MASKED(HDC_CHICKEN0,
 896                          HDC_FORCE_NON_COHERENT);
 897
 898        /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
 899        I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
 900                   BDW_DISABLE_HDC_INVALIDATION);
 901
 902        /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
 903        if (IS_SKYLAKE(dev_priv) ||
 904            IS_KABYLAKE(dev_priv) ||
 905            IS_COFFEELAKE(dev_priv) ||
 906            IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0))
 907                WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
 908                                  GEN8_SAMPLER_POWER_BYPASS_DIS);
 909
 910        /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
 911        WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
 912
 913        /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
 914        I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
 915                                    GEN8_LQSC_FLUSH_COHERENT_LINES));
 916
 917        /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
 918        ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG);
 919        if (ret)
 920                return ret;
 921
 922        /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl */
 923        ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
 924        if (ret)
 925                return ret;
 926
 927        /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
 928        ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1);
 929        if (ret)
 930                return ret;
 931
 932        return 0;
 933}
 934
 935static int skl_tune_iz_hashing(struct intel_engine_cs *engine)
 936{
 937        struct drm_i915_private *dev_priv = engine->i915;
 938        u8 vals[3] = { 0, 0, 0 };
 939        unsigned int i;
 940
 941        for (i = 0; i < 3; i++) {
 942                u8 ss;
 943
 944                /*
 945                 * Only consider slices where one, and only one, subslice has 7
 946                 * EUs
 947                 */
 948                if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]))
 949                        continue;
 950
 951                /*
 952                 * subslice_7eu[i] != 0 (because of the check above) and
 953                 * ss_max == 4 (maximum number of subslices possible per slice)
 954                 *
 955                 * ->    0 <= ss <= 3;
 956                 */
 957                ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1;
 958                vals[i] = 3 - ss;
 959        }
 960
 961        if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
 962                return 0;
 963
 964        /* Tune IZ hashing. See intel_device_info_runtime_init() */
 965        WA_SET_FIELD_MASKED(GEN7_GT_MODE,
 966                            GEN9_IZ_HASHING_MASK(2) |
 967                            GEN9_IZ_HASHING_MASK(1) |
 968                            GEN9_IZ_HASHING_MASK(0),
 969                            GEN9_IZ_HASHING(2, vals[2]) |
 970                            GEN9_IZ_HASHING(1, vals[1]) |
 971                            GEN9_IZ_HASHING(0, vals[0]));
 972
 973        return 0;
 974}
 975
 976static int skl_init_workarounds(struct intel_engine_cs *engine)
 977{
 978        struct drm_i915_private *dev_priv = engine->i915;
 979        int ret;
 980
 981        ret = gen9_init_workarounds(engine);
 982        if (ret)
 983                return ret;
 984
 985        /*
 986         * Actual WA is to disable percontext preemption granularity control
 987         * until D0 which is the default case so this is equivalent to
 988         * !WaDisablePerCtxtPreemptionGranularityControl:skl
 989         */
 990        I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
 991                   _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
 992
 993        /* WaEnableGapsTsvCreditFix:skl */
 994        I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
 995                                   GEN9_GAPS_TSV_CREDIT_DISABLE));
 996
 997        /* WaDisableGafsUnitClkGating:skl */
 998        WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
 999
1000        /* WaInPlaceDecompressionHang:skl */
1001        if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
1002                WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
1003                           GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1004
1005        /* WaDisableLSQCROPERFforOCL:skl */
1006        ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
1007        if (ret)
1008                return ret;
1009
1010        return skl_tune_iz_hashing(engine);
1011}
1012
1013static int bxt_init_workarounds(struct intel_engine_cs *engine)
1014{
1015        struct drm_i915_private *dev_priv = engine->i915;
1016        int ret;
1017
1018        ret = gen9_init_workarounds(engine);
1019        if (ret)
1020                return ret;
1021
1022        /* WaStoreMultiplePTEenable:bxt */
1023        /* This is a requirement according to Hardware specification */
1024        if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
1025                I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
1026
1027        /* WaSetClckGatingDisableMedia:bxt */
1028        if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) {
1029                I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
1030                                            ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE));
1031        }
1032
1033        /* WaDisableThreadStallDopClockGating:bxt */
1034        WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
1035                          STALL_DOP_GATING_DISABLE);
1036
1037        /* WaDisablePooledEuLoadBalancingFix:bxt */
1038        if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) {
1039                WA_SET_BIT_MASKED(FF_SLICE_CS_CHICKEN2,
1040                                  GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1041        }
1042
1043        /* WaDisableSbeCacheDispatchPortSharing:bxt */
1044        if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) {
1045                WA_SET_BIT_MASKED(
1046                        GEN7_HALF_SLICE_CHICKEN1,
1047                        GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1048        }
1049
1050        /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */
1051        /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */
1052        /* WaDisableObjectLevelPreemtionForInstanceId:bxt */
1053        /* WaDisableLSQCROPERFforOCL:bxt */
1054        if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) {
1055                ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1);
1056                if (ret)
1057                        return ret;
1058
1059                ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
1060                if (ret)
1061                        return ret;
1062        }
1063
1064        /* WaProgramL3SqcReg1DefaultForPerf:bxt */
1065        if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER))
1066                I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) |
1067                                           L3_HIGH_PRIO_CREDITS(2));
1068
1069        /* WaToEnableHwFixForPushConstHWBug:bxt */
1070        if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
1071                WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1072                                  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1073
1074        /* WaInPlaceDecompressionHang:bxt */
1075        if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
1076                WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
1077                           GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1078
1079        return 0;
1080}
1081
1082static int kbl_init_workarounds(struct intel_engine_cs *engine)
1083{
1084        struct drm_i915_private *dev_priv = engine->i915;
1085        int ret;
1086
1087        ret = gen9_init_workarounds(engine);
1088        if (ret)
1089                return ret;
1090
1091        /* WaEnableGapsTsvCreditFix:kbl */
1092        I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1093                                   GEN9_GAPS_TSV_CREDIT_DISABLE));
1094
1095        /* WaDisableDynamicCreditSharing:kbl */
1096        if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
1097                WA_SET_BIT(GAMT_CHKN_BIT_REG,
1098                           GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
1099
1100        /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
1101        if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
1102                WA_SET_BIT_MASKED(HDC_CHICKEN0,
1103                                  HDC_FENCE_DEST_SLM_DISABLE);
1104
1105        /* WaToEnableHwFixForPushConstHWBug:kbl */
1106        if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
1107                WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1108                                  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1109
1110        /* WaDisableGafsUnitClkGating:kbl */
1111        WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1112
1113        /* WaDisableSbeCacheDispatchPortSharing:kbl */
1114        WA_SET_BIT_MASKED(
1115                GEN7_HALF_SLICE_CHICKEN1,
1116                GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1117
1118        /* WaInPlaceDecompressionHang:kbl */
1119        WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
1120                   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1121
1122        /* WaDisableLSQCROPERFforOCL:kbl */
1123        ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
1124        if (ret)
1125                return ret;
1126
1127        return 0;
1128}
1129
1130static int glk_init_workarounds(struct intel_engine_cs *engine)
1131{
1132        struct drm_i915_private *dev_priv = engine->i915;
1133        int ret;
1134
1135        ret = gen9_init_workarounds(engine);
1136        if (ret)
1137                return ret;
1138
1139        /* WaToEnableHwFixForPushConstHWBug:glk */
1140        WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1141                          GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1142
1143        return 0;
1144}
1145
1146static int cfl_init_workarounds(struct intel_engine_cs *engine)
1147{
1148        struct drm_i915_private *dev_priv = engine->i915;
1149        int ret;
1150
1151        ret = gen9_init_workarounds(engine);
1152        if (ret)
1153                return ret;
1154
1155        /* WaEnableGapsTsvCreditFix:cfl */
1156        I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1157                                   GEN9_GAPS_TSV_CREDIT_DISABLE));
1158
1159        /* WaToEnableHwFixForPushConstHWBug:cfl */
1160        WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1161                          GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1162
1163        /* WaDisableGafsUnitClkGating:cfl */
1164        WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1165
1166        /* WaDisableSbeCacheDispatchPortSharing:cfl */
1167        WA_SET_BIT_MASKED(
1168                GEN7_HALF_SLICE_CHICKEN1,
1169                GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1170
1171        /* WaInPlaceDecompressionHang:cfl */
1172        WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
1173                   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1174
1175        return 0;
1176}
1177
1178int init_workarounds_ring(struct intel_engine_cs *engine)
1179{
1180        struct drm_i915_private *dev_priv = engine->i915;
1181        int err;
1182
1183        WARN_ON(engine->id != RCS);
1184
1185        dev_priv->workarounds.count = 0;
1186        dev_priv->workarounds.hw_whitelist_count[engine->id] = 0;
1187
1188        if (IS_BROADWELL(dev_priv))
1189                err = bdw_init_workarounds(engine);
1190        else if (IS_CHERRYVIEW(dev_priv))
1191                err = chv_init_workarounds(engine);
1192        else if (IS_SKYLAKE(dev_priv))
1193                err =  skl_init_workarounds(engine);
1194        else if (IS_BROXTON(dev_priv))
1195                err = bxt_init_workarounds(engine);
1196        else if (IS_KABYLAKE(dev_priv))
1197                err = kbl_init_workarounds(engine);
1198        else if (IS_GEMINILAKE(dev_priv))
1199                err =  glk_init_workarounds(engine);
1200        else if (IS_COFFEELAKE(dev_priv))
1201                err = cfl_init_workarounds(engine);
1202        else
1203                err = 0;
1204        if (err)
1205                return err;
1206
1207        DRM_DEBUG_DRIVER("%s: Number of context specific w/a: %d\n",
1208                         engine->name, dev_priv->workarounds.count);
1209        return 0;
1210}
1211
1212int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
1213{
1214        struct i915_workarounds *w = &req->i915->workarounds;
1215        u32 *cs;
1216        int ret, i;
1217
1218        if (w->count == 0)
1219                return 0;
1220
1221        ret = req->engine->emit_flush(req, EMIT_BARRIER);
1222        if (ret)
1223                return ret;
1224
1225        cs = intel_ring_begin(req, (w->count * 2 + 2));
1226        if (IS_ERR(cs))
1227                return PTR_ERR(cs);
1228
1229        *cs++ = MI_LOAD_REGISTER_IMM(w->count);
1230        for (i = 0; i < w->count; i++) {
1231                *cs++ = i915_mmio_reg_offset(w->reg[i].addr);
1232                *cs++ = w->reg[i].value;
1233        }
1234        *cs++ = MI_NOOP;
1235
1236        intel_ring_advance(req, cs);
1237
1238        ret = req->engine->emit_flush(req, EMIT_BARRIER);
1239        if (ret)
1240                return ret;
1241
1242        return 0;
1243}
1244
1245static bool ring_is_idle(struct intel_engine_cs *engine)
1246{
1247        struct drm_i915_private *dev_priv = engine->i915;
1248        bool idle = true;
1249
1250        intel_runtime_pm_get(dev_priv);
1251
1252        /* First check that no commands are left in the ring */
1253        if ((I915_READ_HEAD(engine) & HEAD_ADDR) !=
1254            (I915_READ_TAIL(engine) & TAIL_ADDR))
1255                idle = false;
1256
1257        /* No bit for gen2, so assume the CS parser is idle */
1258        if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE))
1259                idle = false;
1260
1261        intel_runtime_pm_put(dev_priv);
1262
1263        return idle;
1264}
1265
1266/**
1267 * intel_engine_is_idle() - Report if the engine has finished process all work
1268 * @engine: the intel_engine_cs
1269 *
1270 * Return true if there are no requests pending, nothing left to be submitted
1271 * to hardware, and that the engine is idle.
1272 */
1273bool intel_engine_is_idle(struct intel_engine_cs *engine)
1274{
1275        struct drm_i915_private *dev_priv = engine->i915;
1276
1277        /* More white lies, if wedged, hw state is inconsistent */
1278        if (i915_terminally_wedged(&dev_priv->gpu_error))
1279                return true;
1280
1281        /* Any inflight/incomplete requests? */
1282        if (!i915_seqno_passed(intel_engine_get_seqno(engine),
1283                               intel_engine_last_submit(engine)))
1284                return false;
1285
1286        if (I915_SELFTEST_ONLY(engine->breadcrumbs.mock))
1287                return true;
1288
1289        /* Interrupt/tasklet pending? */
1290        if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
1291                return false;
1292
1293        /* Both ports drained, no more ELSP submission? */
1294        if (port_request(&engine->execlist_port[0]))
1295                return false;
1296
1297        /* Ring stopped? */
1298        if (!ring_is_idle(engine))
1299                return false;
1300
1301        return true;
1302}
1303
1304bool intel_engines_are_idle(struct drm_i915_private *dev_priv)
1305{
1306        struct intel_engine_cs *engine;
1307        enum intel_engine_id id;
1308
1309        if (READ_ONCE(dev_priv->gt.active_requests))
1310                return false;
1311
1312        /* If the driver is wedged, HW state may be very inconsistent and
1313         * report that it is still busy, even though we have stopped using it.
1314         */
1315        if (i915_terminally_wedged(&dev_priv->gpu_error))
1316                return true;
1317
1318        for_each_engine(engine, dev_priv, id) {
1319                if (!intel_engine_is_idle(engine))
1320                        return false;
1321        }
1322
1323        return true;
1324}
1325
1326void intel_engines_reset_default_submission(struct drm_i915_private *i915)
1327{
1328        struct intel_engine_cs *engine;
1329        enum intel_engine_id id;
1330
1331        for_each_engine(engine, i915, id)
1332                engine->set_default_submission(engine);
1333}
1334
1335void intel_engines_mark_idle(struct drm_i915_private *i915)
1336{
1337        struct intel_engine_cs *engine;
1338        enum intel_engine_id id;
1339
1340        for_each_engine(engine, i915, id) {
1341                intel_engine_disarm_breadcrumbs(engine);
1342                i915_gem_batch_pool_fini(&engine->batch_pool);
1343                engine->no_priolist = false;
1344        }
1345}
1346
1347#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1348#include "selftests/mock_engine.c"
1349#endif
1350