linux/drivers/gpu/drm/i915/gt/intel_workarounds.c
<<
>>
Prefs
   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2014-2018 Intel Corporation
   5 */
   6
   7#include "i915_drv.h"
   8#include "intel_context.h"
   9#include "intel_workarounds.h"
  10
  11/**
  12 * DOC: Hardware workarounds
  13 *
  14 * This file is intended as a central place to implement most [1]_ of the
  15 * required workarounds for hardware to work as originally intended. They fall
  16 * in five basic categories depending on how/when they are applied:
  17 *
  18 * - Workarounds that touch registers that are saved/restored to/from the HW
  19 *   context image. The list is emitted (via Load Register Immediate commands)
  20 *   everytime a new context is created.
  21 * - GT workarounds. The list of these WAs is applied whenever these registers
  22 *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
  23 * - Display workarounds. The list is applied during display clock-gating
  24 *   initialization.
  25 * - Workarounds that whitelist a privileged register, so that UMDs can manage
  26 *   them directly. This is just a special case of a MMMIO workaround (as we
  27 *   write the list of these to/be-whitelisted registers to some special HW
  28 *   registers).
  29 * - Workaround batchbuffers, that get executed automatically by the hardware
  30 *   on every HW context restore.
  31 *
  32 * .. [1] Please notice that there are other WAs that, due to their nature,
  33 *    cannot be applied from a central place. Those are peppered around the rest
  34 *    of the code, as needed.
  35 *
  36 * .. [2] Technically, some registers are powercontext saved & restored, so they
  37 *    survive a suspend/resume. In practice, writing them again is not too
  38 *    costly and simplifies things. We can revisit this in the future.
  39 *
  40 * Layout
  41 * ~~~~~~
  42 *
  43 * Keep things in this file ordered by WA type, as per the above (context, GT,
  44 * display, register whitelist, batchbuffer). Then, inside each type, keep the
  45 * following order:
  46 *
  47 * - Infrastructure functions and macros
  48 * - WAs per platform in standard gen/chrono order
  49 * - Public functions to init or apply the given workaround type.
  50 */
  51
  52static void wa_init_start(struct i915_wa_list *wal, const char *name)
  53{
  54        wal->name = name;
  55}
  56
  57#define WA_LIST_CHUNK (1 << 4)
  58
  59static void wa_init_finish(struct i915_wa_list *wal)
  60{
  61        /* Trim unused entries. */
  62        if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
  63                struct i915_wa *list = kmemdup(wal->list,
  64                                               wal->count * sizeof(*list),
  65                                               GFP_KERNEL);
  66
  67                if (list) {
  68                        kfree(wal->list);
  69                        wal->list = list;
  70                }
  71        }
  72
  73        if (!wal->count)
  74                return;
  75
  76        DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n",
  77                         wal->wa_count, wal->name);
  78}
  79
  80static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
  81{
  82        unsigned int addr = i915_mmio_reg_offset(wa->reg);
  83        unsigned int start = 0, end = wal->count;
  84        const unsigned int grow = WA_LIST_CHUNK;
  85        struct i915_wa *wa_;
  86
  87        GEM_BUG_ON(!is_power_of_2(grow));
  88
  89        if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
  90                struct i915_wa *list;
  91
  92                list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
  93                                     GFP_KERNEL);
  94                if (!list) {
  95                        DRM_ERROR("No space for workaround init!\n");
  96                        return;
  97                }
  98
  99                if (wal->list)
 100                        memcpy(list, wal->list, sizeof(*wa) * wal->count);
 101
 102                wal->list = list;
 103        }
 104
 105        while (start < end) {
 106                unsigned int mid = start + (end - start) / 2;
 107
 108                if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
 109                        start = mid + 1;
 110                } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
 111                        end = mid;
 112                } else {
 113                        wa_ = &wal->list[mid];
 114
 115                        if ((wa->mask & ~wa_->mask) == 0) {
 116                                DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
 117                                          i915_mmio_reg_offset(wa_->reg),
 118                                          wa_->mask, wa_->val);
 119
 120                                wa_->val &= ~wa->mask;
 121                        }
 122
 123                        wal->wa_count++;
 124                        wa_->val |= wa->val;
 125                        wa_->mask |= wa->mask;
 126                        wa_->read |= wa->read;
 127                        return;
 128                }
 129        }
 130
 131        wal->wa_count++;
 132        wa_ = &wal->list[wal->count++];
 133        *wa_ = *wa;
 134
 135        while (wa_-- > wal->list) {
 136                GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
 137                           i915_mmio_reg_offset(wa_[1].reg));
 138                if (i915_mmio_reg_offset(wa_[1].reg) >
 139                    i915_mmio_reg_offset(wa_[0].reg))
 140                        break;
 141
 142                swap(wa_[1], wa_[0]);
 143        }
 144}
 145
 146static void
 147wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
 148                   u32 val)
 149{
 150        struct i915_wa wa = {
 151                .reg  = reg,
 152                .mask = mask,
 153                .val  = val,
 154                .read = mask,
 155        };
 156
 157        _wa_add(wal, &wa);
 158}
 159
 160static void
 161wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 162{
 163        wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
 164}
 165
 166static void
 167wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 168{
 169        wa_write_masked_or(wal, reg, ~0, val);
 170}
 171
 172static void
 173wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 174{
 175        wa_write_masked_or(wal, reg, val, val);
 176}
 177
 178static void
 179ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
 180{
 181        struct i915_wa wa = {
 182                .reg  = reg,
 183                .mask = mask,
 184                .val  = val,
 185                /* Bonkers HW, skip verifying */
 186        };
 187
 188        _wa_add(wal, &wa);
 189}
 190
 191#define WA_SET_BIT_MASKED(addr, mask) \
 192        wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
 193
 194#define WA_CLR_BIT_MASKED(addr, mask) \
 195        wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
 196
 197#define WA_SET_FIELD_MASKED(addr, mask, value) \
 198        wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
 199
 200static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
 201                                      struct i915_wa_list *wal)
 202{
 203        WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
 204
 205        /* WaDisableAsyncFlipPerfMode:bdw,chv */
 206        WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
 207
 208        /* WaDisablePartialInstShootdown:bdw,chv */
 209        WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
 210                          PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 211
 212        /* Use Force Non-Coherent whenever executing a 3D context. This is a
 213         * workaround for for a possible hang in the unlikely event a TLB
 214         * invalidation occurs during a PSD flush.
 215         */
 216        /* WaForceEnableNonCoherent:bdw,chv */
 217        /* WaHdcDisableFetchWhenMasked:bdw,chv */
 218        WA_SET_BIT_MASKED(HDC_CHICKEN0,
 219                          HDC_DONOT_FETCH_MEM_WHEN_MASKED |
 220                          HDC_FORCE_NON_COHERENT);
 221
 222        /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
 223         * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
 224         *  polygons in the same 8x4 pixel/sample area to be processed without
 225         *  stalling waiting for the earlier ones to write to Hierarchical Z
 226         *  buffer."
 227         *
 228         * This optimization is off by default for BDW and CHV; turn it on.
 229         */
 230        WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
 231
 232        /* Wa4x4STCOptimizationDisable:bdw,chv */
 233        WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
 234
 235        /*
 236         * BSpec recommends 8x4 when MSAA is used,
 237         * however in practice 16x4 seems fastest.
 238         *
 239         * Note that PS/WM thread counts depend on the WIZ hashing
 240         * disable bit, which we don't touch here, but it's good
 241         * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
 242         */
 243        WA_SET_FIELD_MASKED(GEN7_GT_MODE,
 244                            GEN6_WIZ_HASHING_MASK,
 245                            GEN6_WIZ_HASHING_16x4);
 246}
 247
 248static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
 249                                     struct i915_wa_list *wal)
 250{
 251        struct drm_i915_private *i915 = engine->i915;
 252
 253        gen8_ctx_workarounds_init(engine, wal);
 254
 255        /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
 256        WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 257
 258        /* WaDisableDopClockGating:bdw
 259         *
 260         * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
 261         * to disable EUTC clock gating.
 262         */
 263        WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
 264                          DOP_CLOCK_GATING_DISABLE);
 265
 266        WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
 267                          GEN8_SAMPLER_POWER_BYPASS_DIS);
 268
 269        WA_SET_BIT_MASKED(HDC_CHICKEN0,
 270                          /* WaForceContextSaveRestoreNonCoherent:bdw */
 271                          HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
 272                          /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
 273                          (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
 274}
 275
 276static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
 277                                     struct i915_wa_list *wal)
 278{
 279        gen8_ctx_workarounds_init(engine, wal);
 280
 281        /* WaDisableThreadStallDopClockGating:chv */
 282        WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 283
 284        /* Improve HiZ throughput on CHV. */
 285        WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
 286}
 287
 288static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
 289                                      struct i915_wa_list *wal)
 290{
 291        struct drm_i915_private *i915 = engine->i915;
 292
 293        if (HAS_LLC(i915)) {
 294                /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
 295                 *
 296                 * Must match Display Engine. See
 297                 * WaCompressedResourceDisplayNewHashMode.
 298                 */
 299                WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 300                                  GEN9_PBE_COMPRESSED_HASH_SELECTION);
 301                WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
 302                                  GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
 303        }
 304
 305        /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
 306        /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
 307        WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
 308                          FLOW_CONTROL_ENABLE |
 309                          PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 310
 311        /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
 312        /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
 313        WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
 314                          GEN9_ENABLE_YV12_BUGFIX |
 315                          GEN9_ENABLE_GPGPU_PREEMPTION);
 316
 317        /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
 318        /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
 319        WA_SET_BIT_MASKED(CACHE_MODE_1,
 320                          GEN8_4x4_STC_OPTIMIZATION_DISABLE |
 321                          GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
 322
 323        /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
 324        WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
 325                          GEN9_CCS_TLB_PREFETCH_ENABLE);
 326
 327        /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
 328        WA_SET_BIT_MASKED(HDC_CHICKEN0,
 329                          HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
 330                          HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
 331
 332        /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
 333         * both tied to WaForceContextSaveRestoreNonCoherent
 334         * in some hsds for skl. We keep the tie for all gen9. The
 335         * documentation is a bit hazy and so we want to get common behaviour,
 336         * even though there is no clear evidence we would need both on kbl/bxt.
 337         * This area has been source of system hangs so we play it safe
 338         * and mimic the skl regardless of what bspec says.
 339         *
 340         * Use Force Non-Coherent whenever executing a 3D context. This
 341         * is a workaround for a possible hang in the unlikely event
 342         * a TLB invalidation occurs during a PSD flush.
 343         */
 344
 345        /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
 346        WA_SET_BIT_MASKED(HDC_CHICKEN0,
 347                          HDC_FORCE_NON_COHERENT);
 348
 349        /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
 350        if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915))
 351                WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
 352                                  GEN8_SAMPLER_POWER_BYPASS_DIS);
 353
 354        /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
 355        WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
 356
 357        /*
 358         * Supporting preemption with fine-granularity requires changes in the
 359         * batch buffer programming. Since we can't break old userspace, we
 360         * need to set our default preemption level to safe value. Userspace is
 361         * still able to use more fine-grained preemption levels, since in
 362         * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
 363         * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
 364         * not real HW workarounds, but merely a way to start using preemption
 365         * while maintaining old contract with userspace.
 366         */
 367
 368        /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
 369        WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
 370
 371        /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
 372        WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
 373                            GEN9_PREEMPT_GPGPU_LEVEL_MASK,
 374                            GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
 375
 376        /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
 377        if (IS_GEN9_LP(i915))
 378                WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
 379}
 380
 381static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
 382                                struct i915_wa_list *wal)
 383{
 384        struct drm_i915_private *i915 = engine->i915;
 385        u8 vals[3] = { 0, 0, 0 };
 386        unsigned int i;
 387
 388        for (i = 0; i < 3; i++) {
 389                u8 ss;
 390
 391                /*
 392                 * Only consider slices where one, and only one, subslice has 7
 393                 * EUs
 394                 */
 395                if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]))
 396                        continue;
 397
 398                /*
 399                 * subslice_7eu[i] != 0 (because of the check above) and
 400                 * ss_max == 4 (maximum number of subslices possible per slice)
 401                 *
 402                 * ->    0 <= ss <= 3;
 403                 */
 404                ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1;
 405                vals[i] = 3 - ss;
 406        }
 407
 408        if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
 409                return;
 410
 411        /* Tune IZ hashing. See intel_device_info_runtime_init() */
 412        WA_SET_FIELD_MASKED(GEN7_GT_MODE,
 413                            GEN9_IZ_HASHING_MASK(2) |
 414                            GEN9_IZ_HASHING_MASK(1) |
 415                            GEN9_IZ_HASHING_MASK(0),
 416                            GEN9_IZ_HASHING(2, vals[2]) |
 417                            GEN9_IZ_HASHING(1, vals[1]) |
 418                            GEN9_IZ_HASHING(0, vals[0]));
 419}
 420
 421static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
 422                                     struct i915_wa_list *wal)
 423{
 424        gen9_ctx_workarounds_init(engine, wal);
 425        skl_tune_iz_hashing(engine, wal);
 426}
 427
 428static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
 429                                     struct i915_wa_list *wal)
 430{
 431        gen9_ctx_workarounds_init(engine, wal);
 432
 433        /* WaDisableThreadStallDopClockGating:bxt */
 434        WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
 435                          STALL_DOP_GATING_DISABLE);
 436
 437        /* WaToEnableHwFixForPushConstHWBug:bxt */
 438        WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 439                          GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 440}
 441
 442static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
 443                                     struct i915_wa_list *wal)
 444{
 445        struct drm_i915_private *i915 = engine->i915;
 446
 447        gen9_ctx_workarounds_init(engine, wal);
 448
 449        /* WaToEnableHwFixForPushConstHWBug:kbl */
 450        if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
 451                WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 452                                  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 453
 454        /* WaDisableSbeCacheDispatchPortSharing:kbl */
 455        WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
 456                          GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 457}
 458
 459static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
 460                                     struct i915_wa_list *wal)
 461{
 462        gen9_ctx_workarounds_init(engine, wal);
 463
 464        /* WaToEnableHwFixForPushConstHWBug:glk */
 465        WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 466                          GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 467}
 468
 469static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
 470                                     struct i915_wa_list *wal)
 471{
 472        gen9_ctx_workarounds_init(engine, wal);
 473
 474        /* WaToEnableHwFixForPushConstHWBug:cfl */
 475        WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 476                          GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 477
 478        /* WaDisableSbeCacheDispatchPortSharing:cfl */
 479        WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
 480                          GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 481}
 482
 483static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
 484                                     struct i915_wa_list *wal)
 485{
 486        struct drm_i915_private *i915 = engine->i915;
 487
 488        /* WaForceContextSaveRestoreNonCoherent:cnl */
 489        WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
 490                          HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
 491
 492        /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
 493        if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
 494                WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
 495
 496        /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
 497        WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 498                          GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 499
 500        /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
 501        if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
 502                WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 503                                  GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
 504
 505        /* WaPushConstantDereferenceHoldDisable:cnl */
 506        WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
 507
 508        /* FtrEnableFastAnisoL1BankingFix:cnl */
 509        WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
 510
 511        /* WaDisable3DMidCmdPreemption:cnl */
 512        WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
 513
 514        /* WaDisableGPGPUMidCmdPreemption:cnl */
 515        WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
 516                            GEN9_PREEMPT_GPGPU_LEVEL_MASK,
 517                            GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
 518
 519        /* WaDisableEarlyEOT:cnl */
 520        WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
 521}
 522
 523static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
 524                                     struct i915_wa_list *wal)
 525{
 526        struct drm_i915_private *i915 = engine->i915;
 527
 528        /* WaDisableBankHangMode:icl */
 529        wa_write(wal,
 530                 GEN8_L3CNTLREG,
 531                 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
 532                 GEN8_ERRDETBCTRL);
 533
 534        /* WaDisableBankHangMode:icl */
 535        wa_write(wal,
 536                 GEN8_L3CNTLREG,
 537                 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
 538                 GEN8_ERRDETBCTRL);
 539
 540        /* Wa_1604370585:icl (pre-prod)
 541         * Formerly known as WaPushConstantDereferenceHoldDisable
 542         */
 543        if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
 544                WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
 545                                  PUSH_CONSTANT_DEREF_DISABLE);
 546
 547        /* WaForceEnableNonCoherent:icl
 548         * This is not the same workaround as in early Gen9 platforms, where
 549         * lacking this could cause system hangs, but coherency performance
 550         * overhead is high and only a few compute workloads really need it
 551         * (the register is whitelisted in hardware now, so UMDs can opt in
 552         * for coherency if they have a good reason).
 553         */
 554        WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
 555
 556        /* Wa_2006611047:icl (pre-prod)
 557         * Formerly known as WaDisableImprovedTdlClkGating
 558         */
 559        if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
 560                WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
 561                                  GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
 562
 563        /* Wa_2006665173:icl (pre-prod) */
 564        if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
 565                WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
 566                                  GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
 567
 568        /* WaEnableFloatBlendOptimization:icl */
 569        wa_write_masked_or(wal,
 570                           GEN10_CACHE_MODE_SS,
 571                           0, /* write-only, so skip validation */
 572                           _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
 573
 574        /* WaDisableGPGPUMidThreadPreemption:icl */
 575        WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
 576                            GEN9_PREEMPT_GPGPU_LEVEL_MASK,
 577                            GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
 578
 579        /* allow headerless messages for preemptible GPGPU context */
 580        WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE,
 581                          GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
 582}
 583
 584static void
 585__intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
 586                           struct i915_wa_list *wal,
 587                           const char *name)
 588{
 589        struct drm_i915_private *i915 = engine->i915;
 590
 591        if (engine->class != RENDER_CLASS)
 592                return;
 593
 594        wa_init_start(wal, name);
 595
 596        if (IS_GEN(i915, 11))
 597                icl_ctx_workarounds_init(engine, wal);
 598        else if (IS_CANNONLAKE(i915))
 599                cnl_ctx_workarounds_init(engine, wal);
 600        else if (IS_COFFEELAKE(i915))
 601                cfl_ctx_workarounds_init(engine, wal);
 602        else if (IS_GEMINILAKE(i915))
 603                glk_ctx_workarounds_init(engine, wal);
 604        else if (IS_KABYLAKE(i915))
 605                kbl_ctx_workarounds_init(engine, wal);
 606        else if (IS_BROXTON(i915))
 607                bxt_ctx_workarounds_init(engine, wal);
 608        else if (IS_SKYLAKE(i915))
 609                skl_ctx_workarounds_init(engine, wal);
 610        else if (IS_CHERRYVIEW(i915))
 611                chv_ctx_workarounds_init(engine, wal);
 612        else if (IS_BROADWELL(i915))
 613                bdw_ctx_workarounds_init(engine, wal);
 614        else if (INTEL_GEN(i915) < 8)
 615                return;
 616        else
 617                MISSING_CASE(INTEL_GEN(i915));
 618
 619        wa_init_finish(wal);
 620}
 621
 622void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
 623{
 624        __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
 625}
 626
 627int intel_engine_emit_ctx_wa(struct i915_request *rq)
 628{
 629        struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
 630        struct i915_wa *wa;
 631        unsigned int i;
 632        u32 *cs;
 633        int ret;
 634
 635        if (wal->count == 0)
 636                return 0;
 637
 638        ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
 639        if (ret)
 640                return ret;
 641
 642        cs = intel_ring_begin(rq, (wal->count * 2 + 2));
 643        if (IS_ERR(cs))
 644                return PTR_ERR(cs);
 645
 646        *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
 647        for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
 648                *cs++ = i915_mmio_reg_offset(wa->reg);
 649                *cs++ = wa->val;
 650        }
 651        *cs++ = MI_NOOP;
 652
 653        intel_ring_advance(rq, cs);
 654
 655        ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
 656        if (ret)
 657                return ret;
 658
 659        return 0;
 660}
 661
 662static void
 663gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 664{
 665        /* WaDisableKillLogic:bxt,skl,kbl */
 666        if (!IS_COFFEELAKE(i915))
 667                wa_write_or(wal,
 668                            GAM_ECOCHK,
 669                            ECOCHK_DIS_TLB);
 670
 671        if (HAS_LLC(i915)) {
 672                /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
 673                 *
 674                 * Must match Display Engine. See
 675                 * WaCompressedResourceDisplayNewHashMode.
 676                 */
 677                wa_write_or(wal,
 678                            MMCD_MISC_CTRL,
 679                            MMCD_PCLA | MMCD_HOTSPOT_EN);
 680        }
 681
 682        /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
 683        wa_write_or(wal,
 684                    GAM_ECOCHK,
 685                    BDW_DISABLE_HDC_INVALIDATION);
 686}
 687
 688static void
 689skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 690{
 691        gen9_gt_workarounds_init(i915, wal);
 692
 693        /* WaDisableGafsUnitClkGating:skl */
 694        wa_write_or(wal,
 695                    GEN7_UCGCTL4,
 696                    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
 697
 698        /* WaInPlaceDecompressionHang:skl */
 699        if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
 700                wa_write_or(wal,
 701                            GEN9_GAMT_ECO_REG_RW_IA,
 702                            GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 703}
 704
 705static void
 706bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 707{
 708        gen9_gt_workarounds_init(i915, wal);
 709
 710        /* WaInPlaceDecompressionHang:bxt */
 711        wa_write_or(wal,
 712                    GEN9_GAMT_ECO_REG_RW_IA,
 713                    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 714}
 715
 716static void
 717kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 718{
 719        gen9_gt_workarounds_init(i915, wal);
 720
 721        /* WaDisableDynamicCreditSharing:kbl */
 722        if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
 723                wa_write_or(wal,
 724                            GAMT_CHKN_BIT_REG,
 725                            GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
 726
 727        /* WaDisableGafsUnitClkGating:kbl */
 728        wa_write_or(wal,
 729                    GEN7_UCGCTL4,
 730                    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
 731
 732        /* WaInPlaceDecompressionHang:kbl */
 733        wa_write_or(wal,
 734                    GEN9_GAMT_ECO_REG_RW_IA,
 735                    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 736}
 737
 738static void
 739glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 740{
 741        gen9_gt_workarounds_init(i915, wal);
 742}
 743
 744static void
 745cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 746{
 747        gen9_gt_workarounds_init(i915, wal);
 748
 749        /* WaDisableGafsUnitClkGating:cfl */
 750        wa_write_or(wal,
 751                    GEN7_UCGCTL4,
 752                    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
 753
 754        /* WaInPlaceDecompressionHang:cfl */
 755        wa_write_or(wal,
 756                    GEN9_GAMT_ECO_REG_RW_IA,
 757                    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 758}
 759
 760static void
 761wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
 762{
 763        const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
 764        u32 mcr_slice_subslice_mask;
 765
 766        /*
 767         * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
 768         * L3Banks could be fused off in single slice scenario. If that is
 769         * the case, we might need to program MCR select to a valid L3Bank
 770         * by default, to make sure we correctly read certain registers
 771         * later on (in the range 0xB100 - 0xB3FF).
 772         * This might be incompatible with
 773         * WaProgramMgsrForCorrectSliceSpecificMmioReads.
 774         * Fortunately, this should not happen in production hardware, so
 775         * we only assert that this is the case (instead of implementing
 776         * something more complex that requires checking the range of every
 777         * MMIO read).
 778         */
 779        if (INTEL_GEN(i915) >= 10 &&
 780            is_power_of_2(sseu->slice_mask)) {
 781                /*
 782                 * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches
 783                 * enabled subslice, no need to redirect MCR packet
 784                 */
 785                u32 slice = fls(sseu->slice_mask);
 786                u32 fuse3 =
 787                        intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3);
 788                u8 ss_mask = sseu->subslice_mask[slice];
 789
 790                u8 enabled_mask = (ss_mask | ss_mask >>
 791                                   GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
 792                u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK;
 793
 794                /*
 795                 * Production silicon should have matched L3Bank and
 796                 * subslice enabled
 797                 */
 798                WARN_ON((enabled_mask & disabled_mask) != enabled_mask);
 799        }
 800
 801        if (INTEL_GEN(i915) >= 11)
 802                mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
 803                                          GEN11_MCR_SUBSLICE_MASK;
 804        else
 805                mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
 806                                          GEN8_MCR_SUBSLICE_MASK;
 807        /*
 808         * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
 809         * Before any MMIO read into slice/subslice specific registers, MCR
 810         * packet control register needs to be programmed to point to any
 811         * enabled s/ss pair. Otherwise, incorrect values will be returned.
 812         * This means each subsequent MMIO read will be forwarded to an
 813         * specific s/ss combination, but this is OK since these registers
 814         * are consistent across s/ss in almost all cases. In the rare
 815         * occasions, such as INSTDONE, where this value is dependent
 816         * on s/ss combo, the read should be done with read_subslice_reg.
 817         */
 818        wa_write_masked_or(wal,
 819                           GEN8_MCR_SELECTOR,
 820                           mcr_slice_subslice_mask,
 821                           intel_calculate_mcr_s_ss_select(i915));
 822}
 823
 824static void
 825cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 826{
 827        wa_init_mcr(i915, wal);
 828
 829        /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
 830        if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
 831                wa_write_or(wal,
 832                            GAMT_CHKN_BIT_REG,
 833                            GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
 834
 835        /* WaInPlaceDecompressionHang:cnl */
 836        wa_write_or(wal,
 837                    GEN9_GAMT_ECO_REG_RW_IA,
 838                    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 839}
 840
 841static void
 842icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 843{
 844        wa_init_mcr(i915, wal);
 845
 846        /* WaInPlaceDecompressionHang:icl */
 847        wa_write_or(wal,
 848                    GEN9_GAMT_ECO_REG_RW_IA,
 849                    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 850
 851        /* WaModifyGamTlbPartitioning:icl */
 852        wa_write_masked_or(wal,
 853                           GEN11_GACB_PERF_CTRL,
 854                           GEN11_HASH_CTRL_MASK,
 855                           GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
 856
 857        /* Wa_1405766107:icl
 858         * Formerly known as WaCL2SFHalfMaxAlloc
 859         */
 860        wa_write_or(wal,
 861                    GEN11_LSN_UNSLCVC,
 862                    GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
 863                    GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
 864
 865        /* Wa_220166154:icl
 866         * Formerly known as WaDisCtxReload
 867         */
 868        wa_write_or(wal,
 869                    GEN8_GAMW_ECO_DEV_RW_IA,
 870                    GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
 871
 872        /* Wa_1405779004:icl (pre-prod) */
 873        if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
 874                wa_write_or(wal,
 875                            SLICE_UNIT_LEVEL_CLKGATE,
 876                            MSCUNIT_CLKGATE_DIS);
 877
 878        /* Wa_1406680159:icl */
 879        wa_write_or(wal,
 880                    SUBSLICE_UNIT_LEVEL_CLKGATE,
 881                    GWUNIT_CLKGATE_DIS);
 882
 883        /* Wa_1406838659:icl (pre-prod) */
 884        if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
 885                wa_write_or(wal,
 886                            INF_UNIT_LEVEL_CLKGATE,
 887                            CGPSF_CLKGATE_DIS);
 888
 889        /* Wa_1406463099:icl
 890         * Formerly known as WaGamTlbPendError
 891         */
 892        wa_write_or(wal,
 893                    GAMT_CHKN_BIT_REG,
 894                    GAMT_CHKN_DISABLE_L3_COH_PIPE);
 895}
 896
 897static void
 898gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
 899{
 900        if (IS_GEN(i915, 11))
 901                icl_gt_workarounds_init(i915, wal);
 902        else if (IS_CANNONLAKE(i915))
 903                cnl_gt_workarounds_init(i915, wal);
 904        else if (IS_COFFEELAKE(i915))
 905                cfl_gt_workarounds_init(i915, wal);
 906        else if (IS_GEMINILAKE(i915))
 907                glk_gt_workarounds_init(i915, wal);
 908        else if (IS_KABYLAKE(i915))
 909                kbl_gt_workarounds_init(i915, wal);
 910        else if (IS_BROXTON(i915))
 911                bxt_gt_workarounds_init(i915, wal);
 912        else if (IS_SKYLAKE(i915))
 913                skl_gt_workarounds_init(i915, wal);
 914        else if (INTEL_GEN(i915) <= 8)
 915                return;
 916        else
 917                MISSING_CASE(INTEL_GEN(i915));
 918}
 919
 920void intel_gt_init_workarounds(struct drm_i915_private *i915)
 921{
 922        struct i915_wa_list *wal = &i915->gt_wa_list;
 923
 924        wa_init_start(wal, "GT");
 925        gt_init_workarounds(i915, wal);
 926        wa_init_finish(wal);
 927}
 928
 929static enum forcewake_domains
 930wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
 931{
 932        enum forcewake_domains fw = 0;
 933        struct i915_wa *wa;
 934        unsigned int i;
 935
 936        for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
 937                fw |= intel_uncore_forcewake_for_reg(uncore,
 938                                                     wa->reg,
 939                                                     FW_REG_READ |
 940                                                     FW_REG_WRITE);
 941
 942        return fw;
 943}
 944
 945static bool
 946wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
 947{
 948        if ((cur ^ wa->val) & wa->read) {
 949                DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
 950                          name, from, i915_mmio_reg_offset(wa->reg),
 951                          cur, cur & wa->read,
 952                          wa->val, wa->mask);
 953
 954                return false;
 955        }
 956
 957        return true;
 958}
 959
 960static void
 961wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
 962{
 963        enum forcewake_domains fw;
 964        unsigned long flags;
 965        struct i915_wa *wa;
 966        unsigned int i;
 967
 968        if (!wal->count)
 969                return;
 970
 971        fw = wal_get_fw_for_rmw(uncore, wal);
 972
 973        spin_lock_irqsave(&uncore->lock, flags);
 974        intel_uncore_forcewake_get__locked(uncore, fw);
 975
 976        for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
 977                intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val);
 978                if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 979                        wa_verify(wa,
 980                                  intel_uncore_read_fw(uncore, wa->reg),
 981                                  wal->name, "application");
 982        }
 983
 984        intel_uncore_forcewake_put__locked(uncore, fw);
 985        spin_unlock_irqrestore(&uncore->lock, flags);
 986}
 987
 988void intel_gt_apply_workarounds(struct drm_i915_private *i915)
 989{
 990        wa_list_apply(&i915->uncore, &i915->gt_wa_list);
 991}
 992
 993static bool wa_list_verify(struct intel_uncore *uncore,
 994                           const struct i915_wa_list *wal,
 995                           const char *from)
 996{
 997        struct i915_wa *wa;
 998        unsigned int i;
 999        bool ok = true;
1000
1001        for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1002                ok &= wa_verify(wa,
1003                                intel_uncore_read(uncore, wa->reg),
1004                                wal->name, from);
1005
1006        return ok;
1007}
1008
1009bool intel_gt_verify_workarounds(struct drm_i915_private *i915,
1010                                 const char *from)
1011{
1012        return wa_list_verify(&i915->uncore, &i915->gt_wa_list, from);
1013}
1014
1015static void
1016whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1017{
1018        struct i915_wa wa = {
1019                .reg = reg
1020        };
1021
1022        if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1023                return;
1024
1025        wa.reg.reg |= flags;
1026        _wa_add(wal, &wa);
1027}
1028
1029static void
1030whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1031{
1032        whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_RW);
1033}
1034
1035static void gen9_whitelist_build(struct i915_wa_list *w)
1036{
1037        /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1038        whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1039
1040        /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1041        whitelist_reg(w, GEN8_CS_CHICKEN1);
1042
1043        /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1044        whitelist_reg(w, GEN8_HDC_CHICKEN1);
1045}
1046
1047static void skl_whitelist_build(struct intel_engine_cs *engine)
1048{
1049        struct i915_wa_list *w = &engine->whitelist;
1050
1051        if (engine->class != RENDER_CLASS)
1052                return;
1053
1054        gen9_whitelist_build(w);
1055
1056        /* WaDisableLSQCROPERFforOCL:skl */
1057        whitelist_reg(w, GEN8_L3SQCREG4);
1058}
1059
1060static void bxt_whitelist_build(struct intel_engine_cs *engine)
1061{
1062        if (engine->class != RENDER_CLASS)
1063                return;
1064
1065        gen9_whitelist_build(&engine->whitelist);
1066}
1067
1068static void kbl_whitelist_build(struct intel_engine_cs *engine)
1069{
1070        struct i915_wa_list *w = &engine->whitelist;
1071
1072        if (engine->class != RENDER_CLASS)
1073                return;
1074
1075        gen9_whitelist_build(w);
1076
1077        /* WaDisableLSQCROPERFforOCL:kbl */
1078        whitelist_reg(w, GEN8_L3SQCREG4);
1079}
1080
1081static void glk_whitelist_build(struct intel_engine_cs *engine)
1082{
1083        struct i915_wa_list *w = &engine->whitelist;
1084
1085        if (engine->class != RENDER_CLASS)
1086                return;
1087
1088        gen9_whitelist_build(w);
1089
1090        /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1091        whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1092}
1093
1094static void cfl_whitelist_build(struct intel_engine_cs *engine)
1095{
1096        struct i915_wa_list *w = &engine->whitelist;
1097
1098        if (engine->class != RENDER_CLASS)
1099                return;
1100
1101        gen9_whitelist_build(w);
1102
1103        /*
1104         * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
1105         *
1106         * This covers 4 register which are next to one another :
1107         *   - PS_INVOCATION_COUNT
1108         *   - PS_INVOCATION_COUNT_UDW
1109         *   - PS_DEPTH_COUNT
1110         *   - PS_DEPTH_COUNT_UDW
1111         */
1112        whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1113                          RING_FORCE_TO_NONPRIV_RD |
1114                          RING_FORCE_TO_NONPRIV_RANGE_4);
1115}
1116
1117static void cnl_whitelist_build(struct intel_engine_cs *engine)
1118{
1119        struct i915_wa_list *w = &engine->whitelist;
1120
1121        if (engine->class != RENDER_CLASS)
1122                return;
1123
1124        /* WaEnablePreemptionGranularityControlByUMD:cnl */
1125        whitelist_reg(w, GEN8_CS_CHICKEN1);
1126}
1127
1128static void icl_whitelist_build(struct intel_engine_cs *engine)
1129{
1130        struct i915_wa_list *w = &engine->whitelist;
1131
1132        switch (engine->class) {
1133        case RENDER_CLASS:
1134                /* WaAllowUMDToModifyHalfSliceChicken7:icl */
1135                whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1136
1137                /* WaAllowUMDToModifySamplerMode:icl */
1138                whitelist_reg(w, GEN10_SAMPLER_MODE);
1139
1140                /* WaEnableStateCacheRedirectToCS:icl */
1141                whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1142
1143                /*
1144                 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
1145                 *
1146                 * This covers 4 register which are next to one another :
1147                 *   - PS_INVOCATION_COUNT
1148                 *   - PS_INVOCATION_COUNT_UDW
1149                 *   - PS_DEPTH_COUNT
1150                 *   - PS_DEPTH_COUNT_UDW
1151                 */
1152                whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1153                                  RING_FORCE_TO_NONPRIV_RD |
1154                                  RING_FORCE_TO_NONPRIV_RANGE_4);
1155                break;
1156
1157        case VIDEO_DECODE_CLASS:
1158                /* hucStatusRegOffset */
1159                whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
1160                                  RING_FORCE_TO_NONPRIV_RD);
1161                /* hucUKernelHdrInfoRegOffset */
1162                whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
1163                                  RING_FORCE_TO_NONPRIV_RD);
1164                /* hucStatus2RegOffset */
1165                whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
1166                                  RING_FORCE_TO_NONPRIV_RD);
1167                break;
1168
1169        default:
1170                break;
1171        }
1172}
1173
1174void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1175{
1176        struct drm_i915_private *i915 = engine->i915;
1177        struct i915_wa_list *w = &engine->whitelist;
1178
1179        wa_init_start(w, "whitelist");
1180
1181        if (IS_GEN(i915, 11))
1182                icl_whitelist_build(engine);
1183        else if (IS_CANNONLAKE(i915))
1184                cnl_whitelist_build(engine);
1185        else if (IS_COFFEELAKE(i915))
1186                cfl_whitelist_build(engine);
1187        else if (IS_GEMINILAKE(i915))
1188                glk_whitelist_build(engine);
1189        else if (IS_KABYLAKE(i915))
1190                kbl_whitelist_build(engine);
1191        else if (IS_BROXTON(i915))
1192                bxt_whitelist_build(engine);
1193        else if (IS_SKYLAKE(i915))
1194                skl_whitelist_build(engine);
1195        else if (INTEL_GEN(i915) <= 8)
1196                return;
1197        else
1198                MISSING_CASE(INTEL_GEN(i915));
1199
1200        wa_init_finish(w);
1201}
1202
1203void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1204{
1205        const struct i915_wa_list *wal = &engine->whitelist;
1206        struct intel_uncore *uncore = engine->uncore;
1207        const u32 base = engine->mmio_base;
1208        struct i915_wa *wa;
1209        unsigned int i;
1210
1211        if (!wal->count)
1212                return;
1213
1214        for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1215                intel_uncore_write(uncore,
1216                                   RING_FORCE_TO_NONPRIV(base, i),
1217                                   i915_mmio_reg_offset(wa->reg));
1218
1219        /* And clear the rest just in case of garbage */
1220        for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1221                intel_uncore_write(uncore,
1222                                   RING_FORCE_TO_NONPRIV(base, i),
1223                                   i915_mmio_reg_offset(RING_NOPID(base)));
1224}
1225
1226static void
1227rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1228{
1229        struct drm_i915_private *i915 = engine->i915;
1230
1231        if (IS_GEN(i915, 11)) {
1232                /* This is not an Wa. Enable for better image quality */
1233                wa_masked_en(wal,
1234                             _3D_CHICKEN3,
1235                             _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
1236
1237                /* WaPipelineFlushCoherentLines:icl */
1238                ignore_wa_write_or(wal,
1239                                   GEN8_L3SQCREG4,
1240                                   GEN8_LQSC_FLUSH_COHERENT_LINES,
1241                                   GEN8_LQSC_FLUSH_COHERENT_LINES);
1242
1243                /*
1244                 * Wa_1405543622:icl
1245                 * Formerly known as WaGAPZPriorityScheme
1246                 */
1247                wa_write_or(wal,
1248                            GEN8_GARBCNTL,
1249                            GEN11_ARBITRATION_PRIO_ORDER_MASK);
1250
1251                /*
1252                 * Wa_1604223664:icl
1253                 * Formerly known as WaL3BankAddressHashing
1254                 */
1255                wa_write_masked_or(wal,
1256                                   GEN8_GARBCNTL,
1257                                   GEN11_HASH_CTRL_EXCL_MASK,
1258                                   GEN11_HASH_CTRL_EXCL_BIT0);
1259                wa_write_masked_or(wal,
1260                                   GEN11_GLBLINVL,
1261                                   GEN11_BANK_HASH_ADDR_EXCL_MASK,
1262                                   GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1263
1264                /*
1265                 * Wa_1405733216:icl
1266                 * Formerly known as WaDisableCleanEvicts
1267                 */
1268                ignore_wa_write_or(wal,
1269                                   GEN8_L3SQCREG4,
1270                                   GEN11_LQSC_CLEAN_EVICT_DISABLE,
1271                                   GEN11_LQSC_CLEAN_EVICT_DISABLE);
1272
1273                /* WaForwardProgressSoftReset:icl */
1274                wa_write_or(wal,
1275                            GEN10_SCRATCH_LNCF2,
1276                            PMFLUSHDONE_LNICRSDROP |
1277                            PMFLUSH_GAPL3UNBLOCK |
1278                            PMFLUSHDONE_LNEBLK);
1279
1280                /* Wa_1406609255:icl (pre-prod) */
1281                if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1282                        wa_write_or(wal,
1283                                    GEN7_SARCHKMD,
1284                                    GEN7_DISABLE_DEMAND_PREFETCH);
1285
1286                /* Wa_1606682166:icl */
1287                wa_write_or(wal,
1288                            GEN7_SARCHKMD,
1289                            GEN7_DISABLE_SAMPLER_PREFETCH);
1290        }
1291
1292        if (IS_GEN_RANGE(i915, 9, 11)) {
1293                /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */
1294                wa_masked_en(wal,
1295                             GEN7_FF_SLICE_CS_CHICKEN1,
1296                             GEN9_FFSC_PERCTX_PREEMPT_CTRL);
1297        }
1298
1299        if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
1300                /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
1301                wa_write_or(wal,
1302                            GEN8_GARBCNTL,
1303                            GEN9_GAPS_TSV_CREDIT_DISABLE);
1304        }
1305
1306        if (IS_BROXTON(i915)) {
1307                /* WaDisablePooledEuLoadBalancingFix:bxt */
1308                wa_masked_en(wal,
1309                             FF_SLICE_CS_CHICKEN2,
1310                             GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1311        }
1312
1313        if (IS_GEN(i915, 9)) {
1314                /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1315                wa_masked_en(wal,
1316                             GEN9_CSFE_CHICKEN1_RCS,
1317                             GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
1318
1319                /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1320                wa_write_or(wal,
1321                            BDW_SCRATCH1,
1322                            GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1323
1324                /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1325                if (IS_GEN9_LP(i915))
1326                        wa_write_masked_or(wal,
1327                                           GEN8_L3SQCREG1,
1328                                           L3_PRIO_CREDITS_MASK,
1329                                           L3_GENERAL_PRIO_CREDITS(62) |
1330                                           L3_HIGH_PRIO_CREDITS(2));
1331
1332                /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1333                wa_write_or(wal,
1334                            GEN8_L3SQCREG4,
1335                            GEN8_LQSC_FLUSH_COHERENT_LINES);
1336        }
1337}
1338
1339static void
1340xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1341{
1342        struct drm_i915_private *i915 = engine->i915;
1343
1344        /* WaKBLVECSSemaphoreWaitPoll:kbl */
1345        if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
1346                wa_write(wal,
1347                         RING_SEMA_WAIT_POLL(engine->mmio_base),
1348                         1);
1349        }
1350}
1351
1352static void
1353engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1354{
1355        if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8))
1356                return;
1357
1358        if (engine->id == RCS0)
1359                rcs_engine_wa_init(engine, wal);
1360        else
1361                xcs_engine_wa_init(engine, wal);
1362}
1363
1364void intel_engine_init_workarounds(struct intel_engine_cs *engine)
1365{
1366        struct i915_wa_list *wal = &engine->wa_list;
1367
1368        if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8))
1369                return;
1370
1371        wa_init_start(wal, engine->name);
1372        engine_init_workarounds(engine, wal);
1373        wa_init_finish(wal);
1374}
1375
1376void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
1377{
1378        wa_list_apply(engine->uncore, &engine->wa_list);
1379}
1380
1381static struct i915_vma *
1382create_scratch(struct i915_address_space *vm, int count)
1383{
1384        struct drm_i915_gem_object *obj;
1385        struct i915_vma *vma;
1386        unsigned int size;
1387        int err;
1388
1389        size = round_up(count * sizeof(u32), PAGE_SIZE);
1390        obj = i915_gem_object_create_internal(vm->i915, size);
1391        if (IS_ERR(obj))
1392                return ERR_CAST(obj);
1393
1394        i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
1395
1396        vma = i915_vma_instance(obj, vm, NULL);
1397        if (IS_ERR(vma)) {
1398                err = PTR_ERR(vma);
1399                goto err_obj;
1400        }
1401
1402        err = i915_vma_pin(vma, 0, 0,
1403                           i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
1404        if (err)
1405                goto err_obj;
1406
1407        return vma;
1408
1409err_obj:
1410        i915_gem_object_put(obj);
1411        return ERR_PTR(err);
1412}
1413
1414static int
1415wa_list_srm(struct i915_request *rq,
1416            const struct i915_wa_list *wal,
1417            struct i915_vma *vma)
1418{
1419        const struct i915_wa *wa;
1420        unsigned int i;
1421        u32 srm, *cs;
1422
1423        srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
1424        if (INTEL_GEN(rq->i915) >= 8)
1425                srm++;
1426
1427        cs = intel_ring_begin(rq, 4 * wal->count);
1428        if (IS_ERR(cs))
1429                return PTR_ERR(cs);
1430
1431        for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1432                *cs++ = srm;
1433                *cs++ = i915_mmio_reg_offset(wa->reg);
1434                *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
1435                *cs++ = 0;
1436        }
1437        intel_ring_advance(rq, cs);
1438
1439        return 0;
1440}
1441
1442static int engine_wa_list_verify(struct intel_context *ce,
1443                                 const struct i915_wa_list * const wal,
1444                                 const char *from)
1445{
1446        const struct i915_wa *wa;
1447        struct i915_request *rq;
1448        struct i915_vma *vma;
1449        unsigned int i;
1450        u32 *results;
1451        int err;
1452
1453        if (!wal->count)
1454                return 0;
1455
1456        vma = create_scratch(&ce->engine->i915->ggtt.vm, wal->count);
1457        if (IS_ERR(vma))
1458                return PTR_ERR(vma);
1459
1460        rq = intel_context_create_request(ce);
1461        if (IS_ERR(rq)) {
1462                err = PTR_ERR(rq);
1463                goto err_vma;
1464        }
1465
1466        err = wa_list_srm(rq, wal, vma);
1467        if (err)
1468                goto err_vma;
1469
1470        i915_request_add(rq);
1471        if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1472                err = -ETIME;
1473                goto err_vma;
1474        }
1475
1476        results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
1477        if (IS_ERR(results)) {
1478                err = PTR_ERR(results);
1479                goto err_vma;
1480        }
1481
1482        err = 0;
1483        for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1484                if (!wa_verify(wa, results[i], wal->name, from))
1485                        err = -ENXIO;
1486
1487        i915_gem_object_unpin_map(vma->obj);
1488
1489err_vma:
1490        i915_vma_unpin(vma);
1491        i915_vma_put(vma);
1492        return err;
1493}
1494
1495int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
1496                                    const char *from)
1497{
1498        return engine_wa_list_verify(engine->kernel_context,
1499                                     &engine->wa_list,
1500                                     from);
1501}
1502
1503#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1504#include "selftest_workarounds.c"
1505#endif
1506