linux/drivers/gpu/drm/i915/gt/intel_workarounds.c
<<
>>
Prefs
   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2014-2018 Intel Corporation
   5 */
   6
   7#include "i915_drv.h"
   8#include "intel_context.h"
   9#include "intel_gt.h"
  10#include "intel_ring.h"
  11#include "intel_workarounds.h"
  12
  13/**
  14 * DOC: Hardware workarounds
  15 *
  16 * This file is intended as a central place to implement most [1]_ of the
  17 * required workarounds for hardware to work as originally intended. They fall
  18 * in five basic categories depending on how/when they are applied:
  19 *
  20 * - Workarounds that touch registers that are saved/restored to/from the HW
  21 *   context image. The list is emitted (via Load Register Immediate commands)
  22 *   everytime a new context is created.
  23 * - GT workarounds. The list of these WAs is applied whenever these registers
  24 *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
  25 * - Display workarounds. The list is applied during display clock-gating
  26 *   initialization.
  27 * - Workarounds that whitelist a privileged register, so that UMDs can manage
  28 *   them directly. This is just a special case of a MMMIO workaround (as we
  29 *   write the list of these to/be-whitelisted registers to some special HW
  30 *   registers).
  31 * - Workaround batchbuffers, that get executed automatically by the hardware
  32 *   on every HW context restore.
  33 *
  34 * .. [1] Please notice that there are other WAs that, due to their nature,
  35 *    cannot be applied from a central place. Those are peppered around the rest
  36 *    of the code, as needed.
  37 *
  38 * .. [2] Technically, some registers are powercontext saved & restored, so they
  39 *    survive a suspend/resume. In practice, writing them again is not too
  40 *    costly and simplifies things. We can revisit this in the future.
  41 *
  42 * Layout
  43 * ~~~~~~
  44 *
  45 * Keep things in this file ordered by WA type, as per the above (context, GT,
  46 * display, register whitelist, batchbuffer). Then, inside each type, keep the
  47 * following order:
  48 *
  49 * - Infrastructure functions and macros
  50 * - WAs per platform in standard gen/chrono order
  51 * - Public functions to init or apply the given workaround type.
  52 */
  53
  54static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name)
  55{
  56        wal->name = name;
  57        wal->engine_name = engine_name;
  58}
  59
  60#define WA_LIST_CHUNK (1 << 4)
  61
  62static void wa_init_finish(struct i915_wa_list *wal)
  63{
  64        /* Trim unused entries. */
  65        if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
  66                struct i915_wa *list = kmemdup(wal->list,
  67                                               wal->count * sizeof(*list),
  68                                               GFP_KERNEL);
  69
  70                if (list) {
  71                        kfree(wal->list);
  72                        wal->list = list;
  73                }
  74        }
  75
  76        if (!wal->count)
  77                return;
  78
  79        DRM_DEBUG_DRIVER("Initialized %u %s workarounds on %s\n",
  80                         wal->wa_count, wal->name, wal->engine_name);
  81}
  82
  83static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
  84{
  85        unsigned int addr = i915_mmio_reg_offset(wa->reg);
  86        unsigned int start = 0, end = wal->count;
  87        const unsigned int grow = WA_LIST_CHUNK;
  88        struct i915_wa *wa_;
  89
  90        GEM_BUG_ON(!is_power_of_2(grow));
  91
  92        if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
  93                struct i915_wa *list;
  94
  95                list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
  96                                     GFP_KERNEL);
  97                if (!list) {
  98                        DRM_ERROR("No space for workaround init!\n");
  99                        return;
 100                }
 101
 102                if (wal->list)
 103                        memcpy(list, wal->list, sizeof(*wa) * wal->count);
 104
 105                wal->list = list;
 106        }
 107
 108        while (start < end) {
 109                unsigned int mid = start + (end - start) / 2;
 110
 111                if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
 112                        start = mid + 1;
 113                } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
 114                        end = mid;
 115                } else {
 116                        wa_ = &wal->list[mid];
 117
 118                        if ((wa->mask & ~wa_->mask) == 0) {
 119                                DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
 120                                          i915_mmio_reg_offset(wa_->reg),
 121                                          wa_->mask, wa_->val);
 122
 123                                wa_->val &= ~wa->mask;
 124                        }
 125
 126                        wal->wa_count++;
 127                        wa_->val |= wa->val;
 128                        wa_->mask |= wa->mask;
 129                        wa_->read |= wa->read;
 130                        return;
 131                }
 132        }
 133
 134        wal->wa_count++;
 135        wa_ = &wal->list[wal->count++];
 136        *wa_ = *wa;
 137
 138        while (wa_-- > wal->list) {
 139                GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
 140                           i915_mmio_reg_offset(wa_[1].reg));
 141                if (i915_mmio_reg_offset(wa_[1].reg) >
 142                    i915_mmio_reg_offset(wa_[0].reg))
 143                        break;
 144
 145                swap(wa_[1], wa_[0]);
 146        }
 147}
 148
 149static void
 150wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
 151                   u32 val)
 152{
 153        struct i915_wa wa = {
 154                .reg  = reg,
 155                .mask = mask,
 156                .val  = val,
 157                .read = mask,
 158        };
 159
 160        _wa_add(wal, &wa);
 161}
 162
 163static void
 164wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 165{
 166        wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
 167}
 168
 169static void
 170wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 171{
 172        wa_write_masked_or(wal, reg, ~0, val);
 173}
 174
 175static void
 176wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 177{
 178        wa_write_masked_or(wal, reg, val, val);
 179}
 180
 181#define WA_SET_BIT_MASKED(addr, mask) \
 182        wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
 183
 184#define WA_CLR_BIT_MASKED(addr, mask) \
 185        wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
 186
 187#define WA_SET_FIELD_MASKED(addr, mask, value) \
 188        wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
 189
 190static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
 191                                      struct i915_wa_list *wal)
 192{
 193        WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
 194
 195        /* WaDisableAsyncFlipPerfMode:bdw,chv */
 196        WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
 197
 198        /* WaDisablePartialInstShootdown:bdw,chv */
 199        WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
 200                          PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 201
 202        /* Use Force Non-Coherent whenever executing a 3D context. This is a
 203         * workaround for for a possible hang in the unlikely event a TLB
 204         * invalidation occurs during a PSD flush.
 205         */
 206        /* WaForceEnableNonCoherent:bdw,chv */
 207        /* WaHdcDisableFetchWhenMasked:bdw,chv */
 208        WA_SET_BIT_MASKED(HDC_CHICKEN0,
 209                          HDC_DONOT_FETCH_MEM_WHEN_MASKED |
 210                          HDC_FORCE_NON_COHERENT);
 211
 212        /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
 213         * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
 214         *  polygons in the same 8x4 pixel/sample area to be processed without
 215         *  stalling waiting for the earlier ones to write to Hierarchical Z
 216         *  buffer."
 217         *
 218         * This optimization is off by default for BDW and CHV; turn it on.
 219         */
 220        WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
 221
 222        /* Wa4x4STCOptimizationDisable:bdw,chv */
 223        WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
 224
 225        /*
 226         * BSpec recommends 8x4 when MSAA is used,
 227         * however in practice 16x4 seems fastest.
 228         *
 229         * Note that PS/WM thread counts depend on the WIZ hashing
 230         * disable bit, which we don't touch here, but it's good
 231         * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
 232         */
 233        WA_SET_FIELD_MASKED(GEN7_GT_MODE,
 234                            GEN6_WIZ_HASHING_MASK,
 235                            GEN6_WIZ_HASHING_16x4);
 236}
 237
 238static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
 239                                     struct i915_wa_list *wal)
 240{
 241        struct drm_i915_private *i915 = engine->i915;
 242
 243        gen8_ctx_workarounds_init(engine, wal);
 244
 245        /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
 246        WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 247
 248        /* WaDisableDopClockGating:bdw
 249         *
 250         * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
 251         * to disable EUTC clock gating.
 252         */
 253        WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
 254                          DOP_CLOCK_GATING_DISABLE);
 255
 256        WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
 257                          GEN8_SAMPLER_POWER_BYPASS_DIS);
 258
 259        WA_SET_BIT_MASKED(HDC_CHICKEN0,
 260                          /* WaForceContextSaveRestoreNonCoherent:bdw */
 261                          HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
 262                          /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
 263                          (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
 264}
 265
 266static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
 267                                     struct i915_wa_list *wal)
 268{
 269        gen8_ctx_workarounds_init(engine, wal);
 270
 271        /* WaDisableThreadStallDopClockGating:chv */
 272        WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 273
 274        /* Improve HiZ throughput on CHV. */
 275        WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
 276}
 277
 278static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
 279                                      struct i915_wa_list *wal)
 280{
 281        struct drm_i915_private *i915 = engine->i915;
 282
 283        if (HAS_LLC(i915)) {
 284                /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
 285                 *
 286                 * Must match Display Engine. See
 287                 * WaCompressedResourceDisplayNewHashMode.
 288                 */
 289                WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 290                                  GEN9_PBE_COMPRESSED_HASH_SELECTION);
 291                WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
 292                                  GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
 293        }
 294
 295        /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
 296        /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
 297        WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
 298                          FLOW_CONTROL_ENABLE |
 299                          PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 300
 301        /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
 302        /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
 303        WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
 304                          GEN9_ENABLE_YV12_BUGFIX |
 305                          GEN9_ENABLE_GPGPU_PREEMPTION);
 306
 307        /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
 308        /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
 309        WA_SET_BIT_MASKED(CACHE_MODE_1,
 310                          GEN8_4x4_STC_OPTIMIZATION_DISABLE |
 311                          GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
 312
 313        /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
 314        WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
 315                          GEN9_CCS_TLB_PREFETCH_ENABLE);
 316
 317        /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
 318        WA_SET_BIT_MASKED(HDC_CHICKEN0,
 319                          HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
 320                          HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
 321
 322        /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
 323         * both tied to WaForceContextSaveRestoreNonCoherent
 324         * in some hsds for skl. We keep the tie for all gen9. The
 325         * documentation is a bit hazy and so we want to get common behaviour,
 326         * even though there is no clear evidence we would need both on kbl/bxt.
 327         * This area has been source of system hangs so we play it safe
 328         * and mimic the skl regardless of what bspec says.
 329         *
 330         * Use Force Non-Coherent whenever executing a 3D context. This
 331         * is a workaround for a possible hang in the unlikely event
 332         * a TLB invalidation occurs during a PSD flush.
 333         */
 334
 335        /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
 336        WA_SET_BIT_MASKED(HDC_CHICKEN0,
 337                          HDC_FORCE_NON_COHERENT);
 338
 339        /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
 340        if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915))
 341                WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
 342                                  GEN8_SAMPLER_POWER_BYPASS_DIS);
 343
 344        /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
 345        WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
 346
 347        /*
 348         * Supporting preemption with fine-granularity requires changes in the
 349         * batch buffer programming. Since we can't break old userspace, we
 350         * need to set our default preemption level to safe value. Userspace is
 351         * still able to use more fine-grained preemption levels, since in
 352         * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
 353         * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
 354         * not real HW workarounds, but merely a way to start using preemption
 355         * while maintaining old contract with userspace.
 356         */
 357
 358        /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
 359        WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
 360
 361        /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
 362        WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
 363                            GEN9_PREEMPT_GPGPU_LEVEL_MASK,
 364                            GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
 365
 366        /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
 367        if (IS_GEN9_LP(i915))
 368                WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
 369}
 370
 371static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
 372                                struct i915_wa_list *wal)
 373{
 374        struct drm_i915_private *i915 = engine->i915;
 375        u8 vals[3] = { 0, 0, 0 };
 376        unsigned int i;
 377
 378        for (i = 0; i < 3; i++) {
 379                u8 ss;
 380
 381                /*
 382                 * Only consider slices where one, and only one, subslice has 7
 383                 * EUs
 384                 */
 385                if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]))
 386                        continue;
 387
 388                /*
 389                 * subslice_7eu[i] != 0 (because of the check above) and
 390                 * ss_max == 4 (maximum number of subslices possible per slice)
 391                 *
 392                 * ->    0 <= ss <= 3;
 393                 */
 394                ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1;
 395                vals[i] = 3 - ss;
 396        }
 397
 398        if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
 399                return;
 400
 401        /* Tune IZ hashing. See intel_device_info_runtime_init() */
 402        WA_SET_FIELD_MASKED(GEN7_GT_MODE,
 403                            GEN9_IZ_HASHING_MASK(2) |
 404                            GEN9_IZ_HASHING_MASK(1) |
 405                            GEN9_IZ_HASHING_MASK(0),
 406                            GEN9_IZ_HASHING(2, vals[2]) |
 407                            GEN9_IZ_HASHING(1, vals[1]) |
 408                            GEN9_IZ_HASHING(0, vals[0]));
 409}
 410
 411static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
 412                                     struct i915_wa_list *wal)
 413{
 414        gen9_ctx_workarounds_init(engine, wal);
 415        skl_tune_iz_hashing(engine, wal);
 416}
 417
 418static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
 419                                     struct i915_wa_list *wal)
 420{
 421        gen9_ctx_workarounds_init(engine, wal);
 422
 423        /* WaDisableThreadStallDopClockGating:bxt */
 424        WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
 425                          STALL_DOP_GATING_DISABLE);
 426
 427        /* WaToEnableHwFixForPushConstHWBug:bxt */
 428        WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 429                          GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 430}
 431
 432static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
 433                                     struct i915_wa_list *wal)
 434{
 435        struct drm_i915_private *i915 = engine->i915;
 436
 437        gen9_ctx_workarounds_init(engine, wal);
 438
 439        /* WaToEnableHwFixForPushConstHWBug:kbl */
 440        if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
 441                WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 442                                  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 443
 444        /* WaDisableSbeCacheDispatchPortSharing:kbl */
 445        WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
 446                          GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 447}
 448
 449static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
 450                                     struct i915_wa_list *wal)
 451{
 452        gen9_ctx_workarounds_init(engine, wal);
 453
 454        /* WaToEnableHwFixForPushConstHWBug:glk */
 455        WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 456                          GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 457}
 458
 459static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
 460                                     struct i915_wa_list *wal)
 461{
 462        gen9_ctx_workarounds_init(engine, wal);
 463
 464        /* WaToEnableHwFixForPushConstHWBug:cfl */
 465        WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 466                          GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 467
 468        /* WaDisableSbeCacheDispatchPortSharing:cfl */
 469        WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
 470                          GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 471}
 472
 473static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
 474                                     struct i915_wa_list *wal)
 475{
 476        struct drm_i915_private *i915 = engine->i915;
 477
 478        /* WaForceContextSaveRestoreNonCoherent:cnl */
 479        WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
 480                          HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
 481
 482        /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
 483        if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
 484                WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
 485
 486        /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
 487        WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 488                          GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 489
 490        /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
 491        if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
 492                WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 493                                  GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
 494
 495        /* WaPushConstantDereferenceHoldDisable:cnl */
 496        WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
 497
 498        /* FtrEnableFastAnisoL1BankingFix:cnl */
 499        WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
 500
 501        /* WaDisable3DMidCmdPreemption:cnl */
 502        WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
 503
 504        /* WaDisableGPGPUMidCmdPreemption:cnl */
 505        WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
 506                            GEN9_PREEMPT_GPGPU_LEVEL_MASK,
 507                            GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
 508
 509        /* WaDisableEarlyEOT:cnl */
 510        WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
 511}
 512
 513static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
 514                                     struct i915_wa_list *wal)
 515{
 516        struct drm_i915_private *i915 = engine->i915;
 517
 518        /* WaDisableBankHangMode:icl */
 519        wa_write(wal,
 520                 GEN8_L3CNTLREG,
 521                 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
 522                 GEN8_ERRDETBCTRL);
 523
 524        /* Wa_1604370585:icl (pre-prod)
 525         * Formerly known as WaPushConstantDereferenceHoldDisable
 526         */
 527        if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
 528                WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
 529                                  PUSH_CONSTANT_DEREF_DISABLE);
 530
 531        /* WaForceEnableNonCoherent:icl
 532         * This is not the same workaround as in early Gen9 platforms, where
 533         * lacking this could cause system hangs, but coherency performance
 534         * overhead is high and only a few compute workloads really need it
 535         * (the register is whitelisted in hardware now, so UMDs can opt in
 536         * for coherency if they have a good reason).
 537         */
 538        WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
 539
 540        /* Wa_2006611047:icl (pre-prod)
 541         * Formerly known as WaDisableImprovedTdlClkGating
 542         */
 543        if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
 544                WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
 545                                  GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
 546
 547        /* Wa_2006665173:icl (pre-prod) */
 548        if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
 549                WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
 550                                  GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
 551
 552        /* WaEnableFloatBlendOptimization:icl */
 553        wa_write_masked_or(wal,
 554                           GEN10_CACHE_MODE_SS,
 555                           0, /* write-only, so skip validation */
 556                           _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
 557
 558        /* WaDisableGPGPUMidThreadPreemption:icl */
 559        WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
 560                            GEN9_PREEMPT_GPGPU_LEVEL_MASK,
 561                            GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
 562
 563        /* allow headerless messages for preemptible GPGPU context */
 564        WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE,
 565                          GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
 566}
 567
 568static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
 569                                     struct i915_wa_list *wal)
 570{
 571        /* Wa_1409142259:tgl */
 572        WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
 573                          GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
 574}
 575
 576static void
 577__intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
 578                           struct i915_wa_list *wal,
 579                           const char *name)
 580{
 581        struct drm_i915_private *i915 = engine->i915;
 582
 583        if (engine->class != RENDER_CLASS)
 584                return;
 585
 586        wa_init_start(wal, name, engine->name);
 587
 588        if (IS_GEN(i915, 12))
 589                tgl_ctx_workarounds_init(engine, wal);
 590        else if (IS_GEN(i915, 11))
 591                icl_ctx_workarounds_init(engine, wal);
 592        else if (IS_CANNONLAKE(i915))
 593                cnl_ctx_workarounds_init(engine, wal);
 594        else if (IS_COFFEELAKE(i915))
 595                cfl_ctx_workarounds_init(engine, wal);
 596        else if (IS_GEMINILAKE(i915))
 597                glk_ctx_workarounds_init(engine, wal);
 598        else if (IS_KABYLAKE(i915))
 599                kbl_ctx_workarounds_init(engine, wal);
 600        else if (IS_BROXTON(i915))
 601                bxt_ctx_workarounds_init(engine, wal);
 602        else if (IS_SKYLAKE(i915))
 603                skl_ctx_workarounds_init(engine, wal);
 604        else if (IS_CHERRYVIEW(i915))
 605                chv_ctx_workarounds_init(engine, wal);
 606        else if (IS_BROADWELL(i915))
 607                bdw_ctx_workarounds_init(engine, wal);
 608        else if (INTEL_GEN(i915) < 8)
 609                return;
 610        else
 611                MISSING_CASE(INTEL_GEN(i915));
 612
 613        wa_init_finish(wal);
 614}
 615
 616void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
 617{
 618        __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
 619}
 620
 621int intel_engine_emit_ctx_wa(struct i915_request *rq)
 622{
 623        struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
 624        struct i915_wa *wa;
 625        unsigned int i;
 626        u32 *cs;
 627        int ret;
 628
 629        if (wal->count == 0)
 630                return 0;
 631
 632        ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
 633        if (ret)
 634                return ret;
 635
 636        cs = intel_ring_begin(rq, (wal->count * 2 + 2));
 637        if (IS_ERR(cs))
 638                return PTR_ERR(cs);
 639
 640        *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
 641        for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
 642                *cs++ = i915_mmio_reg_offset(wa->reg);
 643                *cs++ = wa->val;
 644        }
 645        *cs++ = MI_NOOP;
 646
 647        intel_ring_advance(rq, cs);
 648
 649        ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
 650        if (ret)
 651                return ret;
 652
 653        return 0;
 654}
 655
 656static void
 657gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 658{
 659        /* WaDisableKillLogic:bxt,skl,kbl */
 660        if (!IS_COFFEELAKE(i915))
 661                wa_write_or(wal,
 662                            GAM_ECOCHK,
 663                            ECOCHK_DIS_TLB);
 664
 665        if (HAS_LLC(i915)) {
 666                /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
 667                 *
 668                 * Must match Display Engine. See
 669                 * WaCompressedResourceDisplayNewHashMode.
 670                 */
 671                wa_write_or(wal,
 672                            MMCD_MISC_CTRL,
 673                            MMCD_PCLA | MMCD_HOTSPOT_EN);
 674        }
 675
 676        /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
 677        wa_write_or(wal,
 678                    GAM_ECOCHK,
 679                    BDW_DISABLE_HDC_INVALIDATION);
 680}
 681
 682static void
 683skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 684{
 685        gen9_gt_workarounds_init(i915, wal);
 686
 687        /* WaDisableGafsUnitClkGating:skl */
 688        wa_write_or(wal,
 689                    GEN7_UCGCTL4,
 690                    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
 691
 692        /* WaInPlaceDecompressionHang:skl */
 693        if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
 694                wa_write_or(wal,
 695                            GEN9_GAMT_ECO_REG_RW_IA,
 696                            GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 697}
 698
 699static void
 700bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 701{
 702        gen9_gt_workarounds_init(i915, wal);
 703
 704        /* WaInPlaceDecompressionHang:bxt */
 705        wa_write_or(wal,
 706                    GEN9_GAMT_ECO_REG_RW_IA,
 707                    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 708}
 709
 710static void
 711kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 712{
 713        gen9_gt_workarounds_init(i915, wal);
 714
 715        /* WaDisableDynamicCreditSharing:kbl */
 716        if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
 717                wa_write_or(wal,
 718                            GAMT_CHKN_BIT_REG,
 719                            GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
 720
 721        /* WaDisableGafsUnitClkGating:kbl */
 722        wa_write_or(wal,
 723                    GEN7_UCGCTL4,
 724                    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
 725
 726        /* WaInPlaceDecompressionHang:kbl */
 727        wa_write_or(wal,
 728                    GEN9_GAMT_ECO_REG_RW_IA,
 729                    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 730}
 731
 732static void
 733glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 734{
 735        gen9_gt_workarounds_init(i915, wal);
 736}
 737
 738static void
 739cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 740{
 741        gen9_gt_workarounds_init(i915, wal);
 742
 743        /* WaDisableGafsUnitClkGating:cfl */
 744        wa_write_or(wal,
 745                    GEN7_UCGCTL4,
 746                    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
 747
 748        /* WaInPlaceDecompressionHang:cfl */
 749        wa_write_or(wal,
 750                    GEN9_GAMT_ECO_REG_RW_IA,
 751                    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 752}
 753
 754static void
 755wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
 756{
 757        const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
 758        unsigned int slice, subslice;
 759        u32 l3_en, mcr, mcr_mask;
 760
 761        GEM_BUG_ON(INTEL_GEN(i915) < 10);
 762
 763        /*
 764         * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
 765         * L3Banks could be fused off in single slice scenario. If that is
 766         * the case, we might need to program MCR select to a valid L3Bank
 767         * by default, to make sure we correctly read certain registers
 768         * later on (in the range 0xB100 - 0xB3FF).
 769         *
 770         * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
 771         * Before any MMIO read into slice/subslice specific registers, MCR
 772         * packet control register needs to be programmed to point to any
 773         * enabled s/ss pair. Otherwise, incorrect values will be returned.
 774         * This means each subsequent MMIO read will be forwarded to an
 775         * specific s/ss combination, but this is OK since these registers
 776         * are consistent across s/ss in almost all cases. In the rare
 777         * occasions, such as INSTDONE, where this value is dependent
 778         * on s/ss combo, the read should be done with read_subslice_reg.
 779         *
 780         * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both
 781         * to which subslice, or to which L3 bank, the respective mmio reads
 782         * will go, we have to find a common index which works for both
 783         * accesses.
 784         *
 785         * Case where we cannot find a common index fortunately should not
 786         * happen in production hardware, so we only emit a warning instead of
 787         * implementing something more complex that requires checking the range
 788         * of every MMIO read.
 789         */
 790
 791        if (INTEL_GEN(i915) >= 10 && is_power_of_2(sseu->slice_mask)) {
 792                u32 l3_fuse =
 793                        intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) &
 794                        GEN10_L3BANK_MASK;
 795
 796                DRM_DEBUG_DRIVER("L3 fuse = %x\n", l3_fuse);
 797                l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse);
 798        } else {
 799                l3_en = ~0;
 800        }
 801
 802        slice = fls(sseu->slice_mask) - 1;
 803        subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
 804        if (!subslice) {
 805                DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n",
 806                         intel_sseu_get_subslices(sseu, slice), l3_en);
 807                subslice = fls(l3_en);
 808                WARN_ON(!subslice);
 809        }
 810        subslice--;
 811
 812        if (INTEL_GEN(i915) >= 11) {
 813                mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
 814                mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
 815        } else {
 816                mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
 817                mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
 818        }
 819
 820        DRM_DEBUG_DRIVER("MCR slice/subslice = %x\n", mcr);
 821
 822        wa_write_masked_or(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
 823}
 824
 825static void
 826cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 827{
 828        wa_init_mcr(i915, wal);
 829
 830        /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
 831        if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
 832                wa_write_or(wal,
 833                            GAMT_CHKN_BIT_REG,
 834                            GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
 835
 836        /* WaInPlaceDecompressionHang:cnl */
 837        wa_write_or(wal,
 838                    GEN9_GAMT_ECO_REG_RW_IA,
 839                    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 840}
 841
 842static void
 843icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 844{
 845        wa_init_mcr(i915, wal);
 846
 847        /* WaInPlaceDecompressionHang:icl */
 848        wa_write_or(wal,
 849                    GEN9_GAMT_ECO_REG_RW_IA,
 850                    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 851
 852        /* WaModifyGamTlbPartitioning:icl */
 853        wa_write_masked_or(wal,
 854                           GEN11_GACB_PERF_CTRL,
 855                           GEN11_HASH_CTRL_MASK,
 856                           GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
 857
 858        /* Wa_1405766107:icl
 859         * Formerly known as WaCL2SFHalfMaxAlloc
 860         */
 861        wa_write_or(wal,
 862                    GEN11_LSN_UNSLCVC,
 863                    GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
 864                    GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
 865
 866        /* Wa_220166154:icl
 867         * Formerly known as WaDisCtxReload
 868         */
 869        wa_write_or(wal,
 870                    GEN8_GAMW_ECO_DEV_RW_IA,
 871                    GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
 872
 873        /* Wa_1405779004:icl (pre-prod) */
 874        if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
 875                wa_write_or(wal,
 876                            SLICE_UNIT_LEVEL_CLKGATE,
 877                            MSCUNIT_CLKGATE_DIS);
 878
 879        /* Wa_1406680159:icl */
 880        wa_write_or(wal,
 881                    SUBSLICE_UNIT_LEVEL_CLKGATE,
 882                    GWUNIT_CLKGATE_DIS);
 883
 884        /* Wa_1406838659:icl (pre-prod) */
 885        if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
 886                wa_write_or(wal,
 887                            INF_UNIT_LEVEL_CLKGATE,
 888                            CGPSF_CLKGATE_DIS);
 889
 890        /* Wa_1406463099:icl
 891         * Formerly known as WaGamTlbPendError
 892         */
 893        wa_write_or(wal,
 894                    GAMT_CHKN_BIT_REG,
 895                    GAMT_CHKN_DISABLE_L3_COH_PIPE);
 896
 897        /* Wa_1607087056:icl */
 898        wa_write_or(wal,
 899                    SLICE_UNIT_LEVEL_CLKGATE,
 900                    L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
 901}
 902
 903static void
 904tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 905{
 906        /* Wa_1409420604:tgl */
 907        if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
 908                wa_write_or(wal,
 909                            SUBSLICE_UNIT_LEVEL_CLKGATE2,
 910                            CPSSUNIT_CLKGATE_DIS);
 911
 912        /* Wa_1409180338:tgl */
 913        if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
 914                wa_write_or(wal,
 915                            SLICE_UNIT_LEVEL_CLKGATE,
 916                            L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
 917}
 918
 919static void
 920gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
 921{
 922        if (IS_GEN(i915, 12))
 923                tgl_gt_workarounds_init(i915, wal);
 924        else if (IS_GEN(i915, 11))
 925                icl_gt_workarounds_init(i915, wal);
 926        else if (IS_CANNONLAKE(i915))
 927                cnl_gt_workarounds_init(i915, wal);
 928        else if (IS_COFFEELAKE(i915))
 929                cfl_gt_workarounds_init(i915, wal);
 930        else if (IS_GEMINILAKE(i915))
 931                glk_gt_workarounds_init(i915, wal);
 932        else if (IS_KABYLAKE(i915))
 933                kbl_gt_workarounds_init(i915, wal);
 934        else if (IS_BROXTON(i915))
 935                bxt_gt_workarounds_init(i915, wal);
 936        else if (IS_SKYLAKE(i915))
 937                skl_gt_workarounds_init(i915, wal);
 938        else if (INTEL_GEN(i915) <= 8)
 939                return;
 940        else
 941                MISSING_CASE(INTEL_GEN(i915));
 942}
 943
 944void intel_gt_init_workarounds(struct drm_i915_private *i915)
 945{
 946        struct i915_wa_list *wal = &i915->gt_wa_list;
 947
 948        wa_init_start(wal, "GT", "global");
 949        gt_init_workarounds(i915, wal);
 950        wa_init_finish(wal);
 951}
 952
 953static enum forcewake_domains
 954wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
 955{
 956        enum forcewake_domains fw = 0;
 957        struct i915_wa *wa;
 958        unsigned int i;
 959
 960        for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
 961                fw |= intel_uncore_forcewake_for_reg(uncore,
 962                                                     wa->reg,
 963                                                     FW_REG_READ |
 964                                                     FW_REG_WRITE);
 965
 966        return fw;
 967}
 968
 969static bool
 970wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
 971{
 972        if ((cur ^ wa->val) & wa->read) {
 973                DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
 974                          name, from, i915_mmio_reg_offset(wa->reg),
 975                          cur, cur & wa->read,
 976                          wa->val, wa->mask);
 977
 978                return false;
 979        }
 980
 981        return true;
 982}
 983
 984static void
 985wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
 986{
 987        enum forcewake_domains fw;
 988        unsigned long flags;
 989        struct i915_wa *wa;
 990        unsigned int i;
 991
 992        if (!wal->count)
 993                return;
 994
 995        fw = wal_get_fw_for_rmw(uncore, wal);
 996
 997        spin_lock_irqsave(&uncore->lock, flags);
 998        intel_uncore_forcewake_get__locked(uncore, fw);
 999
1000        for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1001                intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val);
1002                if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1003                        wa_verify(wa,
1004                                  intel_uncore_read_fw(uncore, wa->reg),
1005                                  wal->name, "application");
1006        }
1007
1008        intel_uncore_forcewake_put__locked(uncore, fw);
1009        spin_unlock_irqrestore(&uncore->lock, flags);
1010}
1011
1012void intel_gt_apply_workarounds(struct intel_gt *gt)
1013{
1014        wa_list_apply(gt->uncore, &gt->i915->gt_wa_list);
1015}
1016
1017static bool wa_list_verify(struct intel_uncore *uncore,
1018                           const struct i915_wa_list *wal,
1019                           const char *from)
1020{
1021        struct i915_wa *wa;
1022        unsigned int i;
1023        bool ok = true;
1024
1025        for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1026                ok &= wa_verify(wa,
1027                                intel_uncore_read(uncore, wa->reg),
1028                                wal->name, from);
1029
1030        return ok;
1031}
1032
1033bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
1034{
1035        return wa_list_verify(gt->uncore, &gt->i915->gt_wa_list, from);
1036}
1037
1038static inline bool is_nonpriv_flags_valid(u32 flags)
1039{
1040        /* Check only valid flag bits are set */
1041        if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID)
1042                return false;
1043
1044        /* NB: Only 3 out of 4 enum values are valid for access field */
1045        if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
1046            RING_FORCE_TO_NONPRIV_ACCESS_INVALID)
1047                return false;
1048
1049        return true;
1050}
1051
1052static void
1053whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1054{
1055        struct i915_wa wa = {
1056                .reg = reg
1057        };
1058
1059        if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1060                return;
1061
1062        if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
1063                return;
1064
1065        wa.reg.reg |= flags;
1066        _wa_add(wal, &wa);
1067}
1068
1069static void
1070whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1071{
1072        whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
1073}
1074
1075static void gen9_whitelist_build(struct i915_wa_list *w)
1076{
1077        /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1078        whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1079
1080        /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1081        whitelist_reg(w, GEN8_CS_CHICKEN1);
1082
1083        /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1084        whitelist_reg(w, GEN8_HDC_CHICKEN1);
1085
1086        /* WaSendPushConstantsFromMMIO:skl,bxt */
1087        whitelist_reg(w, COMMON_SLICE_CHICKEN2);
1088}
1089
1090static void skl_whitelist_build(struct intel_engine_cs *engine)
1091{
1092        struct i915_wa_list *w = &engine->whitelist;
1093
1094        if (engine->class != RENDER_CLASS)
1095                return;
1096
1097        gen9_whitelist_build(w);
1098
1099        /* WaDisableLSQCROPERFforOCL:skl */
1100        whitelist_reg(w, GEN8_L3SQCREG4);
1101}
1102
1103static void bxt_whitelist_build(struct intel_engine_cs *engine)
1104{
1105        if (engine->class != RENDER_CLASS)
1106                return;
1107
1108        gen9_whitelist_build(&engine->whitelist);
1109}
1110
1111static void kbl_whitelist_build(struct intel_engine_cs *engine)
1112{
1113        struct i915_wa_list *w = &engine->whitelist;
1114
1115        if (engine->class != RENDER_CLASS)
1116                return;
1117
1118        gen9_whitelist_build(w);
1119
1120        /* WaDisableLSQCROPERFforOCL:kbl */
1121        whitelist_reg(w, GEN8_L3SQCREG4);
1122}
1123
1124static void glk_whitelist_build(struct intel_engine_cs *engine)
1125{
1126        struct i915_wa_list *w = &engine->whitelist;
1127
1128        if (engine->class != RENDER_CLASS)
1129                return;
1130
1131        gen9_whitelist_build(w);
1132
1133        /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1134        whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1135}
1136
1137static void cfl_whitelist_build(struct intel_engine_cs *engine)
1138{
1139        struct i915_wa_list *w = &engine->whitelist;
1140
1141        if (engine->class != RENDER_CLASS)
1142                return;
1143
1144        gen9_whitelist_build(w);
1145
1146        /*
1147         * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
1148         *
1149         * This covers 4 register which are next to one another :
1150         *   - PS_INVOCATION_COUNT
1151         *   - PS_INVOCATION_COUNT_UDW
1152         *   - PS_DEPTH_COUNT
1153         *   - PS_DEPTH_COUNT_UDW
1154         */
1155        whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1156                          RING_FORCE_TO_NONPRIV_ACCESS_RD |
1157                          RING_FORCE_TO_NONPRIV_RANGE_4);
1158}
1159
1160static void cnl_whitelist_build(struct intel_engine_cs *engine)
1161{
1162        struct i915_wa_list *w = &engine->whitelist;
1163
1164        if (engine->class != RENDER_CLASS)
1165                return;
1166
1167        /* WaEnablePreemptionGranularityControlByUMD:cnl */
1168        whitelist_reg(w, GEN8_CS_CHICKEN1);
1169}
1170
1171static void icl_whitelist_build(struct intel_engine_cs *engine)
1172{
1173        struct i915_wa_list *w = &engine->whitelist;
1174
1175        switch (engine->class) {
1176        case RENDER_CLASS:
1177                /* WaAllowUMDToModifyHalfSliceChicken7:icl */
1178                whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1179
1180                /* WaAllowUMDToModifySamplerMode:icl */
1181                whitelist_reg(w, GEN10_SAMPLER_MODE);
1182
1183                /* WaEnableStateCacheRedirectToCS:icl */
1184                whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1185
1186                /*
1187                 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
1188                 *
1189                 * This covers 4 register which are next to one another :
1190                 *   - PS_INVOCATION_COUNT
1191                 *   - PS_INVOCATION_COUNT_UDW
1192                 *   - PS_DEPTH_COUNT
1193                 *   - PS_DEPTH_COUNT_UDW
1194                 */
1195                whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1196                                  RING_FORCE_TO_NONPRIV_ACCESS_RD |
1197                                  RING_FORCE_TO_NONPRIV_RANGE_4);
1198                break;
1199
1200        case VIDEO_DECODE_CLASS:
1201                /* hucStatusRegOffset */
1202                whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
1203                                  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1204                /* hucUKernelHdrInfoRegOffset */
1205                whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
1206                                  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1207                /* hucStatus2RegOffset */
1208                whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
1209                                  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1210                break;
1211
1212        default:
1213                break;
1214        }
1215}
1216
1217static void tgl_whitelist_build(struct intel_engine_cs *engine)
1218{
1219        struct i915_wa_list *w = &engine->whitelist;
1220
1221        switch (engine->class) {
1222        case RENDER_CLASS:
1223                /*
1224                 * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
1225                 *
1226                 * This covers 4 registers which are next to one another :
1227                 *   - PS_INVOCATION_COUNT
1228                 *   - PS_INVOCATION_COUNT_UDW
1229                 *   - PS_DEPTH_COUNT
1230                 *   - PS_DEPTH_COUNT_UDW
1231                 */
1232                whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1233                                  RING_FORCE_TO_NONPRIV_ACCESS_RD |
1234                                  RING_FORCE_TO_NONPRIV_RANGE_4);
1235                break;
1236        default:
1237                break;
1238        }
1239}
1240
1241void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1242{
1243        struct drm_i915_private *i915 = engine->i915;
1244        struct i915_wa_list *w = &engine->whitelist;
1245
1246        wa_init_start(w, "whitelist", engine->name);
1247
1248        if (IS_GEN(i915, 12))
1249                tgl_whitelist_build(engine);
1250        else if (IS_GEN(i915, 11))
1251                icl_whitelist_build(engine);
1252        else if (IS_CANNONLAKE(i915))
1253                cnl_whitelist_build(engine);
1254        else if (IS_COFFEELAKE(i915))
1255                cfl_whitelist_build(engine);
1256        else if (IS_GEMINILAKE(i915))
1257                glk_whitelist_build(engine);
1258        else if (IS_KABYLAKE(i915))
1259                kbl_whitelist_build(engine);
1260        else if (IS_BROXTON(i915))
1261                bxt_whitelist_build(engine);
1262        else if (IS_SKYLAKE(i915))
1263                skl_whitelist_build(engine);
1264        else if (INTEL_GEN(i915) <= 8)
1265                return;
1266        else
1267                MISSING_CASE(INTEL_GEN(i915));
1268
1269        wa_init_finish(w);
1270}
1271
1272void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1273{
1274        const struct i915_wa_list *wal = &engine->whitelist;
1275        struct intel_uncore *uncore = engine->uncore;
1276        const u32 base = engine->mmio_base;
1277        struct i915_wa *wa;
1278        unsigned int i;
1279
1280        if (!wal->count)
1281                return;
1282
1283        for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1284                intel_uncore_write(uncore,
1285                                   RING_FORCE_TO_NONPRIV(base, i),
1286                                   i915_mmio_reg_offset(wa->reg));
1287
1288        /* And clear the rest just in case of garbage */
1289        for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1290                intel_uncore_write(uncore,
1291                                   RING_FORCE_TO_NONPRIV(base, i),
1292                                   i915_mmio_reg_offset(RING_NOPID(base)));
1293}
1294
1295static void
1296rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1297{
1298        struct drm_i915_private *i915 = engine->i915;
1299
1300        if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) {
1301                /* Wa_1606700617:tgl */
1302                wa_masked_en(wal,
1303                             GEN9_CS_DEBUG_MODE1,
1304                             FF_DOP_CLOCK_GATE_DISABLE);
1305
1306                /* Wa_1607138336:tgl */
1307                wa_write_or(wal,
1308                            GEN9_CTX_PREEMPT_REG,
1309                            GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
1310
1311                /* Wa_1607030317:tgl */
1312                /* Wa_1607186500:tgl */
1313                /* Wa_1607297627:tgl */
1314                wa_masked_en(wal,
1315                             GEN6_RC_SLEEP_PSMI_CONTROL,
1316                             GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
1317                             GEN8_RC_SEMA_IDLE_MSG_DISABLE);
1318        }
1319
1320        if (IS_GEN(i915, 11)) {
1321                /* This is not an Wa. Enable for better image quality */
1322                wa_masked_en(wal,
1323                             _3D_CHICKEN3,
1324                             _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
1325
1326                /* WaPipelineFlushCoherentLines:icl */
1327                wa_write_or(wal,
1328                            GEN8_L3SQCREG4,
1329                            GEN8_LQSC_FLUSH_COHERENT_LINES);
1330
1331                /*
1332                 * Wa_1405543622:icl
1333                 * Formerly known as WaGAPZPriorityScheme
1334                 */
1335                wa_write_or(wal,
1336                            GEN8_GARBCNTL,
1337                            GEN11_ARBITRATION_PRIO_ORDER_MASK);
1338
1339                /*
1340                 * Wa_1604223664:icl
1341                 * Formerly known as WaL3BankAddressHashing
1342                 */
1343                wa_write_masked_or(wal,
1344                                   GEN8_GARBCNTL,
1345                                   GEN11_HASH_CTRL_EXCL_MASK,
1346                                   GEN11_HASH_CTRL_EXCL_BIT0);
1347                wa_write_masked_or(wal,
1348                                   GEN11_GLBLINVL,
1349                                   GEN11_BANK_HASH_ADDR_EXCL_MASK,
1350                                   GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1351
1352                /*
1353                 * Wa_1405733216:icl
1354                 * Formerly known as WaDisableCleanEvicts
1355                 */
1356                wa_write_or(wal,
1357                            GEN8_L3SQCREG4,
1358                            GEN11_LQSC_CLEAN_EVICT_DISABLE);
1359
1360                /* WaForwardProgressSoftReset:icl */
1361                wa_write_or(wal,
1362                            GEN10_SCRATCH_LNCF2,
1363                            PMFLUSHDONE_LNICRSDROP |
1364                            PMFLUSH_GAPL3UNBLOCK |
1365                            PMFLUSHDONE_LNEBLK);
1366
1367                /* Wa_1406609255:icl (pre-prod) */
1368                if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1369                        wa_write_or(wal,
1370                                    GEN7_SARCHKMD,
1371                                    GEN7_DISABLE_DEMAND_PREFETCH);
1372
1373                /* Wa_1606682166:icl */
1374                wa_write_or(wal,
1375                            GEN7_SARCHKMD,
1376                            GEN7_DISABLE_SAMPLER_PREFETCH);
1377
1378                /* Wa_1409178092:icl */
1379                wa_write_masked_or(wal,
1380                                   GEN11_SCRATCH2,
1381                                   GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
1382                                   0);
1383        }
1384
1385        if (IS_GEN_RANGE(i915, 9, 11)) {
1386                /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */
1387                wa_masked_en(wal,
1388                             GEN7_FF_SLICE_CS_CHICKEN1,
1389                             GEN9_FFSC_PERCTX_PREEMPT_CTRL);
1390        }
1391
1392        if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
1393                /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
1394                wa_write_or(wal,
1395                            GEN8_GARBCNTL,
1396                            GEN9_GAPS_TSV_CREDIT_DISABLE);
1397        }
1398
1399        if (IS_BROXTON(i915)) {
1400                /* WaDisablePooledEuLoadBalancingFix:bxt */
1401                wa_masked_en(wal,
1402                             FF_SLICE_CS_CHICKEN2,
1403                             GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1404        }
1405
1406        if (IS_GEN(i915, 9)) {
1407                /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1408                wa_masked_en(wal,
1409                             GEN9_CSFE_CHICKEN1_RCS,
1410                             GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
1411
1412                /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1413                wa_write_or(wal,
1414                            BDW_SCRATCH1,
1415                            GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1416
1417                /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1418                if (IS_GEN9_LP(i915))
1419                        wa_write_masked_or(wal,
1420                                           GEN8_L3SQCREG1,
1421                                           L3_PRIO_CREDITS_MASK,
1422                                           L3_GENERAL_PRIO_CREDITS(62) |
1423                                           L3_HIGH_PRIO_CREDITS(2));
1424
1425                /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1426                wa_write_or(wal,
1427                            GEN8_L3SQCREG4,
1428                            GEN8_LQSC_FLUSH_COHERENT_LINES);
1429        }
1430}
1431
1432static void
1433xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1434{
1435        struct drm_i915_private *i915 = engine->i915;
1436
1437        /* WaKBLVECSSemaphoreWaitPoll:kbl */
1438        if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
1439                wa_write(wal,
1440                         RING_SEMA_WAIT_POLL(engine->mmio_base),
1441                         1);
1442        }
1443}
1444
1445static void
1446engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1447{
1448        if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8))
1449                return;
1450
1451        if (engine->class == RENDER_CLASS)
1452                rcs_engine_wa_init(engine, wal);
1453        else
1454                xcs_engine_wa_init(engine, wal);
1455}
1456
1457void intel_engine_init_workarounds(struct intel_engine_cs *engine)
1458{
1459        struct i915_wa_list *wal = &engine->wa_list;
1460
1461        if (INTEL_GEN(engine->i915) < 8)
1462                return;
1463
1464        wa_init_start(wal, "engine", engine->name);
1465        engine_init_workarounds(engine, wal);
1466        wa_init_finish(wal);
1467}
1468
1469void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
1470{
1471        wa_list_apply(engine->uncore, &engine->wa_list);
1472}
1473
1474static struct i915_vma *
1475create_scratch(struct i915_address_space *vm, int count)
1476{
1477        struct drm_i915_gem_object *obj;
1478        struct i915_vma *vma;
1479        unsigned int size;
1480        int err;
1481
1482        size = round_up(count * sizeof(u32), PAGE_SIZE);
1483        obj = i915_gem_object_create_internal(vm->i915, size);
1484        if (IS_ERR(obj))
1485                return ERR_CAST(obj);
1486
1487        i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
1488
1489        vma = i915_vma_instance(obj, vm, NULL);
1490        if (IS_ERR(vma)) {
1491                err = PTR_ERR(vma);
1492                goto err_obj;
1493        }
1494
1495        err = i915_vma_pin(vma, 0, 0,
1496                           i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
1497        if (err)
1498                goto err_obj;
1499
1500        return vma;
1501
1502err_obj:
1503        i915_gem_object_put(obj);
1504        return ERR_PTR(err);
1505}
1506
1507static bool mcr_range(struct drm_i915_private *i915, u32 offset)
1508{
1509        /*
1510         * Registers in this range are affected by the MCR selector
1511         * which only controls CPU initiated MMIO. Routing does not
1512         * work for CS access so we cannot verify them on this path.
1513         */
1514        if (INTEL_GEN(i915) >= 8 && (offset >= 0xb000 && offset <= 0xb4ff))
1515                return true;
1516
1517        return false;
1518}
1519
1520static int
1521wa_list_srm(struct i915_request *rq,
1522            const struct i915_wa_list *wal,
1523            struct i915_vma *vma)
1524{
1525        struct drm_i915_private *i915 = rq->i915;
1526        unsigned int i, count = 0;
1527        const struct i915_wa *wa;
1528        u32 srm, *cs;
1529
1530        srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
1531        if (INTEL_GEN(i915) >= 8)
1532                srm++;
1533
1534        for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1535                if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg)))
1536                        count++;
1537        }
1538
1539        cs = intel_ring_begin(rq, 4 * count);
1540        if (IS_ERR(cs))
1541                return PTR_ERR(cs);
1542
1543        for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1544                u32 offset = i915_mmio_reg_offset(wa->reg);
1545
1546                if (mcr_range(i915, offset))
1547                        continue;
1548
1549                *cs++ = srm;
1550                *cs++ = offset;
1551                *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
1552                *cs++ = 0;
1553        }
1554        intel_ring_advance(rq, cs);
1555
1556        return 0;
1557}
1558
1559static int engine_wa_list_verify(struct intel_context *ce,
1560                                 const struct i915_wa_list * const wal,
1561                                 const char *from)
1562{
1563        const struct i915_wa *wa;
1564        struct i915_request *rq;
1565        struct i915_vma *vma;
1566        unsigned int i;
1567        u32 *results;
1568        int err;
1569
1570        if (!wal->count)
1571                return 0;
1572
1573        vma = create_scratch(&ce->engine->gt->ggtt->vm, wal->count);
1574        if (IS_ERR(vma))
1575                return PTR_ERR(vma);
1576
1577        rq = intel_context_create_request(ce);
1578        if (IS_ERR(rq)) {
1579                err = PTR_ERR(rq);
1580                goto err_vma;
1581        }
1582
1583        err = wa_list_srm(rq, wal, vma);
1584        if (err)
1585                goto err_vma;
1586
1587        i915_request_add(rq);
1588        if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1589                err = -ETIME;
1590                goto err_vma;
1591        }
1592
1593        results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
1594        if (IS_ERR(results)) {
1595                err = PTR_ERR(results);
1596                goto err_vma;
1597        }
1598
1599        err = 0;
1600        for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1601                if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg)))
1602                        continue;
1603
1604                if (!wa_verify(wa, results[i], wal->name, from))
1605                        err = -ENXIO;
1606        }
1607
1608        i915_gem_object_unpin_map(vma->obj);
1609
1610err_vma:
1611        i915_vma_unpin(vma);
1612        i915_vma_put(vma);
1613        return err;
1614}
1615
1616int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
1617                                    const char *from)
1618{
1619        return engine_wa_list_verify(engine->kernel_context,
1620                                     &engine->wa_list,
1621                                     from);
1622}
1623
1624#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1625#include "selftest_workarounds.c"
1626#endif
1627