linux/drivers/gpu/drm/i915/gt/intel_rc6.c
<<
>>
Prefs
   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2019 Intel Corporation
   5 */
   6
   7#include <linux/pm_runtime.h>
   8
   9#include "i915_drv.h"
  10#include "intel_gt.h"
  11#include "intel_gt_pm.h"
  12#include "intel_rc6.h"
  13#include "intel_sideband.h"
  14
  15/**
  16 * DOC: RC6
  17 *
  18 * RC6 is a special power stage which allows the GPU to enter an very
  19 * low-voltage mode when idle, using down to 0V while at this stage.  This
  20 * stage is entered automatically when the GPU is idle when RC6 support is
  21 * enabled, and as soon as new workload arises GPU wakes up automatically as
  22 * well.
  23 *
  24 * There are different RC6 modes available in Intel GPU, which differentiate
  25 * among each other with the latency required to enter and leave RC6 and
  26 * voltage consumed by the GPU in different states.
  27 *
  28 * The combination of the following flags define which states GPU is allowed
  29 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
  30 * RC6pp is deepest RC6. Their support by hardware varies according to the
  31 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
  32 * which brings the most power savings; deeper states save more power, but
  33 * require higher latency to switch to and wake up.
  34 */
  35
  36static struct intel_gt *rc6_to_gt(struct intel_rc6 *rc6)
  37{
  38        return container_of(rc6, struct intel_gt, rc6);
  39}
  40
  41static struct intel_uncore *rc6_to_uncore(struct intel_rc6 *rc)
  42{
  43        return rc6_to_gt(rc)->uncore;
  44}
  45
  46static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc)
  47{
  48        return rc6_to_gt(rc)->i915;
  49}
  50
  51static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
  52{
  53        intel_uncore_write_fw(uncore, reg, val);
  54}
  55
  56static void gen11_rc6_enable(struct intel_rc6 *rc6)
  57{
  58        struct intel_uncore *uncore = rc6_to_uncore(rc6);
  59        struct intel_engine_cs *engine;
  60        enum intel_engine_id id;
  61
  62        /* 2b: Program RC6 thresholds.*/
  63        set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
  64        set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
  65
  66        set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
  67        set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
  68        for_each_engine(engine, rc6_to_gt(rc6), id)
  69                set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
  70
  71        set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
  72
  73        set(uncore, GEN6_RC_SLEEP, 0);
  74
  75        set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
  76
  77        /*
  78         * 2c: Program Coarse Power Gating Policies.
  79         *
  80         * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
  81         * use instead is a more conservative estimate for the maximum time
  82         * it takes us to service a CS interrupt and submit a new ELSP - that
  83         * is the time which the GPU is idle waiting for the CPU to select the
  84         * next request to execute. If the idle hysteresis is less than that
  85         * interrupt service latency, the hardware will automatically gate
  86         * the power well and we will then incur the wake up cost on top of
  87         * the service latency. A similar guide from plane_state is that we
  88         * do not want the enable hysteresis to less than the wakeup latency.
  89         *
  90         * igt/gem_exec_nop/sequential provides a rough estimate for the
  91         * service latency, and puts it around 10us for Broadwell (and other
  92         * big core) and around 40us for Broxton (and other low power cores).
  93         * [Note that for legacy ringbuffer submission, this is less than 1us!]
  94         * However, the wakeup latency on Broxton is closer to 100us. To be
  95         * conservative, we have to factor in a context switch on top (due
  96         * to ksoftirqd).
  97         */
  98        set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
  99        set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
 100
 101        /* 3a: Enable RC6 */
 102        set(uncore, GEN6_RC_CONTROL,
 103            GEN6_RC_CTL_HW_ENABLE |
 104            GEN6_RC_CTL_RC6_ENABLE |
 105            GEN6_RC_CTL_EI_MODE(1));
 106
 107        set(uncore, GEN9_PG_ENABLE,
 108            GEN9_RENDER_PG_ENABLE |
 109            GEN9_MEDIA_PG_ENABLE |
 110            GEN11_MEDIA_SAMPLER_PG_ENABLE);
 111}
 112
 113static void gen9_rc6_enable(struct intel_rc6 *rc6)
 114{
 115        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 116        struct intel_engine_cs *engine;
 117        enum intel_engine_id id;
 118        u32 rc6_mode;
 119
 120        /* 2b: Program RC6 thresholds.*/
 121        if (INTEL_GEN(rc6_to_i915(rc6)) >= 10) {
 122                set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
 123                set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
 124        } else if (IS_SKYLAKE(rc6_to_i915(rc6))) {
 125                /*
 126                 * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
 127                 * when CPG is enabled
 128                 */
 129                set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
 130        } else {
 131                set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
 132        }
 133
 134        set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
 135        set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
 136        for_each_engine(engine, rc6_to_gt(rc6), id)
 137                set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
 138
 139        set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
 140
 141        set(uncore, GEN6_RC_SLEEP, 0);
 142
 143        /*
 144         * 2c: Program Coarse Power Gating Policies.
 145         *
 146         * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
 147         * use instead is a more conservative estimate for the maximum time
 148         * it takes us to service a CS interrupt and submit a new ELSP - that
 149         * is the time which the GPU is idle waiting for the CPU to select the
 150         * next request to execute. If the idle hysteresis is less than that
 151         * interrupt service latency, the hardware will automatically gate
 152         * the power well and we will then incur the wake up cost on top of
 153         * the service latency. A similar guide from plane_state is that we
 154         * do not want the enable hysteresis to less than the wakeup latency.
 155         *
 156         * igt/gem_exec_nop/sequential provides a rough estimate for the
 157         * service latency, and puts it around 10us for Broadwell (and other
 158         * big core) and around 40us for Broxton (and other low power cores).
 159         * [Note that for legacy ringbuffer submission, this is less than 1us!]
 160         * However, the wakeup latency on Broxton is closer to 100us. To be
 161         * conservative, we have to factor in a context switch on top (due
 162         * to ksoftirqd).
 163         */
 164        set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
 165        set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
 166
 167        /* 3a: Enable RC6 */
 168        set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
 169
 170        /* WaRsUseTimeoutMode:cnl (pre-prod) */
 171        if (IS_CNL_REVID(rc6_to_i915(rc6), CNL_REVID_A0, CNL_REVID_C0))
 172                rc6_mode = GEN7_RC_CTL_TO_MODE;
 173        else
 174                rc6_mode = GEN6_RC_CTL_EI_MODE(1);
 175
 176        set(uncore, GEN6_RC_CONTROL,
 177            GEN6_RC_CTL_HW_ENABLE |
 178            GEN6_RC_CTL_RC6_ENABLE |
 179            rc6_mode);
 180
 181        /*
 182         * WaRsDisableCoarsePowerGating:skl,cnl
 183         *   - Render/Media PG need to be disabled with RC6.
 184         */
 185        if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6)))
 186                set(uncore, GEN9_PG_ENABLE,
 187                    GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
 188}
 189
 190static void gen8_rc6_enable(struct intel_rc6 *rc6)
 191{
 192        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 193        struct intel_engine_cs *engine;
 194        enum intel_engine_id id;
 195
 196        /* 2b: Program RC6 thresholds.*/
 197        set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
 198        set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
 199        set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
 200        for_each_engine(engine, rc6_to_gt(rc6), id)
 201                set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
 202        set(uncore, GEN6_RC_SLEEP, 0);
 203        set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
 204
 205        /* 3: Enable RC6 */
 206        set(uncore, GEN6_RC_CONTROL,
 207            GEN6_RC_CTL_HW_ENABLE |
 208            GEN7_RC_CTL_TO_MODE |
 209            GEN6_RC_CTL_RC6_ENABLE);
 210}
 211
 212static void gen6_rc6_enable(struct intel_rc6 *rc6)
 213{
 214        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 215        struct drm_i915_private *i915 = rc6_to_i915(rc6);
 216        struct intel_engine_cs *engine;
 217        enum intel_engine_id id;
 218        u32 rc6vids, rc6_mask;
 219        int ret;
 220
 221        set(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
 222        set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
 223        set(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
 224        set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
 225        set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
 226
 227        for_each_engine(engine, rc6_to_gt(rc6), id)
 228                set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
 229
 230        set(uncore, GEN6_RC_SLEEP, 0);
 231        set(uncore, GEN6_RC1e_THRESHOLD, 1000);
 232        if (IS_IVYBRIDGE(i915))
 233                set(uncore, GEN6_RC6_THRESHOLD, 125000);
 234        else
 235                set(uncore, GEN6_RC6_THRESHOLD, 50000);
 236        set(uncore, GEN6_RC6p_THRESHOLD, 150000);
 237        set(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */
 238
 239        /* We don't use those on Haswell */
 240        rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
 241        if (HAS_RC6p(i915))
 242                rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
 243        if (HAS_RC6pp(i915))
 244                rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
 245        set(uncore, GEN6_RC_CONTROL,
 246            rc6_mask |
 247            GEN6_RC_CTL_EI_MODE(1) |
 248            GEN6_RC_CTL_HW_ENABLE);
 249
 250        rc6vids = 0;
 251        ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS,
 252                                     &rc6vids, NULL);
 253        if (IS_GEN(i915, 6) && ret) {
 254                DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
 255        } else if (IS_GEN(i915, 6) &&
 256                   (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
 257                DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
 258                                 GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
 259                rc6vids &= 0xffff00;
 260                rc6vids |= GEN6_ENCODE_RC6_VID(450);
 261                ret = sandybridge_pcode_write(i915, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
 262                if (ret)
 263                        DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
 264        }
 265}
 266
 267/* Check that the pcbr address is not empty. */
 268static int chv_rc6_init(struct intel_rc6 *rc6)
 269{
 270        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 271        resource_size_t pctx_paddr, paddr;
 272        resource_size_t pctx_size = 32 * SZ_1K;
 273        u32 pcbr;
 274
 275        pcbr = intel_uncore_read(uncore, VLV_PCBR);
 276        if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
 277                DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
 278                paddr = rc6_to_i915(rc6)->dsm.end + 1 - pctx_size;
 279                GEM_BUG_ON(paddr > U32_MAX);
 280
 281                pctx_paddr = (paddr & ~4095);
 282                intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
 283        }
 284
 285        return 0;
 286}
 287
 288static int vlv_rc6_init(struct intel_rc6 *rc6)
 289{
 290        struct drm_i915_private *i915 = rc6_to_i915(rc6);
 291        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 292        struct drm_i915_gem_object *pctx;
 293        resource_size_t pctx_paddr;
 294        resource_size_t pctx_size = 24 * SZ_1K;
 295        u32 pcbr;
 296
 297        pcbr = intel_uncore_read(uncore, VLV_PCBR);
 298        if (pcbr) {
 299                /* BIOS set it up already, grab the pre-alloc'd space */
 300                resource_size_t pcbr_offset;
 301
 302                pcbr_offset = (pcbr & ~4095) - i915->dsm.start;
 303                pctx = i915_gem_object_create_stolen_for_preallocated(i915,
 304                                                                      pcbr_offset,
 305                                                                      I915_GTT_OFFSET_NONE,
 306                                                                      pctx_size);
 307                if (IS_ERR(pctx))
 308                        return PTR_ERR(pctx);
 309
 310                goto out;
 311        }
 312
 313        DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
 314
 315        /*
 316         * From the Gunit register HAS:
 317         * The Gfx driver is expected to program this register and ensure
 318         * proper allocation within Gfx stolen memory.  For example, this
 319         * register should be programmed such than the PCBR range does not
 320         * overlap with other ranges, such as the frame buffer, protected
 321         * memory, or any other relevant ranges.
 322         */
 323        pctx = i915_gem_object_create_stolen(i915, pctx_size);
 324        if (IS_ERR(pctx)) {
 325                DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
 326                return PTR_ERR(pctx);
 327        }
 328
 329        GEM_BUG_ON(range_overflows_t(u64,
 330                                     i915->dsm.start,
 331                                     pctx->stolen->start,
 332                                     U32_MAX));
 333        pctx_paddr = i915->dsm.start + pctx->stolen->start;
 334        intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
 335
 336out:
 337        rc6->pctx = pctx;
 338        return 0;
 339}
 340
 341static void chv_rc6_enable(struct intel_rc6 *rc6)
 342{
 343        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 344        struct intel_engine_cs *engine;
 345        enum intel_engine_id id;
 346
 347        /* 2a: Program RC6 thresholds.*/
 348        set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
 349        set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
 350        set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
 351
 352        for_each_engine(engine, rc6_to_gt(rc6), id)
 353                set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
 354        set(uncore, GEN6_RC_SLEEP, 0);
 355
 356        /* TO threshold set to 500 us (0x186 * 1.28 us) */
 357        set(uncore, GEN6_RC6_THRESHOLD, 0x186);
 358
 359        /* Allows RC6 residency counter to work */
 360        set(uncore, VLV_COUNTER_CONTROL,
 361            _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
 362                               VLV_MEDIA_RC6_COUNT_EN |
 363                               VLV_RENDER_RC6_COUNT_EN));
 364
 365        /* 3: Enable RC6 */
 366        set(uncore, GEN6_RC_CONTROL, GEN7_RC_CTL_TO_MODE);
 367}
 368
 369static void vlv_rc6_enable(struct intel_rc6 *rc6)
 370{
 371        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 372        struct intel_engine_cs *engine;
 373        enum intel_engine_id id;
 374
 375        set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
 376        set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
 377        set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
 378
 379        for_each_engine(engine, rc6_to_gt(rc6), id)
 380                set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
 381
 382        set(uncore, GEN6_RC6_THRESHOLD, 0x557);
 383
 384        /* Allows RC6 residency counter to work */
 385        set(uncore, VLV_COUNTER_CONTROL,
 386            _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
 387                               VLV_MEDIA_RC0_COUNT_EN |
 388                               VLV_RENDER_RC0_COUNT_EN |
 389                               VLV_MEDIA_RC6_COUNT_EN |
 390                               VLV_RENDER_RC6_COUNT_EN));
 391
 392        set(uncore, GEN6_RC_CONTROL,
 393            GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
 394}
 395
 396static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
 397{
 398        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 399        struct drm_i915_private *i915 = rc6_to_i915(rc6);
 400        u32 rc6_ctx_base, rc_ctl, rc_sw_target;
 401        bool enable_rc6 = true;
 402
 403        rc_ctl = intel_uncore_read(uncore, GEN6_RC_CONTROL);
 404        rc_sw_target = intel_uncore_read(uncore, GEN6_RC_STATE);
 405        rc_sw_target &= RC_SW_TARGET_STATE_MASK;
 406        rc_sw_target >>= RC_SW_TARGET_STATE_SHIFT;
 407        DRM_DEBUG_DRIVER("BIOS enabled RC states: "
 408                         "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
 409                         onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
 410                         onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
 411                         rc_sw_target);
 412
 413        if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
 414                DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
 415                enable_rc6 = false;
 416        }
 417
 418        /*
 419         * The exact context size is not known for BXT, so assume a page size
 420         * for this check.
 421         */
 422        rc6_ctx_base =
 423                intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
 424        if (!(rc6_ctx_base >= i915->dsm_reserved.start &&
 425              rc6_ctx_base + PAGE_SIZE < i915->dsm_reserved.end)) {
 426                DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
 427                enable_rc6 = false;
 428        }
 429
 430        if (!((intel_uncore_read(uncore, PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1 &&
 431              (intel_uncore_read(uncore, PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1 &&
 432              (intel_uncore_read(uncore, PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1 &&
 433              (intel_uncore_read(uncore, PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1)) {
 434                DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
 435                enable_rc6 = false;
 436        }
 437
 438        if (!intel_uncore_read(uncore, GEN8_PUSHBUS_CONTROL) ||
 439            !intel_uncore_read(uncore, GEN8_PUSHBUS_ENABLE) ||
 440            !intel_uncore_read(uncore, GEN8_PUSHBUS_SHIFT)) {
 441                DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
 442                enable_rc6 = false;
 443        }
 444
 445        if (!intel_uncore_read(uncore, GEN6_GFXPAUSE)) {
 446                DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
 447                enable_rc6 = false;
 448        }
 449
 450        if (!intel_uncore_read(uncore, GEN8_MISC_CTRL0)) {
 451                DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
 452                enable_rc6 = false;
 453        }
 454
 455        return enable_rc6;
 456}
 457
 458static bool rc6_supported(struct intel_rc6 *rc6)
 459{
 460        struct drm_i915_private *i915 = rc6_to_i915(rc6);
 461
 462        if (!HAS_RC6(i915))
 463                return false;
 464
 465        if (intel_vgpu_active(i915))
 466                return false;
 467
 468        if (is_mock_gt(rc6_to_gt(rc6)))
 469                return false;
 470
 471        if (IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(rc6)) {
 472                dev_notice(i915->drm.dev,
 473                           "RC6 and powersaving disabled by BIOS\n");
 474                return false;
 475        }
 476
 477        return true;
 478}
 479
 480static void rpm_get(struct intel_rc6 *rc6)
 481{
 482        GEM_BUG_ON(rc6->wakeref);
 483        pm_runtime_get_sync(&rc6_to_i915(rc6)->drm.pdev->dev);
 484        rc6->wakeref = true;
 485}
 486
 487static void rpm_put(struct intel_rc6 *rc6)
 488{
 489        GEM_BUG_ON(!rc6->wakeref);
 490        pm_runtime_put(&rc6_to_i915(rc6)->drm.pdev->dev);
 491        rc6->wakeref = false;
 492}
 493
 494static bool intel_rc6_ctx_corrupted(struct intel_rc6 *rc6)
 495{
 496        return !intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO);
 497}
 498
 499static void intel_rc6_ctx_wa_init(struct intel_rc6 *rc6)
 500{
 501        struct drm_i915_private *i915 = rc6_to_i915(rc6);
 502
 503        if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
 504                return;
 505
 506        if (intel_rc6_ctx_corrupted(rc6)) {
 507                DRM_INFO("RC6 context corrupted, disabling runtime power management\n");
 508                rc6->ctx_corrupted = true;
 509        }
 510}
 511
 512/**
 513 * intel_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA
 514 * @rc6: rc6 state
 515 *
 516 * Perform any steps needed to re-init the RC6 CTX WA after system resume.
 517 */
 518void intel_rc6_ctx_wa_resume(struct intel_rc6 *rc6)
 519{
 520        if (rc6->ctx_corrupted && !intel_rc6_ctx_corrupted(rc6)) {
 521                DRM_INFO("RC6 context restored, re-enabling runtime power management\n");
 522                rc6->ctx_corrupted = false;
 523        }
 524}
 525
 526/**
 527 * intel_rc6_ctx_wa_check - check for a new RC6 CTX corruption
 528 * @rc6: rc6 state
 529 *
 530 * Check if an RC6 CTX corruption has happened since the last check and if so
 531 * disable RC6 and runtime power management.
 532*/
 533void intel_rc6_ctx_wa_check(struct intel_rc6 *rc6)
 534{
 535        struct drm_i915_private *i915 = rc6_to_i915(rc6);
 536
 537        if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
 538                return;
 539
 540        if (rc6->ctx_corrupted)
 541                return;
 542
 543        if (!intel_rc6_ctx_corrupted(rc6))
 544                return;
 545
 546        DRM_NOTE("RC6 context corruption, disabling runtime power management\n");
 547
 548        intel_rc6_disable(rc6);
 549        rc6->ctx_corrupted = true;
 550
 551        return;
 552}
 553
 554static void __intel_rc6_disable(struct intel_rc6 *rc6)
 555{
 556        struct drm_i915_private *i915 = rc6_to_i915(rc6);
 557        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 558
 559        intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
 560        if (INTEL_GEN(i915) >= 9)
 561                set(uncore, GEN9_PG_ENABLE, 0);
 562        set(uncore, GEN6_RC_CONTROL, 0);
 563        set(uncore, GEN6_RC_STATE, 0);
 564        intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
 565}
 566
 567void intel_rc6_init(struct intel_rc6 *rc6)
 568{
 569        struct drm_i915_private *i915 = rc6_to_i915(rc6);
 570        int err;
 571
 572        /* Disable runtime-pm until we can save the GPU state with rc6 pctx */
 573        rpm_get(rc6);
 574
 575        if (!rc6_supported(rc6))
 576                return;
 577
 578        intel_rc6_ctx_wa_init(rc6);
 579
 580        if (IS_CHERRYVIEW(i915))
 581                err = chv_rc6_init(rc6);
 582        else if (IS_VALLEYVIEW(i915))
 583                err = vlv_rc6_init(rc6);
 584        else
 585                err = 0;
 586
 587        /* Sanitize rc6, ensure it is disabled before we are ready. */
 588        __intel_rc6_disable(rc6);
 589
 590        rc6->supported = err == 0;
 591}
 592
 593void intel_rc6_sanitize(struct intel_rc6 *rc6)
 594{
 595        if (rc6->enabled) { /* unbalanced suspend/resume */
 596                rpm_get(rc6);
 597                rc6->enabled = false;
 598        }
 599
 600        if (rc6->supported)
 601                __intel_rc6_disable(rc6);
 602}
 603
 604void intel_rc6_enable(struct intel_rc6 *rc6)
 605{
 606        struct drm_i915_private *i915 = rc6_to_i915(rc6);
 607        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 608
 609        if (!rc6->supported)
 610                return;
 611
 612        GEM_BUG_ON(rc6->enabled);
 613
 614        if (rc6->ctx_corrupted)
 615                return;
 616
 617        intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
 618
 619        if (IS_CHERRYVIEW(i915))
 620                chv_rc6_enable(rc6);
 621        else if (IS_VALLEYVIEW(i915))
 622                vlv_rc6_enable(rc6);
 623        else if (INTEL_GEN(i915) >= 11)
 624                gen11_rc6_enable(rc6);
 625        else if (INTEL_GEN(i915) >= 9)
 626                gen9_rc6_enable(rc6);
 627        else if (IS_BROADWELL(i915))
 628                gen8_rc6_enable(rc6);
 629        else if (INTEL_GEN(i915) >= 6)
 630                gen6_rc6_enable(rc6);
 631
 632        intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
 633
 634        /* rc6 is ready, runtime-pm is go! */
 635        rpm_put(rc6);
 636        rc6->enabled = true;
 637}
 638
 639void intel_rc6_disable(struct intel_rc6 *rc6)
 640{
 641        if (!rc6->enabled)
 642                return;
 643
 644        rpm_get(rc6);
 645        rc6->enabled = false;
 646
 647        __intel_rc6_disable(rc6);
 648}
 649
 650void intel_rc6_fini(struct intel_rc6 *rc6)
 651{
 652        struct drm_i915_gem_object *pctx;
 653
 654        intel_rc6_disable(rc6);
 655
 656        pctx = fetch_and_zero(&rc6->pctx);
 657        if (pctx)
 658                i915_gem_object_put(pctx);
 659
 660        if (rc6->wakeref)
 661                rpm_put(rc6);
 662}
 663
 664static u64 vlv_residency_raw(struct intel_uncore *uncore, const i915_reg_t reg)
 665{
 666        u32 lower, upper, tmp;
 667        int loop = 2;
 668
 669        /*
 670         * The register accessed do not need forcewake. We borrow
 671         * uncore lock to prevent concurrent access to range reg.
 672         */
 673        lockdep_assert_held(&uncore->lock);
 674
 675        /*
 676         * vlv and chv residency counters are 40 bits in width.
 677         * With a control bit, we can choose between upper or lower
 678         * 32bit window into this counter.
 679         *
 680         * Although we always use the counter in high-range mode elsewhere,
 681         * userspace may attempt to read the value before rc6 is initialised,
 682         * before we have set the default VLV_COUNTER_CONTROL value. So always
 683         * set the high bit to be safe.
 684         */
 685        set(uncore, VLV_COUNTER_CONTROL,
 686            _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
 687        upper = intel_uncore_read_fw(uncore, reg);
 688        do {
 689                tmp = upper;
 690
 691                set(uncore, VLV_COUNTER_CONTROL,
 692                    _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
 693                lower = intel_uncore_read_fw(uncore, reg);
 694
 695                set(uncore, VLV_COUNTER_CONTROL,
 696                    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
 697                upper = intel_uncore_read_fw(uncore, reg);
 698        } while (upper != tmp && --loop);
 699
 700        /*
 701         * Everywhere else we always use VLV_COUNTER_CONTROL with the
 702         * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
 703         * now.
 704         */
 705
 706        return lower | (u64)upper << 8;
 707}
 708
 709u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, const i915_reg_t reg)
 710{
 711        struct drm_i915_private *i915 = rc6_to_i915(rc6);
 712        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 713        u64 time_hw, prev_hw, overflow_hw;
 714        unsigned int fw_domains;
 715        unsigned long flags;
 716        unsigned int i;
 717        u32 mul, div;
 718
 719        if (!rc6->supported)
 720                return 0;
 721
 722        /*
 723         * Store previous hw counter values for counter wrap-around handling.
 724         *
 725         * There are only four interesting registers and they live next to each
 726         * other so we can use the relative address, compared to the smallest
 727         * one as the index into driver storage.
 728         */
 729        i = (i915_mmio_reg_offset(reg) -
 730             i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
 731        if (WARN_ON_ONCE(i >= ARRAY_SIZE(rc6->cur_residency)))
 732                return 0;
 733
 734        fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ);
 735
 736        spin_lock_irqsave(&uncore->lock, flags);
 737        intel_uncore_forcewake_get__locked(uncore, fw_domains);
 738
 739        /* On VLV and CHV, residency time is in CZ units rather than 1.28us */
 740        if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
 741                mul = 1000000;
 742                div = i915->czclk_freq;
 743                overflow_hw = BIT_ULL(40);
 744                time_hw = vlv_residency_raw(uncore, reg);
 745        } else {
 746                /* 833.33ns units on Gen9LP, 1.28us elsewhere. */
 747                if (IS_GEN9_LP(i915)) {
 748                        mul = 10000;
 749                        div = 12;
 750                } else {
 751                        mul = 1280;
 752                        div = 1;
 753                }
 754
 755                overflow_hw = BIT_ULL(32);
 756                time_hw = intel_uncore_read_fw(uncore, reg);
 757        }
 758
 759        /*
 760         * Counter wrap handling.
 761         *
 762         * But relying on a sufficient frequency of queries otherwise counters
 763         * can still wrap.
 764         */
 765        prev_hw = rc6->prev_hw_residency[i];
 766        rc6->prev_hw_residency[i] = time_hw;
 767
 768        /* RC6 delta from last sample. */
 769        if (time_hw >= prev_hw)
 770                time_hw -= prev_hw;
 771        else
 772                time_hw += overflow_hw - prev_hw;
 773
 774        /* Add delta to RC6 extended raw driver copy. */
 775        time_hw += rc6->cur_residency[i];
 776        rc6->cur_residency[i] = time_hw;
 777
 778        intel_uncore_forcewake_put__locked(uncore, fw_domains);
 779        spin_unlock_irqrestore(&uncore->lock, flags);
 780
 781        return mul_u64_u32_div(time_hw, mul, div);
 782}
 783
 784u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg)
 785{
 786        return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, reg), 1000);
 787}
 788