linux/drivers/gpu/drm/i915/gt/intel_rc6.c
<<
>>
Prefs
   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2019 Intel Corporation
   5 */
   6
   7#include <linux/pm_runtime.h>
   8
   9#include "i915_drv.h"
  10#include "i915_vgpu.h"
  11#include "intel_gt.h"
  12#include "intel_gt_pm.h"
  13#include "intel_rc6.h"
  14#include "intel_sideband.h"
  15
  16/**
  17 * DOC: RC6
  18 *
  19 * RC6 is a special power stage which allows the GPU to enter an very
  20 * low-voltage mode when idle, using down to 0V while at this stage.  This
  21 * stage is entered automatically when the GPU is idle when RC6 support is
  22 * enabled, and as soon as new workload arises GPU wakes up automatically as
  23 * well.
  24 *
  25 * There are different RC6 modes available in Intel GPU, which differentiate
  26 * among each other with the latency required to enter and leave RC6 and
  27 * voltage consumed by the GPU in different states.
  28 *
  29 * The combination of the following flags define which states GPU is allowed
  30 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
  31 * RC6pp is deepest RC6. Their support by hardware varies according to the
  32 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
  33 * which brings the most power savings; deeper states save more power, but
  34 * require higher latency to switch to and wake up.
  35 */
  36
  37static struct intel_gt *rc6_to_gt(struct intel_rc6 *rc6)
  38{
  39        return container_of(rc6, struct intel_gt, rc6);
  40}
  41
  42static struct intel_uncore *rc6_to_uncore(struct intel_rc6 *rc)
  43{
  44        return rc6_to_gt(rc)->uncore;
  45}
  46
  47static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc)
  48{
  49        return rc6_to_gt(rc)->i915;
  50}
  51
  52static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
  53{
  54        intel_uncore_write_fw(uncore, reg, val);
  55}
  56
  57static void gen11_rc6_enable(struct intel_rc6 *rc6)
  58{
  59        struct intel_uncore *uncore = rc6_to_uncore(rc6);
  60        struct intel_engine_cs *engine;
  61        enum intel_engine_id id;
  62
  63        /* 2b: Program RC6 thresholds.*/
  64        set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
  65        set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
  66
  67        set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
  68        set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
  69        for_each_engine(engine, rc6_to_gt(rc6), id)
  70                set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
  71
  72        set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
  73
  74        set(uncore, GEN6_RC_SLEEP, 0);
  75
  76        set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
  77
  78        /*
  79         * 2c: Program Coarse Power Gating Policies.
  80         *
  81         * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
  82         * use instead is a more conservative estimate for the maximum time
  83         * it takes us to service a CS interrupt and submit a new ELSP - that
  84         * is the time which the GPU is idle waiting for the CPU to select the
  85         * next request to execute. If the idle hysteresis is less than that
  86         * interrupt service latency, the hardware will automatically gate
  87         * the power well and we will then incur the wake up cost on top of
  88         * the service latency. A similar guide from plane_state is that we
  89         * do not want the enable hysteresis to less than the wakeup latency.
  90         *
  91         * igt/gem_exec_nop/sequential provides a rough estimate for the
  92         * service latency, and puts it under 10us for Icelake, similar to
  93         * Broadwell+, To be conservative, we want to factor in a context
  94         * switch on top (due to ksoftirqd).
  95         */
  96        set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60);
  97        set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60);
  98
  99        /* 3a: Enable RC6 */
 100        rc6->ctl_enable =
 101                GEN6_RC_CTL_HW_ENABLE |
 102                GEN6_RC_CTL_RC6_ENABLE |
 103                GEN6_RC_CTL_EI_MODE(1);
 104
 105        set(uncore, GEN9_PG_ENABLE,
 106            GEN9_RENDER_PG_ENABLE |
 107            GEN9_MEDIA_PG_ENABLE |
 108            GEN11_MEDIA_SAMPLER_PG_ENABLE);
 109}
 110
 111static void gen9_rc6_enable(struct intel_rc6 *rc6)
 112{
 113        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 114        struct intel_engine_cs *engine;
 115        enum intel_engine_id id;
 116
 117        /* 2b: Program RC6 thresholds.*/
 118        if (INTEL_GEN(rc6_to_i915(rc6)) >= 10) {
 119                set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
 120                set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
 121        } else if (IS_SKYLAKE(rc6_to_i915(rc6))) {
 122                /*
 123                 * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
 124                 * when CPG is enabled
 125                 */
 126                set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
 127        } else {
 128                set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
 129        }
 130
 131        set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
 132        set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
 133        for_each_engine(engine, rc6_to_gt(rc6), id)
 134                set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
 135
 136        set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
 137
 138        set(uncore, GEN6_RC_SLEEP, 0);
 139
 140        /*
 141         * 2c: Program Coarse Power Gating Policies.
 142         *
 143         * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
 144         * use instead is a more conservative estimate for the maximum time
 145         * it takes us to service a CS interrupt and submit a new ELSP - that
 146         * is the time which the GPU is idle waiting for the CPU to select the
 147         * next request to execute. If the idle hysteresis is less than that
 148         * interrupt service latency, the hardware will automatically gate
 149         * the power well and we will then incur the wake up cost on top of
 150         * the service latency. A similar guide from plane_state is that we
 151         * do not want the enable hysteresis to less than the wakeup latency.
 152         *
 153         * igt/gem_exec_nop/sequential provides a rough estimate for the
 154         * service latency, and puts it around 10us for Broadwell (and other
 155         * big core) and around 40us for Broxton (and other low power cores).
 156         * [Note that for legacy ringbuffer submission, this is less than 1us!]
 157         * However, the wakeup latency on Broxton is closer to 100us. To be
 158         * conservative, we have to factor in a context switch on top (due
 159         * to ksoftirqd).
 160         */
 161        set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
 162        set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
 163
 164        /* 3a: Enable RC6 */
 165        set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
 166
 167
 168        rc6->ctl_enable =
 169                GEN6_RC_CTL_HW_ENABLE |
 170                GEN6_RC_CTL_RC6_ENABLE |
 171                GEN6_RC_CTL_EI_MODE(1);
 172
 173        /*
 174         * WaRsDisableCoarsePowerGating:skl,cnl
 175         *   - Render/Media PG need to be disabled with RC6.
 176         */
 177        if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6)))
 178                set(uncore, GEN9_PG_ENABLE,
 179                    GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
 180}
 181
 182static void gen8_rc6_enable(struct intel_rc6 *rc6)
 183{
 184        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 185        struct intel_engine_cs *engine;
 186        enum intel_engine_id id;
 187
 188        /* 2b: Program RC6 thresholds.*/
 189        set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
 190        set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
 191        set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
 192        for_each_engine(engine, rc6_to_gt(rc6), id)
 193                set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
 194        set(uncore, GEN6_RC_SLEEP, 0);
 195        set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
 196
 197        /* 3: Enable RC6 */
 198        rc6->ctl_enable =
 199            GEN6_RC_CTL_HW_ENABLE |
 200            GEN7_RC_CTL_TO_MODE |
 201            GEN6_RC_CTL_RC6_ENABLE;
 202}
 203
 204static void gen6_rc6_enable(struct intel_rc6 *rc6)
 205{
 206        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 207        struct drm_i915_private *i915 = rc6_to_i915(rc6);
 208        struct intel_engine_cs *engine;
 209        enum intel_engine_id id;
 210        u32 rc6vids, rc6_mask;
 211        int ret;
 212
 213        set(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
 214        set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
 215        set(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
 216        set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
 217        set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
 218
 219        for_each_engine(engine, rc6_to_gt(rc6), id)
 220                set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
 221
 222        set(uncore, GEN6_RC_SLEEP, 0);
 223        set(uncore, GEN6_RC1e_THRESHOLD, 1000);
 224        set(uncore, GEN6_RC6_THRESHOLD, 50000);
 225        set(uncore, GEN6_RC6p_THRESHOLD, 150000);
 226        set(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */
 227
 228        /* We don't use those on Haswell */
 229        rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
 230        if (HAS_RC6p(i915))
 231                rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
 232        if (HAS_RC6pp(i915))
 233                rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
 234        rc6->ctl_enable =
 235            rc6_mask |
 236            GEN6_RC_CTL_EI_MODE(1) |
 237            GEN6_RC_CTL_HW_ENABLE;
 238
 239        rc6vids = 0;
 240        ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS,
 241                                     &rc6vids, NULL);
 242        if (IS_GEN(i915, 6) && ret) {
 243                drm_dbg(&i915->drm, "Couldn't check for BIOS workaround\n");
 244        } else if (IS_GEN(i915, 6) &&
 245                   (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
 246                drm_dbg(&i915->drm,
 247                        "You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
 248                        GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
 249                rc6vids &= 0xffff00;
 250                rc6vids |= GEN6_ENCODE_RC6_VID(450);
 251                ret = sandybridge_pcode_write(i915, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
 252                if (ret)
 253                        drm_err(&i915->drm,
 254                                "Couldn't fix incorrect rc6 voltage\n");
 255        }
 256}
 257
 258/* Check that the pcbr address is not empty. */
 259static int chv_rc6_init(struct intel_rc6 *rc6)
 260{
 261        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 262        struct drm_i915_private *i915 = rc6_to_i915(rc6);
 263        resource_size_t pctx_paddr, paddr;
 264        resource_size_t pctx_size = 32 * SZ_1K;
 265        u32 pcbr;
 266
 267        pcbr = intel_uncore_read(uncore, VLV_PCBR);
 268        if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
 269                drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n");
 270                paddr = i915->dsm.end + 1 - pctx_size;
 271                GEM_BUG_ON(paddr > U32_MAX);
 272
 273                pctx_paddr = (paddr & ~4095);
 274                intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
 275        }
 276
 277        return 0;
 278}
 279
 280static int vlv_rc6_init(struct intel_rc6 *rc6)
 281{
 282        struct drm_i915_private *i915 = rc6_to_i915(rc6);
 283        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 284        struct drm_i915_gem_object *pctx;
 285        resource_size_t pctx_paddr;
 286        resource_size_t pctx_size = 24 * SZ_1K;
 287        u32 pcbr;
 288
 289        pcbr = intel_uncore_read(uncore, VLV_PCBR);
 290        if (pcbr) {
 291                /* BIOS set it up already, grab the pre-alloc'd space */
 292                resource_size_t pcbr_offset;
 293
 294                pcbr_offset = (pcbr & ~4095) - i915->dsm.start;
 295                pctx = i915_gem_object_create_stolen_for_preallocated(i915,
 296                                                                      pcbr_offset,
 297                                                                      pctx_size);
 298                if (IS_ERR(pctx))
 299                        return PTR_ERR(pctx);
 300
 301                goto out;
 302        }
 303
 304        drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n");
 305
 306        /*
 307         * From the Gunit register HAS:
 308         * The Gfx driver is expected to program this register and ensure
 309         * proper allocation within Gfx stolen memory.  For example, this
 310         * register should be programmed such than the PCBR range does not
 311         * overlap with other ranges, such as the frame buffer, protected
 312         * memory, or any other relevant ranges.
 313         */
 314        pctx = i915_gem_object_create_stolen(i915, pctx_size);
 315        if (IS_ERR(pctx)) {
 316                drm_dbg(&i915->drm,
 317                        "not enough stolen space for PCTX, disabling\n");
 318                return PTR_ERR(pctx);
 319        }
 320
 321        GEM_BUG_ON(range_overflows_end_t(u64,
 322                                         i915->dsm.start,
 323                                         pctx->stolen->start,
 324                                         U32_MAX));
 325        pctx_paddr = i915->dsm.start + pctx->stolen->start;
 326        intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
 327
 328out:
 329        rc6->pctx = pctx;
 330        return 0;
 331}
 332
 333static void chv_rc6_enable(struct intel_rc6 *rc6)
 334{
 335        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 336        struct intel_engine_cs *engine;
 337        enum intel_engine_id id;
 338
 339        /* 2a: Program RC6 thresholds.*/
 340        set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
 341        set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
 342        set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
 343
 344        for_each_engine(engine, rc6_to_gt(rc6), id)
 345                set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
 346        set(uncore, GEN6_RC_SLEEP, 0);
 347
 348        /* TO threshold set to 500 us (0x186 * 1.28 us) */
 349        set(uncore, GEN6_RC6_THRESHOLD, 0x186);
 350
 351        /* Allows RC6 residency counter to work */
 352        set(uncore, VLV_COUNTER_CONTROL,
 353            _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
 354                               VLV_MEDIA_RC6_COUNT_EN |
 355                               VLV_RENDER_RC6_COUNT_EN));
 356
 357        /* 3: Enable RC6 */
 358        rc6->ctl_enable = GEN7_RC_CTL_TO_MODE;
 359}
 360
 361static void vlv_rc6_enable(struct intel_rc6 *rc6)
 362{
 363        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 364        struct intel_engine_cs *engine;
 365        enum intel_engine_id id;
 366
 367        set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
 368        set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
 369        set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
 370
 371        for_each_engine(engine, rc6_to_gt(rc6), id)
 372                set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
 373
 374        set(uncore, GEN6_RC6_THRESHOLD, 0x557);
 375
 376        /* Allows RC6 residency counter to work */
 377        set(uncore, VLV_COUNTER_CONTROL,
 378            _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
 379                               VLV_MEDIA_RC0_COUNT_EN |
 380                               VLV_RENDER_RC0_COUNT_EN |
 381                               VLV_MEDIA_RC6_COUNT_EN |
 382                               VLV_RENDER_RC6_COUNT_EN));
 383
 384        rc6->ctl_enable =
 385            GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
 386}
 387
 388static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
 389{
 390        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 391        struct drm_i915_private *i915 = rc6_to_i915(rc6);
 392        u32 rc6_ctx_base, rc_ctl, rc_sw_target;
 393        bool enable_rc6 = true;
 394
 395        rc_ctl = intel_uncore_read(uncore, GEN6_RC_CONTROL);
 396        rc_sw_target = intel_uncore_read(uncore, GEN6_RC_STATE);
 397        rc_sw_target &= RC_SW_TARGET_STATE_MASK;
 398        rc_sw_target >>= RC_SW_TARGET_STATE_SHIFT;
 399        drm_dbg(&i915->drm, "BIOS enabled RC states: "
 400                         "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
 401                         onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
 402                         onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
 403                         rc_sw_target);
 404
 405        if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
 406                drm_dbg(&i915->drm, "RC6 Base location not set properly.\n");
 407                enable_rc6 = false;
 408        }
 409
 410        /*
 411         * The exact context size is not known for BXT, so assume a page size
 412         * for this check.
 413         */
 414        rc6_ctx_base =
 415                intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
 416        if (!(rc6_ctx_base >= i915->dsm_reserved.start &&
 417              rc6_ctx_base + PAGE_SIZE < i915->dsm_reserved.end)) {
 418                drm_dbg(&i915->drm, "RC6 Base address not as expected.\n");
 419                enable_rc6 = false;
 420        }
 421
 422        if (!((intel_uncore_read(uncore, PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1 &&
 423              (intel_uncore_read(uncore, PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1 &&
 424              (intel_uncore_read(uncore, PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1 &&
 425              (intel_uncore_read(uncore, PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1)) {
 426                drm_dbg(&i915->drm,
 427                        "Engine Idle wait time not set properly.\n");
 428                enable_rc6 = false;
 429        }
 430
 431        if (!intel_uncore_read(uncore, GEN8_PUSHBUS_CONTROL) ||
 432            !intel_uncore_read(uncore, GEN8_PUSHBUS_ENABLE) ||
 433            !intel_uncore_read(uncore, GEN8_PUSHBUS_SHIFT)) {
 434                drm_dbg(&i915->drm, "Pushbus not setup properly.\n");
 435                enable_rc6 = false;
 436        }
 437
 438        if (!intel_uncore_read(uncore, GEN6_GFXPAUSE)) {
 439                drm_dbg(&i915->drm, "GFX pause not setup properly.\n");
 440                enable_rc6 = false;
 441        }
 442
 443        if (!intel_uncore_read(uncore, GEN8_MISC_CTRL0)) {
 444                drm_dbg(&i915->drm, "GPM control not setup properly.\n");
 445                enable_rc6 = false;
 446        }
 447
 448        return enable_rc6;
 449}
 450
 451static bool rc6_supported(struct intel_rc6 *rc6)
 452{
 453        struct drm_i915_private *i915 = rc6_to_i915(rc6);
 454
 455        if (!HAS_RC6(i915))
 456                return false;
 457
 458        if (intel_vgpu_active(i915))
 459                return false;
 460
 461        if (is_mock_gt(rc6_to_gt(rc6)))
 462                return false;
 463
 464        if (IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(rc6)) {
 465                drm_notice(&i915->drm,
 466                           "RC6 and powersaving disabled by BIOS\n");
 467                return false;
 468        }
 469
 470        return true;
 471}
 472
 473static void rpm_get(struct intel_rc6 *rc6)
 474{
 475        GEM_BUG_ON(rc6->wakeref);
 476        pm_runtime_get_sync(&rc6_to_i915(rc6)->drm.pdev->dev);
 477        rc6->wakeref = true;
 478}
 479
 480static void rpm_put(struct intel_rc6 *rc6)
 481{
 482        GEM_BUG_ON(!rc6->wakeref);
 483        pm_runtime_put(&rc6_to_i915(rc6)->drm.pdev->dev);
 484        rc6->wakeref = false;
 485}
 486
 487static bool pctx_corrupted(struct intel_rc6 *rc6)
 488{
 489        struct drm_i915_private *i915 = rc6_to_i915(rc6);
 490
 491        if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
 492                return false;
 493
 494        if (intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO))
 495                return false;
 496
 497        drm_notice(&i915->drm,
 498                   "RC6 context corruption, disabling runtime power management\n");
 499        return true;
 500}
 501
 502static void __intel_rc6_disable(struct intel_rc6 *rc6)
 503{
 504        struct drm_i915_private *i915 = rc6_to_i915(rc6);
 505        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 506
 507        intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
 508        if (INTEL_GEN(i915) >= 9)
 509                set(uncore, GEN9_PG_ENABLE, 0);
 510        set(uncore, GEN6_RC_CONTROL, 0);
 511        set(uncore, GEN6_RC_STATE, 0);
 512        intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
 513}
 514
 515void intel_rc6_init(struct intel_rc6 *rc6)
 516{
 517        struct drm_i915_private *i915 = rc6_to_i915(rc6);
 518        int err;
 519
 520        /* Disable runtime-pm until we can save the GPU state with rc6 pctx */
 521        rpm_get(rc6);
 522
 523        if (!rc6_supported(rc6))
 524                return;
 525
 526        if (IS_CHERRYVIEW(i915))
 527                err = chv_rc6_init(rc6);
 528        else if (IS_VALLEYVIEW(i915))
 529                err = vlv_rc6_init(rc6);
 530        else
 531                err = 0;
 532
 533        /* Sanitize rc6, ensure it is disabled before we are ready. */
 534        __intel_rc6_disable(rc6);
 535
 536        rc6->supported = err == 0;
 537}
 538
 539void intel_rc6_sanitize(struct intel_rc6 *rc6)
 540{
 541        memset(rc6->prev_hw_residency, 0, sizeof(rc6->prev_hw_residency));
 542
 543        if (rc6->enabled) { /* unbalanced suspend/resume */
 544                rpm_get(rc6);
 545                rc6->enabled = false;
 546        }
 547
 548        if (rc6->supported)
 549                __intel_rc6_disable(rc6);
 550}
 551
 552void intel_rc6_enable(struct intel_rc6 *rc6)
 553{
 554        struct drm_i915_private *i915 = rc6_to_i915(rc6);
 555        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 556
 557        if (!rc6->supported)
 558                return;
 559
 560        GEM_BUG_ON(rc6->enabled);
 561
 562        intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
 563
 564        if (IS_CHERRYVIEW(i915))
 565                chv_rc6_enable(rc6);
 566        else if (IS_VALLEYVIEW(i915))
 567                vlv_rc6_enable(rc6);
 568        else if (INTEL_GEN(i915) >= 11)
 569                gen11_rc6_enable(rc6);
 570        else if (INTEL_GEN(i915) >= 9)
 571                gen9_rc6_enable(rc6);
 572        else if (IS_BROADWELL(i915))
 573                gen8_rc6_enable(rc6);
 574        else if (INTEL_GEN(i915) >= 6)
 575                gen6_rc6_enable(rc6);
 576
 577        rc6->manual = rc6->ctl_enable & GEN6_RC_CTL_RC6_ENABLE;
 578        if (NEEDS_RC6_CTX_CORRUPTION_WA(i915))
 579                rc6->ctl_enable = 0;
 580
 581        intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
 582
 583        if (unlikely(pctx_corrupted(rc6)))
 584                return;
 585
 586        /* rc6 is ready, runtime-pm is go! */
 587        rpm_put(rc6);
 588        rc6->enabled = true;
 589}
 590
 591void intel_rc6_unpark(struct intel_rc6 *rc6)
 592{
 593        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 594
 595        if (!rc6->enabled)
 596                return;
 597
 598        /* Restore HW timers for automatic RC6 entry while busy */
 599        set(uncore, GEN6_RC_CONTROL, rc6->ctl_enable);
 600}
 601
 602void intel_rc6_park(struct intel_rc6 *rc6)
 603{
 604        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 605        unsigned int target;
 606
 607        if (!rc6->enabled)
 608                return;
 609
 610        if (unlikely(pctx_corrupted(rc6))) {
 611                intel_rc6_disable(rc6);
 612                return;
 613        }
 614
 615        if (!rc6->manual)
 616                return;
 617
 618        /* Turn off the HW timers and go directly to rc6 */
 619        set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE);
 620
 621        if (HAS_RC6pp(rc6_to_i915(rc6)))
 622                target = 0x6; /* deepest rc6 */
 623        else if (HAS_RC6p(rc6_to_i915(rc6)))
 624                target = 0x5; /* deep rc6 */
 625        else
 626                target = 0x4; /* normal rc6 */
 627        set(uncore, GEN6_RC_STATE, target << RC_SW_TARGET_STATE_SHIFT);
 628}
 629
 630void intel_rc6_disable(struct intel_rc6 *rc6)
 631{
 632        if (!rc6->enabled)
 633                return;
 634
 635        rpm_get(rc6);
 636        rc6->enabled = false;
 637
 638        __intel_rc6_disable(rc6);
 639}
 640
 641void intel_rc6_fini(struct intel_rc6 *rc6)
 642{
 643        struct drm_i915_gem_object *pctx;
 644
 645        intel_rc6_disable(rc6);
 646
 647        pctx = fetch_and_zero(&rc6->pctx);
 648        if (pctx)
 649                i915_gem_object_put(pctx);
 650
 651        if (rc6->wakeref)
 652                rpm_put(rc6);
 653}
 654
 655static u64 vlv_residency_raw(struct intel_uncore *uncore, const i915_reg_t reg)
 656{
 657        u32 lower, upper, tmp;
 658        int loop = 2;
 659
 660        /*
 661         * The register accessed do not need forcewake. We borrow
 662         * uncore lock to prevent concurrent access to range reg.
 663         */
 664        lockdep_assert_held(&uncore->lock);
 665
 666        /*
 667         * vlv and chv residency counters are 40 bits in width.
 668         * With a control bit, we can choose between upper or lower
 669         * 32bit window into this counter.
 670         *
 671         * Although we always use the counter in high-range mode elsewhere,
 672         * userspace may attempt to read the value before rc6 is initialised,
 673         * before we have set the default VLV_COUNTER_CONTROL value. So always
 674         * set the high bit to be safe.
 675         */
 676        set(uncore, VLV_COUNTER_CONTROL,
 677            _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
 678        upper = intel_uncore_read_fw(uncore, reg);
 679        do {
 680                tmp = upper;
 681
 682                set(uncore, VLV_COUNTER_CONTROL,
 683                    _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
 684                lower = intel_uncore_read_fw(uncore, reg);
 685
 686                set(uncore, VLV_COUNTER_CONTROL,
 687                    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
 688                upper = intel_uncore_read_fw(uncore, reg);
 689        } while (upper != tmp && --loop);
 690
 691        /*
 692         * Everywhere else we always use VLV_COUNTER_CONTROL with the
 693         * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
 694         * now.
 695         */
 696
 697        return lower | (u64)upper << 8;
 698}
 699
 700u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, const i915_reg_t reg)
 701{
 702        struct drm_i915_private *i915 = rc6_to_i915(rc6);
 703        struct intel_uncore *uncore = rc6_to_uncore(rc6);
 704        u64 time_hw, prev_hw, overflow_hw;
 705        unsigned int fw_domains;
 706        unsigned long flags;
 707        unsigned int i;
 708        u32 mul, div;
 709
 710        if (!rc6->supported)
 711                return 0;
 712
 713        /*
 714         * Store previous hw counter values for counter wrap-around handling.
 715         *
 716         * There are only four interesting registers and they live next to each
 717         * other so we can use the relative address, compared to the smallest
 718         * one as the index into driver storage.
 719         */
 720        i = (i915_mmio_reg_offset(reg) -
 721             i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
 722        if (drm_WARN_ON_ONCE(&i915->drm, i >= ARRAY_SIZE(rc6->cur_residency)))
 723                return 0;
 724
 725        fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ);
 726
 727        spin_lock_irqsave(&uncore->lock, flags);
 728        intel_uncore_forcewake_get__locked(uncore, fw_domains);
 729
 730        /* On VLV and CHV, residency time is in CZ units rather than 1.28us */
 731        if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
 732                mul = 1000000;
 733                div = i915->czclk_freq;
 734                overflow_hw = BIT_ULL(40);
 735                time_hw = vlv_residency_raw(uncore, reg);
 736        } else {
 737                /* 833.33ns units on Gen9LP, 1.28us elsewhere. */
 738                if (IS_GEN9_LP(i915)) {
 739                        mul = 10000;
 740                        div = 12;
 741                } else {
 742                        mul = 1280;
 743                        div = 1;
 744                }
 745
 746                overflow_hw = BIT_ULL(32);
 747                time_hw = intel_uncore_read_fw(uncore, reg);
 748        }
 749
 750        /*
 751         * Counter wrap handling.
 752         *
 753         * But relying on a sufficient frequency of queries otherwise counters
 754         * can still wrap.
 755         */
 756        prev_hw = rc6->prev_hw_residency[i];
 757        rc6->prev_hw_residency[i] = time_hw;
 758
 759        /* RC6 delta from last sample. */
 760        if (time_hw >= prev_hw)
 761                time_hw -= prev_hw;
 762        else
 763                time_hw += overflow_hw - prev_hw;
 764
 765        /* Add delta to RC6 extended raw driver copy. */
 766        time_hw += rc6->cur_residency[i];
 767        rc6->cur_residency[i] = time_hw;
 768
 769        intel_uncore_forcewake_put__locked(uncore, fw_domains);
 770        spin_unlock_irqrestore(&uncore->lock, flags);
 771
 772        return mul_u64_u32_div(time_hw, mul, div);
 773}
 774
 775u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg)
 776{
 777        return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, reg), 1000);
 778}
 779
 780#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 781#include "selftest_rc6.c"
 782#endif
 783