linux/drivers/gpu/drm/i915/intel_pm.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2012 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 * Authors:
  24 *    Eugeni Dodonov <eugeni.dodonov@intel.com>
  25 *
  26 */
  27
  28#include <linux/cpufreq.h>
  29#include <drm/drm_plane_helper.h>
  30#include "i915_drv.h"
  31#include "intel_drv.h"
  32#include "../../../platform/x86/intel_ips.h"
  33#include <linux/module.h>
  34#include <drm/drm_atomic_helper.h>
  35
  36/**
  37 * DOC: RC6
  38 *
  39 * RC6 is a special power stage which allows the GPU to enter an very
  40 * low-voltage mode when idle, using down to 0V while at this stage.  This
  41 * stage is entered automatically when the GPU is idle when RC6 support is
  42 * enabled, and as soon as new workload arises GPU wakes up automatically as well.
  43 *
  44 * There are different RC6 modes available in Intel GPU, which differentiate
  45 * among each other with the latency required to enter and leave RC6 and
  46 * voltage consumed by the GPU in different states.
  47 *
  48 * The combination of the following flags define which states GPU is allowed
  49 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
  50 * RC6pp is deepest RC6. Their support by hardware varies according to the
  51 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
  52 * which brings the most power savings; deeper states save more power, but
  53 * require higher latency to switch to and wake up.
  54 */
  55
  56static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
  57{
  58        if (HAS_LLC(dev_priv)) {
  59                /*
  60                 * WaCompressedResourceDisplayNewHashMode:skl,kbl
  61                 * Display WA #0390: skl,kbl
  62                 *
  63                 * Must match Sampler, Pixel Back End, and Media. See
  64                 * WaCompressedResourceSamplerPbeMediaNewHashMode.
  65                 */
  66                I915_WRITE(CHICKEN_PAR1_1,
  67                           I915_READ(CHICKEN_PAR1_1) |
  68                           SKL_DE_COMPRESSED_HASH_MODE);
  69        }
  70
  71        /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */
  72        I915_WRITE(CHICKEN_PAR1_1,
  73                   I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
  74
  75        /* WaEnableChickenDCPR:skl,bxt,kbl,glk,cfl */
  76        I915_WRITE(GEN8_CHICKEN_DCPR_1,
  77                   I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
  78
  79        /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl,cfl */
  80        /* WaFbcWakeMemOn:skl,bxt,kbl,glk,cfl */
  81        I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
  82                   DISP_FBC_WM_DIS |
  83                   DISP_FBC_MEMORY_WAKE);
  84
  85        /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl,cfl */
  86        I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
  87                   ILK_DPFC_DISABLE_DUMMY0);
  88
  89        if (IS_SKYLAKE(dev_priv)) {
  90                /* WaDisableDopClockGating */
  91                I915_WRITE(GEN7_MISCCPCTL, I915_READ(GEN7_MISCCPCTL)
  92                           & ~GEN7_DOP_CLOCK_GATE_ENABLE);
  93        }
  94}
  95
  96static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
  97{
  98        gen9_init_clock_gating(dev_priv);
  99
 100        /* WaDisableSDEUnitClockGating:bxt */
 101        I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
 102                   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 103
 104        /*
 105         * FIXME:
 106         * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
 107         */
 108        I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
 109                   GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
 110
 111        /*
 112         * Wa: Backlight PWM may stop in the asserted state, causing backlight
 113         * to stay fully on.
 114         */
 115        I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
 116                   PWM1_GATING_DIS | PWM2_GATING_DIS);
 117}
 118
 119static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
 120{
 121        gen9_init_clock_gating(dev_priv);
 122
 123        /*
 124         * WaDisablePWMClockGating:glk
 125         * Backlight PWM may stop in the asserted state, causing backlight
 126         * to stay fully on.
 127         */
 128        I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
 129                   PWM1_GATING_DIS | PWM2_GATING_DIS);
 130
 131        /* WaDDIIOTimeout:glk */
 132        if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1)) {
 133                u32 val = I915_READ(CHICKEN_MISC_2);
 134                val &= ~(GLK_CL0_PWR_DOWN |
 135                         GLK_CL1_PWR_DOWN |
 136                         GLK_CL2_PWR_DOWN);
 137                I915_WRITE(CHICKEN_MISC_2, val);
 138        }
 139
 140}
 141
 142static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv)
 143{
 144        u32 tmp;
 145
 146        tmp = I915_READ(CLKCFG);
 147
 148        switch (tmp & CLKCFG_FSB_MASK) {
 149        case CLKCFG_FSB_533:
 150                dev_priv->fsb_freq = 533; /* 133*4 */
 151                break;
 152        case CLKCFG_FSB_800:
 153                dev_priv->fsb_freq = 800; /* 200*4 */
 154                break;
 155        case CLKCFG_FSB_667:
 156                dev_priv->fsb_freq =  667; /* 167*4 */
 157                break;
 158        case CLKCFG_FSB_400:
 159                dev_priv->fsb_freq = 400; /* 100*4 */
 160                break;
 161        }
 162
 163        switch (tmp & CLKCFG_MEM_MASK) {
 164        case CLKCFG_MEM_533:
 165                dev_priv->mem_freq = 533;
 166                break;
 167        case CLKCFG_MEM_667:
 168                dev_priv->mem_freq = 667;
 169                break;
 170        case CLKCFG_MEM_800:
 171                dev_priv->mem_freq = 800;
 172                break;
 173        }
 174
 175        /* detect pineview DDR3 setting */
 176        tmp = I915_READ(CSHRDDR3CTL);
 177        dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
 178}
 179
 180static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
 181{
 182        u16 ddrpll, csipll;
 183
 184        ddrpll = I915_READ16(DDRMPLL1);
 185        csipll = I915_READ16(CSIPLL0);
 186
 187        switch (ddrpll & 0xff) {
 188        case 0xc:
 189                dev_priv->mem_freq = 800;
 190                break;
 191        case 0x10:
 192                dev_priv->mem_freq = 1066;
 193                break;
 194        case 0x14:
 195                dev_priv->mem_freq = 1333;
 196                break;
 197        case 0x18:
 198                dev_priv->mem_freq = 1600;
 199                break;
 200        default:
 201                DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
 202                                 ddrpll & 0xff);
 203                dev_priv->mem_freq = 0;
 204                break;
 205        }
 206
 207        dev_priv->ips.r_t = dev_priv->mem_freq;
 208
 209        switch (csipll & 0x3ff) {
 210        case 0x00c:
 211                dev_priv->fsb_freq = 3200;
 212                break;
 213        case 0x00e:
 214                dev_priv->fsb_freq = 3733;
 215                break;
 216        case 0x010:
 217                dev_priv->fsb_freq = 4266;
 218                break;
 219        case 0x012:
 220                dev_priv->fsb_freq = 4800;
 221                break;
 222        case 0x014:
 223                dev_priv->fsb_freq = 5333;
 224                break;
 225        case 0x016:
 226                dev_priv->fsb_freq = 5866;
 227                break;
 228        case 0x018:
 229                dev_priv->fsb_freq = 6400;
 230                break;
 231        default:
 232                DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
 233                                 csipll & 0x3ff);
 234                dev_priv->fsb_freq = 0;
 235                break;
 236        }
 237
 238        if (dev_priv->fsb_freq == 3200) {
 239                dev_priv->ips.c_m = 0;
 240        } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
 241                dev_priv->ips.c_m = 1;
 242        } else {
 243                dev_priv->ips.c_m = 2;
 244        }
 245}
 246
 247static const struct cxsr_latency cxsr_latency_table[] = {
 248        {1, 0, 800, 400, 3382, 33382, 3983, 33983},    /* DDR2-400 SC */
 249        {1, 0, 800, 667, 3354, 33354, 3807, 33807},    /* DDR2-667 SC */
 250        {1, 0, 800, 800, 3347, 33347, 3763, 33763},    /* DDR2-800 SC */
 251        {1, 1, 800, 667, 6420, 36420, 6873, 36873},    /* DDR3-667 SC */
 252        {1, 1, 800, 800, 5902, 35902, 6318, 36318},    /* DDR3-800 SC */
 253
 254        {1, 0, 667, 400, 3400, 33400, 4021, 34021},    /* DDR2-400 SC */
 255        {1, 0, 667, 667, 3372, 33372, 3845, 33845},    /* DDR2-667 SC */
 256        {1, 0, 667, 800, 3386, 33386, 3822, 33822},    /* DDR2-800 SC */
 257        {1, 1, 667, 667, 6438, 36438, 6911, 36911},    /* DDR3-667 SC */
 258        {1, 1, 667, 800, 5941, 35941, 6377, 36377},    /* DDR3-800 SC */
 259
 260        {1, 0, 400, 400, 3472, 33472, 4173, 34173},    /* DDR2-400 SC */
 261        {1, 0, 400, 667, 3443, 33443, 3996, 33996},    /* DDR2-667 SC */
 262        {1, 0, 400, 800, 3430, 33430, 3946, 33946},    /* DDR2-800 SC */
 263        {1, 1, 400, 667, 6509, 36509, 7062, 37062},    /* DDR3-667 SC */
 264        {1, 1, 400, 800, 5985, 35985, 6501, 36501},    /* DDR3-800 SC */
 265
 266        {0, 0, 800, 400, 3438, 33438, 4065, 34065},    /* DDR2-400 SC */
 267        {0, 0, 800, 667, 3410, 33410, 3889, 33889},    /* DDR2-667 SC */
 268        {0, 0, 800, 800, 3403, 33403, 3845, 33845},    /* DDR2-800 SC */
 269        {0, 1, 800, 667, 6476, 36476, 6955, 36955},    /* DDR3-667 SC */
 270        {0, 1, 800, 800, 5958, 35958, 6400, 36400},    /* DDR3-800 SC */
 271
 272        {0, 0, 667, 400, 3456, 33456, 4103, 34106},    /* DDR2-400 SC */
 273        {0, 0, 667, 667, 3428, 33428, 3927, 33927},    /* DDR2-667 SC */
 274        {0, 0, 667, 800, 3443, 33443, 3905, 33905},    /* DDR2-800 SC */
 275        {0, 1, 667, 667, 6494, 36494, 6993, 36993},    /* DDR3-667 SC */
 276        {0, 1, 667, 800, 5998, 35998, 6460, 36460},    /* DDR3-800 SC */
 277
 278        {0, 0, 400, 400, 3528, 33528, 4255, 34255},    /* DDR2-400 SC */
 279        {0, 0, 400, 667, 3500, 33500, 4079, 34079},    /* DDR2-667 SC */
 280        {0, 0, 400, 800, 3487, 33487, 4029, 34029},    /* DDR2-800 SC */
 281        {0, 1, 400, 667, 6566, 36566, 7145, 37145},    /* DDR3-667 SC */
 282        {0, 1, 400, 800, 6042, 36042, 6584, 36584},    /* DDR3-800 SC */
 283};
 284
 285static const struct cxsr_latency *intel_get_cxsr_latency(bool is_desktop,
 286                                                         bool is_ddr3,
 287                                                         int fsb,
 288                                                         int mem)
 289{
 290        const struct cxsr_latency *latency;
 291        int i;
 292
 293        if (fsb == 0 || mem == 0)
 294                return NULL;
 295
 296        for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
 297                latency = &cxsr_latency_table[i];
 298                if (is_desktop == latency->is_desktop &&
 299                    is_ddr3 == latency->is_ddr3 &&
 300                    fsb == latency->fsb_freq && mem == latency->mem_freq)
 301                        return latency;
 302        }
 303
 304        DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
 305
 306        return NULL;
 307}
 308
 309static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
 310{
 311        u32 val;
 312
 313        mutex_lock(&dev_priv->pcu_lock);
 314
 315        val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
 316        if (enable)
 317                val &= ~FORCE_DDR_HIGH_FREQ;
 318        else
 319                val |= FORCE_DDR_HIGH_FREQ;
 320        val &= ~FORCE_DDR_LOW_FREQ;
 321        val |= FORCE_DDR_FREQ_REQ_ACK;
 322        vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
 323
 324        if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
 325                      FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
 326                DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
 327
 328        mutex_unlock(&dev_priv->pcu_lock);
 329}
 330
 331static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
 332{
 333        u32 val;
 334
 335        mutex_lock(&dev_priv->pcu_lock);
 336
 337        val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
 338        if (enable)
 339                val |= DSP_MAXFIFO_PM5_ENABLE;
 340        else
 341                val &= ~DSP_MAXFIFO_PM5_ENABLE;
 342        vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val);
 343
 344        mutex_unlock(&dev_priv->pcu_lock);
 345}
 346
 347#define FW_WM(value, plane) \
 348        (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK)
 349
 350static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
 351{
 352        bool was_enabled;
 353        u32 val;
 354
 355        if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
 356                was_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
 357                I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
 358                POSTING_READ(FW_BLC_SELF_VLV);
 359        } else if (IS_G4X(dev_priv) || IS_I965GM(dev_priv)) {
 360                was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
 361                I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
 362                POSTING_READ(FW_BLC_SELF);
 363        } else if (IS_PINEVIEW(dev_priv)) {
 364                val = I915_READ(DSPFW3);
 365                was_enabled = val & PINEVIEW_SELF_REFRESH_EN;
 366                if (enable)
 367                        val |= PINEVIEW_SELF_REFRESH_EN;
 368                else
 369                        val &= ~PINEVIEW_SELF_REFRESH_EN;
 370                I915_WRITE(DSPFW3, val);
 371                POSTING_READ(DSPFW3);
 372        } else if (IS_I945G(dev_priv) || IS_I945GM(dev_priv)) {
 373                was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
 374                val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
 375                               _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
 376                I915_WRITE(FW_BLC_SELF, val);
 377                POSTING_READ(FW_BLC_SELF);
 378        } else if (IS_I915GM(dev_priv)) {
 379                /*
 380                 * FIXME can't find a bit like this for 915G, and
 381                 * and yet it does have the related watermark in
 382                 * FW_BLC_SELF. What's going on?
 383                 */
 384                was_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN;
 385                val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
 386                               _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
 387                I915_WRITE(INSTPM, val);
 388                POSTING_READ(INSTPM);
 389        } else {
 390                return false;
 391        }
 392
 393        trace_intel_memory_cxsr(dev_priv, was_enabled, enable);
 394
 395        DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n",
 396                      enableddisabled(enable),
 397                      enableddisabled(was_enabled));
 398
 399        return was_enabled;
 400}
 401
 402/**
 403 * intel_set_memory_cxsr - Configure CxSR state
 404 * @dev_priv: i915 device
 405 * @enable: Allow vs. disallow CxSR
 406 *
 407 * Allow or disallow the system to enter a special CxSR
 408 * (C-state self refresh) state. What typically happens in CxSR mode
 409 * is that several display FIFOs may get combined into a single larger
 410 * FIFO for a particular plane (so called max FIFO mode) to allow the
 411 * system to defer memory fetches longer, and the memory will enter
 412 * self refresh.
 413 *
 414 * Note that enabling CxSR does not guarantee that the system enter
 415 * this special mode, nor does it guarantee that the system stays
 416 * in that mode once entered. So this just allows/disallows the system
 417 * to autonomously utilize the CxSR mode. Other factors such as core
 418 * C-states will affect when/if the system actually enters/exits the
 419 * CxSR mode.
 420 *
 421 * Note that on VLV/CHV this actually only controls the max FIFO mode,
 422 * and the system is free to enter/exit memory self refresh at any time
 423 * even when the use of CxSR has been disallowed.
 424 *
 425 * While the system is actually in the CxSR/max FIFO mode, some plane
 426 * control registers will not get latched on vblank. Thus in order to
 427 * guarantee the system will respond to changes in the plane registers
 428 * we must always disallow CxSR prior to making changes to those registers.
 429 * Unfortunately the system will re-evaluate the CxSR conditions at
 430 * frame start which happens after vblank start (which is when the plane
 431 * registers would get latched), so we can't proceed with the plane update
 432 * during the same frame where we disallowed CxSR.
 433 *
 434 * Certain platforms also have a deeper HPLL SR mode. Fortunately the
 435 * HPLL SR mode depends on CxSR itself, so we don't have to hand hold
 436 * the hardware w.r.t. HPLL SR when writing to plane registers.
 437 * Disallowing just CxSR is sufficient.
 438 */
 439bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
 440{
 441        bool ret;
 442
 443        mutex_lock(&dev_priv->wm.wm_mutex);
 444        ret = _intel_set_memory_cxsr(dev_priv, enable);
 445        if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 446                dev_priv->wm.vlv.cxsr = enable;
 447        else if (IS_G4X(dev_priv))
 448                dev_priv->wm.g4x.cxsr = enable;
 449        mutex_unlock(&dev_priv->wm.wm_mutex);
 450
 451        return ret;
 452}
 453
 454/*
 455 * Latency for FIFO fetches is dependent on several factors:
 456 *   - memory configuration (speed, channels)
 457 *   - chipset
 458 *   - current MCH state
 459 * It can be fairly high in some situations, so here we assume a fairly
 460 * pessimal value.  It's a tradeoff between extra memory fetches (if we
 461 * set this value too high, the FIFO will fetch frequently to stay full)
 462 * and power consumption (set it too low to save power and we might see
 463 * FIFO underruns and display "flicker").
 464 *
 465 * A value of 5us seems to be a good balance; safe for very low end
 466 * platforms but not overly aggressive on lower latency configs.
 467 */
 468static const int pessimal_latency_ns = 5000;
 469
 470#define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \
 471        ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8))
 472
 473static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state)
 474{
 475        struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
 476        struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 477        struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
 478        enum pipe pipe = crtc->pipe;
 479        int sprite0_start, sprite1_start;
 480
 481        switch (pipe) {
 482                uint32_t dsparb, dsparb2, dsparb3;
 483        case PIPE_A:
 484                dsparb = I915_READ(DSPARB);
 485                dsparb2 = I915_READ(DSPARB2);
 486                sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0);
 487                sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4);
 488                break;
 489        case PIPE_B:
 490                dsparb = I915_READ(DSPARB);
 491                dsparb2 = I915_READ(DSPARB2);
 492                sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8);
 493                sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12);
 494                break;
 495        case PIPE_C:
 496                dsparb2 = I915_READ(DSPARB2);
 497                dsparb3 = I915_READ(DSPARB3);
 498                sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16);
 499                sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20);
 500                break;
 501        default:
 502                MISSING_CASE(pipe);
 503                return;
 504        }
 505
 506        fifo_state->plane[PLANE_PRIMARY] = sprite0_start;
 507        fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start;
 508        fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start;
 509        fifo_state->plane[PLANE_CURSOR] = 63;
 510}
 511
 512static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv,
 513                              enum i9xx_plane_id i9xx_plane)
 514{
 515        uint32_t dsparb = I915_READ(DSPARB);
 516        int size;
 517
 518        size = dsparb & 0x7f;
 519        if (i9xx_plane == PLANE_B)
 520                size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
 521
 522        DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
 523                      dsparb, plane_name(i9xx_plane), size);
 524
 525        return size;
 526}
 527
 528static int i830_get_fifo_size(struct drm_i915_private *dev_priv,
 529                              enum i9xx_plane_id i9xx_plane)
 530{
 531        uint32_t dsparb = I915_READ(DSPARB);
 532        int size;
 533
 534        size = dsparb & 0x1ff;
 535        if (i9xx_plane == PLANE_B)
 536                size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
 537        size >>= 1; /* Convert to cachelines */
 538
 539        DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
 540                      dsparb, plane_name(i9xx_plane), size);
 541
 542        return size;
 543}
 544
 545static int i845_get_fifo_size(struct drm_i915_private *dev_priv,
 546                              enum i9xx_plane_id i9xx_plane)
 547{
 548        uint32_t dsparb = I915_READ(DSPARB);
 549        int size;
 550
 551        size = dsparb & 0x7f;
 552        size >>= 2; /* Convert to cachelines */
 553
 554        DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
 555                      dsparb, plane_name(i9xx_plane), size);
 556
 557        return size;
 558}
 559
 560/* Pineview has different values for various configs */
 561static const struct intel_watermark_params pineview_display_wm = {
 562        .fifo_size = PINEVIEW_DISPLAY_FIFO,
 563        .max_wm = PINEVIEW_MAX_WM,
 564        .default_wm = PINEVIEW_DFT_WM,
 565        .guard_size = PINEVIEW_GUARD_WM,
 566        .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
 567};
 568static const struct intel_watermark_params pineview_display_hplloff_wm = {
 569        .fifo_size = PINEVIEW_DISPLAY_FIFO,
 570        .max_wm = PINEVIEW_MAX_WM,
 571        .default_wm = PINEVIEW_DFT_HPLLOFF_WM,
 572        .guard_size = PINEVIEW_GUARD_WM,
 573        .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
 574};
 575static const struct intel_watermark_params pineview_cursor_wm = {
 576        .fifo_size = PINEVIEW_CURSOR_FIFO,
 577        .max_wm = PINEVIEW_CURSOR_MAX_WM,
 578        .default_wm = PINEVIEW_CURSOR_DFT_WM,
 579        .guard_size = PINEVIEW_CURSOR_GUARD_WM,
 580        .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
 581};
 582static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
 583        .fifo_size = PINEVIEW_CURSOR_FIFO,
 584        .max_wm = PINEVIEW_CURSOR_MAX_WM,
 585        .default_wm = PINEVIEW_CURSOR_DFT_WM,
 586        .guard_size = PINEVIEW_CURSOR_GUARD_WM,
 587        .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
 588};
 589static const struct intel_watermark_params i965_cursor_wm_info = {
 590        .fifo_size = I965_CURSOR_FIFO,
 591        .max_wm = I965_CURSOR_MAX_WM,
 592        .default_wm = I965_CURSOR_DFT_WM,
 593        .guard_size = 2,
 594        .cacheline_size = I915_FIFO_LINE_SIZE,
 595};
 596static const struct intel_watermark_params i945_wm_info = {
 597        .fifo_size = I945_FIFO_SIZE,
 598        .max_wm = I915_MAX_WM,
 599        .default_wm = 1,
 600        .guard_size = 2,
 601        .cacheline_size = I915_FIFO_LINE_SIZE,
 602};
 603static const struct intel_watermark_params i915_wm_info = {
 604        .fifo_size = I915_FIFO_SIZE,
 605        .max_wm = I915_MAX_WM,
 606        .default_wm = 1,
 607        .guard_size = 2,
 608        .cacheline_size = I915_FIFO_LINE_SIZE,
 609};
 610static const struct intel_watermark_params i830_a_wm_info = {
 611        .fifo_size = I855GM_FIFO_SIZE,
 612        .max_wm = I915_MAX_WM,
 613        .default_wm = 1,
 614        .guard_size = 2,
 615        .cacheline_size = I830_FIFO_LINE_SIZE,
 616};
 617static const struct intel_watermark_params i830_bc_wm_info = {
 618        .fifo_size = I855GM_FIFO_SIZE,
 619        .max_wm = I915_MAX_WM/2,
 620        .default_wm = 1,
 621        .guard_size = 2,
 622        .cacheline_size = I830_FIFO_LINE_SIZE,
 623};
 624static const struct intel_watermark_params i845_wm_info = {
 625        .fifo_size = I830_FIFO_SIZE,
 626        .max_wm = I915_MAX_WM,
 627        .default_wm = 1,
 628        .guard_size = 2,
 629        .cacheline_size = I830_FIFO_LINE_SIZE,
 630};
 631
 632/**
 633 * intel_wm_method1 - Method 1 / "small buffer" watermark formula
 634 * @pixel_rate: Pipe pixel rate in kHz
 635 * @cpp: Plane bytes per pixel
 636 * @latency: Memory wakeup latency in 0.1us units
 637 *
 638 * Compute the watermark using the method 1 or "small buffer"
 639 * formula. The caller may additonally add extra cachelines
 640 * to account for TLB misses and clock crossings.
 641 *
 642 * This method is concerned with the short term drain rate
 643 * of the FIFO, ie. it does not account for blanking periods
 644 * which would effectively reduce the average drain rate across
 645 * a longer period. The name "small" refers to the fact the
 646 * FIFO is relatively small compared to the amount of data
 647 * fetched.
 648 *
 649 * The FIFO level vs. time graph might look something like:
 650 *
 651 *   |\   |\
 652 *   | \  | \
 653 * __---__---__ (- plane active, _ blanking)
 654 * -> time
 655 *
 656 * or perhaps like this:
 657 *
 658 *   |\|\  |\|\
 659 * __----__----__ (- plane active, _ blanking)
 660 * -> time
 661 *
 662 * Returns:
 663 * The watermark in bytes
 664 */
 665static unsigned int intel_wm_method1(unsigned int pixel_rate,
 666                                     unsigned int cpp,
 667                                     unsigned int latency)
 668{
 669        uint64_t ret;
 670
 671        ret = (uint64_t) pixel_rate * cpp * latency;
 672        ret = DIV_ROUND_UP_ULL(ret, 10000);
 673
 674        return ret;
 675}
 676
 677/**
 678 * intel_wm_method2 - Method 2 / "large buffer" watermark formula
 679 * @pixel_rate: Pipe pixel rate in kHz
 680 * @htotal: Pipe horizontal total
 681 * @width: Plane width in pixels
 682 * @cpp: Plane bytes per pixel
 683 * @latency: Memory wakeup latency in 0.1us units
 684 *
 685 * Compute the watermark using the method 2 or "large buffer"
 686 * formula. The caller may additonally add extra cachelines
 687 * to account for TLB misses and clock crossings.
 688 *
 689 * This method is concerned with the long term drain rate
 690 * of the FIFO, ie. it does account for blanking periods
 691 * which effectively reduce the average drain rate across
 692 * a longer period. The name "large" refers to the fact the
 693 * FIFO is relatively large compared to the amount of data
 694 * fetched.
 695 *
 696 * The FIFO level vs. time graph might look something like:
 697 *
 698 *    |\___       |\___
 699 *    |    \___   |    \___
 700 *    |        \  |        \
 701 * __ --__--__--__--__--__--__ (- plane active, _ blanking)
 702 * -> time
 703 *
 704 * Returns:
 705 * The watermark in bytes
 706 */
 707static unsigned int intel_wm_method2(unsigned int pixel_rate,
 708                                     unsigned int htotal,
 709                                     unsigned int width,
 710                                     unsigned int cpp,
 711                                     unsigned int latency)
 712{
 713        unsigned int ret;
 714
 715        /*
 716         * FIXME remove once all users are computing
 717         * watermarks in the correct place.
 718         */
 719        if (WARN_ON_ONCE(htotal == 0))
 720                htotal = 1;
 721
 722        ret = (latency * pixel_rate) / (htotal * 10000);
 723        ret = (ret + 1) * width * cpp;
 724
 725        return ret;
 726}
 727
 728/**
 729 * intel_calculate_wm - calculate watermark level
 730 * @pixel_rate: pixel clock
 731 * @wm: chip FIFO params
 732 * @fifo_size: size of the FIFO buffer
 733 * @cpp: bytes per pixel
 734 * @latency_ns: memory latency for the platform
 735 *
 736 * Calculate the watermark level (the level at which the display plane will
 737 * start fetching from memory again).  Each chip has a different display
 738 * FIFO size and allocation, so the caller needs to figure that out and pass
 739 * in the correct intel_watermark_params structure.
 740 *
 741 * As the pixel clock runs, the FIFO will be drained at a rate that depends
 742 * on the pixel size.  When it reaches the watermark level, it'll start
 743 * fetching FIFO line sized based chunks from memory until the FIFO fills
 744 * past the watermark point.  If the FIFO drains completely, a FIFO underrun
 745 * will occur, and a display engine hang could result.
 746 */
 747static unsigned int intel_calculate_wm(int pixel_rate,
 748                                       const struct intel_watermark_params *wm,
 749                                       int fifo_size, int cpp,
 750                                       unsigned int latency_ns)
 751{
 752        int entries, wm_size;
 753
 754        /*
 755         * Note: we need to make sure we don't overflow for various clock &
 756         * latency values.
 757         * clocks go from a few thousand to several hundred thousand.
 758         * latency is usually a few thousand
 759         */
 760        entries = intel_wm_method1(pixel_rate, cpp,
 761                                   latency_ns / 100);
 762        entries = DIV_ROUND_UP(entries, wm->cacheline_size) +
 763                wm->guard_size;
 764        DRM_DEBUG_KMS("FIFO entries required for mode: %d\n", entries);
 765
 766        wm_size = fifo_size - entries;
 767        DRM_DEBUG_KMS("FIFO watermark level: %d\n", wm_size);
 768
 769        /* Don't promote wm_size to unsigned... */
 770        if (wm_size > wm->max_wm)
 771                wm_size = wm->max_wm;
 772        if (wm_size <= 0)
 773                wm_size = wm->default_wm;
 774
 775        /*
 776         * Bspec seems to indicate that the value shouldn't be lower than
 777         * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
 778         * Lets go for 8 which is the burst size since certain platforms
 779         * already use a hardcoded 8 (which is what the spec says should be
 780         * done).
 781         */
 782        if (wm_size <= 8)
 783                wm_size = 8;
 784
 785        return wm_size;
 786}
 787
 788static bool is_disabling(int old, int new, int threshold)
 789{
 790        return old >= threshold && new < threshold;
 791}
 792
 793static bool is_enabling(int old, int new, int threshold)
 794{
 795        return old < threshold && new >= threshold;
 796}
 797
 798static int intel_wm_num_levels(struct drm_i915_private *dev_priv)
 799{
 800        return dev_priv->wm.max_level + 1;
 801}
 802
 803static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state,
 804                                   const struct intel_plane_state *plane_state)
 805{
 806        struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
 807
 808        /* FIXME check the 'enable' instead */
 809        if (!crtc_state->base.active)
 810                return false;
 811
 812        /*
 813         * Treat cursor with fb as always visible since cursor updates
 814         * can happen faster than the vrefresh rate, and the current
 815         * watermark code doesn't handle that correctly. Cursor updates
 816         * which set/clear the fb or change the cursor size are going
 817         * to get throttled by intel_legacy_cursor_update() to work
 818         * around this problem with the watermark code.
 819         */
 820        if (plane->id == PLANE_CURSOR)
 821                return plane_state->base.fb != NULL;
 822        else
 823                return plane_state->base.visible;
 824}
 825
 826static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv)
 827{
 828        struct intel_crtc *crtc, *enabled = NULL;
 829
 830        for_each_intel_crtc(&dev_priv->drm, crtc) {
 831                if (intel_crtc_active(crtc)) {
 832                        if (enabled)
 833                                return NULL;
 834                        enabled = crtc;
 835                }
 836        }
 837
 838        return enabled;
 839}
 840
 841static void pineview_update_wm(struct intel_crtc *unused_crtc)
 842{
 843        struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
 844        struct intel_crtc *crtc;
 845        const struct cxsr_latency *latency;
 846        u32 reg;
 847        unsigned int wm;
 848
 849        latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
 850                                         dev_priv->is_ddr3,
 851                                         dev_priv->fsb_freq,
 852                                         dev_priv->mem_freq);
 853        if (!latency) {
 854                DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
 855                intel_set_memory_cxsr(dev_priv, false);
 856                return;
 857        }
 858
 859        crtc = single_enabled_crtc(dev_priv);
 860        if (crtc) {
 861                const struct drm_display_mode *adjusted_mode =
 862                        &crtc->config->base.adjusted_mode;
 863                const struct drm_framebuffer *fb =
 864                        crtc->base.primary->state->fb;
 865                int cpp = fb->format->cpp[0];
 866                int clock = adjusted_mode->crtc_clock;
 867
 868                /* Display SR */
 869                wm = intel_calculate_wm(clock, &pineview_display_wm,
 870                                        pineview_display_wm.fifo_size,
 871                                        cpp, latency->display_sr);
 872                reg = I915_READ(DSPFW1);
 873                reg &= ~DSPFW_SR_MASK;
 874                reg |= FW_WM(wm, SR);
 875                I915_WRITE(DSPFW1, reg);
 876                DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
 877
 878                /* cursor SR */
 879                wm = intel_calculate_wm(clock, &pineview_cursor_wm,
 880                                        pineview_display_wm.fifo_size,
 881                                        4, latency->cursor_sr);
 882                reg = I915_READ(DSPFW3);
 883                reg &= ~DSPFW_CURSOR_SR_MASK;
 884                reg |= FW_WM(wm, CURSOR_SR);
 885                I915_WRITE(DSPFW3, reg);
 886
 887                /* Display HPLL off SR */
 888                wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
 889                                        pineview_display_hplloff_wm.fifo_size,
 890                                        cpp, latency->display_hpll_disable);
 891                reg = I915_READ(DSPFW3);
 892                reg &= ~DSPFW_HPLL_SR_MASK;
 893                reg |= FW_WM(wm, HPLL_SR);
 894                I915_WRITE(DSPFW3, reg);
 895
 896                /* cursor HPLL off SR */
 897                wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
 898                                        pineview_display_hplloff_wm.fifo_size,
 899                                        4, latency->cursor_hpll_disable);
 900                reg = I915_READ(DSPFW3);
 901                reg &= ~DSPFW_HPLL_CURSOR_MASK;
 902                reg |= FW_WM(wm, HPLL_CURSOR);
 903                I915_WRITE(DSPFW3, reg);
 904                DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
 905
 906                intel_set_memory_cxsr(dev_priv, true);
 907        } else {
 908                intel_set_memory_cxsr(dev_priv, false);
 909        }
 910}
 911
 912/*
 913 * Documentation says:
 914 * "If the line size is small, the TLB fetches can get in the way of the
 915 *  data fetches, causing some lag in the pixel data return which is not
 916 *  accounted for in the above formulas. The following adjustment only
 917 *  needs to be applied if eight whole lines fit in the buffer at once.
 918 *  The WM is adjusted upwards by the difference between the FIFO size
 919 *  and the size of 8 whole lines. This adjustment is always performed
 920 *  in the actual pixel depth regardless of whether FBC is enabled or not."
 921 */
 922static unsigned int g4x_tlb_miss_wa(int fifo_size, int width, int cpp)
 923{
 924        int tlb_miss = fifo_size * 64 - width * cpp * 8;
 925
 926        return max(0, tlb_miss);
 927}
 928
 929static void g4x_write_wm_values(struct drm_i915_private *dev_priv,
 930                                const struct g4x_wm_values *wm)
 931{
 932        enum pipe pipe;
 933
 934        for_each_pipe(dev_priv, pipe)
 935                trace_g4x_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
 936
 937        I915_WRITE(DSPFW1,
 938                   FW_WM(wm->sr.plane, SR) |
 939                   FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
 940                   FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
 941                   FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
 942        I915_WRITE(DSPFW2,
 943                   (wm->fbc_en ? DSPFW_FBC_SR_EN : 0) |
 944                   FW_WM(wm->sr.fbc, FBC_SR) |
 945                   FW_WM(wm->hpll.fbc, FBC_HPLL_SR) |
 946                   FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEB) |
 947                   FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
 948                   FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
 949        I915_WRITE(DSPFW3,
 950                   (wm->hpll_en ? DSPFW_HPLL_SR_EN : 0) |
 951                   FW_WM(wm->sr.cursor, CURSOR_SR) |
 952                   FW_WM(wm->hpll.cursor, HPLL_CURSOR) |
 953                   FW_WM(wm->hpll.plane, HPLL_SR));
 954
 955        POSTING_READ(DSPFW1);
 956}
 957
 958#define FW_WM_VLV(value, plane) \
 959        (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV)
 960
 961static void vlv_write_wm_values(struct drm_i915_private *dev_priv,
 962                                const struct vlv_wm_values *wm)
 963{
 964        enum pipe pipe;
 965
 966        for_each_pipe(dev_priv, pipe) {
 967                trace_vlv_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
 968
 969                I915_WRITE(VLV_DDL(pipe),
 970                           (wm->ddl[pipe].plane[PLANE_CURSOR] << DDL_CURSOR_SHIFT) |
 971                           (wm->ddl[pipe].plane[PLANE_SPRITE1] << DDL_SPRITE_SHIFT(1)) |
 972                           (wm->ddl[pipe].plane[PLANE_SPRITE0] << DDL_SPRITE_SHIFT(0)) |
 973                           (wm->ddl[pipe].plane[PLANE_PRIMARY] << DDL_PLANE_SHIFT));
 974        }
 975
 976        /*
 977         * Zero the (unused) WM1 watermarks, and also clear all the
 978         * high order bits so that there are no out of bounds values
 979         * present in the registers during the reprogramming.
 980         */
 981        I915_WRITE(DSPHOWM, 0);
 982        I915_WRITE(DSPHOWM1, 0);
 983        I915_WRITE(DSPFW4, 0);
 984        I915_WRITE(DSPFW5, 0);
 985        I915_WRITE(DSPFW6, 0);
 986
 987        I915_WRITE(DSPFW1,
 988                   FW_WM(wm->sr.plane, SR) |
 989                   FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
 990                   FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
 991                   FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
 992        I915_WRITE(DSPFW2,
 993                   FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE1], SPRITEB) |
 994                   FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
 995                   FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
 996        I915_WRITE(DSPFW3,
 997                   FW_WM(wm->sr.cursor, CURSOR_SR));
 998
 999        if (IS_CHERRYVIEW(dev_priv)) {
1000                I915_WRITE(DSPFW7_CHV,
1001                           FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1002                           FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1003                I915_WRITE(DSPFW8_CHV,
1004                           FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE1], SPRITEF) |
1005                           FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE0], SPRITEE));
1006                I915_WRITE(DSPFW9_CHV,
1007                           FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_PRIMARY], PLANEC) |
1008                           FW_WM(wm->pipe[PIPE_C].plane[PLANE_CURSOR], CURSORC));
1009                I915_WRITE(DSPHOWM,
1010                           FW_WM(wm->sr.plane >> 9, SR_HI) |
1011                           FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE1] >> 8, SPRITEF_HI) |
1012                           FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE0] >> 8, SPRITEE_HI) |
1013                           FW_WM(wm->pipe[PIPE_C].plane[PLANE_PRIMARY] >> 8, PLANEC_HI) |
1014                           FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1015                           FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1016                           FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1017                           FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1018                           FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1019                           FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1020        } else {
1021                I915_WRITE(DSPFW7,
1022                           FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1023                           FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1024                I915_WRITE(DSPHOWM,
1025                           FW_WM(wm->sr.plane >> 9, SR_HI) |
1026                           FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1027                           FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1028                           FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1029                           FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1030                           FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1031                           FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1032        }
1033
1034        POSTING_READ(DSPFW1);
1035}
1036
1037#undef FW_WM_VLV
1038
1039static void g4x_setup_wm_latency(struct drm_i915_private *dev_priv)
1040{
1041        /* all latencies in usec */
1042        dev_priv->wm.pri_latency[G4X_WM_LEVEL_NORMAL] = 5;
1043        dev_priv->wm.pri_latency[G4X_WM_LEVEL_SR] = 12;
1044        dev_priv->wm.pri_latency[G4X_WM_LEVEL_HPLL] = 35;
1045
1046        dev_priv->wm.max_level = G4X_WM_LEVEL_HPLL;
1047}
1048
1049static int g4x_plane_fifo_size(enum plane_id plane_id, int level)
1050{
1051        /*
1052         * DSPCNTR[13] supposedly controls whether the
1053         * primary plane can use the FIFO space otherwise
1054         * reserved for the sprite plane. It's not 100% clear
1055         * what the actual FIFO size is, but it looks like we
1056         * can happily set both primary and sprite watermarks
1057         * up to 127 cachelines. So that would seem to mean
1058         * that either DSPCNTR[13] doesn't do anything, or that
1059         * the total FIFO is >= 256 cachelines in size. Either
1060         * way, we don't seem to have to worry about this
1061         * repartitioning as the maximum watermark value the
1062         * register can hold for each plane is lower than the
1063         * minimum FIFO size.
1064         */
1065        switch (plane_id) {
1066        case PLANE_CURSOR:
1067                return 63;
1068        case PLANE_PRIMARY:
1069                return level == G4X_WM_LEVEL_NORMAL ? 127 : 511;
1070        case PLANE_SPRITE0:
1071                return level == G4X_WM_LEVEL_NORMAL ? 127 : 0;
1072        default:
1073                MISSING_CASE(plane_id);
1074                return 0;
1075        }
1076}
1077
1078static int g4x_fbc_fifo_size(int level)
1079{
1080        switch (level) {
1081        case G4X_WM_LEVEL_SR:
1082                return 7;
1083        case G4X_WM_LEVEL_HPLL:
1084                return 15;
1085        default:
1086                MISSING_CASE(level);
1087                return 0;
1088        }
1089}
1090
1091static uint16_t g4x_compute_wm(const struct intel_crtc_state *crtc_state,
1092                               const struct intel_plane_state *plane_state,
1093                               int level)
1094{
1095        struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1096        struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1097        const struct drm_display_mode *adjusted_mode =
1098                &crtc_state->base.adjusted_mode;
1099        unsigned int latency = dev_priv->wm.pri_latency[level] * 10;
1100        unsigned int clock, htotal, cpp, width, wm;
1101
1102        if (latency == 0)
1103                return USHRT_MAX;
1104
1105        if (!intel_wm_plane_visible(crtc_state, plane_state))
1106                return 0;
1107
1108        /*
1109         * Not 100% sure which way ELK should go here as the
1110         * spec only says CL/CTG should assume 32bpp and BW
1111         * doesn't need to. But as these things followed the
1112         * mobile vs. desktop lines on gen3 as well, let's
1113         * assume ELK doesn't need this.
1114         *
1115         * The spec also fails to list such a restriction for
1116         * the HPLL watermark, which seems a little strange.
1117         * Let's use 32bpp for the HPLL watermark as well.
1118         */
1119        if (IS_GM45(dev_priv) && plane->id == PLANE_PRIMARY &&
1120            level != G4X_WM_LEVEL_NORMAL)
1121                cpp = 4;
1122        else
1123                cpp = plane_state->base.fb->format->cpp[0];
1124
1125        clock = adjusted_mode->crtc_clock;
1126        htotal = adjusted_mode->crtc_htotal;
1127
1128        if (plane->id == PLANE_CURSOR)
1129                width = plane_state->base.crtc_w;
1130        else
1131                width = drm_rect_width(&plane_state->base.dst);
1132
1133        if (plane->id == PLANE_CURSOR) {
1134                wm = intel_wm_method2(clock, htotal, width, cpp, latency);
1135        } else if (plane->id == PLANE_PRIMARY &&
1136                   level == G4X_WM_LEVEL_NORMAL) {
1137                wm = intel_wm_method1(clock, cpp, latency);
1138        } else {
1139                unsigned int small, large;
1140
1141                small = intel_wm_method1(clock, cpp, latency);
1142                large = intel_wm_method2(clock, htotal, width, cpp, latency);
1143
1144                wm = min(small, large);
1145        }
1146
1147        wm += g4x_tlb_miss_wa(g4x_plane_fifo_size(plane->id, level),
1148                              width, cpp);
1149
1150        wm = DIV_ROUND_UP(wm, 64) + 2;
1151
1152        return min_t(unsigned int, wm, USHRT_MAX);
1153}
1154
1155static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1156                                 int level, enum plane_id plane_id, u16 value)
1157{
1158        struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1159        bool dirty = false;
1160
1161        for (; level < intel_wm_num_levels(dev_priv); level++) {
1162                struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1163
1164                dirty |= raw->plane[plane_id] != value;
1165                raw->plane[plane_id] = value;
1166        }
1167
1168        return dirty;
1169}
1170
1171static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state,
1172                               int level, u16 value)
1173{
1174        struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1175        bool dirty = false;
1176
1177        /* NORMAL level doesn't have an FBC watermark */
1178        level = max(level, G4X_WM_LEVEL_SR);
1179
1180        for (; level < intel_wm_num_levels(dev_priv); level++) {
1181                struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1182
1183                dirty |= raw->fbc != value;
1184                raw->fbc = value;
1185        }
1186
1187        return dirty;
1188}
1189
1190static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
1191                                   const struct intel_plane_state *pstate,
1192                                   uint32_t pri_val);
1193
1194static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1195                                     const struct intel_plane_state *plane_state)
1196{
1197        struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1198        int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1199        enum plane_id plane_id = plane->id;
1200        bool dirty = false;
1201        int level;
1202
1203        if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1204                dirty |= g4x_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1205                if (plane_id == PLANE_PRIMARY)
1206                        dirty |= g4x_raw_fbc_wm_set(crtc_state, 0, 0);
1207                goto out;
1208        }
1209
1210        for (level = 0; level < num_levels; level++) {
1211                struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1212                int wm, max_wm;
1213
1214                wm = g4x_compute_wm(crtc_state, plane_state, level);
1215                max_wm = g4x_plane_fifo_size(plane_id, level);
1216
1217                if (wm > max_wm)
1218                        break;
1219
1220                dirty |= raw->plane[plane_id] != wm;
1221                raw->plane[plane_id] = wm;
1222
1223                if (plane_id != PLANE_PRIMARY ||
1224                    level == G4X_WM_LEVEL_NORMAL)
1225                        continue;
1226
1227                wm = ilk_compute_fbc_wm(crtc_state, plane_state,
1228                                        raw->plane[plane_id]);
1229                max_wm = g4x_fbc_fifo_size(level);
1230
1231                /*
1232                 * FBC wm is not mandatory as we
1233                 * can always just disable its use.
1234                 */
1235                if (wm > max_wm)
1236                        wm = USHRT_MAX;
1237
1238                dirty |= raw->fbc != wm;
1239                raw->fbc = wm;
1240        }
1241
1242        /* mark watermarks as invalid */
1243        dirty |= g4x_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1244
1245        if (plane_id == PLANE_PRIMARY)
1246                dirty |= g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
1247
1248 out:
1249        if (dirty) {
1250                DRM_DEBUG_KMS("%s watermarks: normal=%d, SR=%d, HPLL=%d\n",
1251                              plane->base.name,
1252                              crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id],
1253                              crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id],
1254                              crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]);
1255
1256                if (plane_id == PLANE_PRIMARY)
1257                        DRM_DEBUG_KMS("FBC watermarks: SR=%d, HPLL=%d\n",
1258                                      crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc,
1259                                      crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc);
1260        }
1261
1262        return dirty;
1263}
1264
1265static bool g4x_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1266                                      enum plane_id plane_id, int level)
1267{
1268        const struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1269
1270        return raw->plane[plane_id] <= g4x_plane_fifo_size(plane_id, level);
1271}
1272
1273static bool g4x_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state,
1274                                     int level)
1275{
1276        struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1277
1278        if (level > dev_priv->wm.max_level)
1279                return false;
1280
1281        return g4x_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1282                g4x_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1283                g4x_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1284}
1285
1286/* mark all levels starting from 'level' as invalid */
1287static void g4x_invalidate_wms(struct intel_crtc *crtc,
1288                               struct g4x_wm_state *wm_state, int level)
1289{
1290        if (level <= G4X_WM_LEVEL_NORMAL) {
1291                enum plane_id plane_id;
1292
1293                for_each_plane_id_on_crtc(crtc, plane_id)
1294                        wm_state->wm.plane[plane_id] = USHRT_MAX;
1295        }
1296
1297        if (level <= G4X_WM_LEVEL_SR) {
1298                wm_state->cxsr = false;
1299                wm_state->sr.cursor = USHRT_MAX;
1300                wm_state->sr.plane = USHRT_MAX;
1301                wm_state->sr.fbc = USHRT_MAX;
1302        }
1303
1304        if (level <= G4X_WM_LEVEL_HPLL) {
1305                wm_state->hpll_en = false;
1306                wm_state->hpll.cursor = USHRT_MAX;
1307                wm_state->hpll.plane = USHRT_MAX;
1308                wm_state->hpll.fbc = USHRT_MAX;
1309        }
1310}
1311
1312static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1313{
1314        struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1315        struct intel_atomic_state *state =
1316                to_intel_atomic_state(crtc_state->base.state);
1317        struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
1318        int num_active_planes = hweight32(crtc_state->active_planes &
1319                                          ~BIT(PLANE_CURSOR));
1320        const struct g4x_pipe_wm *raw;
1321        const struct intel_plane_state *old_plane_state;
1322        const struct intel_plane_state *new_plane_state;
1323        struct intel_plane *plane;
1324        enum plane_id plane_id;
1325        int i, level;
1326        unsigned int dirty = 0;
1327
1328        for_each_oldnew_intel_plane_in_state(state, plane,
1329                                             old_plane_state,
1330                                             new_plane_state, i) {
1331                if (new_plane_state->base.crtc != &crtc->base &&
1332                    old_plane_state->base.crtc != &crtc->base)
1333                        continue;
1334
1335                if (g4x_raw_plane_wm_compute(crtc_state, new_plane_state))
1336                        dirty |= BIT(plane->id);
1337        }
1338
1339        if (!dirty)
1340                return 0;
1341
1342        level = G4X_WM_LEVEL_NORMAL;
1343        if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1344                goto out;
1345
1346        raw = &crtc_state->wm.g4x.raw[level];
1347        for_each_plane_id_on_crtc(crtc, plane_id)
1348                wm_state->wm.plane[plane_id] = raw->plane[plane_id];
1349
1350        level = G4X_WM_LEVEL_SR;
1351
1352        if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1353                goto out;
1354
1355        raw = &crtc_state->wm.g4x.raw[level];
1356        wm_state->sr.plane = raw->plane[PLANE_PRIMARY];
1357        wm_state->sr.cursor = raw->plane[PLANE_CURSOR];
1358        wm_state->sr.fbc = raw->fbc;
1359
1360        wm_state->cxsr = num_active_planes == BIT(PLANE_PRIMARY);
1361
1362        level = G4X_WM_LEVEL_HPLL;
1363
1364        if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1365                goto out;
1366
1367        raw = &crtc_state->wm.g4x.raw[level];
1368        wm_state->hpll.plane = raw->plane[PLANE_PRIMARY];
1369        wm_state->hpll.cursor = raw->plane[PLANE_CURSOR];
1370        wm_state->hpll.fbc = raw->fbc;
1371
1372        wm_state->hpll_en = wm_state->cxsr;
1373
1374        level++;
1375
1376 out:
1377        if (level == G4X_WM_LEVEL_NORMAL)
1378                return -EINVAL;
1379
1380        /* invalidate the higher levels */
1381        g4x_invalidate_wms(crtc, wm_state, level);
1382
1383        /*
1384         * Determine if the FBC watermark(s) can be used. IF
1385         * this isn't the case we prefer to disable the FBC
1386         ( watermark(s) rather than disable the SR/HPLL
1387         * level(s) entirely.
1388         */
1389        wm_state->fbc_en = level > G4X_WM_LEVEL_NORMAL;
1390
1391        if (level >= G4X_WM_LEVEL_SR &&
1392            wm_state->sr.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_SR))
1393                wm_state->fbc_en = false;
1394        else if (level >= G4X_WM_LEVEL_HPLL &&
1395                 wm_state->hpll.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_HPLL))
1396                wm_state->fbc_en = false;
1397
1398        return 0;
1399}
1400
1401static int g4x_compute_intermediate_wm(struct drm_device *dev,
1402                                       struct intel_crtc *crtc,
1403                                       struct intel_crtc_state *new_crtc_state)
1404{
1405        struct g4x_wm_state *intermediate = &new_crtc_state->wm.g4x.intermediate;
1406        const struct g4x_wm_state *optimal = &new_crtc_state->wm.g4x.optimal;
1407        struct intel_atomic_state *intel_state =
1408                to_intel_atomic_state(new_crtc_state->base.state);
1409        const struct intel_crtc_state *old_crtc_state =
1410                intel_atomic_get_old_crtc_state(intel_state, crtc);
1411        const struct g4x_wm_state *active = &old_crtc_state->wm.g4x.optimal;
1412        enum plane_id plane_id;
1413
1414        if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
1415                *intermediate = *optimal;
1416
1417                intermediate->cxsr = false;
1418                intermediate->hpll_en = false;
1419                goto out;
1420        }
1421
1422        intermediate->cxsr = optimal->cxsr && active->cxsr &&
1423                !new_crtc_state->disable_cxsr;
1424        intermediate->hpll_en = optimal->hpll_en && active->hpll_en &&
1425                !new_crtc_state->disable_cxsr;
1426        intermediate->fbc_en = optimal->fbc_en && active->fbc_en;
1427
1428        for_each_plane_id_on_crtc(crtc, plane_id) {
1429                intermediate->wm.plane[plane_id] =
1430                        max(optimal->wm.plane[plane_id],
1431                            active->wm.plane[plane_id]);
1432
1433                WARN_ON(intermediate->wm.plane[plane_id] >
1434                        g4x_plane_fifo_size(plane_id, G4X_WM_LEVEL_NORMAL));
1435        }
1436
1437        intermediate->sr.plane = max(optimal->sr.plane,
1438                                     active->sr.plane);
1439        intermediate->sr.cursor = max(optimal->sr.cursor,
1440                                      active->sr.cursor);
1441        intermediate->sr.fbc = max(optimal->sr.fbc,
1442                                   active->sr.fbc);
1443
1444        intermediate->hpll.plane = max(optimal->hpll.plane,
1445                                       active->hpll.plane);
1446        intermediate->hpll.cursor = max(optimal->hpll.cursor,
1447                                        active->hpll.cursor);
1448        intermediate->hpll.fbc = max(optimal->hpll.fbc,
1449                                     active->hpll.fbc);
1450
1451        WARN_ON((intermediate->sr.plane >
1452                 g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_SR) ||
1453                 intermediate->sr.cursor >
1454                 g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_SR)) &&
1455                intermediate->cxsr);
1456        WARN_ON((intermediate->sr.plane >
1457                 g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_HPLL) ||
1458                 intermediate->sr.cursor >
1459                 g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_HPLL)) &&
1460                intermediate->hpll_en);
1461
1462        WARN_ON(intermediate->sr.fbc > g4x_fbc_fifo_size(1) &&
1463                intermediate->fbc_en && intermediate->cxsr);
1464        WARN_ON(intermediate->hpll.fbc > g4x_fbc_fifo_size(2) &&
1465                intermediate->fbc_en && intermediate->hpll_en);
1466
1467out:
1468        /*
1469         * If our intermediate WM are identical to the final WM, then we can
1470         * omit the post-vblank programming; only update if it's different.
1471         */
1472        if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
1473                new_crtc_state->wm.need_postvbl_update = true;
1474
1475        return 0;
1476}
1477
1478static void g4x_merge_wm(struct drm_i915_private *dev_priv,
1479                         struct g4x_wm_values *wm)
1480{
1481        struct intel_crtc *crtc;
1482        int num_active_crtcs = 0;
1483
1484        wm->cxsr = true;
1485        wm->hpll_en = true;
1486        wm->fbc_en = true;
1487
1488        for_each_intel_crtc(&dev_priv->drm, crtc) {
1489                const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1490
1491                if (!crtc->active)
1492                        continue;
1493
1494                if (!wm_state->cxsr)
1495                        wm->cxsr = false;
1496                if (!wm_state->hpll_en)
1497                        wm->hpll_en = false;
1498                if (!wm_state->fbc_en)
1499                        wm->fbc_en = false;
1500
1501                num_active_crtcs++;
1502        }
1503
1504        if (num_active_crtcs != 1) {
1505                wm->cxsr = false;
1506                wm->hpll_en = false;
1507                wm->fbc_en = false;
1508        }
1509
1510        for_each_intel_crtc(&dev_priv->drm, crtc) {
1511                const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1512                enum pipe pipe = crtc->pipe;
1513
1514                wm->pipe[pipe] = wm_state->wm;
1515                if (crtc->active && wm->cxsr)
1516                        wm->sr = wm_state->sr;
1517                if (crtc->active && wm->hpll_en)
1518                        wm->hpll = wm_state->hpll;
1519        }
1520}
1521
1522static void g4x_program_watermarks(struct drm_i915_private *dev_priv)
1523{
1524        struct g4x_wm_values *old_wm = &dev_priv->wm.g4x;
1525        struct g4x_wm_values new_wm = {};
1526
1527        g4x_merge_wm(dev_priv, &new_wm);
1528
1529        if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
1530                return;
1531
1532        if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
1533                _intel_set_memory_cxsr(dev_priv, false);
1534
1535        g4x_write_wm_values(dev_priv, &new_wm);
1536
1537        if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
1538                _intel_set_memory_cxsr(dev_priv, true);
1539
1540        *old_wm = new_wm;
1541}
1542
1543static void g4x_initial_watermarks(struct intel_atomic_state *state,
1544                                   struct intel_crtc_state *crtc_state)
1545{
1546        struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1547        struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1548
1549        mutex_lock(&dev_priv->wm.wm_mutex);
1550        crtc->wm.active.g4x = crtc_state->wm.g4x.intermediate;
1551        g4x_program_watermarks(dev_priv);
1552        mutex_unlock(&dev_priv->wm.wm_mutex);
1553}
1554
1555static void g4x_optimize_watermarks(struct intel_atomic_state *state,
1556                                    struct intel_crtc_state *crtc_state)
1557{
1558        struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1559        struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
1560
1561        if (!crtc_state->wm.need_postvbl_update)
1562                return;
1563
1564        mutex_lock(&dev_priv->wm.wm_mutex);
1565        intel_crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
1566        g4x_program_watermarks(dev_priv);
1567        mutex_unlock(&dev_priv->wm.wm_mutex);
1568}
1569
1570/* latency must be in 0.1us units. */
1571static unsigned int vlv_wm_method2(unsigned int pixel_rate,
1572                                   unsigned int htotal,
1573                                   unsigned int width,
1574                                   unsigned int cpp,
1575                                   unsigned int latency)
1576{
1577        unsigned int ret;
1578
1579        ret = intel_wm_method2(pixel_rate, htotal,
1580                               width, cpp, latency);
1581        ret = DIV_ROUND_UP(ret, 64);
1582
1583        return ret;
1584}
1585
1586static void vlv_setup_wm_latency(struct drm_i915_private *dev_priv)
1587{
1588        /* all latencies in usec */
1589        dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
1590
1591        dev_priv->wm.max_level = VLV_WM_LEVEL_PM2;
1592
1593        if (IS_CHERRYVIEW(dev_priv)) {
1594                dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
1595                dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
1596
1597                dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS;
1598        }
1599}
1600
1601static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state,
1602                                     const struct intel_plane_state *plane_state,
1603                                     int level)
1604{
1605        struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1606        struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1607        const struct drm_display_mode *adjusted_mode =
1608                &crtc_state->base.adjusted_mode;
1609        unsigned int clock, htotal, cpp, width, wm;
1610
1611        if (dev_priv->wm.pri_latency[level] == 0)
1612                return USHRT_MAX;
1613
1614        if (!intel_wm_plane_visible(crtc_state, plane_state))
1615                return 0;
1616
1617        cpp = plane_state->base.fb->format->cpp[0];
1618        clock = adjusted_mode->crtc_clock;
1619        htotal = adjusted_mode->crtc_htotal;
1620        width = crtc_state->pipe_src_w;
1621
1622        if (plane->id == PLANE_CURSOR) {
1623                /*
1624                 * FIXME the formula gives values that are
1625                 * too big for the cursor FIFO, and hence we
1626                 * would never be able to use cursors. For
1627                 * now just hardcode the watermark.
1628                 */
1629                wm = 63;
1630        } else {
1631                wm = vlv_wm_method2(clock, htotal, width, cpp,
1632                                    dev_priv->wm.pri_latency[level] * 10);
1633        }
1634
1635        return min_t(unsigned int, wm, USHRT_MAX);
1636}
1637
1638static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes)
1639{
1640        return (active_planes & (BIT(PLANE_SPRITE0) |
1641                                 BIT(PLANE_SPRITE1))) == BIT(PLANE_SPRITE1);
1642}
1643
1644static int vlv_compute_fifo(struct intel_crtc_state *crtc_state)
1645{
1646        struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1647        const struct g4x_pipe_wm *raw =
1648                &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2];
1649        struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
1650        unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR);
1651        int num_active_planes = hweight32(active_planes);
1652        const int fifo_size = 511;
1653        int fifo_extra, fifo_left = fifo_size;
1654        int sprite0_fifo_extra = 0;
1655        unsigned int total_rate;
1656        enum plane_id plane_id;
1657
1658        /*
1659         * When enabling sprite0 after sprite1 has already been enabled
1660         * we tend to get an underrun unless sprite0 already has some
1661         * FIFO space allcoated. Hence we always allocate at least one
1662         * cacheline for sprite0 whenever sprite1 is enabled.
1663         *
1664         * All other plane enable sequences appear immune to this problem.
1665         */
1666        if (vlv_need_sprite0_fifo_workaround(active_planes))
1667                sprite0_fifo_extra = 1;
1668
1669        total_rate = raw->plane[PLANE_PRIMARY] +
1670                raw->plane[PLANE_SPRITE0] +
1671                raw->plane[PLANE_SPRITE1] +
1672                sprite0_fifo_extra;
1673
1674        if (total_rate > fifo_size)
1675                return -EINVAL;
1676
1677        if (total_rate == 0)
1678                total_rate = 1;
1679
1680        for_each_plane_id_on_crtc(crtc, plane_id) {
1681                unsigned int rate;
1682
1683                if ((active_planes & BIT(plane_id)) == 0) {
1684                        fifo_state->plane[plane_id] = 0;
1685                        continue;
1686                }
1687
1688                rate = raw->plane[plane_id];
1689                fifo_state->plane[plane_id] = fifo_size * rate / total_rate;
1690                fifo_left -= fifo_state->plane[plane_id];
1691        }
1692
1693        fifo_state->plane[PLANE_SPRITE0] += sprite0_fifo_extra;
1694        fifo_left -= sprite0_fifo_extra;
1695
1696        fifo_state->plane[PLANE_CURSOR] = 63;
1697
1698        fifo_extra = DIV_ROUND_UP(fifo_left, num_active_planes ?: 1);
1699
1700        /* spread the remainder evenly */
1701        for_each_plane_id_on_crtc(crtc, plane_id) {
1702                int plane_extra;
1703
1704                if (fifo_left == 0)
1705                        break;
1706
1707                if ((active_planes & BIT(plane_id)) == 0)
1708                        continue;
1709
1710                plane_extra = min(fifo_extra, fifo_left);
1711                fifo_state->plane[plane_id] += plane_extra;
1712                fifo_left -= plane_extra;
1713        }
1714
1715        WARN_ON(active_planes != 0 && fifo_left != 0);
1716
1717        /* give it all to the first plane if none are active */
1718        if (active_planes == 0) {
1719                WARN_ON(fifo_left != fifo_size);
1720                fifo_state->plane[PLANE_PRIMARY] = fifo_left;
1721        }
1722
1723        return 0;
1724}
1725
1726/* mark all levels starting from 'level' as invalid */
1727static void vlv_invalidate_wms(struct intel_crtc *crtc,
1728                               struct vlv_wm_state *wm_state, int level)
1729{
1730        struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1731
1732        for (; level < intel_wm_num_levels(dev_priv); level++) {
1733                enum plane_id plane_id;
1734
1735                for_each_plane_id_on_crtc(crtc, plane_id)
1736                        wm_state->wm[level].plane[plane_id] = USHRT_MAX;
1737
1738                wm_state->sr[level].cursor = USHRT_MAX;
1739                wm_state->sr[level].plane = USHRT_MAX;
1740        }
1741}
1742
1743static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size)
1744{
1745        if (wm > fifo_size)
1746                return USHRT_MAX;
1747        else
1748                return fifo_size - wm;
1749}
1750
1751/*
1752 * Starting from 'level' set all higher
1753 * levels to 'value' in the "raw" watermarks.
1754 */
1755static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1756                                 int level, enum plane_id plane_id, u16 value)
1757{
1758        struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1759        int num_levels = intel_wm_num_levels(dev_priv);
1760        bool dirty = false;
1761
1762        for (; level < num_levels; level++) {
1763                struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1764
1765                dirty |= raw->plane[plane_id] != value;
1766                raw->plane[plane_id] = value;
1767        }
1768
1769        return dirty;
1770}
1771
1772static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1773                                     const struct intel_plane_state *plane_state)
1774{
1775        struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1776        enum plane_id plane_id = plane->id;
1777        int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1778        int level;
1779        bool dirty = false;
1780
1781        if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1782                dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1783                goto out;
1784        }
1785
1786        for (level = 0; level < num_levels; level++) {
1787                struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1788                int wm = vlv_compute_wm_level(crtc_state, plane_state, level);
1789                int max_wm = plane_id == PLANE_CURSOR ? 63 : 511;
1790
1791                if (wm > max_wm)
1792                        break;
1793
1794                dirty |= raw->plane[plane_id] != wm;
1795                raw->plane[plane_id] = wm;
1796        }
1797
1798        /* mark all higher levels as invalid */
1799        dirty |= vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1800
1801out:
1802        if (dirty)
1803                DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n",
1804                              plane->base.name,
1805                              crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id],
1806                              crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id],
1807                              crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]);
1808
1809        return dirty;
1810}
1811
1812static bool vlv_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1813                                      enum plane_id plane_id, int level)
1814{
1815        const struct g4x_pipe_wm *raw =
1816                &crtc_state->wm.vlv.raw[level];
1817        const struct vlv_fifo_state *fifo_state =
1818                &crtc_state->wm.vlv.fifo_state;
1819
1820        return raw->plane[plane_id] <= fifo_state->plane[plane_id];
1821}
1822
1823static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level)
1824{
1825        return vlv_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1826                vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1827                vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) &&
1828                vlv_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1829}
1830
1831static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1832{
1833        struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1834        struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1835        struct intel_atomic_state *state =
1836                to_intel_atomic_state(crtc_state->base.state);
1837        struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
1838        const struct vlv_fifo_state *fifo_state =
1839                &crtc_state->wm.vlv.fifo_state;
1840        int num_active_planes = hweight32(crtc_state->active_planes &
1841                                          ~BIT(PLANE_CURSOR));
1842        bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base);
1843        const struct intel_plane_state *old_plane_state;
1844        const struct intel_plane_state *new_plane_state;
1845        struct intel_plane *plane;
1846        enum plane_id plane_id;
1847        int level, ret, i;
1848        unsigned int dirty = 0;
1849
1850        for_each_oldnew_intel_plane_in_state(state, plane,
1851                                             old_plane_state,
1852                                             new_plane_state, i) {
1853                if (new_plane_state->base.crtc != &crtc->base &&
1854                    old_plane_state->base.crtc != &crtc->base)
1855                        continue;
1856
1857                if (vlv_raw_plane_wm_compute(crtc_state, new_plane_state))
1858                        dirty |= BIT(plane->id);
1859        }
1860
1861        /*
1862         * DSPARB registers may have been reset due to the
1863         * power well being turned off. Make sure we restore
1864         * them to a consistent state even if no primary/sprite
1865         * planes are initially active.
1866         */
1867        if (needs_modeset)
1868                crtc_state->fifo_changed = true;
1869
1870        if (!dirty)
1871                return 0;
1872
1873        /* cursor changes don't warrant a FIFO recompute */
1874        if (dirty & ~BIT(PLANE_CURSOR)) {
1875                const struct intel_crtc_state *old_crtc_state =
1876                        intel_atomic_get_old_crtc_state(state, crtc);
1877                const struct vlv_fifo_state *old_fifo_state =
1878                        &old_crtc_state->wm.vlv.fifo_state;
1879
1880                ret = vlv_compute_fifo(crtc_state);
1881                if (ret)
1882                        return ret;
1883
1884                if (needs_modeset ||
1885                    memcmp(old_fifo_state, fifo_state,
1886                           sizeof(*fifo_state)) != 0)
1887                        crtc_state->fifo_changed = true;
1888        }
1889
1890        /* initially allow all levels */
1891        wm_state->num_levels = intel_wm_num_levels(dev_priv);
1892        /*
1893         * Note that enabling cxsr with no primary/sprite planes
1894         * enabled can wedge the pipe. Hence we only allow cxsr
1895         * with exactly one enabled primary/sprite plane.
1896         */
1897        wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1;
1898
1899        for (level = 0; level < wm_state->num_levels; level++) {
1900                const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1901                const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1;
1902
1903                if (!vlv_raw_crtc_wm_is_valid(crtc_state, level))
1904                        break;
1905
1906                for_each_plane_id_on_crtc(crtc, plane_id) {
1907                        wm_state->wm[level].plane[plane_id] =
1908                                vlv_invert_wm_value(raw->plane[plane_id],
1909                                                    fifo_state->plane[plane_id]);
1910                }
1911
1912                wm_state->sr[level].plane =
1913                        vlv_invert_wm_value(max3(raw->plane[PLANE_PRIMARY],
1914                                                 raw->plane[PLANE_SPRITE0],
1915                                                 raw->plane[PLANE_SPRITE1]),
1916                                            sr_fifo_size);
1917
1918                wm_state->sr[level].cursor =
1919                        vlv_invert_wm_value(raw->plane[PLANE_CURSOR],
1920                                            63);
1921        }
1922
1923        if (level == 0)
1924                return -EINVAL;
1925
1926        /* limit to only levels we can actually handle */
1927        wm_state->num_levels = level;
1928
1929        /* invalidate the higher levels */
1930        vlv_invalidate_wms(crtc, wm_state, level);
1931
1932        return 0;
1933}
1934
1935#define VLV_FIFO(plane, value) \
1936        (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
1937
1938static void vlv_atomic_update_fifo(struct intel_atomic_state *state,
1939                                   struct intel_crtc_state *crtc_state)
1940{
1941        struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1942        struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1943        const struct vlv_fifo_state *fifo_state =
1944                &crtc_state->wm.vlv.fifo_state;
1945        int sprite0_start, sprite1_start, fifo_size;
1946
1947        if (!crtc_state->fifo_changed)
1948                return;
1949
1950        sprite0_start = fifo_state->plane[PLANE_PRIMARY];
1951        sprite1_start = fifo_state->plane[PLANE_SPRITE0] + sprite0_start;
1952        fifo_size = fifo_state->plane[PLANE_SPRITE1] + sprite1_start;
1953
1954        WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63);
1955        WARN_ON(fifo_size != 511);
1956
1957        trace_vlv_fifo_size(crtc, sprite0_start, sprite1_start, fifo_size);
1958
1959        /*
1960         * uncore.lock serves a double purpose here. It allows us to
1961         * use the less expensive I915_{READ,WRITE}_FW() functions, and
1962         * it protects the DSPARB registers from getting clobbered by
1963         * parallel updates from multiple pipes.
1964         *
1965         * intel_pipe_update_start() has already disabled interrupts
1966         * for us, so a plain spin_lock() is sufficient here.
1967         */
1968        spin_lock(&dev_priv->uncore.lock);
1969
1970        switch (crtc->pipe) {
1971                uint32_t dsparb, dsparb2, dsparb3;
1972        case PIPE_A:
1973                dsparb = I915_READ_FW(DSPARB);
1974                dsparb2 = I915_READ_FW(DSPARB2);
1975
1976                dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) |
1977                            VLV_FIFO(SPRITEB, 0xff));
1978                dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) |
1979                           VLV_FIFO(SPRITEB, sprite1_start));
1980
1981                dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) |
1982                             VLV_FIFO(SPRITEB_HI, 0x1));
1983                dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) |
1984                           VLV_FIFO(SPRITEB_HI, sprite1_start >> 8));
1985
1986                I915_WRITE_FW(DSPARB, dsparb);
1987                I915_WRITE_FW(DSPARB2, dsparb2);
1988                break;
1989        case PIPE_B:
1990                dsparb = I915_READ_FW(DSPARB);
1991                dsparb2 = I915_READ_FW(DSPARB2);
1992
1993                dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) |
1994                            VLV_FIFO(SPRITED, 0xff));
1995                dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) |
1996                           VLV_FIFO(SPRITED, sprite1_start));
1997
1998                dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) |
1999                             VLV_FIFO(SPRITED_HI, 0xff));
2000                dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) |
2001                           VLV_FIFO(SPRITED_HI, sprite1_start >> 8));
2002
2003                I915_WRITE_FW(DSPARB, dsparb);
2004                I915_WRITE_FW(DSPARB2, dsparb2);
2005                break;
2006        case PIPE_C:
2007                dsparb3 = I915_READ_FW(DSPARB3);
2008                dsparb2 = I915_READ_FW(DSPARB2);
2009
2010                dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) |
2011                             VLV_FIFO(SPRITEF, 0xff));
2012                dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) |
2013                            VLV_FIFO(SPRITEF, sprite1_start));
2014
2015                dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) |
2016                             VLV_FIFO(SPRITEF_HI, 0xff));
2017                dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) |
2018                           VLV_FIFO(SPRITEF_HI, sprite1_start >> 8));
2019
2020                I915_WRITE_FW(DSPARB3, dsparb3);
2021                I915_WRITE_FW(DSPARB2, dsparb2);
2022                break;
2023        default:
2024                break;
2025        }
2026
2027        POSTING_READ_FW(DSPARB);
2028
2029        spin_unlock(&dev_priv->uncore.lock);
2030}
2031
2032#undef VLV_FIFO
2033
2034static int vlv_compute_intermediate_wm(struct drm_device *dev,
2035                                       struct intel_crtc *crtc,
2036                                       struct intel_crtc_state *new_crtc_state)
2037{
2038        struct vlv_wm_state *intermediate = &new_crtc_state->wm.vlv.intermediate;
2039        const struct vlv_wm_state *optimal = &new_crtc_state->wm.vlv.optimal;
2040        struct intel_atomic_state *intel_state =
2041                to_intel_atomic_state(new_crtc_state->base.state);
2042        const struct intel_crtc_state *old_crtc_state =
2043                intel_atomic_get_old_crtc_state(intel_state, crtc);
2044        const struct vlv_wm_state *active = &old_crtc_state->wm.vlv.optimal;
2045        int level;
2046
2047        if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
2048                *intermediate = *optimal;
2049
2050                intermediate->cxsr = false;
2051                goto out;
2052        }
2053
2054        intermediate->num_levels = min(optimal->num_levels, active->num_levels);
2055        intermediate->cxsr = optimal->cxsr && active->cxsr &&
2056                !new_crtc_state->disable_cxsr;
2057
2058        for (level = 0; level < intermediate->num_levels; level++) {
2059                enum plane_id plane_id;
2060
2061                for_each_plane_id_on_crtc(crtc, plane_id) {
2062                        intermediate->wm[level].plane[plane_id] =
2063                                min(optimal->wm[level].plane[plane_id],
2064                                    active->wm[level].plane[plane_id]);
2065                }
2066
2067                intermediate->sr[level].plane = min(optimal->sr[level].plane,
2068                                                    active->sr[level].plane);
2069                intermediate->sr[level].cursor = min(optimal->sr[level].cursor,
2070                                                     active->sr[level].cursor);
2071        }
2072
2073        vlv_invalidate_wms(crtc, intermediate, level);
2074
2075out:
2076        /*
2077         * If our intermediate WM are identical to the final WM, then we can
2078         * omit the post-vblank programming; only update if it's different.
2079         */
2080        if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
2081                new_crtc_state->wm.need_postvbl_update = true;
2082
2083        return 0;
2084}
2085
2086static void vlv_merge_wm(struct drm_i915_private *dev_priv,
2087                         struct vlv_wm_values *wm)
2088{
2089        struct intel_crtc *crtc;
2090        int num_active_crtcs = 0;
2091
2092        wm->level = dev_priv->wm.max_level;
2093        wm->cxsr = true;
2094
2095        for_each_intel_crtc(&dev_priv->drm, crtc) {
2096                const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
2097
2098                if (!crtc->active)
2099                        continue;
2100
2101                if (!wm_state->cxsr)
2102                        wm->cxsr = false;
2103
2104                num_active_crtcs++;
2105                wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
2106        }
2107
2108        if (num_active_crtcs != 1)
2109                wm->cxsr = false;
2110
2111        if (num_active_crtcs > 1)
2112                wm->level = VLV_WM_LEVEL_PM2;
2113
2114        for_each_intel_crtc(&dev_priv->drm, crtc) {
2115                const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
2116                enum pipe pipe = crtc->pipe;
2117
2118                wm->pipe[pipe] = wm_state->wm[wm->level];
2119                if (crtc->active && wm->cxsr)
2120                        wm->sr = wm_state->sr[wm->level];
2121
2122                wm->ddl[pipe].plane[PLANE_PRIMARY] = DDL_PRECISION_HIGH | 2;
2123                wm->ddl[pipe].plane[PLANE_SPRITE0] = DDL_PRECISION_HIGH | 2;
2124                wm->ddl[pipe].plane[PLANE_SPRITE1] = DDL_PRECISION_HIGH | 2;
2125                wm->ddl[pipe].plane[PLANE_CURSOR] = DDL_PRECISION_HIGH | 2;
2126        }
2127}
2128
2129static void vlv_program_watermarks(struct drm_i915_private *dev_priv)
2130{
2131        struct vlv_wm_values *old_wm = &dev_priv->wm.vlv;
2132        struct vlv_wm_values new_wm = {};
2133
2134        vlv_merge_wm(dev_priv, &new_wm);
2135
2136        if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
2137                return;
2138
2139        if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
2140                chv_set_memory_dvfs(dev_priv, false);
2141
2142        if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
2143                chv_set_memory_pm5(dev_priv, false);
2144
2145        if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
2146                _intel_set_memory_cxsr(dev_priv, false);
2147
2148        vlv_write_wm_values(dev_priv, &new_wm);
2149
2150        if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
2151                _intel_set_memory_cxsr(dev_priv, true);
2152
2153        if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
2154                chv_set_memory_pm5(dev_priv, true);
2155
2156        if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
2157                chv_set_memory_dvfs(dev_priv, true);
2158
2159        *old_wm = new_wm;
2160}
2161
2162static void vlv_initial_watermarks(struct intel_atomic_state *state,
2163                                   struct intel_crtc_state *crtc_state)
2164{
2165        struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2166        struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
2167
2168        mutex_lock(&dev_priv->wm.wm_mutex);
2169        crtc->wm.active.vlv = crtc_state->wm.vlv.intermediate;
2170        vlv_program_watermarks(dev_priv);
2171        mutex_unlock(&dev_priv->wm.wm_mutex);
2172}
2173
2174static void vlv_optimize_watermarks(struct intel_atomic_state *state,
2175                                    struct intel_crtc_state *crtc_state)
2176{
2177        struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2178        struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
2179
2180        if (!crtc_state->wm.need_postvbl_update)
2181                return;
2182
2183        mutex_lock(&dev_priv->wm.wm_mutex);
2184        intel_crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
2185        vlv_program_watermarks(dev_priv);
2186        mutex_unlock(&dev_priv->wm.wm_mutex);
2187}
2188
2189static void i965_update_wm(struct intel_crtc *unused_crtc)
2190{
2191        struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2192        struct intel_crtc *crtc;
2193        int srwm = 1;
2194        int cursor_sr = 16;
2195        bool cxsr_enabled;
2196
2197        /* Calc sr entries for one plane configs */
2198        crtc = single_enabled_crtc(dev_priv);
2199        if (crtc) {
2200                /* self-refresh has much higher latency */
2201                static const int sr_latency_ns = 12000;
2202                const struct drm_display_mode *adjusted_mode =
2203                        &crtc->config->base.adjusted_mode;
2204                const struct drm_framebuffer *fb =
2205                        crtc->base.primary->state->fb;
2206                int clock = adjusted_mode->crtc_clock;
2207                int htotal = adjusted_mode->crtc_htotal;
2208                int hdisplay = crtc->config->pipe_src_w;
2209                int cpp = fb->format->cpp[0];
2210                int entries;
2211
2212                entries = intel_wm_method2(clock, htotal,
2213                                           hdisplay, cpp, sr_latency_ns / 100);
2214                entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
2215                srwm = I965_FIFO_SIZE - entries;
2216                if (srwm < 0)
2217                        srwm = 1;
2218                srwm &= 0x1ff;
2219                DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
2220                              entries, srwm);
2221
2222                entries = intel_wm_method2(clock, htotal,
2223                                           crtc->base.cursor->state->crtc_w, 4,
2224                                           sr_latency_ns / 100);
2225                entries = DIV_ROUND_UP(entries,
2226                                       i965_cursor_wm_info.cacheline_size) +
2227                        i965_cursor_wm_info.guard_size;
2228
2229                cursor_sr = i965_cursor_wm_info.fifo_size - entries;
2230                if (cursor_sr > i965_cursor_wm_info.max_wm)
2231                        cursor_sr = i965_cursor_wm_info.max_wm;
2232
2233                DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
2234                              "cursor %d\n", srwm, cursor_sr);
2235
2236                cxsr_enabled = true;
2237        } else {
2238                cxsr_enabled = false;
2239                /* Turn off self refresh if both pipes are enabled */
2240                intel_set_memory_cxsr(dev_priv, false);
2241        }
2242
2243        DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
2244                      srwm);
2245
2246        /* 965 has limitations... */
2247        I915_WRITE(DSPFW1, FW_WM(srwm, SR) |
2248                   FW_WM(8, CURSORB) |
2249                   FW_WM(8, PLANEB) |
2250                   FW_WM(8, PLANEA));
2251        I915_WRITE(DSPFW2, FW_WM(8, CURSORA) |
2252                   FW_WM(8, PLANEC_OLD));
2253        /* update cursor SR watermark */
2254        I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR));
2255
2256        if (cxsr_enabled)
2257                intel_set_memory_cxsr(dev_priv, true);
2258}
2259
2260#undef FW_WM
2261
2262static void i9xx_update_wm(struct intel_crtc *unused_crtc)
2263{
2264        struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2265        const struct intel_watermark_params *wm_info;
2266        uint32_t fwater_lo;
2267        uint32_t fwater_hi;
2268        int cwm, srwm = 1;
2269        int fifo_size;
2270        int planea_wm, planeb_wm;
2271        struct intel_crtc *crtc, *enabled = NULL;
2272
2273        if (IS_I945GM(dev_priv))
2274                wm_info = &i945_wm_info;
2275        else if (!IS_GEN2(dev_priv))
2276                wm_info = &i915_wm_info;
2277        else
2278                wm_info = &i830_a_wm_info;
2279
2280        fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_A);
2281        crtc = intel_get_crtc_for_plane(dev_priv, PLANE_A);
2282        if (intel_crtc_active(crtc)) {
2283                const struct drm_display_mode *adjusted_mode =
2284                        &crtc->config->base.adjusted_mode;
2285                const struct drm_framebuffer *fb =
2286                        crtc->base.primary->state->fb;
2287                int cpp;
2288
2289                if (IS_GEN2(dev_priv))
2290                        cpp = 4;
2291                else
2292                        cpp = fb->format->cpp[0];
2293
2294                planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2295                                               wm_info, fifo_size, cpp,
2296                                               pessimal_latency_ns);
2297                enabled = crtc;
2298        } else {
2299                planea_wm = fifo_size - wm_info->guard_size;
2300                if (planea_wm > (long)wm_info->max_wm)
2301                        planea_wm = wm_info->max_wm;
2302        }
2303
2304        if (IS_GEN2(dev_priv))
2305                wm_info = &i830_bc_wm_info;
2306
2307        fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_B);
2308        crtc = intel_get_crtc_for_plane(dev_priv, PLANE_B);
2309        if (intel_crtc_active(crtc)) {
2310                const struct drm_display_mode *adjusted_mode =
2311                        &crtc->config->base.adjusted_mode;
2312                const struct drm_framebuffer *fb =
2313                        crtc->base.primary->state->fb;
2314                int cpp;
2315
2316                if (IS_GEN2(dev_priv))
2317                        cpp = 4;
2318                else
2319                        cpp = fb->format->cpp[0];
2320
2321                planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2322                                               wm_info, fifo_size, cpp,
2323                                               pessimal_latency_ns);
2324                if (enabled == NULL)
2325                        enabled = crtc;
2326                else
2327                        enabled = NULL;
2328        } else {
2329                planeb_wm = fifo_size - wm_info->guard_size;
2330                if (planeb_wm > (long)wm_info->max_wm)
2331                        planeb_wm = wm_info->max_wm;
2332        }
2333
2334        DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
2335
2336        if (IS_I915GM(dev_priv) && enabled) {
2337                struct drm_i915_gem_object *obj;
2338
2339                obj = intel_fb_obj(enabled->base.primary->state->fb);
2340
2341                /* self-refresh seems busted with untiled */
2342                if (!i915_gem_object_is_tiled(obj))
2343                        enabled = NULL;
2344        }
2345
2346        /*
2347         * Overlay gets an aggressive default since video jitter is bad.
2348         */
2349        cwm = 2;
2350
2351        /* Play safe and disable self-refresh before adjusting watermarks. */
2352        intel_set_memory_cxsr(dev_priv, false);
2353
2354        /* Calc sr entries for one plane configs */
2355        if (HAS_FW_BLC(dev_priv) && enabled) {
2356                /* self-refresh has much higher latency */
2357                static const int sr_latency_ns = 6000;
2358                const struct drm_display_mode *adjusted_mode =
2359                        &enabled->config->base.adjusted_mode;
2360                const struct drm_framebuffer *fb =
2361                        enabled->base.primary->state->fb;
2362                int clock = adjusted_mode->crtc_clock;
2363                int htotal = adjusted_mode->crtc_htotal;
2364                int hdisplay = enabled->config->pipe_src_w;
2365                int cpp;
2366                int entries;
2367
2368                if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv))
2369                        cpp = 4;
2370                else
2371                        cpp = fb->format->cpp[0];
2372
2373                entries = intel_wm_method2(clock, htotal, hdisplay, cpp,
2374                                           sr_latency_ns / 100);
2375                entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
2376                DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
2377                srwm = wm_info->fifo_size - entries;
2378                if (srwm < 0)
2379                        srwm = 1;
2380
2381                if (IS_I945G(dev_priv) || IS_I945GM(dev_priv))
2382                        I915_WRITE(FW_BLC_SELF,
2383                                   FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
2384                else
2385                        I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
2386        }
2387
2388        DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
2389                      planea_wm, planeb_wm, cwm, srwm);
2390
2391        fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
2392        fwater_hi = (cwm & 0x1f);
2393
2394        /* Set request length to 8 cachelines per fetch */
2395        fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
2396        fwater_hi = fwater_hi | (1 << 8);
2397
2398        I915_WRITE(FW_BLC, fwater_lo);
2399        I915_WRITE(FW_BLC2, fwater_hi);
2400
2401        if (enabled)
2402                intel_set_memory_cxsr(dev_priv, true);
2403}
2404
2405static void i845_update_wm(struct intel_crtc *unused_crtc)
2406{
2407        struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2408        struct intel_crtc *crtc;
2409        const struct drm_display_mode *adjusted_mode;
2410        uint32_t fwater_lo;
2411        int planea_wm;
2412
2413        crtc = single_enabled_crtc(dev_priv);
2414        if (crtc == NULL)
2415                return;
2416
2417        adjusted_mode = &crtc->config->base.adjusted_mode;
2418        planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2419                                       &i845_wm_info,
2420                                       dev_priv->display.get_fifo_size(dev_priv, PLANE_A),
2421                                       4, pessimal_latency_ns);
2422        fwater_lo = I915_READ(FW_BLC) & ~0xfff;
2423        fwater_lo |= (3<<8) | planea_wm;
2424
2425        DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
2426
2427        I915_WRITE(FW_BLC, fwater_lo);
2428}
2429
2430/* latency must be in 0.1us units. */
2431static unsigned int ilk_wm_method1(unsigned int pixel_rate,
2432                                   unsigned int cpp,
2433                                   unsigned int latency)
2434{
2435        unsigned int ret;
2436
2437        ret = intel_wm_method1(pixel_rate, cpp, latency);
2438        ret = DIV_ROUND_UP(ret, 64) + 2;
2439
2440        return ret;
2441}
2442
2443/* latency must be in 0.1us units. */
2444static unsigned int ilk_wm_method2(unsigned int pixel_rate,
2445                                   unsigned int htotal,
2446                                   unsigned int width,
2447                                   unsigned int cpp,
2448                                   unsigned int latency)
2449{
2450        unsigned int ret;
2451
2452        ret = intel_wm_method2(pixel_rate, htotal,
2453                               width, cpp, latency);
2454        ret = DIV_ROUND_UP(ret, 64) + 2;
2455
2456        return ret;
2457}
2458
2459static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
2460                           uint8_t cpp)
2461{
2462        /*
2463         * Neither of these should be possible since this function shouldn't be
2464         * called if the CRTC is off or the plane is invisible.  But let's be
2465         * extra paranoid to avoid a potential divide-by-zero if we screw up
2466         * elsewhere in the driver.
2467         */
2468        if (WARN_ON(!cpp))
2469                return 0;
2470        if (WARN_ON(!horiz_pixels))
2471                return 0;
2472
2473        return DIV_ROUND_UP(pri_val * 64, horiz_pixels * cpp) + 2;
2474}
2475
2476struct ilk_wm_maximums {
2477        uint16_t pri;
2478        uint16_t spr;
2479        uint16_t cur;
2480        uint16_t fbc;
2481};
2482
2483/*
2484 * For both WM_PIPE and WM_LP.
2485 * mem_value must be in 0.1us units.
2486 */
2487static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
2488                                   const struct intel_plane_state *pstate,
2489                                   uint32_t mem_value,
2490                                   bool is_lp)
2491{
2492        uint32_t method1, method2;
2493        int cpp;
2494
2495        if (!intel_wm_plane_visible(cstate, pstate))
2496                return 0;
2497
2498        cpp = pstate->base.fb->format->cpp[0];
2499
2500        method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
2501
2502        if (!is_lp)
2503                return method1;
2504
2505        method2 = ilk_wm_method2(cstate->pixel_rate,
2506                                 cstate->base.adjusted_mode.crtc_htotal,
2507                                 drm_rect_width(&pstate->base.dst),
2508                                 cpp, mem_value);
2509
2510        return min(method1, method2);
2511}
2512
2513/*
2514 * For both WM_PIPE and WM_LP.
2515 * mem_value must be in 0.1us units.
2516 */
2517static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate,
2518                                   const struct intel_plane_state *pstate,
2519                                   uint32_t mem_value)
2520{
2521        uint32_t method1, method2;
2522        int cpp;
2523
2524        if (!intel_wm_plane_visible(cstate, pstate))
2525                return 0;
2526
2527        cpp = pstate->base.fb->format->cpp[0];
2528
2529        method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
2530        method2 = ilk_wm_method2(cstate->pixel_rate,
2531                                 cstate->base.adjusted_mode.crtc_htotal,
2532                                 drm_rect_width(&pstate->base.dst),
2533                                 cpp, mem_value);
2534        return min(method1, method2);
2535}
2536
2537/*
2538 * For both WM_PIPE and WM_LP.
2539 * mem_value must be in 0.1us units.
2540 */
2541static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
2542                                   const struct intel_plane_state *pstate,
2543                                   uint32_t mem_value)
2544{
2545        int cpp;
2546
2547        if (!intel_wm_plane_visible(cstate, pstate))
2548                return 0;
2549
2550        cpp = pstate->base.fb->format->cpp[0];
2551
2552        return ilk_wm_method2(cstate->pixel_rate,
2553                              cstate->base.adjusted_mode.crtc_htotal,
2554                              pstate->base.crtc_w, cpp, mem_value);
2555}
2556
2557/* Only for WM_LP. */
2558static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
2559                                   const struct intel_plane_state *pstate,
2560                                   uint32_t pri_val)
2561{
2562        int cpp;
2563
2564        if (!intel_wm_plane_visible(cstate, pstate))
2565                return 0;
2566
2567        cpp = pstate->base.fb->format->cpp[0];
2568
2569        return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->base.dst), cpp);
2570}
2571
2572static unsigned int
2573ilk_display_fifo_size(const struct drm_i915_private *dev_priv)
2574{
2575        if (INTEL_GEN(dev_priv) >= 8)
2576                return 3072;
2577        else if (INTEL_GEN(dev_priv) >= 7)
2578                return 768;
2579        else
2580                return 512;
2581}
2582
2583static unsigned int
2584ilk_plane_wm_reg_max(const struct drm_i915_private *dev_priv,
2585                     int level, bool is_sprite)
2586{
2587        if (INTEL_GEN(dev_priv) >= 8)
2588                /* BDW primary/sprite plane watermarks */
2589                return level == 0 ? 255 : 2047;
2590        else if (INTEL_GEN(dev_priv) >= 7)
2591                /* IVB/HSW primary/sprite plane watermarks */
2592                return level == 0 ? 127 : 1023;
2593        else if (!is_sprite)
2594                /* ILK/SNB primary plane watermarks */
2595                return level == 0 ? 127 : 511;
2596        else
2597                /* ILK/SNB sprite plane watermarks */
2598                return level == 0 ? 63 : 255;
2599}
2600
2601static unsigned int
2602ilk_cursor_wm_reg_max(const struct drm_i915_private *dev_priv, int level)
2603{
2604        if (INTEL_GEN(dev_priv) >= 7)
2605                return level == 0 ? 63 : 255;
2606        else
2607                return level == 0 ? 31 : 63;
2608}
2609
2610static unsigned int ilk_fbc_wm_reg_max(const struct drm_i915_private *dev_priv)
2611{
2612        if (INTEL_GEN(dev_priv) >= 8)
2613                return 31;
2614        else
2615                return 15;
2616}
2617
2618/* Calculate the maximum primary/sprite plane watermark */
2619static unsigned int ilk_plane_wm_max(const struct drm_device *dev,
2620                                     int level,
2621                                     const struct intel_wm_config *config,
2622                                     enum intel_ddb_partitioning ddb_partitioning,
2623                                     bool is_sprite)
2624{
2625        struct drm_i915_private *dev_priv = to_i915(dev);
2626        unsigned int fifo_size = ilk_display_fifo_size(dev_priv);
2627
2628        /* if sprites aren't enabled, sprites get nothing */
2629        if (is_sprite && !config->sprites_enabled)
2630                return 0;
2631
2632        /* HSW allows LP1+ watermarks even with multiple pipes */
2633        if (level == 0 || config->num_pipes_active > 1) {
2634                fifo_size /= INTEL_INFO(dev_priv)->num_pipes;
2635
2636                /*
2637                 * For some reason the non self refresh
2638                 * FIFO size is only half of the self
2639                 * refresh FIFO size on ILK/SNB.
2640                 */
2641                if (INTEL_GEN(dev_priv) <= 6)
2642                        fifo_size /= 2;
2643        }
2644
2645        if (config->sprites_enabled) {
2646                /* level 0 is always calculated with 1:1 split */
2647                if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
2648                        if (is_sprite)
2649                                fifo_size *= 5;
2650                        fifo_size /= 6;
2651                } else {
2652                        fifo_size /= 2;
2653                }
2654        }
2655
2656        /* clamp to max that the registers can hold */
2657        return min(fifo_size, ilk_plane_wm_reg_max(dev_priv, level, is_sprite));
2658}
2659
2660/* Calculate the maximum cursor plane watermark */
2661static unsigned int ilk_cursor_wm_max(const struct drm_device *dev,
2662                                      int level,
2663                                      const struct intel_wm_config *config)
2664{
2665        /* HSW LP1+ watermarks w/ multiple pipes */
2666        if (level > 0 && config->num_pipes_active > 1)
2667                return 64;
2668
2669        /* otherwise just report max that registers can hold */
2670        return ilk_cursor_wm_reg_max(to_i915(dev), level);
2671}
2672
2673static void ilk_compute_wm_maximums(const struct drm_device *dev,
2674                                    int level,
2675                                    const struct intel_wm_config *config,
2676                                    enum intel_ddb_partitioning ddb_partitioning,
2677                                    struct ilk_wm_maximums *max)
2678{
2679        max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false);
2680        max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true);
2681        max->cur = ilk_cursor_wm_max(dev, level, config);
2682        max->fbc = ilk_fbc_wm_reg_max(to_i915(dev));
2683}
2684
2685static void ilk_compute_wm_reg_maximums(const struct drm_i915_private *dev_priv,
2686                                        int level,
2687                                        struct ilk_wm_maximums *max)
2688{
2689        max->pri = ilk_plane_wm_reg_max(dev_priv, level, false);
2690        max->spr = ilk_plane_wm_reg_max(dev_priv, level, true);
2691        max->cur = ilk_cursor_wm_reg_max(dev_priv, level);
2692        max->fbc = ilk_fbc_wm_reg_max(dev_priv);
2693}
2694
2695static bool ilk_validate_wm_level(int level,
2696                                  const struct ilk_wm_maximums *max,
2697                                  struct intel_wm_level *result)
2698{
2699        bool ret;
2700
2701        /* already determined to be invalid? */
2702        if (!result->enable)
2703                return false;
2704
2705        result->enable = result->pri_val <= max->pri &&
2706                         result->spr_val <= max->spr &&
2707                         result->cur_val <= max->cur;
2708
2709        ret = result->enable;
2710
2711        /*
2712         * HACK until we can pre-compute everything,
2713         * and thus fail gracefully if LP0 watermarks
2714         * are exceeded...
2715         */
2716        if (level == 0 && !result->enable) {
2717                if (result->pri_val > max->pri)
2718                        DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
2719                                      level, result->pri_val, max->pri);
2720                if (result->spr_val > max->spr)
2721                        DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
2722                                      level, result->spr_val, max->spr);
2723                if (result->cur_val > max->cur)
2724                        DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
2725                                      level, result->cur_val, max->cur);
2726
2727                result->pri_val = min_t(uint32_t, result->pri_val, max->pri);
2728                result->spr_val = min_t(uint32_t, result->spr_val, max->spr);
2729                result->cur_val = min_t(uint32_t, result->cur_val, max->cur);
2730                result->enable = true;
2731        }
2732
2733        return ret;
2734}
2735
2736static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
2737                                 const struct intel_crtc *intel_crtc,
2738                                 int level,
2739                                 struct intel_crtc_state *cstate,
2740                                 const struct intel_plane_state *pristate,
2741                                 const struct intel_plane_state *sprstate,
2742                                 const struct intel_plane_state *curstate,
2743                                 struct intel_wm_level *result)
2744{
2745        uint16_t pri_latency = dev_priv->wm.pri_latency[level];
2746        uint16_t spr_latency = dev_priv->wm.spr_latency[level];
2747        uint16_t cur_latency = dev_priv->wm.cur_latency[level];
2748
2749        /* WM1+ latency values stored in 0.5us units */
2750        if (level > 0) {
2751                pri_latency *= 5;
2752                spr_latency *= 5;
2753                cur_latency *= 5;
2754        }
2755
2756        if (pristate) {
2757                result->pri_val = ilk_compute_pri_wm(cstate, pristate,
2758                                                     pri_latency, level);
2759                result->fbc_val = ilk_compute_fbc_wm(cstate, pristate, result->pri_val);
2760        }
2761
2762        if (sprstate)
2763                result->spr_val = ilk_compute_spr_wm(cstate, sprstate, spr_latency);
2764
2765        if (curstate)
2766                result->cur_val = ilk_compute_cur_wm(cstate, curstate, cur_latency);
2767
2768        result->enable = true;
2769}
2770
2771static uint32_t
2772hsw_compute_linetime_wm(const struct intel_crtc_state *cstate)
2773{
2774        const struct intel_atomic_state *intel_state =
2775                to_intel_atomic_state(cstate->base.state);
2776        const struct drm_display_mode *adjusted_mode =
2777                &cstate->base.adjusted_mode;
2778        u32 linetime, ips_linetime;
2779
2780        if (!cstate->base.active)
2781                return 0;
2782        if (WARN_ON(adjusted_mode->crtc_clock == 0))
2783                return 0;
2784        if (WARN_ON(intel_state->cdclk.logical.cdclk == 0))
2785                return 0;
2786
2787        /* The WM are computed with base on how long it takes to fill a single
2788         * row at the given clock rate, multiplied by 8.
2789         * */
2790        linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2791                                     adjusted_mode->crtc_clock);
2792        ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2793                                         intel_state->cdclk.logical.cdclk);
2794
2795        return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
2796               PIPE_WM_LINETIME_TIME(linetime);
2797}
2798
2799static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
2800                                  uint16_t wm[8])
2801{
2802        if (INTEL_GEN(dev_priv) >= 9) {
2803                uint32_t val;
2804                int ret, i;
2805                int level, max_level = ilk_wm_max_level(dev_priv);
2806
2807                /* read the first set of memory latencies[0:3] */
2808                val = 0; /* data0 to be programmed to 0 for first set */
2809                mutex_lock(&dev_priv->pcu_lock);
2810                ret = sandybridge_pcode_read(dev_priv,
2811                                             GEN9_PCODE_READ_MEM_LATENCY,
2812                                             &val);
2813                mutex_unlock(&dev_priv->pcu_lock);
2814
2815                if (ret) {
2816                        DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2817                        return;
2818                }
2819
2820                wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2821                wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2822                                GEN9_MEM_LATENCY_LEVEL_MASK;
2823                wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2824                                GEN9_MEM_LATENCY_LEVEL_MASK;
2825                wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2826                                GEN9_MEM_LATENCY_LEVEL_MASK;
2827
2828                /* read the second set of memory latencies[4:7] */
2829                val = 1; /* data0 to be programmed to 1 for second set */
2830                mutex_lock(&dev_priv->pcu_lock);
2831                ret = sandybridge_pcode_read(dev_priv,
2832                                             GEN9_PCODE_READ_MEM_LATENCY,
2833                                             &val);
2834                mutex_unlock(&dev_priv->pcu_lock);
2835                if (ret) {
2836                        DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2837                        return;
2838                }
2839
2840                wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2841                wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2842                                GEN9_MEM_LATENCY_LEVEL_MASK;
2843                wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2844                                GEN9_MEM_LATENCY_LEVEL_MASK;
2845                wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2846                                GEN9_MEM_LATENCY_LEVEL_MASK;
2847
2848                /*
2849                 * If a level n (n > 1) has a 0us latency, all levels m (m >= n)
2850                 * need to be disabled. We make sure to sanitize the values out
2851                 * of the punit to satisfy this requirement.
2852                 */
2853                for (level = 1; level <= max_level; level++) {
2854                        if (wm[level] == 0) {
2855                                for (i = level + 1; i <= max_level; i++)
2856                                        wm[i] = 0;
2857                                break;
2858                        }
2859                }
2860
2861                /*
2862                 * WaWmMemoryReadLatency:skl+,glk
2863                 *
2864                 * punit doesn't take into account the read latency so we need
2865                 * to add 2us to the various latency levels we retrieve from the
2866                 * punit when level 0 response data us 0us.
2867                 */
2868                if (wm[0] == 0) {
2869                        wm[0] += 2;
2870                        for (level = 1; level <= max_level; level++) {
2871                                if (wm[level] == 0)
2872                                        break;
2873                                wm[level] += 2;
2874                        }
2875                }
2876
2877        } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
2878                uint64_t sskpd = I915_READ64(MCH_SSKPD);
2879
2880                wm[0] = (sskpd >> 56) & 0xFF;
2881                if (wm[0] == 0)
2882                        wm[0] = sskpd & 0xF;
2883                wm[1] = (sskpd >> 4) & 0xFF;
2884                wm[2] = (sskpd >> 12) & 0xFF;
2885                wm[3] = (sskpd >> 20) & 0x1FF;
2886                wm[4] = (sskpd >> 32) & 0x1FF;
2887        } else if (INTEL_GEN(dev_priv) >= 6) {
2888                uint32_t sskpd = I915_READ(MCH_SSKPD);
2889
2890                wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
2891                wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
2892                wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
2893                wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
2894        } else if (INTEL_GEN(dev_priv) >= 5) {
2895                uint32_t mltr = I915_READ(MLTR_ILK);
2896
2897                /* ILK primary LP0 latency is 700 ns */
2898                wm[0] = 7;
2899                wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
2900                wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
2901        } else {
2902                MISSING_CASE(INTEL_DEVID(dev_priv));
2903        }
2904}
2905
2906static void intel_fixup_spr_wm_latency(struct drm_i915_private *dev_priv,
2907                                       uint16_t wm[5])
2908{
2909        /* ILK sprite LP0 latency is 1300 ns */
2910        if (IS_GEN5(dev_priv))
2911                wm[0] = 13;
2912}
2913
2914static void intel_fixup_cur_wm_latency(struct drm_i915_private *dev_priv,
2915                                       uint16_t wm[5])
2916{
2917        /* ILK cursor LP0 latency is 1300 ns */
2918        if (IS_GEN5(dev_priv))
2919                wm[0] = 13;
2920}
2921
2922int ilk_wm_max_level(const struct drm_i915_private *dev_priv)
2923{
2924        /* how many WM levels are we expecting */
2925        if (INTEL_GEN(dev_priv) >= 9)
2926                return 7;
2927        else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
2928                return 4;
2929        else if (INTEL_GEN(dev_priv) >= 6)
2930                return 3;
2931        else
2932                return 2;
2933}
2934
2935static void intel_print_wm_latency(struct drm_i915_private *dev_priv,
2936                                   const char *name,
2937                                   const uint16_t wm[8])
2938{
2939        int level, max_level = ilk_wm_max_level(dev_priv);
2940
2941        for (level = 0; level <= max_level; level++) {
2942                unsigned int latency = wm[level];
2943
2944                if (latency == 0) {
2945                        DRM_ERROR("%s WM%d latency not provided\n",
2946                                  name, level);
2947                        continue;
2948                }
2949
2950                /*
2951                 * - latencies are in us on gen9.
2952                 * - before then, WM1+ latency values are in 0.5us units
2953                 */
2954                if (INTEL_GEN(dev_priv) >= 9)
2955                        latency *= 10;
2956                else if (level > 0)
2957                        latency *= 5;
2958
2959                DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
2960                              name, level, wm[level],
2961                              latency / 10, latency % 10);
2962        }
2963}
2964
2965static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv,
2966                                    uint16_t wm[5], uint16_t min)
2967{
2968        int level, max_level = ilk_wm_max_level(dev_priv);
2969
2970        if (wm[0] >= min)
2971                return false;
2972
2973        wm[0] = max(wm[0], min);
2974        for (level = 1; level <= max_level; level++)
2975                wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5));
2976
2977        return true;
2978}
2979
2980static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv)
2981{
2982        bool changed;
2983
2984        /*
2985         * The BIOS provided WM memory latency values are often
2986         * inadequate for high resolution displays. Adjust them.
2987         */
2988        changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) |
2989                ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) |
2990                ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
2991
2992        if (!changed)
2993                return;
2994
2995        DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
2996        intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
2997        intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
2998        intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
2999}
3000
3001static void ilk_setup_wm_latency(struct drm_i915_private *dev_priv)
3002{
3003        intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency);
3004
3005        memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
3006               sizeof(dev_priv->wm.pri_latency));
3007        memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
3008               sizeof(dev_priv->wm.pri_latency));
3009
3010        intel_fixup_spr_wm_latency(dev_priv, dev_priv->wm.spr_latency);
3011        intel_fixup_cur_wm_latency(dev_priv, dev_priv->wm.cur_latency);
3012
3013        intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3014        intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3015        intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3016
3017        if (IS_GEN6(dev_priv))
3018                snb_wm_latency_quirk(dev_priv);
3019}
3020
3021static void skl_setup_wm_latency(struct drm_i915_private *dev_priv)
3022{
3023        intel_read_wm_latency(dev_priv, dev_priv->wm.skl_latency);
3024        intel_print_wm_latency(dev_priv, "Gen9 Plane", dev_priv->wm.skl_latency);
3025}
3026
3027static bool ilk_validate_pipe_wm(struct drm_device *dev,
3028                                 struct intel_pipe_wm *pipe_wm)
3029{
3030        /* LP0 watermark maximums depend on this pipe alone */
3031        const struct intel_wm_config config = {
3032                .num_pipes_active = 1,
3033                .sprites_enabled = pipe_wm->sprites_enabled,
3034                .sprites_scaled = pipe_wm->sprites_scaled,
3035        };
3036        struct ilk_wm_maximums max;
3037
3038        /* LP0 watermarks always use 1/2 DDB partitioning */
3039        ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
3040
3041        /* At least LP0 must be valid */
3042        if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) {
3043                DRM_DEBUG_KMS("LP0 watermark invalid\n");
3044                return false;
3045        }
3046
3047        return true;
3048}
3049
3050/* Compute new watermarks for the pipe */
3051static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate)
3052{
3053        struct drm_atomic_state *state = cstate->base.state;
3054        struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
3055        struct intel_pipe_wm *pipe_wm;
3056        struct drm_device *dev = state->dev;
3057        const struct drm_i915_private *dev_priv = to_i915(dev);
3058        struct drm_plane *plane;
3059        const struct drm_plane_state *plane_state;
3060        const struct intel_plane_state *pristate = NULL;
3061        const struct intel_plane_state *sprstate = NULL;
3062        const struct intel_plane_state *curstate = NULL;
3063        int level, max_level = ilk_wm_max_level(dev_priv), usable_level;
3064        struct ilk_wm_maximums max;
3065
3066        pipe_wm = &cstate->wm.ilk.optimal;
3067
3068        drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, &cstate->base) {
3069                const struct intel_plane_state *ps = to_intel_plane_state(plane_state);
3070
3071                if (plane->type == DRM_PLANE_TYPE_PRIMARY)
3072                        pristate = ps;
3073                else if (plane->type == DRM_PLANE_TYPE_OVERLAY)
3074                        sprstate = ps;
3075                else if (plane->type == DRM_PLANE_TYPE_CURSOR)
3076                        curstate = ps;
3077        }
3078
3079        pipe_wm->pipe_enabled = cstate->base.active;
3080        if (sprstate) {
3081                pipe_wm->sprites_enabled = sprstate->base.visible;
3082                pipe_wm->sprites_scaled = sprstate->base.visible &&
3083                        (drm_rect_width(&sprstate->base.dst) != drm_rect_width(&sprstate->base.src) >> 16 ||
3084                         drm_rect_height(&sprstate->base.dst) != drm_rect_height(&sprstate->base.src) >> 16);
3085        }
3086
3087        usable_level = max_level;
3088
3089        /* ILK/SNB: LP2+ watermarks only w/o sprites */
3090        if (INTEL_GEN(dev_priv) <= 6 && pipe_wm->sprites_enabled)
3091                usable_level = 1;
3092
3093        /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
3094        if (pipe_wm->sprites_scaled)
3095                usable_level = 0;
3096
3097        memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm));
3098        ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate,
3099                             pristate, sprstate, curstate, &pipe_wm->wm[0]);
3100
3101        if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
3102                pipe_wm->linetime = hsw_compute_linetime_wm(cstate);
3103
3104        if (!ilk_validate_pipe_wm(dev, pipe_wm))
3105                return -EINVAL;
3106
3107        ilk_compute_wm_reg_maximums(dev_priv, 1, &max);
3108
3109        for (level = 1; level <= usable_level; level++) {
3110                struct intel_wm_level *wm = &pipe_wm->wm[level];
3111
3112                ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate,
3113                                     pristate, sprstate, curstate, wm);
3114
3115                /*
3116                 * Disable any watermark level that exceeds the
3117                 * register maximums since such watermarks are
3118                 * always invalid.
3119                 */
3120                if (!ilk_validate_wm_level(level, &max, wm)) {
3121                        memset(wm, 0, sizeof(*wm));
3122                        break;
3123                }
3124        }
3125
3126        return 0;
3127}
3128
3129/*
3130 * Build a set of 'intermediate' watermark values that satisfy both the old
3131 * state and the new state.  These can be programmed to the hardware
3132 * immediately.
3133 */
3134static int ilk_compute_intermediate_wm(struct drm_device *dev,
3135                                       struct intel_crtc *intel_crtc,
3136                                       struct intel_crtc_state *newstate)
3137{
3138        struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate;
3139        struct intel_atomic_state *intel_state =
3140                to_intel_atomic_state(newstate->base.state);
3141        const struct intel_crtc_state *oldstate =
3142                intel_atomic_get_old_crtc_state(intel_state, intel_crtc);
3143        const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal;
3144        int level, max_level = ilk_wm_max_level(to_i915(dev));
3145
3146        /*
3147         * Start with the final, target watermarks, then combine with the
3148         * currently active watermarks to get values that are safe both before
3149         * and after the vblank.
3150         */
3151        *a = newstate->wm.ilk.optimal;
3152        if (!newstate->base.active || drm_atomic_crtc_needs_modeset(&newstate->base))
3153                return 0;
3154
3155        a->pipe_enabled |= b->pipe_enabled;
3156        a->sprites_enabled |= b->sprites_enabled;
3157        a->sprites_scaled |= b->sprites_scaled;
3158
3159        for (level = 0; level <= max_level; level++) {
3160                struct intel_wm_level *a_wm = &a->wm[level];
3161                const struct intel_wm_level *b_wm = &b->wm[level];
3162
3163                a_wm->enable &= b_wm->enable;
3164                a_wm->pri_val = max(a_wm->pri_val, b_wm->pri_val);
3165                a_wm->spr_val = max(a_wm->spr_val, b_wm->spr_val);
3166                a_wm->cur_val = max(a_wm->cur_val, b_wm->cur_val);
3167                a_wm->fbc_val = max(a_wm->fbc_val, b_wm->fbc_val);
3168        }
3169
3170        /*
3171         * We need to make sure that these merged watermark values are
3172         * actually a valid configuration themselves.  If they're not,
3173         * there's no safe way to transition from the old state to
3174         * the new state, so we need to fail the atomic transaction.
3175         */
3176        if (!ilk_validate_pipe_wm(dev, a))
3177                return -EINVAL;
3178
3179        /*
3180         * If our intermediate WM are identical to the final WM, then we can
3181         * omit the post-vblank programming; only update if it's different.
3182         */
3183        if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) != 0)
3184                newstate->wm.need_postvbl_update = true;
3185
3186        return 0;
3187}
3188
3189/*
3190 * Merge the watermarks from all active pipes for a specific level.
3191 */
3192static void ilk_merge_wm_level(struct drm_device *dev,
3193                               int level,
3194                               struct intel_wm_level *ret_wm)
3195{
3196        const struct intel_crtc *intel_crtc;
3197
3198        ret_wm->enable = true;
3199
3200        for_each_intel_crtc(dev, intel_crtc) {
3201                const struct intel_pipe_wm *active = &intel_crtc->wm.active.ilk;
3202                const struct intel_wm_level *wm = &active->wm[level];
3203
3204                if (!active->pipe_enabled)
3205                        continue;
3206
3207                /*
3208                 * The watermark values may have been used in the past,
3209                 * so we must maintain them in the registers for some
3210                 * time even if the level is now disabled.
3211                 */
3212                if (!wm->enable)
3213                        ret_wm->enable = false;
3214
3215                ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
3216                ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
3217                ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
3218                ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
3219        }
3220}
3221
3222/*
3223 * Merge all low power watermarks for all active pipes.
3224 */
3225static void ilk_wm_merge(struct drm_device *dev,
3226                         const struct intel_wm_config *config,
3227                         const struct ilk_wm_maximums *max,
3228                         struct intel_pipe_wm *merged)
3229{
3230        struct drm_i915_private *dev_priv = to_i915(dev);
3231        int level, max_level = ilk_wm_max_level(dev_priv);
3232        int last_enabled_level = max_level;
3233
3234        /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
3235        if ((INTEL_GEN(dev_priv) <= 6 || IS_IVYBRIDGE(dev_priv)) &&
3236            config->num_pipes_active > 1)
3237                last_enabled_level = 0;
3238
3239        /* ILK: FBC WM must be disabled always */
3240        merged->fbc_wm_enabled = INTEL_GEN(dev_priv) >= 6;
3241
3242        /* merge each WM1+ level */
3243        for (level = 1; level <= max_level; level++) {
3244                struct intel_wm_level *wm = &merged->wm[level];
3245
3246                ilk_merge_wm_level(dev, level, wm);
3247
3248                if (level > last_enabled_level)
3249                        wm->enable = false;
3250                else if (!ilk_validate_wm_level(level, max, wm))
3251                        /* make sure all following levels get disabled */
3252                        last_enabled_level = level - 1;
3253
3254                /*
3255                 * The spec says it is preferred to disable
3256                 * FBC WMs instead of disabling a WM level.
3257                 */
3258                if (wm->fbc_val > max->fbc) {
3259                        if (wm->enable)
3260                                merged->fbc_wm_enabled = false;
3261                        wm->fbc_val = 0;
3262                }
3263        }
3264
3265        /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
3266        /*
3267         * FIXME this is racy. FBC might get enabled later.
3268         * What we should check here is whether FBC can be
3269         * enabled sometime later.
3270         */
3271        if (IS_GEN5(dev_priv) && !merged->fbc_wm_enabled &&
3272            intel_fbc_is_active(dev_priv)) {
3273                for (level = 2; level <= max_level; level++) {
3274                        struct intel_wm_level *wm = &merged->wm[level];
3275
3276                        wm->enable = false;
3277                }
3278        }
3279}
3280
3281static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
3282{
3283        /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
3284        return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
3285}
3286
3287/* The value we need to program into the WM_LPx latency field */
3288static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level)
3289{
3290        struct drm_i915_private *dev_priv = to_i915(dev);
3291
3292        if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
3293                return 2 * level;
3294        else
3295                return dev_priv->wm.pri_latency[level];
3296}
3297
3298static void ilk_compute_wm_results(struct drm_device *dev,
3299                                   const struct intel_pipe_wm *merged,
3300                                   enum intel_ddb_partitioning partitioning,
3301                                   struct ilk_wm_values *results)
3302{
3303        struct drm_i915_private *dev_priv = to_i915(dev);
3304        struct intel_crtc *intel_crtc;
3305        int level, wm_lp;
3306
3307        results->enable_fbc_wm = merged->fbc_wm_enabled;
3308        results->partitioning = partitioning;
3309
3310        /* LP1+ register values */
3311        for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3312                const struct intel_wm_level *r;
3313
3314                level = ilk_wm_lp_to_level(wm_lp, merged);
3315
3316                r = &merged->wm[level];
3317
3318                /*
3319                 * Maintain the watermark values even if the level is
3320                 * disabled. Doing otherwise could cause underruns.
3321                 */
3322                results->wm_lp[wm_lp - 1] =
3323                        (ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) |
3324                        (r->pri_val << WM1_LP_SR_SHIFT) |
3325                        r->cur_val;
3326
3327                if (r->enable)
3328                        results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
3329
3330                if (INTEL_GEN(dev_priv) >= 8)
3331                        results->wm_lp[wm_lp - 1] |=
3332                                r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
3333                else
3334                        results->wm_lp[wm_lp - 1] |=
3335                                r->fbc_val << WM1_LP_FBC_SHIFT;
3336
3337                /*
3338                 * Always set WM1S_LP_EN when spr_val != 0, even if the
3339                 * level is disabled. Doing otherwise could cause underruns.
3340                 */
3341                if (INTEL_GEN(dev_priv) <= 6 && r->spr_val) {
3342                        WARN_ON(wm_lp != 1);
3343                        results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
3344                } else
3345                        results->wm_lp_spr[wm_lp - 1] = r->spr_val;
3346        }
3347
3348        /* LP0 register values */
3349        for_each_intel_crtc(dev, intel_crtc) {
3350                enum pipe pipe = intel_crtc->pipe;
3351                const struct intel_wm_level *r =
3352                        &intel_crtc->wm.active.ilk.wm[0];
3353
3354                if (WARN_ON(!r->enable))
3355                        continue;
3356
3357                results->wm_linetime[pipe] = intel_crtc->wm.active.ilk.linetime;
3358
3359                results->wm_pipe[pipe] =
3360                        (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
3361                        (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
3362                        r->cur_val;
3363        }
3364}
3365
3366/* Find the result with the highest level enabled. Check for enable_fbc_wm in
3367 * case both are at the same level. Prefer r1 in case they're the same. */
3368static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev,
3369                                                  struct intel_pipe_wm *r1,
3370                                                  struct intel_pipe_wm *r2)
3371{
3372        int level, max_level = ilk_wm_max_level(to_i915(dev));
3373        int level1 = 0, level2 = 0;
3374
3375        for (level = 1; level <= max_level; level++) {
3376                if (r1->wm[level].enable)
3377                        level1 = level;
3378                if (r2->wm[level].enable)
3379                        level2 = level;
3380        }
3381
3382        if (level1 == level2) {
3383                if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
3384                        return r2;
3385                else
3386                        return r1;
3387        } else if (level1 > level2) {
3388                return r1;
3389        } else {
3390                return r2;
3391        }
3392}
3393
3394/* dirty bits used to track which watermarks need changes */
3395#define WM_DIRTY_PIPE(pipe) (1 << (pipe))
3396#define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
3397#define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
3398#define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
3399#define WM_DIRTY_FBC (1 << 24)
3400#define WM_DIRTY_DDB (1 << 25)
3401
3402static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
3403                                         const struct ilk_wm_values *old,
3404                                         const struct ilk_wm_values *new)
3405{
3406        unsigned int dirty = 0;
3407        enum pipe pipe;
3408        int wm_lp;
3409
3410        for_each_pipe(dev_priv, pipe) {
3411                if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
3412                        dirty |= WM_DIRTY_LINETIME(pipe);
3413                        /* Must disable LP1+ watermarks too */
3414                        dirty |= WM_DIRTY_LP_ALL;
3415                }
3416
3417                if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
3418                        dirty |= WM_DIRTY_PIPE(pipe);
3419                        /* Must disable LP1+ watermarks too */
3420                        dirty |= WM_DIRTY_LP_ALL;
3421                }
3422        }
3423
3424        if (old->enable_fbc_wm != new->enable_fbc_wm) {
3425                dirty |= WM_DIRTY_FBC;
3426                /* Must disable LP1+ watermarks too */
3427                dirty |= WM_DIRTY_LP_ALL;
3428        }
3429
3430        if (old->partitioning != new->partitioning) {
3431                dirty |= WM_DIRTY_DDB;
3432                /* Must disable LP1+ watermarks too */
3433                dirty |= WM_DIRTY_LP_ALL;
3434        }
3435
3436        /* LP1+ watermarks already deemed dirty, no need to continue */
3437        if (dirty & WM_DIRTY_LP_ALL)
3438                return dirty;
3439
3440        /* Find the lowest numbered LP1+ watermark in need of an update... */
3441        for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3442                if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
3443                    old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
3444                        break;
3445        }
3446
3447        /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
3448        for (; wm_lp <= 3; wm_lp++)
3449                dirty |= WM_DIRTY_LP(wm_lp);
3450
3451        return dirty;
3452}
3453
3454static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
3455                               unsigned int dirty)
3456{
3457        struct ilk_wm_values *previous = &dev_priv->wm.hw;
3458        bool changed = false;
3459
3460        if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
3461                previous->wm_lp[2] &= ~WM1_LP_SR_EN;
3462                I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
3463                changed = true;
3464        }
3465        if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
3466                previous->wm_lp[1] &= ~WM1_LP_SR_EN;
3467                I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
3468                changed = true;
3469        }
3470        if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
3471                previous->wm_lp[0] &= ~WM1_LP_SR_EN;
3472                I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
3473                changed = true;
3474        }
3475
3476        /*
3477         * Don't touch WM1S_LP_EN here.
3478         * Doing so could cause underruns.
3479         */
3480
3481        return changed;
3482}
3483
3484/*
3485 * The spec says we shouldn't write when we don't need, because every write
3486 * causes WMs to be re-evaluated, expending some power.
3487 */
3488static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
3489                                struct ilk_wm_values *results)
3490{
3491        struct ilk_wm_values *previous = &dev_priv->wm.hw;
3492        unsigned int dirty;
3493        uint32_t val;
3494
3495        dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
3496        if (!dirty)
3497                return;
3498
3499        _ilk_disable_lp_wm(dev_priv, dirty);
3500
3501        if (dirty & WM_DIRTY_PIPE(PIPE_A))
3502                I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
3503        if (dirty & WM_DIRTY_PIPE(PIPE_B))
3504                I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
3505        if (dirty & WM_DIRTY_PIPE(PIPE_C))
3506                I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
3507
3508        if (dirty & WM_DIRTY_LINETIME(PIPE_A))
3509                I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
3510        if (dirty & WM_DIRTY_LINETIME(PIPE_B))
3511                I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
3512        if (dirty & WM_DIRTY_LINETIME(PIPE_C))
3513                I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
3514
3515        if (dirty & WM_DIRTY_DDB) {
3516                if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
3517                        val = I915_READ(WM_MISC);
3518                        if (results->partitioning == INTEL_DDB_PART_1_2)
3519                                val &= ~WM_MISC_DATA_PARTITION_5_6;
3520                        else
3521                                val |= WM_MISC_DATA_PARTITION_5_6;
3522                        I915_WRITE(WM_MISC, val);
3523                } else {
3524                        val = I915_READ(DISP_ARB_CTL2);
3525                        if (results->partitioning == INTEL_DDB_PART_1_2)
3526                                val &= ~DISP_DATA_PARTITION_5_6;
3527                        else
3528                                val |= DISP_DATA_PARTITION_5_6;
3529                        I915_WRITE(DISP_ARB_CTL2, val);
3530                }
3531        }
3532
3533        if (dirty & WM_DIRTY_FBC) {
3534                val = I915_READ(DISP_ARB_CTL);
3535                if (results->enable_fbc_wm)
3536                        val &= ~DISP_FBC_WM_DIS;
3537                else
3538                        val |= DISP_FBC_WM_DIS;
3539                I915_WRITE(DISP_ARB_CTL, val);
3540        }
3541
3542        if (dirty & WM_DIRTY_LP(1) &&
3543            previous->wm_lp_spr[0] != results->wm_lp_spr[0])
3544                I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
3545
3546        if (INTEL_GEN(dev_priv) >= 7) {
3547                if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
3548                        I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
3549                if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
3550                        I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
3551        }
3552
3553        if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
3554                I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
3555        if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
3556                I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
3557        if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
3558                I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
3559
3560        dev_priv->wm.hw = *results;
3561}
3562
3563bool ilk_disable_lp_wm(struct drm_device *dev)
3564{
3565        struct drm_i915_private *dev_priv = to_i915(dev);
3566
3567        return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
3568}
3569
3570/*
3571 * FIXME: We still don't have the proper code detect if we need to apply the WA,
3572 * so assume we'll always need it in order to avoid underruns.
3573 */
3574static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state)
3575{
3576        struct drm_i915_private *dev_priv = to_i915(state->base.dev);
3577
3578        if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv))
3579                return true;
3580
3581        return false;
3582}
3583
3584static bool
3585intel_has_sagv(struct drm_i915_private *dev_priv)
3586{
3587        if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) ||
3588            IS_CANNONLAKE(dev_priv))
3589                return true;
3590
3591        if (IS_SKYLAKE(dev_priv) &&
3592            dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED)
3593                return true;
3594
3595        return false;
3596}
3597
3598/*
3599 * SAGV dynamically adjusts the system agent voltage and clock frequencies
3600 * depending on power and performance requirements. The display engine access
3601 * to system memory is blocked during the adjustment time. Because of the
3602 * blocking time, having this enabled can cause full system hangs and/or pipe
3603 * underruns if we don't meet all of the following requirements:
3604 *
3605 *  - <= 1 pipe enabled
3606 *  - All planes can enable watermarks for latencies >= SAGV engine block time
3607 *  - We're not using an interlaced display configuration
3608 */
3609int
3610intel_enable_sagv(struct drm_i915_private *dev_priv)
3611{
3612        int ret;
3613
3614        if (!intel_has_sagv(dev_priv))
3615                return 0;
3616
3617        if (dev_priv->sagv_status == I915_SAGV_ENABLED)
3618                return 0;
3619
3620        DRM_DEBUG_KMS("Enabling the SAGV\n");
3621        mutex_lock(&dev_priv->pcu_lock);
3622
3623        ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3624                                      GEN9_SAGV_ENABLE);
3625
3626        /* We don't need to wait for the SAGV when enabling */
3627        mutex_unlock(&dev_priv->pcu_lock);
3628
3629        /*
3630         * Some skl systems, pre-release machines in particular,
3631         * don't actually have an SAGV.
3632         */
3633        if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3634                DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3635                dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3636                return 0;
3637        } else if (ret < 0) {
3638                DRM_ERROR("Failed to enable the SAGV\n");
3639                return ret;
3640        }
3641
3642        dev_priv->sagv_status = I915_SAGV_ENABLED;
3643        return 0;
3644}
3645
3646int
3647intel_disable_sagv(struct drm_i915_private *dev_priv)
3648{
3649        int ret;
3650
3651        if (!intel_has_sagv(dev_priv))
3652                return 0;
3653
3654        if (dev_priv->sagv_status == I915_SAGV_DISABLED)
3655                return 0;
3656
3657        DRM_DEBUG_KMS("Disabling the SAGV\n");
3658        mutex_lock(&dev_priv->pcu_lock);
3659
3660        /* bspec says to keep retrying for at least 1 ms */
3661        ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3662                                GEN9_SAGV_DISABLE,
3663                                GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED,
3664                                1);
3665        mutex_unlock(&dev_priv->pcu_lock);
3666
3667        /*
3668         * Some skl systems, pre-release machines in particular,
3669         * don't actually have an SAGV.
3670         */
3671        if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3672                DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3673                dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3674                return 0;
3675        } else if (ret < 0) {
3676                DRM_ERROR("Failed to disable the SAGV (%d)\n", ret);
3677                return ret;
3678        }
3679
3680        dev_priv->sagv_status = I915_SAGV_DISABLED;
3681        return 0;
3682}
3683
3684bool intel_can_enable_sagv(struct drm_atomic_state *state)
3685{
3686        struct drm_device *dev = state->dev;
3687        struct drm_i915_private *dev_priv = to_i915(dev);
3688        struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
3689        struct intel_crtc *crtc;
3690        struct intel_plane *plane;
3691        struct intel_crtc_state *cstate;
3692        enum pipe pipe;
3693        int level, latency;
3694        int sagv_block_time_us;
3695
3696        if (!intel_has_sagv(dev_priv))
3697                return false;
3698
3699        if (IS_GEN9(dev_priv))
3700                sagv_block_time_us = 30;
3701        else if (IS_GEN10(dev_priv))
3702                sagv_block_time_us = 20;
3703        else
3704                sagv_block_time_us = 10;
3705
3706        /*
3707         * SKL+ workaround: bspec recommends we disable the SAGV when we have
3708         * more then one pipe enabled
3709         *
3710         * If there are no active CRTCs, no additional checks need be performed
3711         */
3712        if (hweight32(intel_state->active_crtcs) == 0)
3713                return true;
3714        else if (hweight32(intel_state->active_crtcs) > 1)
3715                return false;
3716
3717        /* Since we're now guaranteed to only have one active CRTC... */
3718        pipe = ffs(intel_state->active_crtcs) - 1;
3719        crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
3720        cstate = to_intel_crtc_state(crtc->base.state);
3721
3722        if (crtc->base.state->adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE)
3723                return false;
3724
3725        for_each_intel_plane_on_crtc(dev, crtc, plane) {
3726                struct skl_plane_wm *wm =
3727                        &cstate->wm.skl.optimal.planes[plane->id];
3728
3729                /* Skip this plane if it's not enabled */
3730                if (!wm->wm[0].plane_en)
3731                        continue;
3732
3733                /* Find the highest enabled wm level for this plane */
3734                for (level = ilk_wm_max_level(dev_priv);
3735                     !wm->wm[level].plane_en; --level)
3736                     { }
3737
3738                latency = dev_priv->wm.skl_latency[level];
3739
3740                if (skl_needs_memory_bw_wa(intel_state) &&
3741                    plane->base.state->fb->modifier ==
3742                    I915_FORMAT_MOD_X_TILED)
3743                        latency += 15;
3744
3745                /*
3746                 * If any of the planes on this pipe don't enable wm levels that
3747                 * incur memory latencies higher than sagv_block_time_us we
3748                 * can't enable the SAGV.
3749                 */
3750                if (latency < sagv_block_time_us)
3751                        return false;
3752        }
3753
3754        return true;
3755}
3756
3757static void
3758skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
3759                                   const struct intel_crtc_state *cstate,
3760                                   struct skl_ddb_entry *alloc, /* out */
3761                                   int *num_active /* out */)
3762{
3763        struct drm_atomic_state *state = cstate->base.state;
3764        struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
3765        struct drm_i915_private *dev_priv = to_i915(dev);
3766        struct drm_crtc *for_crtc = cstate->base.crtc;
3767        unsigned int pipe_size, ddb_size;
3768        int nth_active_pipe;
3769
3770        if (WARN_ON(!state) || !cstate->base.active) {
3771                alloc->start = 0;
3772                alloc->end = 0;
3773                *num_active = hweight32(dev_priv->active_crtcs);
3774                return;
3775        }
3776
3777        if (intel_state->active_pipe_changes)
3778                *num_active = hweight32(intel_state->active_crtcs);
3779        else
3780                *num_active = hweight32(dev_priv->active_crtcs);
3781
3782        ddb_size = INTEL_INFO(dev_priv)->ddb_size;
3783        WARN_ON(ddb_size == 0);
3784
3785        if (INTEL_GEN(dev_priv) < 11)
3786                ddb_size -= 4; /* 4 blocks for bypass path allocation */
3787
3788        /*
3789         * If the state doesn't change the active CRTC's, then there's
3790         * no need to recalculate; the existing pipe allocation limits
3791         * should remain unchanged.  Note that we're safe from racing
3792         * commits since any racing commit that changes the active CRTC
3793         * list would need to grab _all_ crtc locks, including the one
3794         * we currently hold.
3795         */
3796        if (!intel_state->active_pipe_changes) {
3797                /*
3798                 * alloc may be cleared by clear_intel_crtc_state,
3799                 * copy from old state to be sure
3800                 */
3801                *alloc = to_intel_crtc_state(for_crtc->state)->wm.skl.ddb;
3802                return;
3803        }
3804
3805        nth_active_pipe = hweight32(intel_state->active_crtcs &
3806                                    (drm_crtc_mask(for_crtc) - 1));
3807        pipe_size = ddb_size / hweight32(intel_state->active_crtcs);
3808        alloc->start = nth_active_pipe * ddb_size / *num_active;
3809        alloc->end = alloc->start + pipe_size;
3810}
3811
3812static unsigned int skl_cursor_allocation(int num_active)
3813{
3814        if (num_active == 1)
3815                return 32;
3816
3817        return 8;
3818}
3819
3820static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg)
3821{
3822        entry->start = reg & 0x3ff;
3823        entry->end = (reg >> 16) & 0x3ff;
3824        if (entry->end)
3825                entry->end += 1;
3826}
3827
3828void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
3829                          struct skl_ddb_allocation *ddb /* out */)
3830{
3831        struct intel_crtc *crtc;
3832
3833        memset(ddb, 0, sizeof(*ddb));
3834
3835        for_each_intel_crtc(&dev_priv->drm, crtc) {
3836                enum intel_display_power_domain power_domain;
3837                enum plane_id plane_id;
3838                enum pipe pipe = crtc->pipe;
3839
3840                power_domain = POWER_DOMAIN_PIPE(pipe);
3841                if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
3842                        continue;
3843
3844                for_each_plane_id_on_crtc(crtc, plane_id) {
3845                        u32 val;
3846
3847                        if (plane_id != PLANE_CURSOR)
3848                                val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
3849                        else
3850                                val = I915_READ(CUR_BUF_CFG(pipe));
3851
3852                        skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane_id], val);
3853                }
3854
3855                intel_display_power_put(dev_priv, power_domain);
3856        }
3857}
3858
3859/*
3860 * Determines the downscale amount of a plane for the purposes of watermark calculations.
3861 * The bspec defines downscale amount as:
3862 *
3863 * """
3864 * Horizontal down scale amount = maximum[1, Horizontal source size /
3865 *                                           Horizontal destination size]
3866 * Vertical down scale amount = maximum[1, Vertical source size /
3867 *                                         Vertical destination size]
3868 * Total down scale amount = Horizontal down scale amount *
3869 *                           Vertical down scale amount
3870 * """
3871 *
3872 * Return value is provided in 16.16 fixed point form to retain fractional part.
3873 * Caller should take care of dividing & rounding off the value.
3874 */
3875static uint_fixed_16_16_t
3876skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
3877                           const struct intel_plane_state *pstate)
3878{
3879        struct intel_plane *plane = to_intel_plane(pstate->base.plane);
3880        uint32_t src_w, src_h, dst_w, dst_h;
3881        uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
3882        uint_fixed_16_16_t downscale_h, downscale_w;
3883
3884        if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
3885                return u32_to_fixed16(0);
3886
3887        /* n.b., src is 16.16 fixed point, dst is whole integer */
3888        if (plane->id == PLANE_CURSOR) {
3889                /*
3890                 * Cursors only support 0/180 degree rotation,
3891                 * hence no need to account for rotation here.
3892                 */
3893                src_w = pstate->base.src_w >> 16;
3894                src_h = pstate->base.src_h >> 16;
3895                dst_w = pstate->base.crtc_w;
3896                dst_h = pstate->base.crtc_h;
3897        } else {
3898                /*
3899                 * Src coordinates are already rotated by 270 degrees for
3900                 * the 90/270 degree plane rotation cases (to match the
3901                 * GTT mapping), hence no need to account for rotation here.
3902                 */
3903                src_w = drm_rect_width(&pstate->base.src) >> 16;
3904                src_h = drm_rect_height(&pstate->base.src) >> 16;
3905                dst_w = drm_rect_width(&pstate->base.dst);
3906                dst_h = drm_rect_height(&pstate->base.dst);
3907        }
3908
3909        fp_w_ratio = div_fixed16(src_w, dst_w);
3910        fp_h_ratio = div_fixed16(src_h, dst_h);
3911        downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
3912        downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
3913
3914        return mul_fixed16(downscale_w, downscale_h);
3915}
3916
3917static uint_fixed_16_16_t
3918skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state)
3919{
3920        uint_fixed_16_16_t pipe_downscale = u32_to_fixed16(1);
3921
3922        if (!crtc_state->base.enable)
3923                return pipe_downscale;
3924
3925        if (crtc_state->pch_pfit.enabled) {
3926                uint32_t src_w, src_h, dst_w, dst_h;
3927                uint32_t pfit_size = crtc_state->pch_pfit.size;
3928                uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
3929                uint_fixed_16_16_t downscale_h, downscale_w;
3930
3931                src_w = crtc_state->pipe_src_w;
3932                src_h = crtc_state->pipe_src_h;
3933                dst_w = pfit_size >> 16;
3934                dst_h = pfit_size & 0xffff;
3935
3936                if (!dst_w || !dst_h)
3937                        return pipe_downscale;
3938
3939                fp_w_ratio = div_fixed16(src_w, dst_w);
3940                fp_h_ratio = div_fixed16(src_h, dst_h);
3941                downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
3942                downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
3943
3944                pipe_downscale = mul_fixed16(downscale_w, downscale_h);
3945        }
3946
3947        return pipe_downscale;
3948}
3949
3950int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
3951                                  struct intel_crtc_state *cstate)
3952{
3953        struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
3954        struct drm_crtc_state *crtc_state = &cstate->base;
3955        struct drm_atomic_state *state = crtc_state->state;
3956        struct drm_plane *plane;
3957        const struct drm_plane_state *pstate;
3958        struct intel_plane_state *intel_pstate;
3959        int crtc_clock, dotclk;
3960        uint32_t pipe_max_pixel_rate;
3961        uint_fixed_16_16_t pipe_downscale;
3962        uint_fixed_16_16_t max_downscale = u32_to_fixed16(1);
3963
3964        if (!cstate->base.enable)
3965                return 0;
3966
3967        drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
3968                uint_fixed_16_16_t plane_downscale;
3969                uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8);
3970                int bpp;
3971
3972                if (!intel_wm_plane_visible(cstate,
3973                                            to_intel_plane_state(pstate)))
3974                        continue;
3975
3976                if (WARN_ON(!pstate->fb))
3977                        return -EINVAL;
3978
3979                intel_pstate = to_intel_plane_state(pstate);
3980                plane_downscale = skl_plane_downscale_amount(cstate,
3981                                                             intel_pstate);
3982                bpp = pstate->fb->format->cpp[0] * 8;
3983                if (bpp == 64)
3984                        plane_downscale = mul_fixed16(plane_downscale,
3985                                                      fp_9_div_8);
3986
3987                max_downscale = max_fixed16(plane_downscale, max_downscale);
3988        }
3989        pipe_downscale = skl_pipe_downscale_amount(cstate);
3990
3991        pipe_downscale = mul_fixed16(pipe_downscale, max_downscale);
3992
3993        crtc_clock = crtc_state->adjusted_mode.crtc_clock;
3994        dotclk = to_intel_atomic_state(state)->cdclk.logical.cdclk;
3995
3996        if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10)
3997                dotclk *= 2;
3998
3999        pipe_max_pixel_rate = div_round_up_u32_fixed16(dotclk, pipe_downscale);
4000
4001        if (pipe_max_pixel_rate < crtc_clock) {
4002                DRM_DEBUG_KMS("Max supported pixel clock with scaling exceeded\n");
4003                return -EINVAL;
4004        }
4005
4006        return 0;
4007}
4008
4009static unsigned int
4010skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
4011                             const struct drm_plane_state *pstate,
4012                             int y)
4013{
4014        struct intel_plane *plane = to_intel_plane(pstate->plane);
4015        struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
4016        uint32_t data_rate;
4017        uint32_t width = 0, height = 0;
4018        struct drm_framebuffer *fb;
4019        u32 format;
4020        uint_fixed_16_16_t down_scale_amount;
4021
4022        if (!intel_pstate->base.visible)
4023                return 0;
4024
4025        fb = pstate->fb;
4026        format = fb->format->format;
4027
4028        if (plane->id == PLANE_CURSOR)
4029                return 0;
4030        if (y && format != DRM_FORMAT_NV12)
4031                return 0;
4032
4033        /*
4034         * Src coordinates are already rotated by 270 degrees for
4035         * the 90/270 degree plane rotation cases (to match the
4036         * GTT mapping), hence no need to account for rotation here.
4037         */
4038        width = drm_rect_width(&intel_pstate->base.src) >> 16;
4039        height = drm_rect_height(&intel_pstate->base.src) >> 16;
4040
4041        /* for planar format */
4042        if (format == DRM_FORMAT_NV12) {
4043                if (y)  /* y-plane data rate */
4044                        data_rate = width * height *
4045                                fb->format->cpp[0];
4046                else    /* uv-plane data rate */
4047                        data_rate = (width / 2) * (height / 2) *
4048                                fb->format->cpp[1];
4049        } else {
4050                /* for packed formats */
4051                data_rate = width * height * fb->format->cpp[0];
4052        }
4053
4054        down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate);
4055
4056        return mul_round_up_u32_fixed16(data_rate, down_scale_amount);
4057}
4058
4059/*
4060 * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching
4061 * a 8192x4096@32bpp framebuffer:
4062 *   3 * 4096 * 8192  * 4 < 2^32
4063 */
4064static unsigned int
4065skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate,
4066                                 unsigned *plane_data_rate,
4067                                 unsigned *plane_y_data_rate)
4068{
4069        struct drm_crtc_state *cstate = &intel_cstate->base;
4070        struct drm_atomic_state *state = cstate->state;
4071        struct drm_plane *plane;
4072        const struct drm_plane_state *pstate;
4073        unsigned int total_data_rate = 0;
4074
4075        if (WARN_ON(!state))
4076                return 0;
4077
4078        /* Calculate and cache data rate for each plane */
4079        drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) {
4080                enum plane_id plane_id = to_intel_plane(plane)->id;
4081                unsigned int rate;
4082
4083                /* packed/uv */
4084                rate = skl_plane_relative_data_rate(intel_cstate,
4085                                                    pstate, 0);
4086                plane_data_rate[plane_id] = rate;
4087
4088                total_data_rate += rate;
4089
4090                /* y-plane */
4091                rate = skl_plane_relative_data_rate(intel_cstate,
4092                                                    pstate, 1);
4093                plane_y_data_rate[plane_id] = rate;
4094
4095                total_data_rate += rate;
4096        }
4097
4098        return total_data_rate;
4099}
4100
4101static uint16_t
4102skl_ddb_min_alloc(const struct drm_plane_state *pstate,
4103                  const int y)
4104{
4105        struct drm_framebuffer *fb = pstate->fb;
4106        struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
4107        uint32_t src_w, src_h;
4108        uint32_t min_scanlines = 8;
4109        uint8_t plane_bpp;
4110
4111        if (WARN_ON(!fb))
4112                return 0;
4113
4114        /* For packed formats, no y-plane, return 0 */
4115        if (y && fb->format->format != DRM_FORMAT_NV12)
4116                return 0;
4117
4118        /* For Non Y-tile return 8-blocks */
4119        if (fb->modifier != I915_FORMAT_MOD_Y_TILED &&
4120            fb->modifier != I915_FORMAT_MOD_Yf_TILED &&
4121            fb->modifier != I915_FORMAT_MOD_Y_TILED_CCS &&
4122            fb->modifier != I915_FORMAT_MOD_Yf_TILED_CCS)
4123                return 8;
4124
4125        /*
4126         * Src coordinates are already rotated by 270 degrees for
4127         * the 90/270 degree plane rotation cases (to match the
4128         * GTT mapping), hence no need to account for rotation here.
4129         */
4130        src_w = drm_rect_width(&intel_pstate->base.src) >> 16;
4131        src_h = drm_rect_height(&intel_pstate->base.src) >> 16;
4132
4133        /* Halve UV plane width and height for NV12 */
4134        if (fb->format->format == DRM_FORMAT_NV12 && !y) {
4135                src_w /= 2;
4136                src_h /= 2;
4137        }
4138
4139        if (fb->format->format == DRM_FORMAT_NV12 && !y)
4140                plane_bpp = fb->format->cpp[1];
4141        else
4142                plane_bpp = fb->format->cpp[0];
4143
4144        if (drm_rotation_90_or_270(pstate->rotation)) {
4145                switch (plane_bpp) {
4146                case 1:
4147                        min_scanlines = 32;
4148                        break;
4149                case 2:
4150                        min_scanlines = 16;
4151                        break;
4152                case 4:
4153                        min_scanlines = 8;
4154                        break;
4155                case 8:
4156                        min_scanlines = 4;
4157                        break;
4158                default:
4159                        WARN(1, "Unsupported pixel depth %u for rotation",
4160                             plane_bpp);
4161                        min_scanlines = 32;
4162                }
4163        }
4164
4165        return DIV_ROUND_UP((4 * src_w * plane_bpp), 512) * min_scanlines/4 + 3;
4166}
4167
4168static void
4169skl_ddb_calc_min(const struct intel_crtc_state *cstate, int num_active,
4170                 uint16_t *minimum, uint16_t *y_minimum)
4171{
4172        const struct drm_plane_state *pstate;
4173        struct drm_plane *plane;
4174
4175        drm_atomic_crtc_state_for_each_plane_state(plane, pstate, &cstate->base) {
4176                enum plane_id plane_id = to_intel_plane(plane)->id;
4177
4178                if (plane_id == PLANE_CURSOR)
4179                        continue;
4180
4181                if (!pstate->visible)
4182                        continue;
4183
4184                minimum[plane_id] = skl_ddb_min_alloc(pstate, 0);
4185                y_minimum[plane_id] = skl_ddb_min_alloc(pstate, 1);
4186        }
4187
4188        minimum[PLANE_CURSOR] = skl_cursor_allocation(num_active);
4189}
4190
4191static int
4192skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
4193                      struct skl_ddb_allocation *ddb /* out */)
4194{
4195        struct drm_atomic_state *state = cstate->base.state;
4196        struct drm_crtc *crtc = cstate->base.crtc;
4197        struct drm_device *dev = crtc->dev;
4198        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
4199        enum pipe pipe = intel_crtc->pipe;
4200        struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb;
4201        uint16_t alloc_size, start;
4202        uint16_t minimum[I915_MAX_PLANES] = {};
4203        uint16_t y_minimum[I915_MAX_PLANES] = {};
4204        unsigned int total_data_rate;
4205        enum plane_id plane_id;
4206        int num_active;
4207        unsigned plane_data_rate[I915_MAX_PLANES] = {};
4208        unsigned plane_y_data_rate[I915_MAX_PLANES] = {};
4209        uint16_t total_min_blocks = 0;
4210
4211        /* Clear the partitioning for disabled planes. */
4212        memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
4213        memset(ddb->y_plane[pipe], 0, sizeof(ddb->y_plane[pipe]));
4214
4215        if (WARN_ON(!state))
4216                return 0;
4217
4218        if (!cstate->base.active) {
4219                alloc->start = alloc->end = 0;
4220                return 0;
4221        }
4222
4223        skl_ddb_get_pipe_allocation_limits(dev, cstate, alloc, &num_active);
4224        alloc_size = skl_ddb_entry_size(alloc);
4225        if (alloc_size == 0)
4226                return 0;
4227
4228        skl_ddb_calc_min(cstate, num_active, minimum, y_minimum);
4229
4230        /*
4231         * 1. Allocate the mininum required blocks for each active plane
4232         * and allocate the cursor, it doesn't require extra allocation
4233         * proportional to the data rate.
4234         */
4235
4236        for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4237                total_min_blocks += minimum[plane_id];
4238                total_min_blocks += y_minimum[plane_id];
4239        }
4240
4241        if (total_min_blocks > alloc_size) {
4242                DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations");
4243                DRM_DEBUG_KMS("minimum required %d/%d\n", total_min_blocks,
4244                                                        alloc_size);
4245                return -EINVAL;
4246        }
4247
4248        alloc_size -= total_min_blocks;
4249        ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - minimum[PLANE_CURSOR];
4250        ddb->plane[pipe][PLANE_CURSOR].end = alloc->end;
4251
4252        /*
4253         * 2. Distribute the remaining space in proportion to the amount of
4254         * data each plane needs to fetch from memory.
4255         *
4256         * FIXME: we may not allocate every single block here.
4257         */
4258        total_data_rate = skl_get_total_relative_data_rate(cstate,
4259                                                           plane_data_rate,
4260                                                           plane_y_data_rate);
4261        if (total_data_rate == 0)
4262                return 0;
4263
4264        start = alloc->start;
4265        for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4266                unsigned int data_rate, y_data_rate;
4267                uint16_t plane_blocks, y_plane_blocks = 0;
4268
4269                if (plane_id == PLANE_CURSOR)
4270                        continue;
4271
4272                data_rate = plane_data_rate[plane_id];
4273
4274                /*
4275                 * allocation for (packed formats) or (uv-plane part of planar format):
4276                 * promote the expression to 64 bits to avoid overflowing, the
4277                 * result is < available as data_rate / total_data_rate < 1
4278                 */
4279                plane_blocks = minimum[plane_id];
4280                plane_blocks += div_u64((uint64_t)alloc_size * data_rate,
4281                                        total_data_rate);
4282
4283                /* Leave disabled planes at (0,0) */
4284                if (data_rate) {
4285                        ddb->plane[pipe][plane_id].start = start;
4286                        ddb->plane[pipe][plane_id].end = start + plane_blocks;
4287                }
4288
4289                start += plane_blocks;
4290
4291                /*
4292                 * allocation for y_plane part of planar format:
4293                 */
4294                y_data_rate = plane_y_data_rate[plane_id];
4295
4296                y_plane_blocks = y_minimum[plane_id];
4297                y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate,
4298                                        total_data_rate);
4299
4300                if (y_data_rate) {
4301                        ddb->y_plane[pipe][plane_id].start = start;
4302                        ddb->y_plane[pipe][plane_id].end = start + y_plane_blocks;
4303                }
4304
4305                start += y_plane_blocks;
4306        }
4307
4308        return 0;
4309}
4310
4311/*
4312 * The max latency should be 257 (max the punit can code is 255 and we add 2us
4313 * for the read latency) and cpp should always be <= 8, so that
4314 * should allow pixel_rate up to ~2 GHz which seems sufficient since max
4315 * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
4316*/
4317static uint_fixed_16_16_t
4318skl_wm_method1(const struct drm_i915_private *dev_priv, uint32_t pixel_rate,
4319               uint8_t cpp, uint32_t latency, uint32_t dbuf_block_size)
4320{
4321        uint32_t wm_intermediate_val;
4322        uint_fixed_16_16_t ret;
4323
4324        if (latency == 0)
4325                return FP_16_16_MAX;
4326
4327        wm_intermediate_val = latency * pixel_rate * cpp;
4328        ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size);
4329
4330        if (INTEL_GEN(dev_priv) >= 10)
4331                ret = add_fixed16_u32(ret, 1);
4332
4333        return ret;
4334}
4335
4336static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate,
4337                        uint32_t pipe_htotal,
4338                        uint32_t latency,
4339                        uint_fixed_16_16_t plane_blocks_per_line)
4340{
4341        uint32_t wm_intermediate_val;
4342        uint_fixed_16_16_t ret;
4343
4344        if (latency == 0)
4345                return FP_16_16_MAX;
4346
4347        wm_intermediate_val = latency * pixel_rate;
4348        wm_intermediate_val = DIV_ROUND_UP(wm_intermediate_val,
4349                                           pipe_htotal * 1000);
4350        ret = mul_u32_fixed16(wm_intermediate_val, plane_blocks_per_line);
4351        return ret;
4352}
4353
4354static uint_fixed_16_16_t
4355intel_get_linetime_us(struct intel_crtc_state *cstate)
4356{
4357        uint32_t pixel_rate;
4358        uint32_t crtc_htotal;
4359        uint_fixed_16_16_t linetime_us;
4360
4361        if (!cstate->base.active)
4362                return u32_to_fixed16(0);
4363
4364        pixel_rate = cstate->pixel_rate;
4365
4366        if (WARN_ON(pixel_rate == 0))
4367                return u32_to_fixed16(0);
4368
4369        crtc_htotal = cstate->base.adjusted_mode.crtc_htotal;
4370        linetime_us = div_fixed16(crtc_htotal * 1000, pixel_rate);
4371
4372        return linetime_us;
4373}
4374
4375static uint32_t
4376skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
4377                              const struct intel_plane_state *pstate)
4378{
4379        uint64_t adjusted_pixel_rate;
4380        uint_fixed_16_16_t downscale_amount;
4381
4382        /* Shouldn't reach here on disabled planes... */
4383        if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
4384                return 0;
4385
4386        /*
4387         * Adjusted plane pixel rate is just the pipe's adjusted pixel rate
4388         * with additional adjustments for plane-specific scaling.
4389         */
4390        adjusted_pixel_rate = cstate->pixel_rate;
4391        downscale_amount = skl_plane_downscale_amount(cstate, pstate);
4392
4393        return mul_round_up_u32_fixed16(adjusted_pixel_rate,
4394                                            downscale_amount);
4395}
4396
4397static int
4398skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv,
4399                            struct intel_crtc_state *cstate,
4400                            const struct intel_plane_state *intel_pstate,
4401                            struct skl_wm_params *wp)
4402{
4403        struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
4404        const struct drm_plane_state *pstate = &intel_pstate->base;
4405        const struct drm_framebuffer *fb = pstate->fb;
4406        uint32_t interm_pbpl;
4407        struct intel_atomic_state *state =
4408                to_intel_atomic_state(cstate->base.state);
4409        bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
4410
4411        if (!intel_wm_plane_visible(cstate, intel_pstate))
4412                return 0;
4413
4414        wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
4415                      fb->modifier == I915_FORMAT_MOD_Yf_TILED ||
4416                      fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4417                      fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4418        wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
4419        wp->rc_surface = fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4420                         fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4421
4422        if (plane->id == PLANE_CURSOR) {
4423                wp->width = intel_pstate->base.crtc_w;
4424        } else {
4425                /*
4426                 * Src coordinates are already rotated by 270 degrees for
4427                 * the 90/270 degree plane rotation cases (to match the
4428                 * GTT mapping), hence no need to account for rotation here.
4429                 */
4430                wp->width = drm_rect_width(&intel_pstate->base.src) >> 16;
4431        }
4432
4433        wp->cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] :
4434                                                            fb->format->cpp[0];
4435        wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate,
4436                                                             intel_pstate);
4437
4438        if (INTEL_GEN(dev_priv) >= 11 &&
4439            fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 8)
4440                wp->dbuf_block_size = 256;
4441        else
4442                wp->dbuf_block_size = 512;
4443
4444        if (drm_rotation_90_or_270(pstate->rotation)) {
4445
4446                switch (wp->cpp) {
4447                case 1:
4448                        wp->y_min_scanlines = 16;
4449                        break;
4450                case 2:
4451                        wp->y_min_scanlines = 8;
4452                        break;
4453                case 4:
4454                        wp->y_min_scanlines = 4;
4455                        break;
4456                default:
4457                        MISSING_CASE(wp->cpp);
4458                        return -EINVAL;
4459                }
4460        } else {
4461                wp->y_min_scanlines = 4;
4462        }
4463
4464        if (apply_memory_bw_wa)
4465                wp->y_min_scanlines *= 2;
4466
4467        wp->plane_bytes_per_line = wp->width * wp->cpp;
4468        if (wp->y_tiled) {
4469                interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
4470                                           wp->y_min_scanlines,
4471                                           wp->dbuf_block_size);
4472
4473                if (INTEL_GEN(dev_priv) >= 10)
4474                        interm_pbpl++;
4475
4476                wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
4477                                                        wp->y_min_scanlines);
4478        } else if (wp->x_tiled && IS_GEN9(dev_priv)) {
4479                interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4480                                           wp->dbuf_block_size);
4481                wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4482        } else {
4483                interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4484                                           wp->dbuf_block_size) + 1;
4485                wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4486        }
4487
4488        wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines,
4489                                             wp->plane_blocks_per_line);
4490        wp->linetime_us = fixed16_to_u32_round_up(
4491                                        intel_get_linetime_us(cstate));
4492
4493        return 0;
4494}
4495
4496static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
4497                                struct intel_crtc_state *cstate,
4498                                const struct intel_plane_state *intel_pstate,
4499                                uint16_t ddb_allocation,
4500                                int level,
4501                                const struct skl_wm_params *wp,
4502                                uint16_t *out_blocks, /* out */
4503                                uint8_t *out_lines, /* out */
4504                                bool *enabled /* out */)
4505{
4506        const struct drm_plane_state *pstate = &intel_pstate->base;
4507        uint32_t latency = dev_priv->wm.skl_latency[level];
4508        uint_fixed_16_16_t method1, method2;
4509        uint_fixed_16_16_t selected_result;
4510        uint32_t res_blocks, res_lines;
4511        struct intel_atomic_state *state =
4512                to_intel_atomic_state(cstate->base.state);
4513        bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
4514        uint32_t min_disp_buf_needed;
4515
4516        if (latency == 0 ||
4517            !intel_wm_plane_visible(cstate, intel_pstate)) {
4518                *enabled = false;
4519                return 0;
4520        }
4521
4522        /* Display WA #1141: kbl,cfl */
4523        if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) ||
4524            IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) &&
4525            dev_priv->ipc_enabled)
4526                latency += 4;
4527
4528        if (apply_memory_bw_wa && wp->x_tiled)
4529                latency += 15;
4530
4531        method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
4532                                 wp->cpp, latency, wp->dbuf_block_size);
4533        method2 = skl_wm_method2(wp->plane_pixel_rate,
4534                                 cstate->base.adjusted_mode.crtc_htotal,
4535                                 latency,
4536                                 wp->plane_blocks_per_line);
4537
4538        if (wp->y_tiled) {
4539                selected_result = max_fixed16(method2, wp->y_tile_minimum);
4540        } else {
4541                if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal /
4542                     wp->dbuf_block_size < 1) &&
4543                     (wp->plane_bytes_per_line / wp->dbuf_block_size < 1))
4544                        selected_result = method2;
4545                else if (ddb_allocation >=
4546                         fixed16_to_u32_round_up(wp->plane_blocks_per_line))
4547                        selected_result = min_fixed16(method1, method2);
4548                else if (latency >= wp->linetime_us)
4549                        selected_result = min_fixed16(method1, method2);
4550                else
4551                        selected_result = method1;
4552        }
4553
4554        res_blocks = fixed16_to_u32_round_up(selected_result) + 1;
4555        res_lines = div_round_up_fixed16(selected_result,
4556                                         wp->plane_blocks_per_line);
4557
4558        /* Display WA #1125: skl,bxt,kbl,glk */
4559        if (level == 0 && wp->rc_surface)
4560                res_blocks += fixed16_to_u32_round_up(wp->y_tile_minimum);
4561
4562        /* Display WA #1126: skl,bxt,kbl,glk */
4563        if (level >= 1 && level <= 7) {
4564                if (wp->y_tiled) {
4565                        res_blocks += fixed16_to_u32_round_up(
4566                                                        wp->y_tile_minimum);
4567                        res_lines += wp->y_min_scanlines;
4568                } else {
4569                        res_blocks++;
4570                }
4571        }
4572
4573        if (INTEL_GEN(dev_priv) >= 11) {
4574                if (wp->y_tiled) {
4575                        uint32_t extra_lines;
4576                        uint_fixed_16_16_t fp_min_disp_buf_needed;
4577
4578                        if (res_lines % wp->y_min_scanlines == 0)
4579                                extra_lines = wp->y_min_scanlines;
4580                        else
4581                                extra_lines = wp->y_min_scanlines * 2 -
4582                                              res_lines % wp->y_min_scanlines;
4583
4584                        fp_min_disp_buf_needed = mul_u32_fixed16(res_lines +
4585                                                extra_lines,
4586                                                wp->plane_blocks_per_line);
4587                        min_disp_buf_needed = fixed16_to_u32_round_up(
4588                                                fp_min_disp_buf_needed);
4589                } else {
4590                        min_disp_buf_needed = DIV_ROUND_UP(res_blocks * 11, 10);
4591                }
4592        } else {
4593                min_disp_buf_needed = res_blocks;
4594        }
4595
4596        if ((level > 0 && res_lines > 31) ||
4597            res_blocks >= ddb_allocation ||
4598            min_disp_buf_needed >= ddb_allocation) {
4599                *enabled = false;
4600
4601                /*
4602                 * If there are no valid level 0 watermarks, then we can't
4603                 * support this display configuration.
4604                 */
4605                if (level) {
4606                        return 0;
4607                } else {
4608                        struct drm_plane *plane = pstate->plane;
4609
4610                        DRM_DEBUG_KMS("Requested display configuration exceeds system watermark limitations\n");
4611                        DRM_DEBUG_KMS("[PLANE:%d:%s] blocks required = %u/%u, lines required = %u/31\n",
4612                                      plane->base.id, plane->name,
4613                                      res_blocks, ddb_allocation, res_lines);
4614                        return -EINVAL;
4615                }
4616        }
4617
4618        /* The number of lines are ignored for the level 0 watermark. */
4619        *out_lines = level ? res_lines : 0;
4620        *out_blocks = res_blocks;
4621        *enabled = true;
4622
4623        return 0;
4624}
4625
4626static int
4627skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
4628                      struct skl_ddb_allocation *ddb,
4629                      struct intel_crtc_state *cstate,
4630                      const struct intel_plane_state *intel_pstate,
4631                      const struct skl_wm_params *wm_params,
4632                      struct skl_plane_wm *wm)
4633{
4634        struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
4635        struct drm_plane *plane = intel_pstate->base.plane;
4636        struct intel_plane *intel_plane = to_intel_plane(plane);
4637        uint16_t ddb_blocks;
4638        enum pipe pipe = intel_crtc->pipe;
4639        int level, max_level = ilk_wm_max_level(dev_priv);
4640        int ret;
4641
4642        if (WARN_ON(!intel_pstate->base.fb))
4643                return -EINVAL;
4644
4645        ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][intel_plane->id]);
4646
4647        for (level = 0; level <= max_level; level++) {
4648                struct skl_wm_level *result = &wm->wm[level];
4649
4650                ret = skl_compute_plane_wm(dev_priv,
4651                                           cstate,
4652                                           intel_pstate,
4653                                           ddb_blocks,
4654                                           level,
4655                                           wm_params,
4656                                           &result->plane_res_b,
4657                                           &result->plane_res_l,
4658                                           &result->plane_en);
4659                if (ret)
4660                        return ret;
4661        }
4662
4663        return 0;
4664}
4665
4666static uint32_t
4667skl_compute_linetime_wm(struct intel_crtc_state *cstate)
4668{
4669        struct drm_atomic_state *state = cstate->base.state;
4670        struct drm_i915_private *dev_priv = to_i915(state->dev);
4671        uint_fixed_16_16_t linetime_us;
4672        uint32_t linetime_wm;
4673
4674        linetime_us = intel_get_linetime_us(cstate);
4675
4676        if (is_fixed16_zero(linetime_us))
4677                return 0;
4678
4679        linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));
4680
4681        /* Display WA #1135: bxt:ALL GLK:ALL */
4682        if ((IS_BROXTON(dev_priv) || IS_GEMINILAKE(dev_priv)) &&
4683            dev_priv->ipc_enabled)
4684                linetime_wm /= 2;
4685
4686        return linetime_wm;
4687}
4688
4689static void skl_compute_transition_wm(struct intel_crtc_state *cstate,
4690                                      struct skl_wm_params *wp,
4691                                      struct skl_wm_level *wm_l0,
4692                                      uint16_t ddb_allocation,
4693                                      struct skl_wm_level *trans_wm /* out */)
4694{
4695        struct drm_device *dev = cstate->base.crtc->dev;
4696        const struct drm_i915_private *dev_priv = to_i915(dev);
4697        uint16_t trans_min, trans_y_tile_min;
4698        const uint16_t trans_amount = 10; /* This is configurable amount */
4699        uint16_t trans_offset_b, res_blocks;
4700
4701        if (!cstate->base.active)
4702                goto exit;
4703
4704        /* Transition WM are not recommended by HW team for GEN9 */
4705        if (INTEL_GEN(dev_priv) <= 9)
4706                goto exit;
4707
4708        /* Transition WM don't make any sense if ipc is disabled */
4709        if (!dev_priv->ipc_enabled)
4710                goto exit;
4711
4712        trans_min = 0;
4713        if (INTEL_GEN(dev_priv) >= 10)
4714                trans_min = 4;
4715
4716        trans_offset_b = trans_min + trans_amount;
4717
4718        if (wp->y_tiled) {
4719                trans_y_tile_min = (uint16_t) mul_round_up_u32_fixed16(2,
4720                                                        wp->y_tile_minimum);
4721                res_blocks = max(wm_l0->plane_res_b, trans_y_tile_min) +
4722                                trans_offset_b;
4723        } else {
4724                res_blocks = wm_l0->plane_res_b + trans_offset_b;
4725
4726                /* WA BUG:1938466 add one block for non y-tile planes */
4727                if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))
4728                        res_blocks += 1;
4729
4730        }
4731
4732        res_blocks += 1;
4733
4734        if (res_blocks < ddb_allocation) {
4735                trans_wm->plane_res_b = res_blocks;
4736                trans_wm->plane_en = true;
4737                return;
4738        }
4739
4740exit:
4741        trans_wm->plane_en = false;
4742}
4743
4744static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
4745                             struct skl_ddb_allocation *ddb,
4746                             struct skl_pipe_wm *pipe_wm)
4747{
4748        struct drm_device *dev = cstate->base.crtc->dev;
4749        struct drm_crtc_state *crtc_state = &cstate->base;
4750        const struct drm_i915_private *dev_priv = to_i915(dev);
4751        struct drm_plane *plane;
4752        const struct drm_plane_state *pstate;
4753        struct skl_plane_wm *wm;
4754        int ret;
4755
4756        /*
4757         * We'll only calculate watermarks for planes that are actually
4758         * enabled, so make sure all other planes are set as disabled.
4759         */
4760        memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes));
4761
4762        drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
4763                const struct intel_plane_state *intel_pstate =
4764                                                to_intel_plane_state(pstate);
4765                enum plane_id plane_id = to_intel_plane(plane)->id;
4766                struct skl_wm_params wm_params;
4767                enum pipe pipe = to_intel_crtc(cstate->base.crtc)->pipe;
4768                uint16_t ddb_blocks;
4769
4770                wm = &pipe_wm->planes[plane_id];
4771                ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][plane_id]);
4772                memset(&wm_params, 0, sizeof(struct skl_wm_params));
4773
4774                ret = skl_compute_plane_wm_params(dev_priv, cstate,
4775                                                  intel_pstate, &wm_params);
4776                if (ret)
4777                        return ret;
4778
4779                ret = skl_compute_wm_levels(dev_priv, ddb, cstate,
4780                                            intel_pstate, &wm_params, wm);
4781                if (ret)
4782                        return ret;
4783                skl_compute_transition_wm(cstate, &wm_params, &wm->wm[0],
4784                                          ddb_blocks, &wm->trans_wm);
4785        }
4786        pipe_wm->linetime = skl_compute_linetime_wm(cstate);
4787
4788        return 0;
4789}
4790
4791static void skl_ddb_entry_write(struct drm_i915_private *dev_priv,
4792                                i915_reg_t reg,
4793                                const struct skl_ddb_entry *entry)
4794{
4795        if (entry->end)
4796                I915_WRITE(reg, (entry->end - 1) << 16 | entry->start);
4797        else
4798                I915_WRITE(reg, 0);
4799}
4800
4801static void skl_write_wm_level(struct drm_i915_private *dev_priv,
4802                               i915_reg_t reg,
4803                               const struct skl_wm_level *level)
4804{
4805        uint32_t val = 0;
4806
4807        if (level->plane_en) {
4808                val |= PLANE_WM_EN;
4809                val |= level->plane_res_b;
4810                val |= level->plane_res_l << PLANE_WM_LINES_SHIFT;
4811        }
4812
4813        I915_WRITE(reg, val);
4814}
4815
4816static void skl_write_plane_wm(struct intel_crtc *intel_crtc,
4817                               const struct skl_plane_wm *wm,
4818                               const struct skl_ddb_allocation *ddb,
4819                               enum plane_id plane_id)
4820{
4821        struct drm_crtc *crtc = &intel_crtc->base;
4822        struct drm_device *dev = crtc->dev;
4823        struct drm_i915_private *dev_priv = to_i915(dev);
4824        int level, max_level = ilk_wm_max_level(dev_priv);
4825        enum pipe pipe = intel_crtc->pipe;
4826
4827        for (level = 0; level <= max_level; level++) {
4828                skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level),
4829                                   &wm->wm[level]);
4830        }
4831        skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id),
4832                           &wm->trans_wm);
4833
4834        skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id),
4835                            &ddb->plane[pipe][plane_id]);
4836        if (INTEL_GEN(dev_priv) < 11)
4837                skl_ddb_entry_write(dev_priv,
4838                                    PLANE_NV12_BUF_CFG(pipe, plane_id),
4839                                    &ddb->y_plane[pipe][plane_id]);
4840}
4841
4842static void skl_write_cursor_wm(struct intel_crtc *intel_crtc,
4843                                const struct skl_plane_wm *wm,
4844                                const struct skl_ddb_allocation *ddb)
4845{
4846        struct drm_crtc *crtc = &intel_crtc->base;
4847        struct drm_device *dev = crtc->dev;
4848        struct drm_i915_private *dev_priv = to_i915(dev);
4849        int level, max_level = ilk_wm_max_level(dev_priv);
4850        enum pipe pipe = intel_crtc->pipe;
4851
4852        for (level = 0; level <= max_level; level++) {
4853                skl_write_wm_level(dev_priv, CUR_WM(pipe, level),
4854                                   &wm->wm[level]);
4855        }
4856        skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm);
4857
4858        skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
4859                            &ddb->plane[pipe][PLANE_CURSOR]);
4860}
4861
4862bool skl_wm_level_equals(const struct skl_wm_level *l1,
4863                         const struct skl_wm_level *l2)
4864{
4865        if (l1->plane_en != l2->plane_en)
4866                return false;
4867
4868        /* If both planes aren't enabled, the rest shouldn't matter */
4869        if (!l1->plane_en)
4870                return true;
4871
4872        return (l1->plane_res_l == l2->plane_res_l &&
4873                l1->plane_res_b == l2->plane_res_b);
4874}
4875
4876static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,
4877                                           const struct skl_ddb_entry *b)
4878{
4879        return a->start < b->end && b->start < a->end;
4880}
4881
4882bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv,
4883                                 const struct skl_ddb_entry **entries,
4884                                 const struct skl_ddb_entry *ddb,
4885                                 int ignore)
4886{
4887        enum pipe pipe;
4888
4889        for_each_pipe(dev_priv, pipe) {
4890                if (pipe != ignore && entries[pipe] &&
4891                    skl_ddb_entries_overlap(ddb, entries[pipe]))
4892                        return true;
4893        }
4894
4895        return false;
4896}
4897
4898static int skl_update_pipe_wm(struct drm_crtc_state *cstate,
4899                              const struct skl_pipe_wm *old_pipe_wm,
4900                              struct skl_pipe_wm *pipe_wm, /* out */
4901                              struct skl_ddb_allocation *ddb, /* out */
4902                              bool *changed /* out */)
4903{
4904        struct intel_crtc_state *intel_cstate = to_intel_crtc_state(cstate);
4905        int ret;
4906
4907        ret = skl_build_pipe_wm(intel_cstate, ddb, pipe_wm);
4908        if (ret)
4909                return ret;
4910
4911        if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm)))
4912                *changed = false;
4913        else
4914                *changed = true;
4915
4916        return 0;
4917}
4918
4919static uint32_t
4920pipes_modified(struct drm_atomic_state *state)
4921{
4922        struct drm_crtc *crtc;
4923        struct drm_crtc_state *cstate;
4924        uint32_t i, ret = 0;
4925
4926        for_each_new_crtc_in_state(state, crtc, cstate, i)
4927                ret |= drm_crtc_mask(crtc);
4928
4929        return ret;
4930}
4931
4932static int
4933skl_ddb_add_affected_planes(struct intel_crtc_state *cstate)
4934{
4935        struct drm_atomic_state *state = cstate->base.state;
4936        struct drm_device *dev = state->dev;
4937        struct drm_crtc *crtc = cstate->base.crtc;
4938        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
4939        struct drm_i915_private *dev_priv = to_i915(dev);
4940        struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
4941        struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
4942        struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
4943        struct drm_plane_state *plane_state;
4944        struct drm_plane *plane;
4945        enum pipe pipe = intel_crtc->pipe;
4946
4947        WARN_ON(!drm_atomic_get_existing_crtc_state(state, crtc));
4948
4949        drm_for_each_plane_mask(plane, dev, cstate->base.plane_mask) {
4950                enum plane_id plane_id = to_intel_plane(plane)->id;
4951
4952                if (skl_ddb_entry_equal(&cur_ddb->plane[pipe][plane_id],
4953                                        &new_ddb->plane[pipe][plane_id]) &&
4954                    skl_ddb_entry_equal(&cur_ddb->y_plane[pipe][plane_id],
4955                                        &new_ddb->y_plane[pipe][plane_id]))
4956                        continue;
4957
4958                plane_state = drm_atomic_get_plane_state(state, plane);
4959                if (IS_ERR(plane_state))
4960                        return PTR_ERR(plane_state);
4961        }
4962
4963        return 0;
4964}
4965
4966static int
4967skl_compute_ddb(struct drm_atomic_state *state)
4968{
4969        struct drm_device *dev = state->dev;
4970        struct drm_i915_private *dev_priv = to_i915(dev);
4971        struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
4972        struct intel_crtc *intel_crtc;
4973        struct skl_ddb_allocation *ddb = &intel_state->wm_results.ddb;
4974        uint32_t realloc_pipes = pipes_modified(state);
4975        int ret;
4976
4977        /*
4978         * If this is our first atomic update following hardware readout,
4979         * we can't trust the DDB that the BIOS programmed for us.  Let's
4980         * pretend that all pipes switched active status so that we'll
4981         * ensure a full DDB recompute.
4982         */
4983        if (dev_priv->wm.distrust_bios_wm) {
4984                ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
4985                                       state->acquire_ctx);
4986                if (ret)
4987                        return ret;
4988
4989                intel_state->active_pipe_changes = ~0;
4990
4991                /*
4992                 * We usually only initialize intel_state->active_crtcs if we
4993                 * we're doing a modeset; make sure this field is always
4994                 * initialized during the sanitization process that happens
4995                 * on the first commit too.
4996                 */
4997                if (!intel_state->modeset)
4998                        intel_state->active_crtcs = dev_priv->active_crtcs;
4999        }
5000
5001        /*
5002         * If the modeset changes which CRTC's are active, we need to
5003         * recompute the DDB allocation for *all* active pipes, even
5004         * those that weren't otherwise being modified in any way by this
5005         * atomic commit.  Due to the shrinking of the per-pipe allocations
5006         * when new active CRTC's are added, it's possible for a pipe that
5007         * we were already using and aren't changing at all here to suddenly
5008         * become invalid if its DDB needs exceeds its new allocation.
5009         *
5010         * Note that if we wind up doing a full DDB recompute, we can't let
5011         * any other display updates race with this transaction, so we need
5012         * to grab the lock on *all* CRTC's.
5013         */
5014        if (intel_state->active_pipe_changes) {
5015                realloc_pipes = ~0;
5016                intel_state->wm_results.dirty_pipes = ~0;
5017        }
5018
5019        /*
5020         * We're not recomputing for the pipes not included in the commit, so
5021         * make sure we start with the current state.
5022         */
5023        memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb));
5024
5025        for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) {
5026                struct intel_crtc_state *cstate;
5027
5028                cstate = intel_atomic_get_crtc_state(state, intel_crtc);
5029                if (IS_ERR(cstate))
5030                        return PTR_ERR(cstate);
5031
5032                ret = skl_allocate_pipe_ddb(cstate, ddb);
5033                if (ret)
5034                        return ret;
5035
5036                ret = skl_ddb_add_affected_planes(cstate);
5037                if (ret)
5038                        return ret;
5039        }
5040
5041        return 0;
5042}
5043
5044static void
5045skl_copy_wm_for_pipe(struct skl_wm_values *dst,
5046                     struct skl_wm_values *src,
5047                     enum pipe pipe)
5048{
5049        memcpy(dst->ddb.y_plane[pipe], src->ddb.y_plane[pipe],
5050               sizeof(dst->ddb.y_plane[pipe]));
5051        memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe],
5052               sizeof(dst->ddb.plane[pipe]));
5053}
5054
5055static void
5056skl_print_wm_changes(const struct drm_atomic_state *state)
5057{
5058        const struct drm_device *dev = state->dev;
5059        const struct drm_i915_private *dev_priv = to_i915(dev);
5060        const struct intel_atomic_state *intel_state =
5061                to_intel_atomic_state(state);
5062        const struct drm_crtc *crtc;
5063        const struct drm_crtc_state *cstate;
5064        const struct intel_plane *intel_plane;
5065        const struct skl_ddb_allocation *old_ddb = &dev_priv->wm.skl_hw.ddb;
5066        const struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
5067        int i;
5068
5069        for_each_new_crtc_in_state(state, crtc, cstate, i) {
5070                const struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
5071                enum pipe pipe = intel_crtc->pipe;
5072
5073                for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
5074                        enum plane_id plane_id = intel_plane->id;
5075                        const struct skl_ddb_entry *old, *new;
5076
5077                        old = &old_ddb->plane[pipe][plane_id];
5078                        new = &new_ddb->plane[pipe][plane_id];
5079
5080                        if (skl_ddb_entry_equal(old, new))
5081                                continue;
5082
5083                        DRM_DEBUG_ATOMIC("[PLANE:%d:%s] ddb (%d - %d) -> (%d - %d)\n",
5084                                         intel_plane->base.base.id,
5085                                         intel_plane->base.name,
5086                                         old->start, old->end,
5087                                         new->start, new->end);
5088                }
5089        }
5090}
5091
5092static int
5093skl_compute_wm(struct drm_atomic_state *state)
5094{
5095        struct drm_crtc *crtc;
5096        struct drm_crtc_state *cstate;
5097        struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
5098        struct skl_wm_values *results = &intel_state->wm_results;
5099        struct drm_device *dev = state->dev;
5100        struct skl_pipe_wm *pipe_wm;
5101        bool changed = false;
5102        int ret, i;
5103
5104        /*
5105         * When we distrust bios wm we always need to recompute to set the
5106         * expected DDB allocations for each CRTC.
5107         */
5108        if (to_i915(dev)->wm.distrust_bios_wm)
5109                changed = true;
5110
5111        /*
5112         * If this transaction isn't actually touching any CRTC's, don't
5113         * bother with watermark calculation.  Note that if we pass this
5114         * test, we're guaranteed to hold at least one CRTC state mutex,
5115         * which means we can safely use values like dev_priv->active_crtcs
5116         * since any racing commits that want to update them would need to
5117         * hold _all_ CRTC state mutexes.
5118         */
5119        for_each_new_crtc_in_state(state, crtc, cstate, i)
5120                changed = true;
5121
5122        if (!changed)
5123                return 0;
5124
5125        /* Clear all dirty flags */
5126        results->dirty_pipes = 0;
5127
5128        ret = skl_compute_ddb(state);
5129        if (ret)
5130                return ret;
5131
5132        /*
5133         * Calculate WM's for all pipes that are part of this transaction.
5134         * Note that the DDB allocation above may have added more CRTC's that
5135         * weren't otherwise being modified (and set bits in dirty_pipes) if
5136         * pipe allocations had to change.
5137         *
5138         * FIXME:  Now that we're doing this in the atomic check phase, we
5139         * should allow skl_update_pipe_wm() to return failure in cases where
5140         * no suitable watermark values can be found.
5141         */
5142        for_each_new_crtc_in_state(state, crtc, cstate, i) {
5143                struct intel_crtc_state *intel_cstate =
5144                        to_intel_crtc_state(cstate);
5145                const struct skl_pipe_wm *old_pipe_wm =
5146                        &to_intel_crtc_state(crtc->state)->wm.skl.optimal;
5147
5148                pipe_wm = &intel_cstate->wm.skl.optimal;
5149                ret = skl_update_pipe_wm(cstate, old_pipe_wm, pipe_wm,
5150                                         &results->ddb, &changed);
5151                if (ret)
5152                        return ret;
5153
5154                if (changed)
5155                        results->dirty_pipes |= drm_crtc_mask(crtc);
5156
5157                if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0)
5158                        /* This pipe's WM's did not change */
5159                        continue;
5160
5161                intel_cstate->update_wm_pre = true;
5162        }
5163
5164        skl_print_wm_changes(state);
5165
5166        return 0;
5167}
5168
5169static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state,
5170                                      struct intel_crtc_state *cstate)
5171{
5172        struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc);
5173        struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5174        struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal;
5175        const struct skl_ddb_allocation *ddb = &state->wm_results.ddb;
5176        enum pipe pipe = crtc->pipe;
5177        enum plane_id plane_id;
5178
5179        if (!(state->wm_results.dirty_pipes & drm_crtc_mask(&crtc->base)))
5180                return;
5181
5182        I915_WRITE(PIPE_WM_LINETIME(pipe), pipe_wm->linetime);
5183
5184        for_each_plane_id_on_crtc(crtc, plane_id) {
5185                if (plane_id != PLANE_CURSOR)
5186                        skl_write_plane_wm(crtc, &pipe_wm->planes[plane_id],
5187                                           ddb, plane_id);
5188                else
5189                        skl_write_cursor_wm(crtc, &pipe_wm->planes[plane_id],
5190                                            ddb);
5191        }
5192}
5193
5194static void skl_initial_wm(struct intel_atomic_state *state,
5195                           struct intel_crtc_state *cstate)
5196{
5197        struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5198        struct drm_device *dev = intel_crtc->base.dev;
5199        struct drm_i915_private *dev_priv = to_i915(dev);
5200        struct skl_wm_values *results = &state->wm_results;
5201        struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw;
5202        enum pipe pipe = intel_crtc->pipe;
5203
5204        if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0)
5205                return;
5206
5207        mutex_lock(&dev_priv->wm.wm_mutex);
5208
5209        if (cstate->base.active_changed)
5210                skl_atomic_update_crtc_wm(state, cstate);
5211
5212        skl_copy_wm_for_pipe(hw_vals, results, pipe);
5213
5214        mutex_unlock(&dev_priv->wm.wm_mutex);
5215}
5216
5217static void ilk_compute_wm_config(struct drm_device *dev,
5218                                  struct intel_wm_config *config)
5219{
5220        struct intel_crtc *crtc;
5221
5222        /* Compute the currently _active_ config */
5223        for_each_intel_crtc(dev, crtc) {
5224                const struct intel_pipe_wm *wm = &crtc->wm.active.ilk;
5225
5226                if (!wm->pipe_enabled)
5227                        continue;
5228
5229                config->sprites_enabled |= wm->sprites_enabled;
5230                config->sprites_scaled |= wm->sprites_scaled;
5231                config->num_pipes_active++;
5232        }
5233}
5234
5235static void ilk_program_watermarks(struct drm_i915_private *dev_priv)
5236{
5237        struct drm_device *dev = &dev_priv->drm;
5238        struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
5239        struct ilk_wm_maximums max;
5240        struct intel_wm_config config = {};
5241        struct ilk_wm_values results = {};
5242        enum intel_ddb_partitioning partitioning;
5243
5244        ilk_compute_wm_config(dev, &config);
5245
5246        ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
5247        ilk_wm_merge(dev, &config, &max, &lp_wm_1_2);
5248
5249        /* 5/6 split only in single pipe config on IVB+ */
5250        if (INTEL_GEN(dev_priv) >= 7 &&
5251            config.num_pipes_active == 1 && config.sprites_enabled) {
5252                ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max);
5253                ilk_wm_merge(dev, &config, &max, &lp_wm_5_6);
5254
5255                best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6);
5256        } else {
5257                best_lp_wm = &lp_wm_1_2;
5258        }
5259
5260        partitioning = (best_lp_wm == &lp_wm_1_2) ?
5261                       INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
5262
5263        ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results);
5264
5265        ilk_write_wm_values(dev_priv, &results);
5266}
5267
5268static void ilk_initial_watermarks(struct intel_atomic_state *state,
5269                                   struct intel_crtc_state *cstate)
5270{
5271        struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5272        struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5273
5274        mutex_lock(&dev_priv->wm.wm_mutex);
5275        intel_crtc->wm.active.ilk = cstate->wm.ilk.intermediate;
5276        ilk_program_watermarks(dev_priv);
5277        mutex_unlock(&dev_priv->wm.wm_mutex);
5278}
5279
5280static void ilk_optimize_watermarks(struct intel_atomic_state *state,
5281                                    struct intel_crtc_state *cstate)
5282{
5283        struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5284        struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5285
5286        mutex_lock(&dev_priv->wm.wm_mutex);
5287        if (cstate->wm.need_postvbl_update) {
5288                intel_crtc->wm.active.ilk = cstate->wm.ilk.optimal;
5289                ilk_program_watermarks(dev_priv);
5290        }
5291        mutex_unlock(&dev_priv->wm.wm_mutex);
5292}
5293
5294static inline void skl_wm_level_from_reg_val(uint32_t val,
5295                                             struct skl_wm_level *level)
5296{
5297        level->plane_en = val & PLANE_WM_EN;
5298        level->plane_res_b = val & PLANE_WM_BLOCKS_MASK;
5299        level->plane_res_l = (val >> PLANE_WM_LINES_SHIFT) &
5300                PLANE_WM_LINES_MASK;
5301}
5302
5303void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc,
5304                              struct skl_pipe_wm *out)
5305{
5306        struct drm_i915_private *dev_priv = to_i915(crtc->dev);
5307        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
5308        enum pipe pipe = intel_crtc->pipe;
5309        int level, max_level;
5310        enum plane_id plane_id;
5311        uint32_t val;
5312
5313        max_level = ilk_wm_max_level(dev_priv);
5314
5315        for_each_plane_id_on_crtc(intel_crtc, plane_id) {
5316                struct skl_plane_wm *wm = &out->planes[plane_id];
5317
5318                for (level = 0; level <= max_level; level++) {
5319                        if (plane_id != PLANE_CURSOR)
5320                                val = I915_READ(PLANE_WM(pipe, plane_id, level));
5321                        else
5322                                val = I915_READ(CUR_WM(pipe, level));
5323
5324                        skl_wm_level_from_reg_val(val, &wm->wm[level]);
5325                }
5326
5327                if (plane_id != PLANE_CURSOR)
5328                        val = I915_READ(PLANE_WM_TRANS(pipe, plane_id));
5329                else
5330                        val = I915_READ(CUR_WM_TRANS(pipe));
5331
5332                skl_wm_level_from_reg_val(val, &wm->trans_wm);
5333        }
5334
5335        if (!intel_crtc->active)
5336                return;
5337
5338        out->linetime = I915_READ(PIPE_WM_LINETIME(pipe));
5339}
5340
5341void skl_wm_get_hw_state(struct drm_device *dev)
5342{
5343        struct drm_i915_private *dev_priv = to_i915(dev);
5344        struct skl_wm_values *hw = &dev_priv->wm.skl_hw;
5345        struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
5346        struct drm_crtc *crtc;
5347        struct intel_crtc *intel_crtc;
5348        struct intel_crtc_state *cstate;
5349
5350        skl_ddb_get_hw_state(dev_priv, ddb);
5351        list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
5352                intel_crtc = to_intel_crtc(crtc);
5353                cstate = to_intel_crtc_state(crtc->state);
5354
5355                skl_pipe_wm_get_hw_state(crtc, &cstate->wm.skl.optimal);
5356
5357                if (intel_crtc->active)
5358                        hw->dirty_pipes |= drm_crtc_mask(crtc);
5359        }
5360
5361        if (dev_priv->active_crtcs) {
5362                /* Fully recompute DDB on first atomic commit */
5363                dev_priv->wm.distrust_bios_wm = true;
5364        } else {
5365                /* Easy/common case; just sanitize DDB now if everything off */
5366                memset(ddb, 0, sizeof(*ddb));
5367        }
5368}
5369
5370static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
5371{
5372        struct drm_device *dev = crtc->dev;
5373        struct drm_i915_private *dev_priv = to_i915(dev);
5374        struct ilk_wm_values *hw = &dev_priv->wm.hw;
5375        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
5376        struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
5377        struct intel_pipe_wm *active = &cstate->wm.ilk.optimal;
5378        enum pipe pipe = intel_crtc->pipe;
5379        static const i915_reg_t wm0_pipe_reg[] = {
5380                [PIPE_A] = WM0_PIPEA_ILK,
5381                [PIPE_B] = WM0_PIPEB_ILK,
5382                [PIPE_C] = WM0_PIPEC_IVB,
5383        };
5384
5385        hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
5386        if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
5387                hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
5388
5389        memset(active, 0, sizeof(*active));
5390
5391        active->pipe_enabled = intel_crtc->active;
5392
5393        if (active->pipe_enabled) {
5394                u32 tmp = hw->wm_pipe[pipe];
5395
5396                /*
5397                 * For active pipes LP0 watermark is marked as
5398                 * enabled, and LP1+ watermaks as disabled since
5399                 * we can't really reverse compute them in case
5400                 * multiple pipes are active.
5401                 */
5402                active->wm[0].enable = true;
5403                active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
5404                active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
5405                active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
5406                active->linetime = hw->wm_linetime[pipe];
5407        } else {
5408                int level, max_level = ilk_wm_max_level(dev_priv);
5409
5410                /*
5411                 * For inactive pipes, all watermark levels
5412                 * should be marked as enabled but zeroed,
5413                 * which is what we'd compute them to.
5414                 */
5415                for (level = 0; level <= max_level; level++)
5416                        active->wm[level].enable = true;
5417        }
5418
5419        intel_crtc->wm.active.ilk = *active;
5420}
5421
5422#define _FW_WM(value, plane) \
5423        (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT)
5424#define _FW_WM_VLV(value, plane) \
5425        (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
5426
5427static void g4x_read_wm_values(struct drm_i915_private *dev_priv,
5428                               struct g4x_wm_values *wm)
5429{
5430        uint32_t tmp;
5431
5432        tmp = I915_READ(DSPFW1);
5433        wm->sr.plane = _FW_WM(tmp, SR);
5434        wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5435        wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEB);
5436        wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEA);
5437
5438        tmp = I915_READ(DSPFW2);
5439        wm->fbc_en = tmp & DSPFW_FBC_SR_EN;
5440        wm->sr.fbc = _FW_WM(tmp, FBC_SR);
5441        wm->hpll.fbc = _FW_WM(tmp, FBC_HPLL_SR);
5442        wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEB);
5443        wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5444        wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEA);
5445
5446        tmp = I915_READ(DSPFW3);
5447        wm->hpll_en = tmp & DSPFW_HPLL_SR_EN;
5448        wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5449        wm->hpll.cursor = _FW_WM(tmp, HPLL_CURSOR);
5450        wm->hpll.plane = _FW_WM(tmp, HPLL_SR);
5451}
5452
5453static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
5454                               struct vlv_wm_values *wm)
5455{
5456        enum pipe pipe;
5457        uint32_t tmp;
5458
5459        for_each_pipe(dev_priv, pipe) {
5460                tmp = I915_READ(VLV_DDL(pipe));
5461
5462                wm->ddl[pipe].plane[PLANE_PRIMARY] =
5463                        (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5464                wm->ddl[pipe].plane[PLANE_CURSOR] =
5465                        (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5466                wm->ddl[pipe].plane[PLANE_SPRITE0] =
5467                        (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5468                wm->ddl[pipe].plane[PLANE_SPRITE1] =
5469                        (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5470        }
5471
5472        tmp = I915_READ(DSPFW1);
5473        wm->sr.plane = _FW_WM(tmp, SR);
5474        wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5475        wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEB);
5476        wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEA);
5477
5478        tmp = I915_READ(DSPFW2);
5479        wm->pipe[PIPE_A].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEB);
5480        wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5481        wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEA);
5482
5483        tmp = I915_READ(DSPFW3);
5484        wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5485
5486        if (IS_CHERRYVIEW(dev_priv)) {
5487                tmp = I915_READ(DSPFW7_CHV);
5488                wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5489                wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
5490
5491                tmp = I915_READ(DSPFW8_CHV);
5492                wm->pipe[PIPE_C].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEF);
5493                wm->pipe[PIPE_C].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEE);
5494
5495                tmp = I915_READ(DSPFW9_CHV);
5496                wm->pipe[PIPE_C].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEC);
5497                wm->pipe[PIPE_C].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORC);
5498
5499                tmp = I915_READ(DSPHOWM);
5500                wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
5501                wm->pipe[PIPE_C].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEF_HI) << 8;
5502                wm->pipe[PIPE_C].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEE_HI) << 8;
5503                wm->pipe[PIPE_C].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEC_HI) << 8;
5504                wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5505                wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5506                wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5507                wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5508                wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5509                wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
5510        } else {
5511                tmp = I915_READ(DSPFW7);
5512                wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5513                wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
5514
5515                tmp = I915_READ(DSPHOWM);
5516                wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
5517                wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5518                wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5519                wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5520                wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5521                wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5522                wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
5523        }
5524}
5525
5526#undef _FW_WM
5527#undef _FW_WM_VLV
5528
5529void g4x_wm_get_hw_state(struct drm_device *dev)
5530{
5531        struct drm_i915_private *dev_priv = to_i915(dev);
5532        struct g4x_wm_values *wm = &dev_priv->wm.g4x;
5533        struct intel_crtc *crtc;
5534
5535        g4x_read_wm_values(dev_priv, wm);
5536
5537        wm->cxsr = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
5538
5539        for_each_intel_crtc(dev, crtc) {
5540                struct intel_crtc_state *crtc_state =
5541                        to_intel_crtc_state(crtc->base.state);
5542                struct g4x_wm_state *active = &crtc->wm.active.g4x;
5543                struct g4x_pipe_wm *raw;
5544                enum pipe pipe = crtc->pipe;
5545                enum plane_id plane_id;
5546                int level, max_level;
5547
5548                active->cxsr = wm->cxsr;
5549                active->hpll_en = wm->hpll_en;
5550                active->fbc_en = wm->fbc_en;
5551
5552                active->sr = wm->sr;
5553                active->hpll = wm->hpll;
5554
5555                for_each_plane_id_on_crtc(crtc, plane_id) {
5556                        active->wm.plane[plane_id] =
5557                                wm->pipe[pipe].plane[plane_id];
5558                }
5559
5560                if (wm->cxsr && wm->hpll_en)
5561                        max_level = G4X_WM_LEVEL_HPLL;
5562                else if (wm->cxsr)
5563                        max_level = G4X_WM_LEVEL_SR;
5564                else
5565                        max_level = G4X_WM_LEVEL_NORMAL;
5566
5567                level = G4X_WM_LEVEL_NORMAL;
5568                raw = &crtc_state->wm.g4x.raw[level];
5569                for_each_plane_id_on_crtc(crtc, plane_id)
5570                        raw->plane[plane_id] = active->wm.plane[plane_id];
5571
5572                if (++level > max_level)
5573                        goto out;
5574
5575                raw = &crtc_state->wm.g4x.raw[level];
5576                raw->plane[PLANE_PRIMARY] = active->sr.plane;
5577                raw->plane[PLANE_CURSOR] = active->sr.cursor;
5578                raw->plane[PLANE_SPRITE0] = 0;
5579                raw->fbc = active->sr.fbc;
5580
5581                if (++level > max_level)
5582                        goto out;
5583
5584                raw = &crtc_state->wm.g4x.raw[level];
5585                raw->plane[PLANE_PRIMARY] = active->hpll.plane;
5586                raw->plane[PLANE_CURSOR] = active->hpll.cursor;
5587                raw->plane[PLANE_SPRITE0] = 0;
5588                raw->fbc = active->hpll.fbc;
5589
5590        out:
5591                for_each_plane_id_on_crtc(crtc, plane_id)
5592                        g4x_raw_plane_wm_set(crtc_state, level,
5593                                             plane_id, USHRT_MAX);
5594                g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
5595
5596                crtc_state->wm.g4x.optimal = *active;
5597                crtc_state->wm.g4x.intermediate = *active;
5598
5599                DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n",
5600                              pipe_name(pipe),
5601                              wm->pipe[pipe].plane[PLANE_PRIMARY],
5602                              wm->pipe[pipe].plane[PLANE_CURSOR],
5603                              wm->pipe[pipe].plane[PLANE_SPRITE0]);
5604        }
5605
5606        DRM_DEBUG_KMS("Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n",
5607                      wm->sr.plane, wm->sr.cursor, wm->sr.fbc);
5608        DRM_DEBUG_KMS("Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n",
5609                      wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc);
5610        DRM_DEBUG_KMS("Initial SR=%s HPLL=%s FBC=%s\n",
5611                      yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en));
5612}
5613
5614void g4x_wm_sanitize(struct drm_i915_private *dev_priv)
5615{
5616        struct intel_plane *plane;
5617        struct intel_crtc *crtc;
5618
5619        mutex_lock(&dev_priv->wm.wm_mutex);
5620
5621        for_each_intel_plane(&dev_priv->drm, plane) {
5622                struct intel_crtc *crtc =
5623                        intel_get_crtc_for_pipe(dev_priv, plane->pipe);
5624                struct intel_crtc_state *crtc_state =
5625                        to_intel_crtc_state(crtc->base.state);
5626                struct intel_plane_state *plane_state =
5627                        to_intel_plane_state(plane->base.state);
5628                struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
5629                enum plane_id plane_id = plane->id;
5630                int level;
5631
5632                if (plane_state->base.visible)
5633                        continue;
5634
5635                for (level = 0; level < 3; level++) {
5636                        struct g4x_pipe_wm *raw =
5637                                &crtc_state->wm.g4x.raw[level];
5638
5639                        raw->plane[plane_id] = 0;
5640                        wm_state->wm.plane[plane_id] = 0;
5641                }
5642
5643                if (plane_id == PLANE_PRIMARY) {
5644                        for (level = 0; level < 3; level++) {
5645                                struct g4x_pipe_wm *raw =
5646                                        &crtc_state->wm.g4x.raw[level];
5647                                raw->fbc = 0;
5648                        }
5649
5650                        wm_state->sr.fbc = 0;
5651                        wm_state->hpll.fbc = 0;
5652                        wm_state->fbc_en = false;
5653                }
5654        }
5655
5656        for_each_intel_crtc(&dev_priv->drm, crtc) {
5657                struct intel_crtc_state *crtc_state =
5658                        to_intel_crtc_state(crtc->base.state);
5659
5660                crtc_state->wm.g4x.intermediate =
5661                        crtc_state->wm.g4x.optimal;
5662                crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
5663        }
5664
5665        g4x_program_watermarks(dev_priv);
5666
5667        mutex_unlock(&dev_priv->wm.wm_mutex);
5668}
5669
5670void vlv_wm_get_hw_state(struct drm_device *dev)
5671{
5672        struct drm_i915_private *dev_priv = to_i915(dev);
5673        struct vlv_wm_values *wm = &dev_priv->wm.vlv;
5674        struct intel_crtc *crtc;
5675        u32 val;
5676
5677        vlv_read_wm_values(dev_priv, wm);
5678
5679        wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
5680        wm->level = VLV_WM_LEVEL_PM2;
5681
5682        if (IS_CHERRYVIEW(dev_priv)) {
5683                mutex_lock(&dev_priv->pcu_lock);
5684
5685                val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
5686                if (val & DSP_MAXFIFO_PM5_ENABLE)
5687                        wm->level = VLV_WM_LEVEL_PM5;
5688
5689                /*
5690                 * If DDR DVFS is disabled in the BIOS, Punit
5691                 * will never ack the request. So if that happens
5692                 * assume we don't have to enable/disable DDR DVFS
5693                 * dynamically. To test that just set the REQ_ACK
5694                 * bit to poke the Punit, but don't change the
5695                 * HIGH/LOW bits so that we don't actually change
5696                 * the current state.
5697                 */
5698                val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
5699                val |= FORCE_DDR_FREQ_REQ_ACK;
5700                vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
5701
5702                if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
5703                              FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
5704                        DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
5705                                      "assuming DDR DVFS is disabled\n");
5706                        dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
5707                } else {
5708                        val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
5709                        if ((val & FORCE_DDR_HIGH_FREQ) == 0)
5710                                wm->level = VLV_WM_LEVEL_DDR_DVFS;
5711                }
5712
5713                mutex_unlock(&dev_priv->pcu_lock);
5714        }
5715
5716        for_each_intel_crtc(dev, crtc) {
5717                struct intel_crtc_state *crtc_state =
5718                        to_intel_crtc_state(crtc->base.state);
5719                struct vlv_wm_state *active = &crtc->wm.active.vlv;
5720                const struct vlv_fifo_state *fifo_state =
5721                        &crtc_state->wm.vlv.fifo_state;
5722                enum pipe pipe = crtc->pipe;
5723                enum plane_id plane_id;
5724                int level;
5725
5726                vlv_get_fifo_size(crtc_state);
5727
5728                active->num_levels = wm->level + 1;
5729                active->cxsr = wm->cxsr;
5730
5731                for (level = 0; level < active->num_levels; level++) {
5732                        struct g4x_pipe_wm *raw =
5733                                &crtc_state->wm.vlv.raw[level];
5734
5735                        active->sr[level].plane = wm->sr.plane;
5736                        active->sr[level].cursor = wm->sr.cursor;
5737
5738                        for_each_plane_id_on_crtc(crtc, plane_id) {
5739                                active->wm[level].plane[plane_id] =
5740                                        wm->pipe[pipe].plane[plane_id];
5741
5742                                raw->plane[plane_id] =
5743                                        vlv_invert_wm_value(active->wm[level].plane[plane_id],
5744                                                            fifo_state->plane[plane_id]);
5745                        }
5746                }
5747
5748                for_each_plane_id_on_crtc(crtc, plane_id)
5749                        vlv_raw_plane_wm_set(crtc_state, level,
5750                                             plane_id, USHRT_MAX);
5751                vlv_invalidate_wms(crtc, active, level);
5752
5753                crtc_state->wm.vlv.optimal = *active;
5754                crtc_state->wm.vlv.intermediate = *active;
5755
5756                DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
5757                              pipe_name(pipe),
5758                              wm->pipe[pipe].plane[PLANE_PRIMARY],
5759                              wm->pipe[pipe].plane[PLANE_CURSOR],
5760                              wm->pipe[pipe].plane[PLANE_SPRITE0],
5761                              wm->pipe[pipe].plane[PLANE_SPRITE1]);
5762        }
5763
5764        DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
5765                      wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
5766}
5767
5768void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
5769{
5770        struct intel_plane *plane;
5771        struct intel_crtc *crtc;
5772
5773        mutex_lock(&dev_priv->wm.wm_mutex);
5774
5775        for_each_intel_plane(&dev_priv->drm, plane) {
5776                struct intel_crtc *crtc =
5777                        intel_get_crtc_for_pipe(dev_priv, plane->pipe);
5778                struct intel_crtc_state *crtc_state =
5779                        to_intel_crtc_state(crtc->base.state);
5780                struct intel_plane_state *plane_state =
5781                        to_intel_plane_state(plane->base.state);
5782                struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
5783                const struct vlv_fifo_state *fifo_state =
5784                        &crtc_state->wm.vlv.fifo_state;
5785                enum plane_id plane_id = plane->id;
5786                int level;
5787
5788                if (plane_state->base.visible)
5789                        continue;
5790
5791                for (level = 0; level < wm_state->num_levels; level++) {
5792                        struct g4x_pipe_wm *raw =
5793                                &crtc_state->wm.vlv.raw[level];
5794
5795                        raw->plane[plane_id] = 0;
5796
5797                        wm_state->wm[level].plane[plane_id] =
5798                                vlv_invert_wm_value(raw->plane[plane_id],
5799                                                    fifo_state->plane[plane_id]);
5800                }
5801        }
5802
5803        for_each_intel_crtc(&dev_priv->drm, crtc) {
5804                struct intel_crtc_state *crtc_state =
5805                        to_intel_crtc_state(crtc->base.state);
5806
5807                crtc_state->wm.vlv.intermediate =
5808                        crtc_state->wm.vlv.optimal;
5809                crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
5810        }
5811
5812        vlv_program_watermarks(dev_priv);
5813
5814        mutex_unlock(&dev_priv->wm.wm_mutex);
5815}
5816
5817/*
5818 * FIXME should probably kill this and improve
5819 * the real watermark readout/sanitation instead
5820 */
5821static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
5822{
5823        I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
5824        I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
5825        I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
5826
5827        /*
5828         * Don't touch WM1S_LP_EN here.
5829         * Doing so could cause underruns.
5830         */
5831}
5832
5833void ilk_wm_get_hw_state(struct drm_device *dev)
5834{
5835        struct drm_i915_private *dev_priv = to_i915(dev);
5836        struct ilk_wm_values *hw = &dev_priv->wm.hw;
5837        struct drm_crtc *crtc;
5838
5839        ilk_init_lp_watermarks(dev_priv);
5840
5841        for_each_crtc(dev, crtc)
5842                ilk_pipe_wm_get_hw_state(crtc);
5843
5844        hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
5845        hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
5846        hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
5847
5848        hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
5849        if (INTEL_GEN(dev_priv) >= 7) {
5850                hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
5851                hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
5852        }
5853
5854        if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
5855                hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
5856                        INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
5857        else if (IS_IVYBRIDGE(dev_priv))
5858                hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
5859                        INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
5860
5861        hw->enable_fbc_wm =
5862                !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
5863}
5864
5865/**
5866 * intel_update_watermarks - update FIFO watermark values based on current modes
5867 * @crtc: the #intel_crtc on which to compute the WM
5868 *
5869 * Calculate watermark values for the various WM regs based on current mode
5870 * and plane configuration.
5871 *
5872 * There are several cases to deal with here:
5873 *   - normal (i.e. non-self-refresh)
5874 *   - self-refresh (SR) mode
5875 *   - lines are large relative to FIFO size (buffer can hold up to 2)
5876 *   - lines are small relative to FIFO size (buffer can hold more than 2
5877 *     lines), so need to account for TLB latency
5878 *
5879 *   The normal calculation is:
5880 *     watermark = dotclock * bytes per pixel * latency
5881 *   where latency is platform & configuration dependent (we assume pessimal
5882 *   values here).
5883 *
5884 *   The SR calculation is:
5885 *     watermark = (trunc(latency/line time)+1) * surface width *
5886 *       bytes per pixel
5887 *   where
5888 *     line time = htotal / dotclock
5889 *     surface width = hdisplay for normal plane and 64 for cursor
5890 *   and latency is assumed to be high, as above.
5891 *
5892 * The final value programmed to the register should always be rounded up,
5893 * and include an extra 2 entries to account for clock crossings.
5894 *
5895 * We don't use the sprite, so we can ignore that.  And on Crestline we have
5896 * to set the non-SR watermarks to 8.
5897 */
5898void intel_update_watermarks(struct intel_crtc *crtc)
5899{
5900        struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
5901
5902        if (dev_priv->display.update_wm)
5903                dev_priv->display.update_wm(crtc);
5904}
5905
5906void intel_enable_ipc(struct drm_i915_private *dev_priv)
5907{
5908        u32 val;
5909
5910        /* Display WA #0477 WaDisableIPC: skl */
5911        if (IS_SKYLAKE(dev_priv)) {
5912                dev_priv->ipc_enabled = false;
5913                return;
5914        }
5915
5916        val = I915_READ(DISP_ARB_CTL2);
5917
5918        if (dev_priv->ipc_enabled)
5919                val |= DISP_IPC_ENABLE;
5920        else
5921                val &= ~DISP_IPC_ENABLE;
5922
5923        I915_WRITE(DISP_ARB_CTL2, val);
5924}
5925
5926void intel_init_ipc(struct drm_i915_private *dev_priv)
5927{
5928        dev_priv->ipc_enabled = false;
5929        if (!HAS_IPC(dev_priv))
5930                return;
5931
5932        dev_priv->ipc_enabled = true;
5933        intel_enable_ipc(dev_priv);
5934}
5935
5936/*
5937 * Lock protecting IPS related data structures
5938 */
5939DEFINE_SPINLOCK(mchdev_lock);
5940
5941/* Global for IPS driver to get at the current i915 device. Protected by
5942 * mchdev_lock. */
5943static struct drm_i915_private *i915_mch_dev;
5944
5945bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
5946{
5947        u16 rgvswctl;
5948
5949        lockdep_assert_held(&mchdev_lock);
5950
5951        rgvswctl = I915_READ16(MEMSWCTL);
5952        if (rgvswctl & MEMCTL_CMD_STS) {
5953                DRM_DEBUG("gpu busy, RCS change rejected\n");
5954                return false; /* still busy with another command */
5955        }
5956
5957        rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
5958                (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
5959        I915_WRITE16(MEMSWCTL, rgvswctl);
5960        POSTING_READ16(MEMSWCTL);
5961
5962        rgvswctl |= MEMCTL_CMD_STS;
5963        I915_WRITE16(MEMSWCTL, rgvswctl);
5964
5965        return true;
5966}
5967
5968static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
5969{
5970        u32 rgvmodectl;
5971        u8 fmax, fmin, fstart, vstart;
5972
5973        spin_lock_irq(&mchdev_lock);
5974
5975        rgvmodectl = I915_READ(MEMMODECTL);
5976
5977        /* Enable temp reporting */
5978        I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
5979        I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
5980
5981        /* 100ms RC evaluation intervals */
5982        I915_WRITE(RCUPEI, 100000);
5983        I915_WRITE(RCDNEI, 100000);
5984
5985        /* Set max/min thresholds to 90ms and 80ms respectively */
5986        I915_WRITE(RCBMAXAVG, 90000);
5987        I915_WRITE(RCBMINAVG, 80000);
5988
5989        I915_WRITE(MEMIHYST, 1);
5990
5991        /* Set up min, max, and cur for interrupt handling */
5992        fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
5993        fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
5994        fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
5995                MEMMODE_FSTART_SHIFT;
5996
5997        vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
5998                PXVFREQ_PX_SHIFT;
5999
6000        dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
6001        dev_priv->ips.fstart = fstart;
6002
6003        dev_priv->ips.max_delay = fstart;
6004        dev_priv->ips.min_delay = fmin;
6005        dev_priv->ips.cur_delay = fstart;
6006
6007        DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
6008                         fmax, fmin, fstart);
6009
6010        I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
6011
6012        /*
6013         * Interrupts will be enabled in ironlake_irq_postinstall
6014         */
6015
6016        I915_WRITE(VIDSTART, vstart);
6017        POSTING_READ(VIDSTART);
6018
6019        rgvmodectl |= MEMMODE_SWMODE_EN;
6020        I915_WRITE(MEMMODECTL, rgvmodectl);
6021
6022        if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
6023                DRM_ERROR("stuck trying to change perf mode\n");
6024        mdelay(1);
6025
6026        ironlake_set_drps(dev_priv, fstart);
6027
6028        dev_priv->ips.last_count1 = I915_READ(DMIEC) +
6029                I915_READ(DDREC) + I915_READ(CSIEC);
6030        dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
6031        dev_priv->ips.last_count2 = I915_READ(GFXEC);
6032        dev_priv->ips.last_time2 = ktime_get_raw_ns();
6033
6034        spin_unlock_irq(&mchdev_lock);
6035}
6036
6037static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
6038{
6039        u16 rgvswctl;
6040
6041        spin_lock_irq(&mchdev_lock);
6042
6043        rgvswctl = I915_READ16(MEMSWCTL);
6044
6045        /* Ack interrupts, disable EFC interrupt */
6046        I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
6047        I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
6048        I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
6049        I915_WRITE(DEIIR, DE_PCU_EVENT);
6050        I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
6051
6052        /* Go back to the starting frequency */
6053        ironlake_set_drps(dev_priv, dev_priv->ips.fstart);
6054        mdelay(1);
6055        rgvswctl |= MEMCTL_CMD_STS;
6056        I915_WRITE(MEMSWCTL, rgvswctl);
6057        mdelay(1);
6058
6059        spin_unlock_irq(&mchdev_lock);
6060}
6061
6062/* There's a funny hw issue where the hw returns all 0 when reading from
6063 * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
6064 * ourselves, instead of doing a rmw cycle (which might result in us clearing
6065 * all limits and the gpu stuck at whatever frequency it is at atm).
6066 */
6067static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
6068{
6069        struct intel_rps *rps = &dev_priv->gt_pm.rps;
6070        u32 limits;
6071
6072        /* Only set the down limit when we've reached the lowest level to avoid
6073         * getting more interrupts, otherwise leave this clear. This prevents a
6074         * race in the hw when coming out of rc6: There's a tiny window where
6075         * the hw runs at the minimal clock before selecting the desired
6076         * frequency, if the down threshold expires in that window we will not
6077         * receive a down interrupt. */
6078        if (INTEL_GEN(dev_priv) >= 9) {
6079                limits = (rps->max_freq_softlimit) << 23;
6080                if (val <= rps->min_freq_softlimit)
6081                        limits |= (rps->min_freq_softlimit) << 14;
6082        } else {
6083                limits = rps->max_freq_softlimit << 24;
6084                if (val <= rps->min_freq_softlimit)
6085                        limits |= rps->min_freq_softlimit << 16;
6086        }
6087
6088        return limits;
6089}
6090
6091static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
6092{
6093        struct intel_rps *rps = &dev_priv->gt_pm.rps;
6094        int new_power;
6095        u32 threshold_up = 0, threshold_down = 0; /* in % */
6096        u32 ei_up = 0, ei_down = 0;
6097
6098        new_power = rps->power;
6099        switch (rps->power) {
6100        case LOW_POWER:
6101                if (val > rps->efficient_freq + 1 &&
6102                    val > rps->cur_freq)
6103                        new_power = BETWEEN;
6104                break;
6105
6106        case BETWEEN:
6107                if (val <= rps->efficient_freq &&
6108                    val < rps->cur_freq)
6109                        new_power = LOW_POWER;
6110                else if (val >= rps->rp0_freq &&
6111                         val > rps->cur_freq)
6112                        new_power = HIGH_POWER;
6113                break;
6114
6115        case HIGH_POWER:
6116                if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
6117                    val < rps->cur_freq)
6118                        new_power = BETWEEN;
6119                break;
6120        }
6121        /* Max/min bins are special */
6122        if (val <= rps->min_freq_softlimit)
6123                new_power = LOW_POWER;
6124        if (val >= rps->max_freq_softlimit)
6125                new_power = HIGH_POWER;
6126        if (new_power == rps->power)
6127                return;
6128
6129        /* Note the units here are not exactly 1us, but 1280ns. */
6130        switch (new_power) {
6131        case LOW_POWER:
6132                /* Upclock if more than 95% busy over 16ms */
6133                ei_up = 16000;
6134                threshold_up = 95;
6135
6136                /* Downclock if less than 85% busy over 32ms */
6137                ei_down = 32000;
6138                threshold_down = 85;
6139                break;
6140
6141        case BETWEEN:
6142                /* Upclock if more than 90% busy over 13ms */
6143                ei_up = 13000;
6144                threshold_up = 90;
6145
6146                /* Downclock if less than 75% busy over 32ms */
6147                ei_down = 32000;
6148                threshold_down = 75;
6149                break;
6150
6151        case HIGH_POWER:
6152                /* Upclock if more than 85% busy over 10ms */
6153                ei_up = 10000;
6154                threshold_up = 85;
6155
6156                /* Downclock if less than 60% busy over 32ms */
6157                ei_down = 32000;
6158                threshold_down = 60;
6159                break;
6160        }
6161
6162        /* When byt can survive without system hang with dynamic
6163         * sw freq adjustments, this restriction can be lifted.
6164         */
6165        if (IS_VALLEYVIEW(dev_priv))
6166                goto skip_hw_write;
6167
6168        I915_WRITE(GEN6_RP_UP_EI,
6169                   GT_INTERVAL_FROM_US(dev_priv, ei_up));
6170        I915_WRITE(GEN6_RP_UP_THRESHOLD,
6171                   GT_INTERVAL_FROM_US(dev_priv,
6172                                       ei_up * threshold_up / 100));
6173
6174        I915_WRITE(GEN6_RP_DOWN_EI,
6175                   GT_INTERVAL_FROM_US(dev_priv, ei_down));
6176        I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
6177                   GT_INTERVAL_FROM_US(dev_priv,
6178                                       ei_down * threshold_down / 100));
6179
6180        I915_WRITE(GEN6_RP_CONTROL,
6181                   GEN6_RP_MEDIA_TURBO |
6182                   GEN6_RP_MEDIA_HW_NORMAL_MODE |
6183                   GEN6_RP_MEDIA_IS_GFX |
6184                   GEN6_RP_ENABLE |
6185                   GEN6_RP_UP_BUSY_AVG |
6186                   GEN6_RP_DOWN_IDLE_AVG);
6187
6188skip_hw_write:
6189        rps->power = new_power;
6190        rps->up_threshold = threshold_up;
6191        rps->down_threshold = threshold_down;
6192        rps->last_adj = 0;
6193}
6194
6195static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
6196{
6197        struct intel_rps *rps = &dev_priv->gt_pm.rps;
6198        u32 mask = 0;
6199
6200        /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
6201        if (val > rps->min_freq_softlimit)
6202                mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
6203        if (val < rps->max_freq_softlimit)
6204                mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
6205
6206        mask &= dev_priv->pm_rps_events;
6207
6208        return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
6209}
6210
6211/* gen6_set_rps is called to update the frequency request, but should also be
6212 * called when the range (min_delay and max_delay) is modified so that we can
6213 * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
6214static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
6215{
6216        struct intel_rps *rps = &dev_priv->gt_pm.rps;
6217
6218        /* min/max delay may still have been modified so be sure to
6219         * write the limits value.
6220         */
6221        if (val != rps->cur_freq) {
6222                gen6_set_rps_thresholds(dev_priv, val);
6223
6224                if (INTEL_GEN(dev_priv) >= 9)
6225                        I915_WRITE(GEN6_RPNSWREQ,
6226                                   GEN9_FREQUENCY(val));
6227                else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
6228                        I915_WRITE(GEN6_RPNSWREQ,
6229                                   HSW_FREQUENCY(val));
6230                else
6231                        I915_WRITE(GEN6_RPNSWREQ,
6232                                   GEN6_FREQUENCY(val) |
6233                                   GEN6_OFFSET(0) |
6234                                   GEN6_AGGRESSIVE_TURBO);
6235        }
6236
6237        /* Make sure we continue to get interrupts
6238         * until we hit the minimum or maximum frequencies.
6239         */
6240        I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
6241        I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
6242
6243        rps->cur_freq = val;
6244        trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
6245
6246        return 0;
6247}
6248
6249static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
6250{
6251        int err;
6252
6253        if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1),
6254                      "Odd GPU freq value\n"))
6255                val &= ~1;
6256
6257        I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
6258
6259        if (val != dev_priv->gt_pm.rps.cur_freq) {
6260                err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
6261                if (err)
6262                        return err;
6263
6264                gen6_set_rps_thresholds(dev_priv, val);
6265        }
6266
6267        dev_priv->gt_pm.rps.cur_freq = val;
6268        trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
6269
6270        return 0;
6271}
6272
6273/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
6274 *
6275 * * If Gfx is Idle, then
6276 * 1. Forcewake Media well.
6277 * 2. Request idle freq.
6278 * 3. Release Forcewake of Media well.
6279*/
6280static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
6281{
6282        struct intel_rps *rps = &dev_priv->gt_pm.rps;
6283        u32 val = rps->idle_freq;
6284        int err;
6285
6286        if (rps->cur_freq <= val)
6287                return;
6288
6289        /* The punit delays the write of the frequency and voltage until it
6290         * determines the GPU is awake. During normal usage we don't want to
6291         * waste power changing the frequency if the GPU is sleeping (rc6).
6292         * However, the GPU and driver is now idle and we do not want to delay
6293         * switching to minimum voltage (reducing power whilst idle) as we do
6294         * not expect to be woken in the near future and so must flush the
6295         * change by waking the device.
6296         *
6297         * We choose to take the media powerwell (either would do to trick the
6298         * punit into committing the voltage change) as that takes a lot less
6299         * power than the render powerwell.
6300         */
6301        intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
6302        err = valleyview_set_rps(dev_priv, val);
6303        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
6304
6305        if (err)
6306                DRM_ERROR("Failed to set RPS for idle\n");
6307}
6308
6309void gen6_rps_busy(struct drm_i915_private *dev_priv)
6310{
6311        struct intel_rps *rps = &dev_priv->gt_pm.rps;
6312
6313        mutex_lock(&dev_priv->pcu_lock);
6314        if (rps->enabled) {
6315                u8 freq;
6316
6317                if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
6318                        gen6_rps_reset_ei(dev_priv);
6319                I915_WRITE(GEN6_PMINTRMSK,
6320                           gen6_rps_pm_mask(dev_priv, rps->cur_freq));
6321
6322                gen6_enable_rps_interrupts(dev_priv);
6323
6324                /* Use the user's desired frequency as a guide, but for better
6325                 * performance, jump directly to RPe as our starting frequency.
6326                 */
6327                freq = max(rps->cur_freq,
6328                           rps->efficient_freq);
6329
6330                if (intel_set_rps(dev_priv,
6331                                  clamp(freq,
6332                                        rps->min_freq_softlimit,
6333                                        rps->max_freq_softlimit)))
6334                        DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
6335        }
6336        mutex_unlock(&dev_priv->pcu_lock);
6337}
6338
6339void gen6_rps_idle(struct drm_i915_private *dev_priv)
6340{
6341        struct intel_rps *rps = &dev_priv->gt_pm.rps;
6342
6343        /* Flush our bottom-half so that it does not race with us
6344         * setting the idle frequency and so that it is bounded by
6345         * our rpm wakeref. And then disable the interrupts to stop any
6346         * futher RPS reclocking whilst we are asleep.
6347         */
6348        gen6_disable_rps_interrupts(dev_priv);
6349
6350        mutex_lock(&dev_priv->pcu_lock);
6351        if (rps->enabled) {
6352                if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
6353                        vlv_set_rps_idle(dev_priv);
6354                else
6355                        gen6_set_rps(dev_priv, rps->idle_freq);
6356                rps->last_adj = 0;
6357                I915_WRITE(GEN6_PMINTRMSK,
6358                           gen6_sanitize_rps_pm_mask(dev_priv, ~0));
6359        }
6360        mutex_unlock(&dev_priv->pcu_lock);
6361}
6362
6363void gen6_rps_boost(struct i915_request *rq,
6364                    struct intel_rps_client *rps_client)
6365{
6366        struct intel_rps *rps = &rq->i915->gt_pm.rps;
6367        unsigned long flags;
6368        bool boost;
6369
6370        /* This is intentionally racy! We peek at the state here, then
6371         * validate inside the RPS worker.
6372         */
6373        if (!rps->enabled)
6374                return;
6375
6376        if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
6377                return;
6378
6379        /* Serializes with i915_request_retire() */
6380        boost = false;
6381        spin_lock_irqsave(&rq->lock, flags);
6382        if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) {
6383                boost = !atomic_fetch_inc(&rps->num_waiters);
6384                rq->waitboost = true;
6385        }
6386        spin_unlock_irqrestore(&rq->lock, flags);
6387        if (!boost)
6388                return;
6389
6390        if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
6391                schedule_work(&rps->work);
6392
6393        atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts);
6394}
6395
6396int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
6397{
6398        struct intel_rps *rps = &dev_priv->gt_pm.rps;
6399        int err;
6400
6401        lockdep_assert_held(&dev_priv->pcu_lock);
6402        GEM_BUG_ON(val > rps->max_freq);
6403        GEM_BUG_ON(val < rps->min_freq);
6404
6405        if (!rps->enabled) {
6406                rps->cur_freq = val;
6407                return 0;
6408        }
6409
6410        if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
6411                err = valleyview_set_rps(dev_priv, val);
6412        else
6413                err = gen6_set_rps(dev_priv, val);
6414
6415        return err;
6416}
6417
6418static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
6419{
6420        I915_WRITE(GEN6_RC_CONTROL, 0);
6421        I915_WRITE(GEN9_PG_ENABLE, 0);
6422}
6423
6424static void gen9_disable_rps(struct drm_i915_private *dev_priv)
6425{
6426        I915_WRITE(GEN6_RP_CONTROL, 0);
6427}
6428
6429static void gen6_disable_rc6(struct drm_i915_private *dev_priv)
6430{
6431        I915_WRITE(GEN6_RC_CONTROL, 0);
6432}
6433
6434static void gen6_disable_rps(struct drm_i915_private *dev_priv)
6435{
6436        I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
6437        I915_WRITE(GEN6_RP_CONTROL, 0);
6438}
6439
6440static void cherryview_disable_rc6(struct drm_i915_private *dev_priv)
6441{
6442        I915_WRITE(GEN6_RC_CONTROL, 0);
6443}
6444
6445static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
6446{
6447        I915_WRITE(GEN6_RP_CONTROL, 0);
6448}
6449
6450static void valleyview_disable_rc6(struct drm_i915_private *dev_priv)
6451{
6452        /* We're doing forcewake before Disabling RC6,
6453         * This what the BIOS expects when going into suspend */
6454        intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6455
6456        I915_WRITE(GEN6_RC_CONTROL, 0);
6457
6458        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6459}
6460
6461static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
6462{
6463        I915_WRITE(GEN6_RP_CONTROL, 0);
6464}
6465
6466static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
6467{
6468        bool enable_rc6 = true;
6469        unsigned long rc6_ctx_base;
6470        u32 rc_ctl;
6471        int rc_sw_target;
6472
6473        rc_ctl = I915_READ(GEN6_RC_CONTROL);
6474        rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >>
6475                       RC_SW_TARGET_STATE_SHIFT;
6476        DRM_DEBUG_DRIVER("BIOS enabled RC states: "
6477                         "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
6478                         onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
6479                         onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
6480                         rc_sw_target);
6481
6482        if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
6483                DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
6484                enable_rc6 = false;
6485        }
6486
6487        /*
6488         * The exact context size is not known for BXT, so assume a page size
6489         * for this check.
6490         */
6491        rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
6492        if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) &&
6493              (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) {
6494                DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
6495                enable_rc6 = false;
6496        }
6497
6498        if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
6499              ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
6500              ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
6501              ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
6502                DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
6503                enable_rc6 = false;
6504        }
6505
6506        if (!I915_READ(GEN8_PUSHBUS_CONTROL) ||
6507            !I915_READ(GEN8_PUSHBUS_ENABLE) ||
6508            !I915_READ(GEN8_PUSHBUS_SHIFT)) {
6509                DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
6510                enable_rc6 = false;
6511        }
6512
6513        if (!I915_READ(GEN6_GFXPAUSE)) {
6514                DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
6515                enable_rc6 = false;
6516        }
6517
6518        if (!I915_READ(GEN8_MISC_CTRL0)) {
6519                DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
6520                enable_rc6 = false;
6521        }
6522
6523        return enable_rc6;
6524}
6525
6526static bool sanitize_rc6(struct drm_i915_private *i915)
6527{
6528        struct intel_device_info *info = mkwrite_device_info(i915);
6529
6530        /* Powersaving is controlled by the host when inside a VM */
6531        if (intel_vgpu_active(i915))
6532                info->has_rc6 = 0;
6533
6534        if (info->has_rc6 &&
6535            IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
6536                DRM_INFO("RC6 disabled by BIOS\n");
6537                info->has_rc6 = 0;
6538        }
6539
6540        /*
6541         * We assume that we do not have any deep rc6 levels if we don't have
6542         * have the previous rc6 level supported, i.e. we use HAS_RC6()
6543         * as the initial coarse check for rc6 in general, moving on to
6544         * progressively finer/deeper levels.
6545         */
6546        if (!info->has_rc6 && info->has_rc6p)
6547                info->has_rc6p = 0;
6548
6549        return info->has_rc6;
6550}
6551
6552static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
6553{
6554        struct intel_rps *rps = &dev_priv->gt_pm.rps;
6555
6556        /* All of these values are in units of 50MHz */
6557
6558        /* static values from HW: RP0 > RP1 > RPn (min_freq) */
6559        if (IS_GEN9_LP(dev_priv)) {
6560                u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
6561                rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
6562                rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
6563                rps->min_freq = (rp_state_cap >>  0) & 0xff;
6564        } else {
6565                u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
6566                rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
6567                rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
6568                rps->min_freq = (rp_state_cap >> 16) & 0xff;
6569        }
6570        /* hw_max = RP0 until we check for overclocking */
6571        rps->max_freq = rps->rp0_freq;
6572
6573        rps->efficient_freq = rps->rp1_freq;
6574        if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
6575            IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
6576                u32 ddcc_status = 0;
6577
6578                if (sandybridge_pcode_read(dev_priv,
6579                                           HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
6580                                           &ddcc_status) == 0)
6581                        rps->efficient_freq =
6582                                clamp_t(u8,
6583                                        ((ddcc_status >> 8) & 0xff),
6584                                        rps->min_freq,
6585                                        rps->max_freq);
6586        }
6587
6588        if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
6589                /* Store the frequency values in 16.66 MHZ units, which is
6590                 * the natural hardware unit for SKL
6591                 */
6592                rps->rp0_freq *= GEN9_FREQ_SCALER;
6593                rps->rp1_freq *= GEN9_FREQ_SCALER;
6594                rps->min_freq *= GEN9_FREQ_SCALER;
6595                rps->max_freq *= GEN9_FREQ_SCALER;
6596                rps->efficient_freq *= GEN9_FREQ_SCALER;
6597        }
6598}
6599
6600static void reset_rps(struct drm_i915_private *dev_priv,
6601                      int (*set)(struct drm_i915_private *, u8))
6602{
6603        struct intel_rps *rps = &dev_priv->gt_pm.rps;
6604        u8 freq = rps->cur_freq;
6605
6606        /* force a reset */
6607        rps->power = -1;
6608        rps->cur_freq = -1;
6609
6610        if (set(dev_priv, freq))
6611                DRM_ERROR("Failed to reset RPS to initial values\n");
6612}
6613
6614/* See the Gen9_GT_PM_Programming_Guide doc for the below */
6615static void gen9_enable_rps(struct drm_i915_private *dev_priv)
6616{
6617        intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6618
6619        /* Program defaults and thresholds for RPS */
6620        if (IS_GEN9(dev_priv))
6621                I915_WRITE(GEN6_RC_VIDEO_FREQ,
6622                        GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
6623
6624        /* 1 second timeout*/
6625        I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
6626                GT_INTERVAL_FROM_US(dev_priv, 1000000));
6627
6628        I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
6629
6630        /* Leaning on the below call to gen6_set_rps to program/setup the
6631         * Up/Down EI & threshold registers, as well as the RP_CONTROL,
6632         * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
6633        reset_rps(dev_priv, gen6_set_rps);
6634
6635        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6636}
6637
6638static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
6639{
6640        struct intel_engine_cs *engine;
6641        enum intel_engine_id id;
6642        u32 rc6_mode;
6643
6644        /* 1a: Software RC state - RC0 */
6645        I915_WRITE(GEN6_RC_STATE, 0);
6646
6647        /* 1b: Get forcewake during program sequence. Although the driver
6648         * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
6649        intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6650
6651        /* 2a: Disable RC states. */
6652        I915_WRITE(GEN6_RC_CONTROL, 0);
6653
6654        /* 2b: Program RC6 thresholds.*/
6655        if (INTEL_GEN(dev_priv) >= 10) {
6656                I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
6657                I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
6658        } else if (IS_SKYLAKE(dev_priv)) {
6659                /*
6660                 * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
6661                 * when CPG is enabled
6662                 */
6663                I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
6664        } else {
6665                I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
6666        }
6667
6668        I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
6669        I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
6670        for_each_engine(engine, dev_priv, id)
6671                I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
6672
6673        if (HAS_GUC(dev_priv))
6674                I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
6675
6676        I915_WRITE(GEN6_RC_SLEEP, 0);
6677
6678        /*
6679         * 2c: Program Coarse Power Gating Policies.
6680         *
6681         * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
6682         * use instead is a more conservative estimate for the maximum time
6683         * it takes us to service a CS interrupt and submit a new ELSP - that
6684         * is the time which the GPU is idle waiting for the CPU to select the
6685         * next request to execute. If the idle hysteresis is less than that
6686         * interrupt service latency, the hardware will automatically gate
6687         * the power well and we will then incur the wake up cost on top of
6688         * the service latency. A similar guide from intel_pstate is that we
6689         * do not want the enable hysteresis to less than the wakeup latency.
6690         *
6691         * igt/gem_exec_nop/sequential provides a rough estimate for the
6692         * service latency, and puts it around 10us for Broadwell (and other
6693         * big core) and around 40us for Broxton (and other low power cores).
6694         * [Note that for legacy ringbuffer submission, this is less than 1us!]
6695         * However, the wakeup latency on Broxton is closer to 100us. To be
6696         * conservative, we have to factor in a context switch on top (due
6697         * to ksoftirqd).
6698         */
6699        I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
6700        I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
6701
6702        /* 3a: Enable RC6 */
6703        I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
6704
6705        /* WaRsUseTimeoutMode:cnl (pre-prod) */
6706        if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0))
6707                rc6_mode = GEN7_RC_CTL_TO_MODE;
6708        else
6709                rc6_mode = GEN6_RC_CTL_EI_MODE(1);
6710
6711        I915_WRITE(GEN6_RC_CONTROL,
6712                   GEN6_RC_CTL_HW_ENABLE |
6713                   GEN6_RC_CTL_RC6_ENABLE |
6714                   rc6_mode);
6715
6716        /*
6717         * 3b: Enable Coarse Power Gating only when RC6 is enabled.
6718         * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6.
6719         */
6720        if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
6721                I915_WRITE(GEN9_PG_ENABLE, 0);
6722        else
6723                I915_WRITE(GEN9_PG_ENABLE,
6724                           GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
6725
6726        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6727}
6728
6729static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
6730{
6731        struct intel_engine_cs *engine;
6732        enum intel_engine_id id;
6733
6734        /* 1a: Software RC state - RC0 */
6735        I915_WRITE(GEN6_RC_STATE, 0);
6736
6737        /* 1b: Get forcewake during program sequence. Although the driver
6738         * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
6739        intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6740
6741        /* 2a: Disable RC states. */
6742        I915_WRITE(GEN6_RC_CONTROL, 0);
6743
6744        /* 2b: Program RC6 thresholds.*/
6745        I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
6746        I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
6747        I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
6748        for_each_engine(engine, dev_priv, id)
6749                I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
6750        I915_WRITE(GEN6_RC_SLEEP, 0);
6751        I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
6752
6753        /* 3: Enable RC6 */
6754
6755        I915_WRITE(GEN6_RC_CONTROL,
6756                   GEN6_RC_CTL_HW_ENABLE |
6757                   GEN7_RC_CTL_TO_MODE |
6758                   GEN6_RC_CTL_RC6_ENABLE);
6759
6760        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6761}
6762
6763static void gen8_enable_rps(struct drm_i915_private *dev_priv)
6764{
6765        struct intel_rps *rps = &dev_priv->gt_pm.rps;
6766
6767        intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6768
6769        /* 1 Program defaults and thresholds for RPS*/
6770        I915_WRITE(GEN6_RPNSWREQ,
6771                   HSW_FREQUENCY(rps->rp1_freq));
6772        I915_WRITE(GEN6_RC_VIDEO_FREQ,
6773                   HSW_FREQUENCY(rps->rp1_freq));
6774        /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
6775        I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
6776
6777        /* Docs recommend 900MHz, and 300 MHz respectively */
6778        I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
6779                   rps->max_freq_softlimit << 24 |
6780                   rps->min_freq_softlimit << 16);
6781
6782        I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
6783        I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
6784        I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
6785        I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
6786
6787        I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
6788
6789        /* 2: Enable RPS */
6790        I915_WRITE(GEN6_RP_CONTROL,
6791                   GEN6_RP_MEDIA_TURBO |
6792                   GEN6_RP_MEDIA_HW_NORMAL_MODE |
6793                   GEN6_RP_MEDIA_IS_GFX |
6794                   GEN6_RP_ENABLE |
6795                   GEN6_RP_UP_BUSY_AVG |
6796                   GEN6_RP_DOWN_IDLE_AVG);
6797
6798        reset_rps(dev_priv, gen6_set_rps);
6799
6800        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6801}
6802
6803static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
6804{
6805        struct intel_engine_cs *engine;
6806        enum intel_engine_id id;
6807        u32 rc6vids, rc6_mask;
6808        u32 gtfifodbg;
6809        int ret;
6810
6811        I915_WRITE(GEN6_RC_STATE, 0);
6812
6813        /* Clear the DBG now so we don't confuse earlier errors */
6814        gtfifodbg = I915_READ(GTFIFODBG);
6815        if (gtfifodbg) {
6816                DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
6817                I915_WRITE(GTFIFODBG, gtfifodbg);
6818        }
6819
6820        intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6821
6822        /* disable the counters and set deterministic thresholds */
6823        I915_WRITE(GEN6_RC_CONTROL, 0);
6824
6825        I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
6826        I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
6827        I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
6828        I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
6829        I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
6830
6831        for_each_engine(engine, dev_priv, id)
6832                I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
6833
6834        I915_WRITE(GEN6_RC_SLEEP, 0);
6835        I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
6836        if (IS_IVYBRIDGE(dev_priv))
6837                I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
6838        else
6839                I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
6840        I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
6841        I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
6842
6843        /* We don't use those on Haswell */
6844        rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
6845        if (HAS_RC6p(dev_priv))
6846                rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
6847        if (HAS_RC6pp(dev_priv))
6848                rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
6849        I915_WRITE(GEN6_RC_CONTROL,
6850                   rc6_mask |
6851                   GEN6_RC_CTL_EI_MODE(1) |
6852                   GEN6_RC_CTL_HW_ENABLE);
6853
6854        rc6vids = 0;
6855        ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
6856        if (IS_GEN6(dev_priv) && ret) {
6857                DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
6858        } else if (IS_GEN6(dev_priv) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
6859                DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
6860                          GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
6861                rc6vids &= 0xffff00;
6862                rc6vids |= GEN6_ENCODE_RC6_VID(450);
6863                ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
6864                if (ret)
6865                        DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
6866        }
6867
6868        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6869}
6870
6871static void gen6_enable_rps(struct drm_i915_private *dev_priv)
6872{
6873        /* Here begins a magic sequence of register writes to enable
6874         * auto-downclocking.
6875         *
6876         * Perhaps there might be some value in exposing these to
6877         * userspace...
6878         */
6879        intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6880
6881        /* Power down if completely idle for over 50ms */
6882        I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
6883        I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
6884
6885        reset_rps(dev_priv, gen6_set_rps);
6886
6887        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6888}
6889
6890static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
6891{
6892        struct intel_rps *rps = &dev_priv->gt_pm.rps;
6893        int min_freq = 15;
6894        unsigned int gpu_freq;
6895        unsigned int max_ia_freq, min_ring_freq;
6896        unsigned int max_gpu_freq, min_gpu_freq;
6897        int scaling_factor = 180;
6898        struct cpufreq_policy *policy;
6899
6900        WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
6901
6902        policy = cpufreq_cpu_get(0);
6903        if (policy) {
6904                max_ia_freq = policy->cpuinfo.max_freq;
6905                cpufreq_cpu_put(policy);
6906        } else {
6907                /*
6908                 * Default to measured freq if none found, PCU will ensure we
6909                 * don't go over
6910                 */
6911                max_ia_freq = tsc_khz;
6912        }
6913
6914        /* Convert from kHz to MHz */
6915        max_ia_freq /= 1000;
6916
6917        min_ring_freq = I915_READ(DCLK) & 0xf;
6918        /* convert DDR frequency from units of 266.6MHz to bandwidth */
6919        min_ring_freq = mult_frac(min_ring_freq, 8, 3);
6920
6921        if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
6922                /* Convert GT frequency to 50 HZ units */
6923                min_gpu_freq = rps->min_freq / GEN9_FREQ_SCALER;
6924                max_gpu_freq = rps->max_freq / GEN9_FREQ_SCALER;
6925        } else {
6926                min_gpu_freq = rps->min_freq;
6927                max_gpu_freq = rps->max_freq;
6928        }
6929
6930        /*
6931         * For each potential GPU frequency, load a ring frequency we'd like
6932         * to use for memory access.  We do this by specifying the IA frequency
6933         * the PCU should use as a reference to determine the ring frequency.
6934         */
6935        for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
6936                int diff = max_gpu_freq - gpu_freq;
6937                unsigned int ia_freq = 0, ring_freq = 0;
6938
6939                if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
6940                        /*
6941                         * ring_freq = 2 * GT. ring_freq is in 100MHz units
6942                         * No floor required for ring frequency on SKL.
6943                         */
6944                        ring_freq = gpu_freq;
6945                } else if (INTEL_GEN(dev_priv) >= 8) {
6946                        /* max(2 * GT, DDR). NB: GT is 50MHz units */
6947                        ring_freq = max(min_ring_freq, gpu_freq);
6948                } else if (IS_HASWELL(dev_priv)) {
6949                        ring_freq = mult_frac(gpu_freq, 5, 4);
6950                        ring_freq = max(min_ring_freq, ring_freq);
6951                        /* leave ia_freq as the default, chosen by cpufreq */
6952                } else {
6953                        /* On older processors, there is no separate ring
6954                         * clock domain, so in order to boost the bandwidth
6955                         * of the ring, we need to upclock the CPU (ia_freq).
6956                         *
6957                         * For GPU frequencies less than 750MHz,
6958                         * just use the lowest ring freq.
6959                         */
6960                        if (gpu_freq < min_freq)
6961                                ia_freq = 800;
6962                        else
6963                                ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
6964                        ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
6965                }
6966
6967                sandybridge_pcode_write(dev_priv,
6968                                        GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
6969                                        ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
6970                                        ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
6971                                        gpu_freq);
6972        }
6973}
6974
6975static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
6976{
6977        u32 val, rp0;
6978
6979        val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
6980
6981        switch (INTEL_INFO(dev_priv)->sseu.eu_total) {
6982        case 8:
6983                /* (2 * 4) config */
6984                rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
6985                break;
6986        case 12:
6987                /* (2 * 6) config */
6988                rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
6989                break;
6990        case 16:
6991                /* (2 * 8) config */
6992        default:
6993                /* Setting (2 * 8) Min RP0 for any other combination */
6994                rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
6995                break;
6996        }
6997
6998        rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
6999
7000        return rp0;
7001}
7002
7003static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7004{
7005        u32 val, rpe;
7006
7007        val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
7008        rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
7009
7010        return rpe;
7011}
7012
7013static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
7014{
7015        u32 val, rp1;
7016
7017        val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
7018        rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
7019
7020        return rp1;
7021}
7022
7023static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
7024{
7025        u32 val, rpn;
7026
7027        val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
7028        rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
7029                       FB_GFX_FREQ_FUSE_MASK);
7030
7031        return rpn;
7032}
7033
7034static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
7035{
7036        u32 val, rp1;
7037
7038        val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
7039
7040        rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
7041
7042        return rp1;
7043}
7044
7045static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
7046{
7047        u32 val, rp0;
7048
7049        val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
7050
7051        rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
7052        /* Clamp to max */
7053        rp0 = min_t(u32, rp0, 0xea);
7054
7055        return rp0;
7056}
7057
7058static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7059{
7060        u32 val, rpe;
7061
7062        val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
7063        rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
7064        val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
7065        rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
7066
7067        return rpe;
7068}
7069
7070static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
7071{
7072        u32 val;
7073
7074        val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
7075        /*
7076         * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
7077         * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
7078         * a BYT-M B0 the above register contains 0xbf. Moreover when setting
7079         * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
7080         * to make sure it matches what Punit accepts.
7081         */
7082        return max_t(u32, val, 0xc0);
7083}
7084
7085/* Check that the pctx buffer wasn't move under us. */
7086static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
7087{
7088        unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7089
7090        WARN_ON(pctx_addr != dev_priv->dsm.start +
7091                             dev_priv->vlv_pctx->stolen->start);
7092}
7093
7094
7095/* Check that the pcbr address is not empty. */
7096static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
7097{
7098        unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7099
7100        WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
7101}
7102
7103static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
7104{
7105        resource_size_t pctx_paddr, paddr;
7106        resource_size_t pctx_size = 32*1024;
7107        u32 pcbr;
7108
7109        pcbr = I915_READ(VLV_PCBR);
7110        if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
7111                DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7112                paddr = dev_priv->dsm.end + 1 - pctx_size;
7113                GEM_BUG_ON(paddr > U32_MAX);
7114
7115                pctx_paddr = (paddr & (~4095));
7116                I915_WRITE(VLV_PCBR, pctx_paddr);
7117        }
7118
7119        DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
7120}
7121
7122static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
7123{
7124        struct drm_i915_gem_object *pctx;
7125        resource_size_t pctx_paddr;
7126        resource_size_t pctx_size = 24*1024;
7127        u32 pcbr;
7128
7129        pcbr = I915_READ(VLV_PCBR);
7130        if (pcbr) {
7131                /* BIOS set it up already, grab the pre-alloc'd space */
7132                resource_size_t pcbr_offset;
7133
7134                pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start;
7135                pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv,
7136                                                                      pcbr_offset,
7137                                                                      I915_GTT_OFFSET_NONE,
7138                                                                      pctx_size);
7139                goto out;
7140        }
7141
7142        DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7143
7144        /*
7145         * From the Gunit register HAS:
7146         * The Gfx driver is expected to program this register and ensure
7147         * proper allocation within Gfx stolen memory.  For example, this
7148         * register should be programmed such than the PCBR range does not
7149         * overlap with other ranges, such as the frame buffer, protected
7150         * memory, or any other relevant ranges.
7151         */
7152        pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
7153        if (!pctx) {
7154                DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
7155                goto out;
7156        }
7157
7158        GEM_BUG_ON(range_overflows_t(u64,
7159                                     dev_priv->dsm.start,
7160                                     pctx->stolen->start,
7161                                     U32_MAX));
7162        pctx_paddr = dev_priv->dsm.start + pctx->stolen->start;
7163        I915_WRITE(VLV_PCBR, pctx_paddr);
7164
7165out:
7166        DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
7167        dev_priv->vlv_pctx = pctx;
7168}
7169
7170static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
7171{
7172        if (WARN_ON(!dev_priv->vlv_pctx))
7173                return;
7174
7175        i915_gem_object_put(dev_priv->vlv_pctx);
7176        dev_priv->vlv_pctx = NULL;
7177}
7178
7179static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
7180{
7181        dev_priv->gt_pm.rps.gpll_ref_freq =
7182                vlv_get_cck_clock(dev_priv, "GPLL ref",
7183                                  CCK_GPLL_CLOCK_CONTROL,
7184                                  dev_priv->czclk_freq);
7185
7186        DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
7187                         dev_priv->gt_pm.rps.gpll_ref_freq);
7188}
7189
7190static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
7191{
7192        struct intel_rps *rps = &dev_priv->gt_pm.rps;
7193        u32 val;
7194
7195        valleyview_setup_pctx(dev_priv);
7196
7197        vlv_init_gpll_ref_freq(dev_priv);
7198
7199        val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7200        switch ((val >> 6) & 3) {
7201        case 0:
7202        case 1:
7203                dev_priv->mem_freq = 800;
7204                break;
7205        case 2:
7206                dev_priv->mem_freq = 1066;
7207                break;
7208        case 3:
7209                dev_priv->mem_freq = 1333;
7210                break;
7211        }
7212        DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
7213
7214        rps->max_freq = valleyview_rps_max_freq(dev_priv);
7215        rps->rp0_freq = rps->max_freq;
7216        DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
7217                         intel_gpu_freq(dev_priv, rps->max_freq),
7218                         rps->max_freq);
7219
7220        rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv);
7221        DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
7222                         intel_gpu_freq(dev_priv, rps->efficient_freq),
7223                         rps->efficient_freq);
7224
7225        rps->rp1_freq = valleyview_rps_guar_freq(dev_priv);
7226        DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
7227                         intel_gpu_freq(dev_priv, rps->rp1_freq),
7228                         rps->rp1_freq);
7229
7230        rps->min_freq = valleyview_rps_min_freq(dev_priv);
7231        DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
7232                         intel_gpu_freq(dev_priv, rps->min_freq),
7233                         rps->min_freq);
7234}
7235
7236static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
7237{
7238        struct intel_rps *rps = &dev_priv->gt_pm.rps;
7239        u32 val;
7240
7241        cherryview_setup_pctx(dev_priv);
7242
7243        vlv_init_gpll_ref_freq(dev_priv);
7244
7245        mutex_lock(&dev_priv->sb_lock);
7246        val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
7247        mutex_unlock(&dev_priv->sb_lock);
7248
7249        switch ((val >> 2) & 0x7) {
7250        case 3:
7251                dev_priv->mem_freq = 2000;
7252                break;
7253        default:
7254                dev_priv->mem_freq = 1600;
7255                break;
7256        }
7257        DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
7258
7259        rps->max_freq = cherryview_rps_max_freq(dev_priv);
7260        rps->rp0_freq = rps->max_freq;
7261        DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
7262                         intel_gpu_freq(dev_priv, rps->max_freq),
7263                         rps->max_freq);
7264
7265        rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv);
7266        DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
7267                         intel_gpu_freq(dev_priv, rps->efficient_freq),
7268                         rps->efficient_freq);
7269
7270        rps->rp1_freq = cherryview_rps_guar_freq(dev_priv);
7271        DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
7272                         intel_gpu_freq(dev_priv, rps->rp1_freq),
7273                         rps->rp1_freq);
7274
7275        rps->min_freq = cherryview_rps_min_freq(dev_priv);
7276        DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
7277                         intel_gpu_freq(dev_priv, rps->min_freq),
7278                         rps->min_freq);
7279
7280        WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
7281                   rps->min_freq) & 1,
7282                  "Odd GPU freq values\n");
7283}
7284
7285static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
7286{
7287        valleyview_cleanup_pctx(dev_priv);
7288}
7289
7290static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
7291{
7292        struct intel_engine_cs *engine;
7293        enum intel_engine_id id;
7294        u32 gtfifodbg, rc6_mode, pcbr;
7295
7296        gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
7297                                             GT_FIFO_FREE_ENTRIES_CHV);
7298        if (gtfifodbg) {
7299                DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7300                                 gtfifodbg);
7301                I915_WRITE(GTFIFODBG, gtfifodbg);
7302        }
7303
7304        cherryview_check_pctx(dev_priv);
7305
7306        /* 1a & 1b: Get forcewake during program sequence. Although the driver
7307         * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
7308        intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7309
7310        /*  Disable RC states. */
7311        I915_WRITE(GEN6_RC_CONTROL, 0);
7312
7313        /* 2a: Program RC6 thresholds.*/
7314        I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
7315        I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7316        I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7317
7318        for_each_engine(engine, dev_priv, id)
7319                I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7320        I915_WRITE(GEN6_RC_SLEEP, 0);
7321
7322        /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
7323        I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
7324
7325        /* Allows RC6 residency counter to work */
7326        I915_WRITE(VLV_COUNTER_CONTROL,
7327                   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
7328                                      VLV_MEDIA_RC6_COUNT_EN |
7329                                      VLV_RENDER_RC6_COUNT_EN));
7330
7331        /* For now we assume BIOS is allocating and populating the PCBR  */
7332        pcbr = I915_READ(VLV_PCBR);
7333
7334        /* 3: Enable RC6 */
7335        rc6_mode = 0;
7336        if (pcbr >> VLV_PCBR_ADDR_SHIFT)
7337                rc6_mode = GEN7_RC_CTL_TO_MODE;
7338        I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
7339
7340        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7341}
7342
7343static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
7344{
7345        u32 val;
7346
7347        intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7348
7349        /* 1: Program defaults and thresholds for RPS*/
7350        I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
7351        I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
7352        I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
7353        I915_WRITE(GEN6_RP_UP_EI, 66000);
7354        I915_WRITE(GEN6_RP_DOWN_EI, 350000);
7355
7356        I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7357
7358        /* 2: Enable RPS */
7359        I915_WRITE(GEN6_RP_CONTROL,
7360                   GEN6_RP_MEDIA_HW_NORMAL_MODE |
7361                   GEN6_RP_MEDIA_IS_GFX |
7362                   GEN6_RP_ENABLE |
7363                   GEN6_RP_UP_BUSY_AVG |
7364                   GEN6_RP_DOWN_IDLE_AVG);
7365
7366        /* Setting Fixed Bias */
7367        val = VLV_OVERRIDE_EN |
7368                  VLV_SOC_TDP_EN |
7369                  CHV_BIAS_CPU_50_SOC_50;
7370        vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
7371
7372        val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7373
7374        /* RPS code assumes GPLL is used */
7375        WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
7376
7377        DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
7378        DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
7379
7380        reset_rps(dev_priv, valleyview_set_rps);
7381
7382        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7383}
7384
7385static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
7386{
7387        struct intel_engine_cs *engine;
7388        enum intel_engine_id id;
7389        u32 gtfifodbg;
7390
7391        valleyview_check_pctx(dev_priv);
7392
7393        gtfifodbg = I915_READ(GTFIFODBG);
7394        if (gtfifodbg) {
7395                DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7396                                 gtfifodbg);
7397                I915_WRITE(GTFIFODBG, gtfifodbg);
7398        }
7399
7400        intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7401
7402        /*  Disable RC states. */
7403        I915_WRITE(GEN6_RC_CONTROL, 0);
7404
7405        I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
7406        I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
7407        I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
7408
7409        for_each_engine(engine, dev_priv, id)
7410                I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7411
7412        I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
7413
7414        /* Allows RC6 residency counter to work */
7415        I915_WRITE(VLV_COUNTER_CONTROL,
7416                   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
7417                                      VLV_MEDIA_RC0_COUNT_EN |
7418                                      VLV_RENDER_RC0_COUNT_EN |
7419                                      VLV_MEDIA_RC6_COUNT_EN |
7420                                      VLV_RENDER_RC6_COUNT_EN));
7421
7422        I915_WRITE(GEN6_RC_CONTROL,
7423                   GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
7424
7425        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7426}
7427
7428static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
7429{
7430        u32 val;
7431
7432        intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7433
7434        I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
7435        I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
7436        I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
7437        I915_WRITE(GEN6_RP_UP_EI, 66000);
7438        I915_WRITE(GEN6_RP_DOWN_EI, 350000);
7439
7440        I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7441
7442        I915_WRITE(GEN6_RP_CONTROL,
7443                   GEN6_RP_MEDIA_TURBO |
7444                   GEN6_RP_MEDIA_HW_NORMAL_MODE |
7445                   GEN6_RP_MEDIA_IS_GFX |
7446                   GEN6_RP_ENABLE |
7447                   GEN6_RP_UP_BUSY_AVG |
7448                   GEN6_RP_DOWN_IDLE_CONT);
7449
7450        /* Setting Fixed Bias */
7451        val = VLV_OVERRIDE_EN |
7452                  VLV_SOC_TDP_EN |
7453                  VLV_BIAS_CPU_125_SOC_875;
7454        vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
7455
7456        val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7457
7458        /* RPS code assumes GPLL is used */
7459        WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
7460
7461        DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
7462        DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
7463
7464        reset_rps(dev_priv, valleyview_set_rps);
7465
7466        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7467}
7468
7469static unsigned long intel_pxfreq(u32 vidfreq)
7470{
7471        unsigned long freq;
7472        int div = (vidfreq & 0x3f0000) >> 16;
7473        int post = (vidfreq & 0x3000) >> 12;
7474        int pre = (vidfreq & 0x7);
7475
7476        if (!pre)
7477                return 0;
7478
7479        freq = ((div * 133333) / ((1<<post) * pre));
7480
7481        return freq;
7482}
7483
7484static const struct cparams {
7485        u16 i;
7486        u16 t;
7487        u16 m;
7488        u16 c;
7489} cparams[] = {
7490        { 1, 1333, 301, 28664 },
7491        { 1, 1066, 294, 24460 },
7492        { 1, 800, 294, 25192 },
7493        { 0, 1333, 276, 27605 },
7494        { 0, 1066, 276, 27605 },
7495        { 0, 800, 231, 23784 },
7496};
7497
7498static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
7499{
7500        u64 total_count, diff, ret;
7501        u32 count1, count2, count3, m = 0, c = 0;
7502        unsigned long now = jiffies_to_msecs(jiffies), diff1;
7503        int i;
7504
7505        lockdep_assert_held(&mchdev_lock);
7506
7507        diff1 = now - dev_priv->ips.last_time1;
7508
7509        /* Prevent division-by-zero if we are asking too fast.
7510         * Also, we don't get interesting results if we are polling
7511         * faster than once in 10ms, so just return the saved value
7512         * in such cases.
7513         */
7514        if (diff1 <= 10)
7515                return dev_priv->ips.chipset_power;
7516
7517        count1 = I915_READ(DMIEC);
7518        count2 = I915_READ(DDREC);
7519        count3 = I915_READ(CSIEC);
7520
7521        total_count = count1 + count2 + count3;
7522
7523        /* FIXME: handle per-counter overflow */
7524        if (total_count < dev_priv->ips.last_count1) {
7525                diff = ~0UL - dev_priv->ips.last_count1;
7526                diff += total_count;
7527        } else {
7528                diff = total_count - dev_priv->ips.last_count1;
7529        }
7530
7531        for (i = 0; i < ARRAY_SIZE(cparams); i++) {
7532                if (cparams[i].i == dev_priv->ips.c_m &&
7533                    cparams[i].t == dev_priv->ips.r_t) {
7534                        m = cparams[i].m;
7535                        c = cparams[i].c;
7536                        break;
7537                }
7538        }
7539
7540        diff = div_u64(diff, diff1);
7541        ret = ((m * diff) + c);
7542        ret = div_u64(ret, 10);
7543
7544        dev_priv->ips.last_count1 = total_count;
7545        dev_priv->ips.last_time1 = now;
7546
7547        dev_priv->ips.chipset_power = ret;
7548
7549        return ret;
7550}
7551
7552unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
7553{
7554        unsigned long val;
7555
7556        if (!IS_GEN5(dev_priv))
7557                return 0;
7558
7559        spin_lock_irq(&mchdev_lock);
7560
7561        val = __i915_chipset_val(dev_priv);
7562
7563        spin_unlock_irq(&mchdev_lock);
7564
7565        return val;
7566}
7567
7568unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
7569{
7570        unsigned long m, x, b;
7571        u32 tsfs;
7572
7573        tsfs = I915_READ(TSFS);
7574
7575        m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
7576        x = I915_READ8(TR1);
7577
7578        b = tsfs & TSFS_INTR_MASK;
7579
7580        return ((m * x) / 127) - b;
7581}
7582
7583static int _pxvid_to_vd(u8 pxvid)
7584{
7585        if (pxvid == 0)
7586                return 0;
7587
7588        if (pxvid >= 8 && pxvid < 31)
7589                pxvid = 31;
7590
7591        return (pxvid + 2) * 125;
7592}
7593
7594static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
7595{
7596        const int vd = _pxvid_to_vd(pxvid);
7597        const int vm = vd - 1125;
7598
7599        if (INTEL_INFO(dev_priv)->is_mobile)
7600                return vm > 0 ? vm : 0;
7601
7602        return vd;
7603}
7604
7605static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
7606{
7607        u64 now, diff, diffms;
7608        u32 count;
7609
7610        lockdep_assert_held(&mchdev_lock);
7611
7612        now = ktime_get_raw_ns();
7613        diffms = now - dev_priv->ips.last_time2;
7614        do_div(diffms, NSEC_PER_MSEC);
7615
7616        /* Don't divide by 0 */
7617        if (!diffms)
7618                return;
7619
7620        count = I915_READ(GFXEC);
7621
7622        if (count < dev_priv->ips.last_count2) {
7623                diff = ~0UL - dev_priv->ips.last_count2;
7624                diff += count;
7625        } else {
7626                diff = count - dev_priv->ips.last_count2;
7627        }
7628
7629        dev_priv->ips.last_count2 = count;
7630        dev_priv->ips.last_time2 = now;
7631
7632        /* More magic constants... */
7633        diff = diff * 1181;
7634        diff = div_u64(diff, diffms * 10);
7635        dev_priv->ips.gfx_power = diff;
7636}
7637
7638void i915_update_gfx_val(struct drm_i915_private *dev_priv)
7639{
7640        if (!IS_GEN5(dev_priv))
7641                return;
7642
7643        spin_lock_irq(&mchdev_lock);
7644
7645        __i915_update_gfx_val(dev_priv);
7646
7647        spin_unlock_irq(&mchdev_lock);
7648}
7649
7650static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
7651{
7652        unsigned long t, corr, state1, corr2, state2;
7653        u32 pxvid, ext_v;
7654
7655        lockdep_assert_held(&mchdev_lock);
7656
7657        pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
7658        pxvid = (pxvid >> 24) & 0x7f;
7659        ext_v = pvid_to_extvid(dev_priv, pxvid);
7660
7661        state1 = ext_v;
7662
7663        t = i915_mch_val(dev_priv);
7664
7665        /* Revel in the empirically derived constants */
7666
7667        /* Correction factor in 1/100000 units */
7668        if (t > 80)
7669                corr = ((t * 2349) + 135940);
7670        else if (t >= 50)
7671                corr = ((t * 964) + 29317);
7672        else /* < 50 */
7673                corr = ((t * 301) + 1004);
7674
7675        corr = corr * ((150142 * state1) / 10000 - 78642);
7676        corr /= 100000;
7677        corr2 = (corr * dev_priv->ips.corr);
7678
7679        state2 = (corr2 * state1) / 10000;
7680        state2 /= 100; /* convert to mW */
7681
7682        __i915_update_gfx_val(dev_priv);
7683
7684        return dev_priv->ips.gfx_power + state2;
7685}
7686
7687unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
7688{
7689        unsigned long val;
7690
7691        if (!IS_GEN5(dev_priv))
7692                return 0;
7693
7694        spin_lock_irq(&mchdev_lock);
7695
7696        val = __i915_gfx_val(dev_priv);
7697
7698        spin_unlock_irq(&mchdev_lock);
7699
7700        return val;
7701}
7702
7703/**
7704 * i915_read_mch_val - return value for IPS use
7705 *
7706 * Calculate and return a value for the IPS driver to use when deciding whether
7707 * we have thermal and power headroom to increase CPU or GPU power budget.
7708 */
7709unsigned long i915_read_mch_val(void)
7710{
7711        struct drm_i915_private *dev_priv;
7712        unsigned long chipset_val, graphics_val, ret = 0;
7713
7714        spin_lock_irq(&mchdev_lock);
7715        if (!i915_mch_dev)
7716                goto out_unlock;
7717        dev_priv = i915_mch_dev;
7718
7719        chipset_val = __i915_chipset_val(dev_priv);
7720        graphics_val = __i915_gfx_val(dev_priv);
7721
7722        ret = chipset_val + graphics_val;
7723
7724out_unlock:
7725        spin_unlock_irq(&mchdev_lock);
7726
7727        return ret;
7728}
7729EXPORT_SYMBOL_GPL(i915_read_mch_val);
7730
7731/**
7732 * i915_gpu_raise - raise GPU frequency limit
7733 *
7734 * Raise the limit; IPS indicates we have thermal headroom.
7735 */
7736bool i915_gpu_raise(void)
7737{
7738        struct drm_i915_private *dev_priv;
7739        bool ret = true;
7740
7741        spin_lock_irq(&mchdev_lock);
7742        if (!i915_mch_dev) {
7743                ret = false;
7744                goto out_unlock;
7745        }
7746        dev_priv = i915_mch_dev;
7747
7748        if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
7749                dev_priv->ips.max_delay--;
7750
7751out_unlock:
7752        spin_unlock_irq(&mchdev_lock);
7753
7754        return ret;
7755}
7756EXPORT_SYMBOL_GPL(i915_gpu_raise);
7757
7758/**
7759 * i915_gpu_lower - lower GPU frequency limit
7760 *
7761 * IPS indicates we're close to a thermal limit, so throttle back the GPU
7762 * frequency maximum.
7763 */
7764bool i915_gpu_lower(void)
7765{
7766        struct drm_i915_private *dev_priv;
7767        bool ret = true;
7768
7769        spin_lock_irq(&mchdev_lock);
7770        if (!i915_mch_dev) {
7771                ret = false;
7772                goto out_unlock;
7773        }
7774        dev_priv = i915_mch_dev;
7775
7776        if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
7777                dev_priv->ips.max_delay++;
7778
7779out_unlock:
7780        spin_unlock_irq(&mchdev_lock);
7781
7782        return ret;
7783}
7784EXPORT_SYMBOL_GPL(i915_gpu_lower);
7785
7786/**
7787 * i915_gpu_busy - indicate GPU business to IPS
7788 *
7789 * Tell the IPS driver whether or not the GPU is busy.
7790 */
7791bool i915_gpu_busy(void)
7792{
7793        bool ret = false;
7794
7795        spin_lock_irq(&mchdev_lock);
7796        if (i915_mch_dev)
7797                ret = i915_mch_dev->gt.awake;
7798        spin_unlock_irq(&mchdev_lock);
7799
7800        return ret;
7801}
7802EXPORT_SYMBOL_GPL(i915_gpu_busy);
7803
7804/**
7805 * i915_gpu_turbo_disable - disable graphics turbo
7806 *
7807 * Disable graphics turbo by resetting the max frequency and setting the
7808 * current frequency to the default.
7809 */
7810bool i915_gpu_turbo_disable(void)
7811{
7812        struct drm_i915_private *dev_priv;
7813        bool ret = true;
7814
7815        spin_lock_irq(&mchdev_lock);
7816        if (!i915_mch_dev) {
7817                ret = false;
7818                goto out_unlock;
7819        }
7820        dev_priv = i915_mch_dev;
7821
7822        dev_priv->ips.max_delay = dev_priv->ips.fstart;
7823
7824        if (!ironlake_set_drps(dev_priv, dev_priv->ips.fstart))
7825                ret = false;
7826
7827out_unlock:
7828        spin_unlock_irq(&mchdev_lock);
7829
7830        return ret;
7831}
7832EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
7833
7834/**
7835 * Tells the intel_ips driver that the i915 driver is now loaded, if
7836 * IPS got loaded first.
7837 *
7838 * This awkward dance is so that neither module has to depend on the
7839 * other in order for IPS to do the appropriate communication of
7840 * GPU turbo limits to i915.
7841 */
7842static void
7843ips_ping_for_i915_load(void)
7844{
7845        void (*link)(void);
7846
7847        link = symbol_get(ips_link_to_i915_driver);
7848        if (link) {
7849                link();
7850                symbol_put(ips_link_to_i915_driver);
7851        }
7852}
7853
7854void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
7855{
7856        /* We only register the i915 ips part with intel-ips once everything is
7857         * set up, to avoid intel-ips sneaking in and reading bogus values. */
7858        spin_lock_irq(&mchdev_lock);
7859        i915_mch_dev = dev_priv;
7860        spin_unlock_irq(&mchdev_lock);
7861
7862        ips_ping_for_i915_load();
7863}
7864
7865void intel_gpu_ips_teardown(void)
7866{
7867        spin_lock_irq(&mchdev_lock);
7868        i915_mch_dev = NULL;
7869        spin_unlock_irq(&mchdev_lock);
7870}
7871
7872static void intel_init_emon(struct drm_i915_private *dev_priv)
7873{
7874        u32 lcfuse;
7875        u8 pxw[16];
7876        int i;
7877
7878        /* Disable to program */
7879        I915_WRITE(ECR, 0);
7880        POSTING_READ(ECR);
7881
7882        /* Program energy weights for various events */
7883        I915_WRITE(SDEW, 0x15040d00);
7884        I915_WRITE(CSIEW0, 0x007f0000);
7885        I915_WRITE(CSIEW1, 0x1e220004);
7886        I915_WRITE(CSIEW2, 0x04000004);
7887
7888        for (i = 0; i < 5; i++)
7889                I915_WRITE(PEW(i), 0);
7890        for (i = 0; i < 3; i++)
7891                I915_WRITE(DEW(i), 0);
7892
7893        /* Program P-state weights to account for frequency power adjustment */
7894        for (i = 0; i < 16; i++) {
7895                u32 pxvidfreq = I915_READ(PXVFREQ(i));
7896                unsigned long freq = intel_pxfreq(pxvidfreq);
7897                unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
7898                        PXVFREQ_PX_SHIFT;
7899                unsigned long val;
7900
7901                val = vid * vid;
7902                val *= (freq / 1000);
7903                val *= 255;
7904                val /= (127*127*900);
7905                if (val > 0xff)
7906                        DRM_ERROR("bad pxval: %ld\n", val);
7907                pxw[i] = val;
7908        }
7909        /* Render standby states get 0 weight */
7910        pxw[14] = 0;
7911        pxw[15] = 0;
7912
7913        for (i = 0; i < 4; i++) {
7914                u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
7915                        (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
7916                I915_WRITE(PXW(i), val);
7917        }
7918
7919        /* Adjust magic regs to magic values (more experimental results) */
7920        I915_WRITE(OGW0, 0);
7921        I915_WRITE(OGW1, 0);
7922        I915_WRITE(EG0, 0x00007f00);
7923        I915_WRITE(EG1, 0x0000000e);
7924        I915_WRITE(EG2, 0x000e0000);
7925        I915_WRITE(EG3, 0x68000300);
7926        I915_WRITE(EG4, 0x42000000);
7927        I915_WRITE(EG5, 0x00140031);
7928        I915_WRITE(EG6, 0);
7929        I915_WRITE(EG7, 0);
7930
7931        for (i = 0; i < 8; i++)
7932                I915_WRITE(PXWL(i), 0);
7933
7934        /* Enable PMON + select events */
7935        I915_WRITE(ECR, 0x80000019);
7936
7937        lcfuse = I915_READ(LCFUSE02);
7938
7939        dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
7940}
7941
7942void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
7943{
7944        struct intel_rps *rps = &dev_priv->gt_pm.rps;
7945
7946        /*
7947         * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
7948         * requirement.
7949         */
7950        if (!sanitize_rc6(dev_priv)) {
7951                DRM_INFO("RC6 disabled, disabling runtime PM support\n");
7952                intel_runtime_pm_get(dev_priv);
7953        }
7954
7955        mutex_lock(&dev_priv->pcu_lock);
7956
7957        /* Initialize RPS limits (for userspace) */
7958        if (IS_CHERRYVIEW(dev_priv))
7959                cherryview_init_gt_powersave(dev_priv);
7960        else if (IS_VALLEYVIEW(dev_priv))
7961                valleyview_init_gt_powersave(dev_priv);
7962        else if (INTEL_GEN(dev_priv) >= 6)
7963                gen6_init_rps_frequencies(dev_priv);
7964
7965        /* Derive initial user preferences/limits from the hardware limits */
7966        rps->idle_freq = rps->min_freq;
7967        rps->cur_freq = rps->idle_freq;
7968
7969        rps->max_freq_softlimit = rps->max_freq;
7970        rps->min_freq_softlimit = rps->min_freq;
7971
7972        if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
7973                rps->min_freq_softlimit =
7974                        max_t(int,
7975                              rps->efficient_freq,
7976                              intel_freq_opcode(dev_priv, 450));
7977
7978        /* After setting max-softlimit, find the overclock max freq */
7979        if (IS_GEN6(dev_priv) ||
7980            IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
7981                u32 params = 0;
7982
7983                sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
7984                if (params & BIT(31)) { /* OC supported */
7985                        DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
7986                                         (rps->max_freq & 0xff) * 50,
7987                                         (params & 0xff) * 50);
7988                        rps->max_freq = params & 0xff;
7989                }
7990        }
7991
7992        /* Finally allow us to boost to max by default */
7993        rps->boost_freq = rps->max_freq;
7994
7995        mutex_unlock(&dev_priv->pcu_lock);
7996}
7997
7998void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
7999{
8000        if (IS_VALLEYVIEW(dev_priv))
8001                valleyview_cleanup_gt_powersave(dev_priv);
8002
8003        if (!HAS_RC6(dev_priv))
8004                intel_runtime_pm_put(dev_priv);
8005}
8006
8007/**
8008 * intel_suspend_gt_powersave - suspend PM work and helper threads
8009 * @dev_priv: i915 device
8010 *
8011 * We don't want to disable RC6 or other features here, we just want
8012 * to make sure any work we've queued has finished and won't bother
8013 * us while we're suspended.
8014 */
8015void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
8016{
8017        if (INTEL_GEN(dev_priv) < 6)
8018                return;
8019
8020        /* gen6_rps_idle() will be called later to disable interrupts */
8021}
8022
8023void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
8024{
8025        dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
8026        dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
8027        intel_disable_gt_powersave(dev_priv);
8028
8029        if (INTEL_GEN(dev_priv) < 11)
8030                gen6_reset_rps_interrupts(dev_priv);
8031        else
8032                WARN_ON_ONCE(1);
8033}
8034
8035static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
8036{
8037        lockdep_assert_held(&i915->pcu_lock);
8038
8039        if (!i915->gt_pm.llc_pstate.enabled)
8040                return;
8041
8042        /* Currently there is no HW configuration to be done to disable. */
8043
8044        i915->gt_pm.llc_pstate.enabled = false;
8045}
8046
8047static void intel_disable_rc6(struct drm_i915_private *dev_priv)
8048{
8049        lockdep_assert_held(&dev_priv->pcu_lock);
8050
8051        if (!dev_priv->gt_pm.rc6.enabled)
8052                return;
8053
8054        if (INTEL_GEN(dev_priv) >= 9)
8055                gen9_disable_rc6(dev_priv);
8056        else if (IS_CHERRYVIEW(dev_priv))
8057                cherryview_disable_rc6(dev_priv);
8058        else if (IS_VALLEYVIEW(dev_priv))
8059                valleyview_disable_rc6(dev_priv);
8060        else if (INTEL_GEN(dev_priv) >= 6)
8061                gen6_disable_rc6(dev_priv);
8062
8063        dev_priv->gt_pm.rc6.enabled = false;
8064}
8065
8066static void intel_disable_rps(struct drm_i915_private *dev_priv)
8067{
8068        lockdep_assert_held(&dev_priv->pcu_lock);
8069
8070        if (!dev_priv->gt_pm.rps.enabled)
8071                return;
8072
8073        if (INTEL_GEN(dev_priv) >= 9)
8074                gen9_disable_rps(dev_priv);
8075        else if (IS_CHERRYVIEW(dev_priv))
8076                cherryview_disable_rps(dev_priv);
8077        else if (IS_VALLEYVIEW(dev_priv))
8078                valleyview_disable_rps(dev_priv);
8079        else if (INTEL_GEN(dev_priv) >= 6)
8080                gen6_disable_rps(dev_priv);
8081        else if (IS_IRONLAKE_M(dev_priv))
8082                ironlake_disable_drps(dev_priv);
8083
8084        dev_priv->gt_pm.rps.enabled = false;
8085}
8086
8087void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
8088{
8089        mutex_lock(&dev_priv->pcu_lock);
8090
8091        intel_disable_rc6(dev_priv);
8092        intel_disable_rps(dev_priv);
8093        if (HAS_LLC(dev_priv))
8094                intel_disable_llc_pstate(dev_priv);
8095
8096        mutex_unlock(&dev_priv->pcu_lock);
8097}
8098
8099static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
8100{
8101        lockdep_assert_held(&i915->pcu_lock);
8102
8103        if (i915->gt_pm.llc_pstate.enabled)
8104                return;
8105
8106        gen6_update_ring_freq(i915);
8107
8108        i915->gt_pm.llc_pstate.enabled = true;
8109}
8110
8111static void intel_enable_rc6(struct drm_i915_private *dev_priv)
8112{
8113        lockdep_assert_held(&dev_priv->pcu_lock);
8114
8115        if (dev_priv->gt_pm.rc6.enabled)
8116                return;
8117
8118        if (IS_CHERRYVIEW(dev_priv))
8119                cherryview_enable_rc6(dev_priv);
8120        else if (IS_VALLEYVIEW(dev_priv))
8121                valleyview_enable_rc6(dev_priv);
8122        else if (INTEL_GEN(dev_priv) >= 9)
8123                gen9_enable_rc6(dev_priv);
8124        else if (IS_BROADWELL(dev_priv))
8125                gen8_enable_rc6(dev_priv);
8126        else if (INTEL_GEN(dev_priv) >= 6)
8127                gen6_enable_rc6(dev_priv);
8128
8129        dev_priv->gt_pm.rc6.enabled = true;
8130}
8131
8132static void intel_enable_rps(struct drm_i915_private *dev_priv)
8133{
8134        struct intel_rps *rps = &dev_priv->gt_pm.rps;
8135
8136        lockdep_assert_held(&dev_priv->pcu_lock);
8137
8138        if (rps->enabled)
8139                return;
8140
8141        if (IS_CHERRYVIEW(dev_priv)) {
8142                cherryview_enable_rps(dev_priv);
8143        } else if (IS_VALLEYVIEW(dev_priv)) {
8144                valleyview_enable_rps(dev_priv);
8145        } else if (WARN_ON_ONCE(INTEL_GEN(dev_priv) >= 11)) {
8146                /* TODO */
8147        } else if (INTEL_GEN(dev_priv) >= 9) {
8148                gen9_enable_rps(dev_priv);
8149        } else if (IS_BROADWELL(dev_priv)) {
8150                gen8_enable_rps(dev_priv);
8151        } else if (INTEL_GEN(dev_priv) >= 6) {
8152                gen6_enable_rps(dev_priv);
8153        } else if (IS_IRONLAKE_M(dev_priv)) {
8154                ironlake_enable_drps(dev_priv);
8155                intel_init_emon(dev_priv);
8156        }
8157
8158        WARN_ON(rps->max_freq < rps->min_freq);
8159        WARN_ON(rps->idle_freq > rps->max_freq);
8160
8161        WARN_ON(rps->efficient_freq < rps->min_freq);
8162        WARN_ON(rps->efficient_freq > rps->max_freq);
8163
8164        rps->enabled = true;
8165}
8166
8167void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
8168{
8169        /* Powersaving is controlled by the host when inside a VM */
8170        if (intel_vgpu_active(dev_priv))
8171                return;
8172
8173        mutex_lock(&dev_priv->pcu_lock);
8174
8175        if (HAS_RC6(dev_priv))
8176                intel_enable_rc6(dev_priv);
8177        intel_enable_rps(dev_priv);
8178        if (HAS_LLC(dev_priv))
8179                intel_enable_llc_pstate(dev_priv);
8180
8181        mutex_unlock(&dev_priv->pcu_lock);
8182}
8183
8184static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
8185{
8186        /*
8187         * On Ibex Peak and Cougar Point, we need to disable clock
8188         * gating for the panel power sequencer or it will fail to
8189         * start up when no ports are active.
8190         */
8191        I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
8192}
8193
8194static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
8195{
8196        enum pipe pipe;
8197
8198        for_each_pipe(dev_priv, pipe) {
8199                I915_WRITE(DSPCNTR(pipe),
8200                           I915_READ(DSPCNTR(pipe)) |
8201                           DISPPLANE_TRICKLE_FEED_DISABLE);
8202
8203                I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
8204                POSTING_READ(DSPSURF(pipe));
8205        }
8206}
8207
8208static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
8209{
8210        uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
8211
8212        /*
8213         * Required for FBC
8214         * WaFbcDisableDpfcClockGating:ilk
8215         */
8216        dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
8217                   ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
8218                   ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
8219
8220        I915_WRITE(PCH_3DCGDIS0,
8221                   MARIUNIT_CLOCK_GATE_DISABLE |
8222                   SVSMUNIT_CLOCK_GATE_DISABLE);
8223        I915_WRITE(PCH_3DCGDIS1,
8224                   VFMUNIT_CLOCK_GATE_DISABLE);
8225
8226        /*
8227         * According to the spec the following bits should be set in
8228         * order to enable memory self-refresh
8229         * The bit 22/21 of 0x42004
8230         * The bit 5 of 0x42020
8231         * The bit 15 of 0x45000
8232         */
8233        I915_WRITE(ILK_DISPLAY_CHICKEN2,
8234                   (I915_READ(ILK_DISPLAY_CHICKEN2) |
8235                    ILK_DPARB_GATE | ILK_VSDPFD_FULL));
8236        dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
8237        I915_WRITE(DISP_ARB_CTL,
8238                   (I915_READ(DISP_ARB_CTL) |
8239                    DISP_FBC_WM_DIS));
8240
8241        /*
8242         * Based on the document from hardware guys the following bits
8243         * should be set unconditionally in order to enable FBC.
8244         * The bit 22 of 0x42000
8245         * The bit 22 of 0x42004
8246         * The bit 7,8,9 of 0x42020.
8247         */
8248        if (IS_IRONLAKE_M(dev_priv)) {
8249                /* WaFbcAsynchFlipDisableFbcQueue:ilk */
8250                I915_WRITE(ILK_DISPLAY_CHICKEN1,
8251                           I915_READ(ILK_DISPLAY_CHICKEN1) |
8252                           ILK_FBCQ_DIS);
8253                I915_WRITE(ILK_DISPLAY_CHICKEN2,
8254                           I915_READ(ILK_DISPLAY_CHICKEN2) |
8255                           ILK_DPARB_GATE);
8256        }
8257
8258        I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
8259
8260        I915_WRITE(ILK_DISPLAY_CHICKEN2,
8261                   I915_READ(ILK_DISPLAY_CHICKEN2) |
8262                   ILK_ELPIN_409_SELECT);
8263        I915_WRITE(_3D_CHICKEN2,
8264                   _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
8265                   _3D_CHICKEN2_WM_READ_PIPELINED);
8266
8267        /* WaDisableRenderCachePipelinedFlush:ilk */
8268        I915_WRITE(CACHE_MODE_0,
8269                   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
8270
8271        /* WaDisable_RenderCache_OperationalFlush:ilk */
8272        I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8273
8274        g4x_disable_trickle_feed(dev_priv);
8275
8276        ibx_init_clock_gating(dev_priv);
8277}
8278
8279static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
8280{
8281        int pipe;
8282        uint32_t val;
8283
8284        /*
8285         * On Ibex Peak and Cougar Point, we need to disable clock
8286         * gating for the panel power sequencer or it will fail to
8287         * start up when no ports are active.
8288         */
8289        I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
8290                   PCH_DPLUNIT_CLOCK_GATE_DISABLE |
8291                   PCH_CPUNIT_CLOCK_GATE_DISABLE);
8292        I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
8293                   DPLS_EDP_PPS_FIX_DIS);
8294        /* The below fixes the weird display corruption, a few pixels shifted
8295         * downward, on (only) LVDS of some HP laptops with IVY.
8296         */
8297        for_each_pipe(dev_priv, pipe) {
8298                val = I915_READ(TRANS_CHICKEN2(pipe));
8299                val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
8300                val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
8301                if (dev_priv->vbt.fdi_rx_polarity_inverted)
8302                        val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
8303                val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
8304                val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
8305                val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
8306                I915_WRITE(TRANS_CHICKEN2(pipe), val);
8307        }
8308        /* WADP0ClockGatingDisable */
8309        for_each_pipe(dev_priv, pipe) {
8310                I915_WRITE(TRANS_CHICKEN1(pipe),
8311                           TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
8312        }
8313}
8314
8315static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
8316{
8317        uint32_t tmp;
8318
8319        tmp = I915_READ(MCH_SSKPD);
8320        if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
8321                DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
8322                              tmp);
8323}
8324
8325static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
8326{
8327        uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
8328
8329        I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
8330
8331        I915_WRITE(ILK_DISPLAY_CHICKEN2,
8332                   I915_READ(ILK_DISPLAY_CHICKEN2) |
8333                   ILK_ELPIN_409_SELECT);
8334
8335        /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
8336        I915_WRITE(_3D_CHICKEN,
8337                   _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
8338
8339        /* WaDisable_RenderCache_OperationalFlush:snb */
8340        I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8341
8342        /*
8343         * BSpec recoomends 8x4 when MSAA is used,
8344         * however in practice 16x4 seems fastest.
8345         *
8346         * Note that PS/WM thread counts depend on the WIZ hashing
8347         * disable bit, which we don't touch here, but it's good
8348         * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
8349         */
8350        I915_WRITE(GEN6_GT_MODE,
8351                   _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
8352
8353        I915_WRITE(CACHE_MODE_0,
8354                   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
8355
8356        I915_WRITE(GEN6_UCGCTL1,
8357                   I915_READ(GEN6_UCGCTL1) |
8358                   GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
8359                   GEN6_CSUNIT_CLOCK_GATE_DISABLE);
8360
8361        /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
8362         * gating disable must be set.  Failure to set it results in
8363         * flickering pixels due to Z write ordering failures after
8364         * some amount of runtime in the Mesa "fire" demo, and Unigine
8365         * Sanctuary and Tropics, and apparently anything else with
8366         * alpha test or pixel discard.
8367         *
8368         * According to the spec, bit 11 (RCCUNIT) must also be set,
8369         * but we didn't debug actual testcases to find it out.
8370         *
8371         * WaDisableRCCUnitClockGating:snb
8372         * WaDisableRCPBUnitClockGating:snb
8373         */
8374        I915_WRITE(GEN6_UCGCTL2,
8375                   GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
8376                   GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
8377
8378        /* WaStripsFansDisableFastClipPerformanceFix:snb */
8379        I915_WRITE(_3D_CHICKEN3,
8380                   _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
8381
8382        /*
8383         * Bspec says:
8384         * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
8385         * 3DSTATE_SF number of SF output attributes is more than 16."
8386         */
8387        I915_WRITE(_3D_CHICKEN3,
8388                   _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
8389
8390        /*
8391         * According to the spec the following bits should be
8392         * set in order to enable memory self-refresh and fbc:
8393         * The bit21 and bit22 of 0x42000
8394         * The bit21 and bit22 of 0x42004
8395         * The bit5 and bit7 of 0x42020
8396         * The bit14 of 0x70180
8397         * The bit14 of 0x71180
8398         *
8399         * WaFbcAsynchFlipDisableFbcQueue:snb
8400         */
8401        I915_WRITE(ILK_DISPLAY_CHICKEN1,
8402                   I915_READ(ILK_DISPLAY_CHICKEN1) |
8403                   ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
8404        I915_WRITE(ILK_DISPLAY_CHICKEN2,
8405                   I915_READ(ILK_DISPLAY_CHICKEN2) |
8406                   ILK_DPARB_GATE | ILK_VSDPFD_FULL);
8407        I915_WRITE(ILK_DSPCLK_GATE_D,
8408                   I915_READ(ILK_DSPCLK_GATE_D) |
8409                   ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
8410                   ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
8411
8412        g4x_disable_trickle_feed(dev_priv);
8413
8414        cpt_init_clock_gating(dev_priv);
8415
8416        gen6_check_mch_setup(dev_priv);
8417}
8418
8419static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
8420{
8421        uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
8422
8423        /*
8424         * WaVSThreadDispatchOverride:ivb,vlv
8425         *
8426         * This actually overrides the dispatch
8427         * mode for all thread types.
8428         */
8429        reg &= ~GEN7_FF_SCHED_MASK;
8430        reg |= GEN7_FF_TS_SCHED_HW;
8431        reg |= GEN7_FF_VS_SCHED_HW;
8432        reg |= GEN7_FF_DS_SCHED_HW;
8433
8434        I915_WRITE(GEN7_FF_THREAD_MODE, reg);
8435}
8436
8437static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
8438{
8439        /*
8440         * TODO: this bit should only be enabled when really needed, then
8441         * disabled when not needed anymore in order to save power.
8442         */
8443        if (HAS_PCH_LPT_LP(dev_priv))
8444                I915_WRITE(SOUTH_DSPCLK_GATE_D,
8445                           I915_READ(SOUTH_DSPCLK_GATE_D) |
8446                           PCH_LP_PARTITION_LEVEL_DISABLE);
8447
8448        /* WADPOClockGatingDisable:hsw */
8449        I915_WRITE(TRANS_CHICKEN1(PIPE_A),
8450                   I915_READ(TRANS_CHICKEN1(PIPE_A)) |
8451                   TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
8452}
8453
8454static void lpt_suspend_hw(struct drm_i915_private *dev_priv)
8455{
8456        if (HAS_PCH_LPT_LP(dev_priv)) {
8457                uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D);
8458
8459                val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
8460                I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
8461        }
8462}
8463
8464static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
8465                                   int general_prio_credits,
8466                                   int high_prio_credits)
8467{
8468        u32 misccpctl;
8469        u32 val;
8470
8471        /* WaTempDisableDOPClkGating:bdw */
8472        misccpctl = I915_READ(GEN7_MISCCPCTL);
8473        I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
8474
8475        val = I915_READ(GEN8_L3SQCREG1);
8476        val &= ~L3_PRIO_CREDITS_MASK;
8477        val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits);
8478        val |= L3_HIGH_PRIO_CREDITS(high_prio_credits);
8479        I915_WRITE(GEN8_L3SQCREG1, val);
8480
8481        /*
8482         * Wait at least 100 clocks before re-enabling clock gating.
8483         * See the definition of L3SQCREG1 in BSpec.
8484         */
8485        POSTING_READ(GEN8_L3SQCREG1);
8486        udelay(1);
8487        I915_WRITE(GEN7_MISCCPCTL, misccpctl);
8488}
8489
8490static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
8491{
8492        if (!HAS_PCH_CNP(dev_priv))
8493                return;
8494
8495        /* Display WA #1181 WaSouthDisplayDisablePWMCGEGating: cnp */
8496        I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) |
8497                   CNP_PWM_CGE_GATING_DISABLE);
8498}
8499
8500static void cnl_init_clock_gating(struct drm_i915_private *dev_priv)
8501{
8502        u32 val;
8503        cnp_init_clock_gating(dev_priv);
8504
8505        /* This is not an Wa. Enable for better image quality */
8506        I915_WRITE(_3D_CHICKEN3,
8507                   _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
8508
8509        /* WaEnableChickenDCPR:cnl */
8510        I915_WRITE(GEN8_CHICKEN_DCPR_1,
8511                   I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
8512
8513        /* WaFbcWakeMemOn:cnl */
8514        I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
8515                   DISP_FBC_MEMORY_WAKE);
8516
8517        val = I915_READ(SLICE_UNIT_LEVEL_CLKGATE);
8518        /* ReadHitWriteOnlyDisable:cnl */
8519        val |= RCCUNIT_CLKGATE_DIS;
8520        /* WaSarbUnitClockGatingDisable:cnl (pre-prod) */
8521        if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0))
8522                val |= SARBUNIT_CLKGATE_DIS;
8523        I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val);
8524
8525        /* Wa_2201832410:cnl */
8526        val = I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE);
8527        val |= GWUNIT_CLKGATE_DIS;
8528        I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE, val);
8529
8530        /* WaDisableVFclkgate:cnl */
8531        /* WaVFUnitClockGatingDisable:cnl */
8532        val = I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE);
8533        val |= VFUNIT_CLKGATE_DIS;
8534        I915_WRITE(UNSLICE_UNIT_LEVEL_CLKGATE, val);
8535}
8536
8537static void cfl_init_clock_gating(struct drm_i915_private *dev_priv)
8538{
8539        cnp_init_clock_gating(dev_priv);
8540        gen9_init_clock_gating(dev_priv);
8541
8542        /* WaFbcNukeOnHostModify:cfl */
8543        I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
8544                   ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
8545}
8546
8547static void kbl_init_clock_gating(struct drm_i915_private *dev_priv)
8548{
8549        gen9_init_clock_gating(dev_priv);
8550
8551        /* WaDisableSDEUnitClockGating:kbl */
8552        if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
8553                I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
8554                           GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
8555
8556        /* WaDisableGamClockGating:kbl */
8557        if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
8558                I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
8559                           GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
8560
8561        /* WaFbcNukeOnHostModify:kbl */
8562        I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
8563                   ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
8564}
8565
8566static void skl_init_clock_gating(struct drm_i915_private *dev_priv)
8567{
8568        gen9_init_clock_gating(dev_priv);
8569
8570        /* WAC6entrylatency:skl */
8571        I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) |
8572                   FBC_LLC_FULLY_OPEN);
8573
8574        /* WaFbcNukeOnHostModify:skl */
8575        I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
8576                   ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
8577}
8578
8579static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
8580{
8581        /* The GTT cache must be disabled if the system is using 2M pages. */
8582        bool can_use_gtt_cache = !HAS_PAGE_SIZES(dev_priv,
8583                                                 I915_GTT_PAGE_SIZE_2M);
8584        enum pipe pipe;
8585
8586        /* WaSwitchSolVfFArbitrationPriority:bdw */
8587        I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
8588
8589        /* WaPsrDPAMaskVBlankInSRD:bdw */
8590        I915_WRITE(CHICKEN_PAR1_1,
8591                   I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
8592
8593        /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
8594        for_each_pipe(dev_priv, pipe) {
8595                I915_WRITE(CHICKEN_PIPESL_1(pipe),
8596                           I915_READ(CHICKEN_PIPESL_1(pipe)) |
8597                           BDW_DPRS_MASK_VBLANK_SRD);
8598        }
8599
8600        /* WaVSRefCountFullforceMissDisable:bdw */
8601        /* WaDSRefCountFullforceMissDisable:bdw */
8602        I915_WRITE(GEN7_FF_THREAD_MODE,
8603                   I915_READ(GEN7_FF_THREAD_MODE) &
8604                   ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
8605
8606        I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
8607                   _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
8608
8609        /* WaDisableSDEUnitClockGating:bdw */
8610        I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
8611                   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
8612
8613        /* WaProgramL3SqcReg1Default:bdw */
8614        gen8_set_l3sqc_credits(dev_priv, 30, 2);
8615
8616        /* WaGttCachingOffByDefault:bdw */
8617        I915_WRITE(HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
8618
8619        /* WaKVMNotificationOnConfigChange:bdw */
8620        I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1)
8621                   | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT);
8622
8623        lpt_init_clock_gating(dev_priv);
8624
8625        /* WaDisableDopClockGating:bdw
8626         *
8627         * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP
8628         * clock gating.
8629         */
8630        I915_WRITE(GEN6_UCGCTL1,
8631                   I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE);
8632}
8633
8634static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
8635{
8636        /* L3 caching of data atomics doesn't work -- disable it. */
8637        I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
8638        I915_WRITE(HSW_ROW_CHICKEN3,
8639                   _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
8640
8641        /* This is required by WaCatErrorRejectionIssue:hsw */
8642        I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
8643                        I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
8644                        GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
8645
8646        /* WaVSRefCountFullforceMissDisable:hsw */
8647        I915_WRITE(GEN7_FF_THREAD_MODE,
8648                   I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
8649
8650        /* WaDisable_RenderCache_OperationalFlush:hsw */
8651        I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8652
8653        /* enable HiZ Raw Stall Optimization */
8654        I915_WRITE(CACHE_MODE_0_GEN7,
8655                   _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
8656
8657        /* WaDisable4x2SubspanOptimization:hsw */
8658        I915_WRITE(CACHE_MODE_1,
8659                   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
8660
8661        /*
8662         * BSpec recommends 8x4 when MSAA is used,
8663         * however in practice 16x4 seems fastest.
8664         *
8665         * Note that PS/WM thread counts depend on the WIZ hashing
8666         * disable bit, which we don't touch here, but it's good
8667         * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
8668         */
8669        I915_WRITE(GEN7_GT_MODE,
8670                   _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
8671
8672        /* WaSampleCChickenBitEnable:hsw */
8673        I915_WRITE(HALF_SLICE_CHICKEN3,
8674                   _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
8675
8676        /* WaSwitchSolVfFArbitrationPriority:hsw */
8677        I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
8678
8679        lpt_init_clock_gating(dev_priv);
8680}
8681
8682static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
8683{
8684        uint32_t snpcr;
8685
8686        I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
8687
8688        /* WaDisableEarlyCull:ivb */
8689        I915_WRITE(_3D_CHICKEN3,
8690                   _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
8691
8692        /* WaDisableBackToBackFlipFix:ivb */
8693        I915_WRITE(IVB_CHICKEN3,
8694                   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
8695                   CHICKEN3_DGMG_DONE_FIX_DISABLE);
8696
8697        /* WaDisablePSDDualDispatchEnable:ivb */
8698        if (IS_IVB_GT1(dev_priv))
8699                I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
8700                           _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
8701
8702        /* WaDisable_RenderCache_OperationalFlush:ivb */
8703        I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8704
8705        /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
8706        I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
8707                   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
8708
8709        /* WaApplyL3ControlAndL3ChickenMode:ivb */
8710        I915_WRITE(GEN7_L3CNTLREG1,
8711                        GEN7_WA_FOR_GEN7_L3_CONTROL);
8712        I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
8713                   GEN7_WA_L3_CHICKEN_MODE);
8714        if (IS_IVB_GT1(dev_priv))
8715                I915_WRITE(GEN7_ROW_CHICKEN2,
8716                           _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
8717        else {
8718                /* must write both registers */
8719                I915_WRITE(GEN7_ROW_CHICKEN2,
8720                           _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
8721                I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
8722                           _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
8723        }
8724
8725        /* WaForceL3Serialization:ivb */
8726        I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
8727                   ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
8728
8729        /*
8730         * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
8731         * This implements the WaDisableRCZUnitClockGating:ivb workaround.
8732         */
8733        I915_WRITE(GEN6_UCGCTL2,
8734                   GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
8735
8736        /* This is required by WaCatErrorRejectionIssue:ivb */
8737        I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
8738                        I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
8739                        GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
8740
8741        g4x_disable_trickle_feed(dev_priv);
8742
8743        gen7_setup_fixed_func_scheduler(dev_priv);
8744
8745        if (0) { /* causes HiZ corruption on ivb:gt1 */
8746                /* enable HiZ Raw Stall Optimization */
8747                I915_WRITE(CACHE_MODE_0_GEN7,
8748                           _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
8749        }
8750
8751        /* WaDisable4x2SubspanOptimization:ivb */
8752        I915_WRITE(CACHE_MODE_1,
8753                   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
8754
8755        /*
8756         * BSpec recommends 8x4 when MSAA is used,
8757         * however in practice 16x4 seems fastest.
8758         *
8759         * Note that PS/WM thread counts depend on the WIZ hashing
8760         * disable bit, which we don't touch here, but it's good
8761         * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
8762         */
8763        I915_WRITE(GEN7_GT_MODE,
8764                   _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
8765
8766        snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
8767        snpcr &= ~GEN6_MBC_SNPCR_MASK;
8768        snpcr |= GEN6_MBC_SNPCR_MED;
8769        I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
8770
8771        if (!HAS_PCH_NOP(dev_priv))
8772                cpt_init_clock_gating(dev_priv);
8773
8774        gen6_check_mch_setup(dev_priv);
8775}
8776
8777static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
8778{
8779        /* WaDisableEarlyCull:vlv */
8780        I915_WRITE(_3D_CHICKEN3,
8781                   _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
8782
8783        /* WaDisableBackToBackFlipFix:vlv */
8784        I915_WRITE(IVB_CHICKEN3,
8785                   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
8786                   CHICKEN3_DGMG_DONE_FIX_DISABLE);
8787
8788        /* WaPsdDispatchEnable:vlv */
8789        /* WaDisablePSDDualDispatchEnable:vlv */
8790        I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
8791                   _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
8792                                      GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
8793
8794        /* WaDisable_RenderCache_OperationalFlush:vlv */
8795        I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8796
8797        /* WaForceL3Serialization:vlv */
8798        I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
8799                   ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
8800
8801        /* WaDisableDopClockGating:vlv */
8802        I915_WRITE(GEN7_ROW_CHICKEN2,
8803                   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
8804
8805        /* This is required by WaCatErrorRejectionIssue:vlv */
8806        I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
8807                   I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
8808                   GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
8809
8810        gen7_setup_fixed_func_scheduler(dev_priv);
8811
8812        /*
8813         * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
8814         * This implements the WaDisableRCZUnitClockGating:vlv workaround.
8815         */
8816        I915_WRITE(GEN6_UCGCTL2,
8817                   GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
8818
8819        /* WaDisableL3Bank2xClockGate:vlv
8820         * Disabling L3 clock gating- MMIO 940c[25] = 1
8821         * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
8822        I915_WRITE(GEN7_UCGCTL4,
8823                   I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
8824
8825        /*
8826         * BSpec says this must be set, even though
8827         * WaDisable4x2SubspanOptimization isn't listed for VLV.
8828         */
8829        I915_WRITE(CACHE_MODE_1,
8830                   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
8831
8832        /*
8833         * BSpec recommends 8x4 when MSAA is used,
8834         * however in practice 16x4 seems fastest.
8835         *
8836         * Note that PS/WM thread counts depend on the WIZ hashing
8837         * disable bit, which we don't touch here, but it's good
8838         * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
8839         */
8840        I915_WRITE(GEN7_GT_MODE,
8841                   _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
8842
8843        /*
8844         * WaIncreaseL3CreditsForVLVB0:vlv
8845         * This is the hardware default actually.
8846         */
8847        I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
8848
8849        /*
8850         * WaDisableVLVClockGating_VBIIssue:vlv
8851         * Disable clock gating on th GCFG unit to prevent a delay
8852         * in the reporting of vblank events.
8853         */
8854        I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
8855}
8856
8857static void chv_init_clock_gating(struct drm_i915_private *dev_priv)
8858{
8859        /* WaVSRefCountFullforceMissDisable:chv */
8860        /* WaDSRefCountFullforceMissDisable:chv */
8861        I915_WRITE(GEN7_FF_THREAD_MODE,
8862                   I915_READ(GEN7_FF_THREAD_MODE) &
8863                   ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
8864
8865        /* WaDisableSemaphoreAndSyncFlipWait:chv */
8866        I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
8867                   _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
8868
8869        /* WaDisableCSUnitClockGating:chv */
8870        I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
8871                   GEN6_CSUNIT_CLOCK_GATE_DISABLE);
8872
8873        /* WaDisableSDEUnitClockGating:chv */
8874        I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
8875                   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
8876
8877        /*
8878         * WaProgramL3SqcReg1Default:chv
8879         * See gfxspecs/Related Documents/Performance Guide/
8880         * LSQC Setting Recommendations.
8881         */
8882        gen8_set_l3sqc_credits(dev_priv, 38, 2);
8883
8884        /*
8885         * GTT cache may not work with big pages, so if those
8886         * are ever enabled GTT cache may need to be disabled.
8887         */
8888        I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
8889}
8890
8891static void g4x_init_clock_gating(struct drm_i915_private *dev_priv)
8892{
8893        uint32_t dspclk_gate;
8894
8895        I915_WRITE(RENCLK_GATE_D1, 0);
8896        I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
8897                   GS_UNIT_CLOCK_GATE_DISABLE |
8898                   CL_UNIT_CLOCK_GATE_DISABLE);
8899        I915_WRITE(RAMCLK_GATE_D, 0);
8900        dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
8901                OVRUNIT_CLOCK_GATE_DISABLE |
8902                OVCUNIT_CLOCK_GATE_DISABLE;
8903        if (IS_GM45(dev_priv))
8904                dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
8905        I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
8906
8907        /* WaDisableRenderCachePipelinedFlush */
8908        I915_WRITE(CACHE_MODE_0,
8909                   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
8910
8911        /* WaDisable_RenderCache_OperationalFlush:g4x */
8912        I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8913
8914        g4x_disable_trickle_feed(dev_priv);
8915}
8916
8917static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv)
8918{
8919        I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
8920        I915_WRITE(RENCLK_GATE_D2, 0);
8921        I915_WRITE(DSPCLK_GATE_D, 0);
8922        I915_WRITE(RAMCLK_GATE_D, 0);
8923        I915_WRITE16(DEUC, 0);
8924        I915_WRITE(MI_ARB_STATE,
8925                   _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
8926
8927        /* WaDisable_RenderCache_OperationalFlush:gen4 */
8928        I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8929}
8930
8931static void i965g_init_clock_gating(struct drm_i915_private *dev_priv)
8932{
8933        I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
8934                   I965_RCC_CLOCK_GATE_DISABLE |
8935                   I965_RCPB_CLOCK_GATE_DISABLE |
8936                   I965_ISC_CLOCK_GATE_DISABLE |
8937                   I965_FBC_CLOCK_GATE_DISABLE);
8938        I915_WRITE(RENCLK_GATE_D2, 0);
8939        I915_WRITE(MI_ARB_STATE,
8940                   _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
8941
8942        /* WaDisable_RenderCache_OperationalFlush:gen4 */
8943        I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8944}
8945
8946static void gen3_init_clock_gating(struct drm_i915_private *dev_priv)
8947{
8948        u32 dstate = I915_READ(D_STATE);
8949
8950        dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
8951                DSTATE_DOT_CLOCK_GATING;
8952        I915_WRITE(D_STATE, dstate);
8953
8954        if (IS_PINEVIEW(dev_priv))
8955                I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
8956
8957        /* IIR "flip pending" means done if this bit is set */
8958        I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
8959
8960        /* interrupts should cause a wake up from C3 */
8961        I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
8962
8963        /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
8964        I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
8965
8966        I915_WRITE(MI_ARB_STATE,
8967                   _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
8968}
8969
8970static void i85x_init_clock_gating(struct drm_i915_private *dev_priv)
8971{
8972        I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
8973
8974        /* interrupts should cause a wake up from C3 */
8975        I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
8976                   _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
8977
8978        I915_WRITE(MEM_MODE,
8979                   _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
8980}
8981
8982static void i830_init_clock_gating(struct drm_i915_private *dev_priv)
8983{
8984        I915_WRITE(MEM_MODE,
8985                   _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
8986                   _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
8987}
8988
8989void intel_init_clock_gating(struct drm_i915_private *dev_priv)
8990{
8991        dev_priv->display.init_clock_gating(dev_priv);
8992}
8993
8994void intel_suspend_hw(struct drm_i915_private *dev_priv)
8995{
8996        if (HAS_PCH_LPT(dev_priv))
8997                lpt_suspend_hw(dev_priv);
8998}
8999
9000static void nop_init_clock_gating(struct drm_i915_private *dev_priv)
9001{
9002        DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n");
9003}
9004
9005/**
9006 * intel_init_clock_gating_hooks - setup the clock gating hooks
9007 * @dev_priv: device private
9008 *
9009 * Setup the hooks that configure which clocks of a given platform can be
9010 * gated and also apply various GT and display specific workarounds for these
9011 * platforms. Note that some GT specific workarounds are applied separately
9012 * when GPU contexts or batchbuffers start their execution.
9013 */
9014void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
9015{
9016        if (IS_CANNONLAKE(dev_priv))
9017                dev_priv->display.init_clock_gating = cnl_init_clock_gating;
9018        else if (IS_COFFEELAKE(dev_priv))
9019                dev_priv->display.init_clock_gating = cfl_init_clock_gating;
9020        else if (IS_SKYLAKE(dev_priv))
9021                dev_priv->display.init_clock_gating = skl_init_clock_gating;
9022        else if (IS_KABYLAKE(dev_priv))
9023                dev_priv->display.init_clock_gating = kbl_init_clock_gating;
9024        else if (IS_BROXTON(dev_priv))
9025                dev_priv->display.init_clock_gating = bxt_init_clock_gating;
9026        else if (IS_GEMINILAKE(dev_priv))
9027                dev_priv->display.init_clock_gating = glk_init_clock_gating;
9028        else if (IS_BROADWELL(dev_priv))
9029                dev_priv->display.init_clock_gating = bdw_init_clock_gating;
9030        else if (IS_CHERRYVIEW(dev_priv))
9031                dev_priv->display.init_clock_gating = chv_init_clock_gating;
9032        else if (IS_HASWELL(dev_priv))
9033                dev_priv->display.init_clock_gating = hsw_init_clock_gating;
9034        else if (IS_IVYBRIDGE(dev_priv))
9035                dev_priv->display.init_clock_gating = ivb_init_clock_gating;
9036        else if (IS_VALLEYVIEW(dev_priv))
9037                dev_priv->display.init_clock_gating = vlv_init_clock_gating;
9038        else if (IS_GEN6(dev_priv))
9039                dev_priv->display.init_clock_gating = gen6_init_clock_gating;
9040        else if (IS_GEN5(dev_priv))
9041                dev_priv->display.init_clock_gating = ilk_init_clock_gating;
9042        else if (IS_G4X(dev_priv))
9043                dev_priv->display.init_clock_gating = g4x_init_clock_gating;
9044        else if (IS_I965GM(dev_priv))
9045                dev_priv->display.init_clock_gating = i965gm_init_clock_gating;
9046        else if (IS_I965G(dev_priv))
9047                dev_priv->display.init_clock_gating = i965g_init_clock_gating;
9048        else if (IS_GEN3(dev_priv))
9049                dev_priv->display.init_clock_gating = gen3_init_clock_gating;
9050        else if (IS_I85X(dev_priv) || IS_I865G(dev_priv))
9051                dev_priv->display.init_clock_gating = i85x_init_clock_gating;
9052        else if (IS_GEN2(dev_priv))
9053                dev_priv->display.init_clock_gating = i830_init_clock_gating;
9054        else {
9055                MISSING_CASE(INTEL_DEVID(dev_priv));
9056                dev_priv->display.init_clock_gating = nop_init_clock_gating;
9057        }
9058}
9059
9060/* Set up chip specific power management-related functions */
9061void intel_init_pm(struct drm_i915_private *dev_priv)
9062{
9063        intel_fbc_init(dev_priv);
9064
9065        /* For cxsr */
9066        if (IS_PINEVIEW(dev_priv))
9067                i915_pineview_get_mem_freq(dev_priv);
9068        else if (IS_GEN5(dev_priv))
9069                i915_ironlake_get_mem_freq(dev_priv);
9070
9071        /* For FIFO watermark updates */
9072        if (INTEL_GEN(dev_priv) >= 9) {
9073                skl_setup_wm_latency(dev_priv);
9074                dev_priv->display.initial_watermarks = skl_initial_wm;
9075                dev_priv->display.atomic_update_watermarks = skl_atomic_update_crtc_wm;
9076                dev_priv->display.compute_global_watermarks = skl_compute_wm;
9077        } else if (HAS_PCH_SPLIT(dev_priv)) {
9078                ilk_setup_wm_latency(dev_priv);
9079
9080                if ((IS_GEN5(dev_priv) && dev_priv->wm.pri_latency[1] &&
9081                     dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
9082                    (!IS_GEN5(dev_priv) && dev_priv->wm.pri_latency[0] &&
9083                     dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
9084                        dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm;
9085                        dev_priv->display.compute_intermediate_wm =
9086                                ilk_compute_intermediate_wm;
9087                        dev_priv->display.initial_watermarks =
9088                                ilk_initial_watermarks;
9089                        dev_priv->display.optimize_watermarks =
9090                                ilk_optimize_watermarks;
9091                } else {
9092                        DRM_DEBUG_KMS("Failed to read display plane latency. "
9093                                      "Disable CxSR\n");
9094                }
9095        } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
9096                vlv_setup_wm_latency(dev_priv);
9097                dev_priv->display.compute_pipe_wm = vlv_compute_pipe_wm;
9098                dev_priv->display.compute_intermediate_wm = vlv_compute_intermediate_wm;
9099                dev_priv->display.initial_watermarks = vlv_initial_watermarks;
9100                dev_priv->display.optimize_watermarks = vlv_optimize_watermarks;
9101                dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo;
9102        } else if (IS_G4X(dev_priv)) {
9103                g4x_setup_wm_latency(dev_priv);
9104                dev_priv->display.compute_pipe_wm = g4x_compute_pipe_wm;
9105                dev_priv->display.compute_intermediate_wm = g4x_compute_intermediate_wm;
9106                dev_priv->display.initial_watermarks = g4x_initial_watermarks;
9107                dev_priv->display.optimize_watermarks = g4x_optimize_watermarks;
9108        } else if (IS_PINEVIEW(dev_priv)) {
9109                if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
9110                                            dev_priv->is_ddr3,
9111                                            dev_priv->fsb_freq,
9112                                            dev_priv->mem_freq)) {
9113                        DRM_INFO("failed to find known CxSR latency "
9114                                 "(found ddr%s fsb freq %d, mem freq %d), "
9115                                 "disabling CxSR\n",
9116                                 (dev_priv->is_ddr3 == 1) ? "3" : "2",
9117                                 dev_priv->fsb_freq, dev_priv->mem_freq);
9118                        /* Disable CxSR and never update its watermark again */
9119                        intel_set_memory_cxsr(dev_priv, false);
9120                        dev_priv->display.update_wm = NULL;
9121                } else
9122                        dev_priv->display.update_wm = pineview_update_wm;
9123        } else if (IS_GEN4(dev_priv)) {
9124                dev_priv->display.update_wm = i965_update_wm;
9125        } else if (IS_GEN3(dev_priv)) {
9126                dev_priv->display.update_wm = i9xx_update_wm;
9127                dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
9128        } else if (IS_GEN2(dev_priv)) {
9129                if (INTEL_INFO(dev_priv)->num_pipes == 1) {
9130                        dev_priv->display.update_wm = i845_update_wm;
9131                        dev_priv->display.get_fifo_size = i845_get_fifo_size;
9132                } else {
9133                        dev_priv->display.update_wm = i9xx_update_wm;
9134                        dev_priv->display.get_fifo_size = i830_get_fifo_size;
9135                }
9136        } else {
9137                DRM_ERROR("unexpected fall-through in intel_init_pm\n");
9138        }
9139}
9140
9141static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
9142{
9143        uint32_t flags =
9144                I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
9145
9146        switch (flags) {
9147        case GEN6_PCODE_SUCCESS:
9148                return 0;
9149        case GEN6_PCODE_UNIMPLEMENTED_CMD:
9150                return -ENODEV;
9151        case GEN6_PCODE_ILLEGAL_CMD:
9152                return -ENXIO;
9153        case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9154        case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9155                return -EOVERFLOW;
9156        case GEN6_PCODE_TIMEOUT:
9157                return -ETIMEDOUT;
9158        default:
9159                MISSING_CASE(flags);
9160                return 0;
9161        }
9162}
9163
9164static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
9165{
9166        uint32_t flags =
9167                I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
9168
9169        switch (flags) {
9170        case GEN6_PCODE_SUCCESS:
9171                return 0;
9172        case GEN6_PCODE_ILLEGAL_CMD:
9173                return -ENXIO;
9174        case GEN7_PCODE_TIMEOUT:
9175                return -ETIMEDOUT;
9176        case GEN7_PCODE_ILLEGAL_DATA:
9177                return -EINVAL;
9178        case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9179                return -EOVERFLOW;
9180        default:
9181                MISSING_CASE(flags);
9182                return 0;
9183        }
9184}
9185
9186int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
9187{
9188        int status;
9189
9190        WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9191
9192        /* GEN6_PCODE_* are outside of the forcewake domain, we can
9193         * use te fw I915_READ variants to reduce the amount of work
9194         * required when reading/writing.
9195         */
9196
9197        if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
9198                DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps\n",
9199                                 mbox, __builtin_return_address(0));
9200                return -EAGAIN;
9201        }
9202
9203        I915_WRITE_FW(GEN6_PCODE_DATA, *val);
9204        I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
9205        I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
9206
9207        if (__intel_wait_for_register_fw(dev_priv,
9208                                         GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
9209                                         500, 0, NULL)) {
9210                DRM_ERROR("timeout waiting for pcode read (from mbox %x) to finish for %ps\n",
9211                          mbox, __builtin_return_address(0));
9212                return -ETIMEDOUT;
9213        }
9214
9215        *val = I915_READ_FW(GEN6_PCODE_DATA);
9216        I915_WRITE_FW(GEN6_PCODE_DATA, 0);
9217
9218        if (INTEL_GEN(dev_priv) > 6)
9219                status = gen7_check_mailbox_status(dev_priv);
9220        else
9221                status = gen6_check_mailbox_status(dev_priv);
9222
9223        if (status) {
9224                DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
9225                                 mbox, __builtin_return_address(0), status);
9226                return status;
9227        }
9228
9229        return 0;
9230}
9231
9232int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
9233                                    u32 mbox, u32 val,
9234                                    int fast_timeout_us, int slow_timeout_ms)
9235{
9236        int status;
9237
9238        WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9239
9240        /* GEN6_PCODE_* are outside of the forcewake domain, we can
9241         * use te fw I915_READ variants to reduce the amount of work
9242         * required when reading/writing.
9243         */
9244
9245        if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
9246                DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps\n",
9247                                 val, mbox, __builtin_return_address(0));
9248                return -EAGAIN;
9249        }
9250
9251        I915_WRITE_FW(GEN6_PCODE_DATA, val);
9252        I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
9253        I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
9254
9255        if (__intel_wait_for_register_fw(dev_priv,
9256                                         GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
9257                                         fast_timeout_us, slow_timeout_ms,
9258                                         NULL)) {
9259                DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n",
9260                          val, mbox, __builtin_return_address(0));
9261                return -ETIMEDOUT;
9262        }
9263
9264        I915_WRITE_FW(GEN6_PCODE_DATA, 0);
9265
9266        if (INTEL_GEN(dev_priv) > 6)
9267                status = gen7_check_mailbox_status(dev_priv);
9268        else
9269                status = gen6_check_mailbox_status(dev_priv);
9270
9271        if (status) {
9272                DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
9273                                 val, mbox, __builtin_return_address(0), status);
9274                return status;
9275        }
9276
9277        return 0;
9278}
9279
9280static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
9281                                  u32 request, u32 reply_mask, u32 reply,
9282                                  u32 *status)
9283{
9284        u32 val = request;
9285
9286        *status = sandybridge_pcode_read(dev_priv, mbox, &val);
9287
9288        return *status || ((val & reply_mask) == reply);
9289}
9290
9291/**
9292 * skl_pcode_request - send PCODE request until acknowledgment
9293 * @dev_priv: device private
9294 * @mbox: PCODE mailbox ID the request is targeted for
9295 * @request: request ID
9296 * @reply_mask: mask used to check for request acknowledgment
9297 * @reply: value used to check for request acknowledgment
9298 * @timeout_base_ms: timeout for polling with preemption enabled
9299 *
9300 * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
9301 * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
9302 * The request is acknowledged once the PCODE reply dword equals @reply after
9303 * applying @reply_mask. Polling is first attempted with preemption enabled
9304 * for @timeout_base_ms and if this times out for another 50 ms with
9305 * preemption disabled.
9306 *
9307 * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
9308 * other error as reported by PCODE.
9309 */
9310int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
9311                      u32 reply_mask, u32 reply, int timeout_base_ms)
9312{
9313        u32 status;
9314        int ret;
9315
9316        WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9317
9318#define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \
9319                                   &status)
9320
9321        /*
9322         * Prime the PCODE by doing a request first. Normally it guarantees
9323         * that a subsequent request, at most @timeout_base_ms later, succeeds.
9324         * _wait_for() doesn't guarantee when its passed condition is evaluated
9325         * first, so send the first request explicitly.
9326         */
9327        if (COND) {
9328                ret = 0;
9329                goto out;
9330        }
9331        ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10);
9332        if (!ret)
9333                goto out;
9334
9335        /*
9336         * The above can time out if the number of requests was low (2 in the
9337         * worst case) _and_ PCODE was busy for some reason even after a
9338         * (queued) request and @timeout_base_ms delay. As a workaround retry
9339         * the poll with preemption disabled to maximize the number of
9340         * requests. Increase the timeout from @timeout_base_ms to 50ms to
9341         * account for interrupts that could reduce the number of these
9342         * requests, and for any quirks of the PCODE firmware that delays
9343         * the request completion.
9344         */
9345        DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
9346        WARN_ON_ONCE(timeout_base_ms > 3);
9347        preempt_disable();
9348        ret = wait_for_atomic(COND, 50);
9349        preempt_enable();
9350
9351out:
9352        return ret ? ret : status;
9353#undef COND
9354}
9355
9356static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
9357{
9358        struct intel_rps *rps = &dev_priv->gt_pm.rps;
9359
9360        /*
9361         * N = val - 0xb7
9362         * Slow = Fast = GPLL ref * N
9363         */
9364        return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
9365}
9366
9367static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
9368{
9369        struct intel_rps *rps = &dev_priv->gt_pm.rps;
9370
9371        return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
9372}
9373
9374static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
9375{
9376        struct intel_rps *rps = &dev_priv->gt_pm.rps;
9377
9378        /*
9379         * N = val / 2
9380         * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
9381         */
9382        return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
9383}
9384
9385static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
9386{
9387        struct intel_rps *rps = &dev_priv->gt_pm.rps;
9388
9389        /* CHV needs even values */
9390        return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
9391}
9392
9393int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
9394{
9395        if (INTEL_GEN(dev_priv) >= 9)
9396                return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
9397                                         GEN9_FREQ_SCALER);
9398        else if (IS_CHERRYVIEW(dev_priv))
9399                return chv_gpu_freq(dev_priv, val);
9400        else if (IS_VALLEYVIEW(dev_priv))
9401                return byt_gpu_freq(dev_priv, val);
9402        else
9403                return val * GT_FREQUENCY_MULTIPLIER;
9404}
9405
9406int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
9407{
9408        if (INTEL_GEN(dev_priv) >= 9)
9409                return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
9410                                         GT_FREQUENCY_MULTIPLIER);
9411        else if (IS_CHERRYVIEW(dev_priv))
9412                return chv_freq_opcode(dev_priv, val);
9413        else if (IS_VALLEYVIEW(dev_priv))
9414                return byt_freq_opcode(dev_priv, val);
9415        else
9416                return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
9417}
9418
9419void intel_pm_setup(struct drm_i915_private *dev_priv)
9420{
9421        mutex_init(&dev_priv->pcu_lock);
9422
9423        atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
9424
9425        dev_priv->runtime_pm.suspended = false;
9426        atomic_set(&dev_priv->runtime_pm.wakeref_count, 0);
9427}
9428
9429static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
9430                             const i915_reg_t reg)
9431{
9432        u32 lower, upper, tmp;
9433        int loop = 2;
9434
9435        /*
9436         * The register accessed do not need forcewake. We borrow
9437         * uncore lock to prevent concurrent access to range reg.
9438         */
9439        lockdep_assert_held(&dev_priv->uncore.lock);
9440
9441        /*
9442         * vlv and chv residency counters are 40 bits in width.
9443         * With a control bit, we can choose between upper or lower
9444         * 32bit window into this counter.
9445         *
9446         * Although we always use the counter in high-range mode elsewhere,
9447         * userspace may attempt to read the value before rc6 is initialised,
9448         * before we have set the default VLV_COUNTER_CONTROL value. So always
9449         * set the high bit to be safe.
9450         */
9451        I915_WRITE_FW(VLV_COUNTER_CONTROL,
9452                      _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
9453        upper = I915_READ_FW(reg);
9454        do {
9455                tmp = upper;
9456
9457                I915_WRITE_FW(VLV_COUNTER_CONTROL,
9458                              _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
9459                lower = I915_READ_FW(reg);
9460
9461                I915_WRITE_FW(VLV_COUNTER_CONTROL,
9462                              _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
9463                upper = I915_READ_FW(reg);
9464        } while (upper != tmp && --loop);
9465
9466        /*
9467         * Everywhere else we always use VLV_COUNTER_CONTROL with the
9468         * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
9469         * now.
9470         */
9471
9472        return lower | (u64)upper << 8;
9473}
9474
9475u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv,
9476                           const i915_reg_t reg)
9477{
9478        u64 time_hw, prev_hw, overflow_hw;
9479        unsigned int fw_domains;
9480        unsigned long flags;
9481        unsigned int i;
9482        u32 mul, div;
9483
9484        if (!HAS_RC6(dev_priv))
9485                return 0;
9486
9487        /*
9488         * Store previous hw counter values for counter wrap-around handling.
9489         *
9490         * There are only four interesting registers and they live next to each
9491         * other so we can use the relative address, compared to the smallest
9492         * one as the index into driver storage.
9493         */
9494        i = (i915_mmio_reg_offset(reg) -
9495             i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
9496        if (WARN_ON_ONCE(i >= ARRAY_SIZE(dev_priv->gt_pm.rc6.cur_residency)))
9497                return 0;
9498
9499        fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ);
9500
9501        spin_lock_irqsave(&dev_priv->uncore.lock, flags);
9502        intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
9503
9504        /* On VLV and CHV, residency time is in CZ units rather than 1.28us */
9505        if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
9506                mul = 1000000;
9507                div = dev_priv->czclk_freq;
9508                overflow_hw = BIT_ULL(40);
9509                time_hw = vlv_residency_raw(dev_priv, reg);
9510        } else {
9511                /* 833.33ns units on Gen9LP, 1.28us elsewhere. */
9512                if (IS_GEN9_LP(dev_priv)) {
9513                        mul = 10000;
9514                        div = 12;
9515                } else {
9516                        mul = 1280;
9517                        div = 1;
9518                }
9519
9520                overflow_hw = BIT_ULL(32);
9521                time_hw = I915_READ_FW(reg);
9522        }
9523
9524        /*
9525         * Counter wrap handling.
9526         *
9527         * But relying on a sufficient frequency of queries otherwise counters
9528         * can still wrap.
9529         */
9530        prev_hw = dev_priv->gt_pm.rc6.prev_hw_residency[i];
9531        dev_priv->gt_pm.rc6.prev_hw_residency[i] = time_hw;
9532
9533        /* RC6 delta from last sample. */
9534        if (time_hw >= prev_hw)
9535                time_hw -= prev_hw;
9536        else
9537                time_hw += overflow_hw - prev_hw;
9538
9539        /* Add delta to RC6 extended raw driver copy. */
9540        time_hw += dev_priv->gt_pm.rc6.cur_residency[i];
9541        dev_priv->gt_pm.rc6.cur_residency[i] = time_hw;
9542
9543        intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
9544        spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
9545
9546        return mul_u64_u32_div(time_hw, mul, div);
9547}
9548
9549u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat)
9550{
9551        u32 cagf;
9552
9553        if (INTEL_GEN(dev_priv) >= 9)
9554                cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
9555        else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
9556                cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
9557        else
9558                cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
9559
9560        return  cagf;
9561}
9562