linux/drivers/gpu/drm/i915/gt/intel_rps.c
<<
>>
Prefs
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2019 Intel Corporation
   4 */
   5
   6#include <drm/i915_drm.h>
   7
   8#include "i915_drv.h"
   9#include "intel_breadcrumbs.h"
  10#include "intel_gt.h"
  11#include "intel_gt_clock_utils.h"
  12#include "intel_gt_irq.h"
  13#include "intel_gt_pm_irq.h"
  14#include "intel_rps.h"
  15#include "intel_sideband.h"
  16#include "../../../platform/x86/intel_ips.h"
  17
  18#define BUSY_MAX_EI     20u /* ms */
  19
  20/*
  21 * Lock protecting IPS related data structures
  22 */
  23static DEFINE_SPINLOCK(mchdev_lock);
  24
  25static struct intel_gt *rps_to_gt(struct intel_rps *rps)
  26{
  27        return container_of(rps, struct intel_gt, rps);
  28}
  29
  30static struct drm_i915_private *rps_to_i915(struct intel_rps *rps)
  31{
  32        return rps_to_gt(rps)->i915;
  33}
  34
  35static struct intel_uncore *rps_to_uncore(struct intel_rps *rps)
  36{
  37        return rps_to_gt(rps)->uncore;
  38}
  39
  40static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps)
  41{
  42        struct intel_gt *gt = rps_to_gt(rps);
  43
  44        return &gt->uc.guc.slpc;
  45}
  46
  47static bool rps_uses_slpc(struct intel_rps *rps)
  48{
  49        struct intel_gt *gt = rps_to_gt(rps);
  50
  51        return intel_uc_uses_guc_slpc(&gt->uc);
  52}
  53
  54static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask)
  55{
  56        return mask & ~rps->pm_intrmsk_mbz;
  57}
  58
  59static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
  60{
  61        intel_uncore_write_fw(uncore, reg, val);
  62}
  63
  64static void rps_timer(struct timer_list *t)
  65{
  66        struct intel_rps *rps = from_timer(rps, t, timer);
  67        struct intel_engine_cs *engine;
  68        ktime_t dt, last, timestamp;
  69        enum intel_engine_id id;
  70        s64 max_busy[3] = {};
  71
  72        timestamp = 0;
  73        for_each_engine(engine, rps_to_gt(rps), id) {
  74                s64 busy;
  75                int i;
  76
  77                dt = intel_engine_get_busy_time(engine, &timestamp);
  78                last = engine->stats.rps;
  79                engine->stats.rps = dt;
  80
  81                busy = ktime_to_ns(ktime_sub(dt, last));
  82                for (i = 0; i < ARRAY_SIZE(max_busy); i++) {
  83                        if (busy > max_busy[i])
  84                                swap(busy, max_busy[i]);
  85                }
  86        }
  87        last = rps->pm_timestamp;
  88        rps->pm_timestamp = timestamp;
  89
  90        if (intel_rps_is_active(rps)) {
  91                s64 busy;
  92                int i;
  93
  94                dt = ktime_sub(timestamp, last);
  95
  96                /*
  97                 * Our goal is to evaluate each engine independently, so we run
  98                 * at the lowest clocks required to sustain the heaviest
  99                 * workload. However, a task may be split into sequential
 100                 * dependent operations across a set of engines, such that
 101                 * the independent contributions do not account for high load,
 102                 * but overall the task is GPU bound. For example, consider
 103                 * video decode on vcs followed by colour post-processing
 104                 * on vecs, followed by general post-processing on rcs.
 105                 * Since multi-engines being active does imply a single
 106                 * continuous workload across all engines, we hedge our
 107                 * bets by only contributing a factor of the distributed
 108                 * load into our busyness calculation.
 109                 */
 110                busy = max_busy[0];
 111                for (i = 1; i < ARRAY_SIZE(max_busy); i++) {
 112                        if (!max_busy[i])
 113                                break;
 114
 115                        busy += div_u64(max_busy[i], 1 << i);
 116                }
 117                GT_TRACE(rps_to_gt(rps),
 118                         "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n",
 119                         busy, (int)div64_u64(100 * busy, dt),
 120                         max_busy[0], max_busy[1], max_busy[2],
 121                         rps->pm_interval);
 122
 123                if (100 * busy > rps->power.up_threshold * dt &&
 124                    rps->cur_freq < rps->max_freq_softlimit) {
 125                        rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD;
 126                        rps->pm_interval = 1;
 127                        schedule_work(&rps->work);
 128                } else if (100 * busy < rps->power.down_threshold * dt &&
 129                           rps->cur_freq > rps->min_freq_softlimit) {
 130                        rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD;
 131                        rps->pm_interval = 1;
 132                        schedule_work(&rps->work);
 133                } else {
 134                        rps->last_adj = 0;
 135                }
 136
 137                mod_timer(&rps->timer,
 138                          jiffies + msecs_to_jiffies(rps->pm_interval));
 139                rps->pm_interval = min(rps->pm_interval * 2, BUSY_MAX_EI);
 140        }
 141}
 142
 143static void rps_start_timer(struct intel_rps *rps)
 144{
 145        rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
 146        rps->pm_interval = 1;
 147        mod_timer(&rps->timer, jiffies + 1);
 148}
 149
 150static void rps_stop_timer(struct intel_rps *rps)
 151{
 152        del_timer_sync(&rps->timer);
 153        rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
 154        cancel_work_sync(&rps->work);
 155}
 156
 157static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
 158{
 159        u32 mask = 0;
 160
 161        /* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */
 162        if (val > rps->min_freq_softlimit)
 163                mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
 164                         GEN6_PM_RP_DOWN_THRESHOLD |
 165                         GEN6_PM_RP_DOWN_TIMEOUT);
 166
 167        if (val < rps->max_freq_softlimit)
 168                mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
 169
 170        mask &= rps->pm_events;
 171
 172        return rps_pm_sanitize_mask(rps, ~mask);
 173}
 174
 175static void rps_reset_ei(struct intel_rps *rps)
 176{
 177        memset(&rps->ei, 0, sizeof(rps->ei));
 178}
 179
 180static void rps_enable_interrupts(struct intel_rps *rps)
 181{
 182        struct intel_gt *gt = rps_to_gt(rps);
 183
 184        GEM_BUG_ON(rps_uses_slpc(rps));
 185
 186        GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n",
 187                 rps->pm_events, rps_pm_mask(rps, rps->last_freq));
 188
 189        rps_reset_ei(rps);
 190
 191        spin_lock_irq(&gt->irq_lock);
 192        gen6_gt_pm_enable_irq(gt, rps->pm_events);
 193        spin_unlock_irq(&gt->irq_lock);
 194
 195        intel_uncore_write(gt->uncore,
 196                           GEN6_PMINTRMSK, rps_pm_mask(rps, rps->last_freq));
 197}
 198
 199static void gen6_rps_reset_interrupts(struct intel_rps *rps)
 200{
 201        gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS);
 202}
 203
 204static void gen11_rps_reset_interrupts(struct intel_rps *rps)
 205{
 206        while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM))
 207                ;
 208}
 209
 210static void rps_reset_interrupts(struct intel_rps *rps)
 211{
 212        struct intel_gt *gt = rps_to_gt(rps);
 213
 214        spin_lock_irq(&gt->irq_lock);
 215        if (GRAPHICS_VER(gt->i915) >= 11)
 216                gen11_rps_reset_interrupts(rps);
 217        else
 218                gen6_rps_reset_interrupts(rps);
 219
 220        rps->pm_iir = 0;
 221        spin_unlock_irq(&gt->irq_lock);
 222}
 223
 224static void rps_disable_interrupts(struct intel_rps *rps)
 225{
 226        struct intel_gt *gt = rps_to_gt(rps);
 227
 228        intel_uncore_write(gt->uncore,
 229                           GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
 230
 231        spin_lock_irq(&gt->irq_lock);
 232        gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS);
 233        spin_unlock_irq(&gt->irq_lock);
 234
 235        intel_synchronize_irq(gt->i915);
 236
 237        /*
 238         * Now that we will not be generating any more work, flush any
 239         * outstanding tasks. As we are called on the RPS idle path,
 240         * we will reset the GPU to minimum frequencies, so the current
 241         * state of the worker can be discarded.
 242         */
 243        cancel_work_sync(&rps->work);
 244
 245        rps_reset_interrupts(rps);
 246        GT_TRACE(gt, "interrupts:off\n");
 247}
 248
 249static const struct cparams {
 250        u16 i;
 251        u16 t;
 252        u16 m;
 253        u16 c;
 254} cparams[] = {
 255        { 1, 1333, 301, 28664 },
 256        { 1, 1066, 294, 24460 },
 257        { 1, 800, 294, 25192 },
 258        { 0, 1333, 276, 27605 },
 259        { 0, 1066, 276, 27605 },
 260        { 0, 800, 231, 23784 },
 261};
 262
 263static void gen5_rps_init(struct intel_rps *rps)
 264{
 265        struct drm_i915_private *i915 = rps_to_i915(rps);
 266        struct intel_uncore *uncore = rps_to_uncore(rps);
 267        u8 fmax, fmin, fstart;
 268        u32 rgvmodectl;
 269        int c_m, i;
 270
 271        if (i915->fsb_freq <= 3200)
 272                c_m = 0;
 273        else if (i915->fsb_freq <= 4800)
 274                c_m = 1;
 275        else
 276                c_m = 2;
 277
 278        for (i = 0; i < ARRAY_SIZE(cparams); i++) {
 279                if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) {
 280                        rps->ips.m = cparams[i].m;
 281                        rps->ips.c = cparams[i].c;
 282                        break;
 283                }
 284        }
 285
 286        rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
 287
 288        /* Set up min, max, and cur for interrupt handling */
 289        fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
 290        fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
 291        fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
 292                MEMMODE_FSTART_SHIFT;
 293        drm_dbg(&i915->drm, "fmax: %d, fmin: %d, fstart: %d\n",
 294                fmax, fmin, fstart);
 295
 296        rps->min_freq = fmax;
 297        rps->efficient_freq = fstart;
 298        rps->max_freq = fmin;
 299}
 300
 301static unsigned long
 302__ips_chipset_val(struct intel_ips *ips)
 303{
 304        struct intel_uncore *uncore =
 305                rps_to_uncore(container_of(ips, struct intel_rps, ips));
 306        unsigned long now = jiffies_to_msecs(jiffies), dt;
 307        unsigned long result;
 308        u64 total, delta;
 309
 310        lockdep_assert_held(&mchdev_lock);
 311
 312        /*
 313         * Prevent division-by-zero if we are asking too fast.
 314         * Also, we don't get interesting results if we are polling
 315         * faster than once in 10ms, so just return the saved value
 316         * in such cases.
 317         */
 318        dt = now - ips->last_time1;
 319        if (dt <= 10)
 320                return ips->chipset_power;
 321
 322        /* FIXME: handle per-counter overflow */
 323        total = intel_uncore_read(uncore, DMIEC);
 324        total += intel_uncore_read(uncore, DDREC);
 325        total += intel_uncore_read(uncore, CSIEC);
 326
 327        delta = total - ips->last_count1;
 328
 329        result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10);
 330
 331        ips->last_count1 = total;
 332        ips->last_time1 = now;
 333
 334        ips->chipset_power = result;
 335
 336        return result;
 337}
 338
 339static unsigned long ips_mch_val(struct intel_uncore *uncore)
 340{
 341        unsigned int m, x, b;
 342        u32 tsfs;
 343
 344        tsfs = intel_uncore_read(uncore, TSFS);
 345        x = intel_uncore_read8(uncore, TR1);
 346
 347        b = tsfs & TSFS_INTR_MASK;
 348        m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT;
 349
 350        return m * x / 127 - b;
 351}
 352
 353static int _pxvid_to_vd(u8 pxvid)
 354{
 355        if (pxvid == 0)
 356                return 0;
 357
 358        if (pxvid >= 8 && pxvid < 31)
 359                pxvid = 31;
 360
 361        return (pxvid + 2) * 125;
 362}
 363
 364static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid)
 365{
 366        const int vd = _pxvid_to_vd(pxvid);
 367
 368        if (INTEL_INFO(i915)->is_mobile)
 369                return max(vd - 1125, 0);
 370
 371        return vd;
 372}
 373
 374static void __gen5_ips_update(struct intel_ips *ips)
 375{
 376        struct intel_uncore *uncore =
 377                rps_to_uncore(container_of(ips, struct intel_rps, ips));
 378        u64 now, delta, dt;
 379        u32 count;
 380
 381        lockdep_assert_held(&mchdev_lock);
 382
 383        now = ktime_get_raw_ns();
 384        dt = now - ips->last_time2;
 385        do_div(dt, NSEC_PER_MSEC);
 386
 387        /* Don't divide by 0 */
 388        if (dt <= 10)
 389                return;
 390
 391        count = intel_uncore_read(uncore, GFXEC);
 392        delta = count - ips->last_count2;
 393
 394        ips->last_count2 = count;
 395        ips->last_time2 = now;
 396
 397        /* More magic constants... */
 398        ips->gfx_power = div_u64(delta * 1181, dt * 10);
 399}
 400
 401static void gen5_rps_update(struct intel_rps *rps)
 402{
 403        spin_lock_irq(&mchdev_lock);
 404        __gen5_ips_update(&rps->ips);
 405        spin_unlock_irq(&mchdev_lock);
 406}
 407
 408static unsigned int gen5_invert_freq(struct intel_rps *rps,
 409                                     unsigned int val)
 410{
 411        /* Invert the frequency bin into an ips delay */
 412        val = rps->max_freq - val;
 413        val = rps->min_freq + val;
 414
 415        return val;
 416}
 417
 418static int __gen5_rps_set(struct intel_rps *rps, u8 val)
 419{
 420        struct intel_uncore *uncore = rps_to_uncore(rps);
 421        u16 rgvswctl;
 422
 423        lockdep_assert_held(&mchdev_lock);
 424
 425        rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
 426        if (rgvswctl & MEMCTL_CMD_STS) {
 427                DRM_DEBUG("gpu busy, RCS change rejected\n");
 428                return -EBUSY; /* still busy with another command */
 429        }
 430
 431        /* Invert the frequency bin into an ips delay */
 432        val = gen5_invert_freq(rps, val);
 433
 434        rgvswctl =
 435                (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
 436                (val << MEMCTL_FREQ_SHIFT) |
 437                MEMCTL_SFCAVM;
 438        intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
 439        intel_uncore_posting_read16(uncore, MEMSWCTL);
 440
 441        rgvswctl |= MEMCTL_CMD_STS;
 442        intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
 443
 444        return 0;
 445}
 446
 447static int gen5_rps_set(struct intel_rps *rps, u8 val)
 448{
 449        int err;
 450
 451        spin_lock_irq(&mchdev_lock);
 452        err = __gen5_rps_set(rps, val);
 453        spin_unlock_irq(&mchdev_lock);
 454
 455        return err;
 456}
 457
 458static unsigned long intel_pxfreq(u32 vidfreq)
 459{
 460        int div = (vidfreq & 0x3f0000) >> 16;
 461        int post = (vidfreq & 0x3000) >> 12;
 462        int pre = (vidfreq & 0x7);
 463
 464        if (!pre)
 465                return 0;
 466
 467        return div * 133333 / (pre << post);
 468}
 469
 470static unsigned int init_emon(struct intel_uncore *uncore)
 471{
 472        u8 pxw[16];
 473        int i;
 474
 475        /* Disable to program */
 476        intel_uncore_write(uncore, ECR, 0);
 477        intel_uncore_posting_read(uncore, ECR);
 478
 479        /* Program energy weights for various events */
 480        intel_uncore_write(uncore, SDEW, 0x15040d00);
 481        intel_uncore_write(uncore, CSIEW0, 0x007f0000);
 482        intel_uncore_write(uncore, CSIEW1, 0x1e220004);
 483        intel_uncore_write(uncore, CSIEW2, 0x04000004);
 484
 485        for (i = 0; i < 5; i++)
 486                intel_uncore_write(uncore, PEW(i), 0);
 487        for (i = 0; i < 3; i++)
 488                intel_uncore_write(uncore, DEW(i), 0);
 489
 490        /* Program P-state weights to account for frequency power adjustment */
 491        for (i = 0; i < 16; i++) {
 492                u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i));
 493                unsigned int freq = intel_pxfreq(pxvidfreq);
 494                unsigned int vid =
 495                        (pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
 496                unsigned int val;
 497
 498                val = vid * vid * freq / 1000 * 255;
 499                val /= 127 * 127 * 900;
 500
 501                pxw[i] = val;
 502        }
 503        /* Render standby states get 0 weight */
 504        pxw[14] = 0;
 505        pxw[15] = 0;
 506
 507        for (i = 0; i < 4; i++) {
 508                intel_uncore_write(uncore, PXW(i),
 509                                   pxw[i * 4 + 0] << 24 |
 510                                   pxw[i * 4 + 1] << 16 |
 511                                   pxw[i * 4 + 2] <<  8 |
 512                                   pxw[i * 4 + 3] <<  0);
 513        }
 514
 515        /* Adjust magic regs to magic values (more experimental results) */
 516        intel_uncore_write(uncore, OGW0, 0);
 517        intel_uncore_write(uncore, OGW1, 0);
 518        intel_uncore_write(uncore, EG0, 0x00007f00);
 519        intel_uncore_write(uncore, EG1, 0x0000000e);
 520        intel_uncore_write(uncore, EG2, 0x000e0000);
 521        intel_uncore_write(uncore, EG3, 0x68000300);
 522        intel_uncore_write(uncore, EG4, 0x42000000);
 523        intel_uncore_write(uncore, EG5, 0x00140031);
 524        intel_uncore_write(uncore, EG6, 0);
 525        intel_uncore_write(uncore, EG7, 0);
 526
 527        for (i = 0; i < 8; i++)
 528                intel_uncore_write(uncore, PXWL(i), 0);
 529
 530        /* Enable PMON + select events */
 531        intel_uncore_write(uncore, ECR, 0x80000019);
 532
 533        return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK;
 534}
 535
 536static bool gen5_rps_enable(struct intel_rps *rps)
 537{
 538        struct drm_i915_private *i915 = rps_to_i915(rps);
 539        struct intel_uncore *uncore = rps_to_uncore(rps);
 540        u8 fstart, vstart;
 541        u32 rgvmodectl;
 542
 543        spin_lock_irq(&mchdev_lock);
 544
 545        rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
 546
 547        /* Enable temp reporting */
 548        intel_uncore_write16(uncore, PMMISC,
 549                             intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN);
 550        intel_uncore_write16(uncore, TSC1,
 551                             intel_uncore_read16(uncore, TSC1) | TSE);
 552
 553        /* 100ms RC evaluation intervals */
 554        intel_uncore_write(uncore, RCUPEI, 100000);
 555        intel_uncore_write(uncore, RCDNEI, 100000);
 556
 557        /* Set max/min thresholds to 90ms and 80ms respectively */
 558        intel_uncore_write(uncore, RCBMAXAVG, 90000);
 559        intel_uncore_write(uncore, RCBMINAVG, 80000);
 560
 561        intel_uncore_write(uncore, MEMIHYST, 1);
 562
 563        /* Set up min, max, and cur for interrupt handling */
 564        fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
 565                MEMMODE_FSTART_SHIFT;
 566
 567        vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) &
 568                  PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
 569
 570        intel_uncore_write(uncore,
 571                           MEMINTREN,
 572                           MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
 573
 574        intel_uncore_write(uncore, VIDSTART, vstart);
 575        intel_uncore_posting_read(uncore, VIDSTART);
 576
 577        rgvmodectl |= MEMMODE_SWMODE_EN;
 578        intel_uncore_write(uncore, MEMMODECTL, rgvmodectl);
 579
 580        if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) &
 581                             MEMCTL_CMD_STS) == 0, 10))
 582                drm_err(&uncore->i915->drm,
 583                        "stuck trying to change perf mode\n");
 584        mdelay(1);
 585
 586        __gen5_rps_set(rps, rps->cur_freq);
 587
 588        rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC);
 589        rps->ips.last_count1 += intel_uncore_read(uncore, DDREC);
 590        rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC);
 591        rps->ips.last_time1 = jiffies_to_msecs(jiffies);
 592
 593        rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC);
 594        rps->ips.last_time2 = ktime_get_raw_ns();
 595
 596        spin_lock(&i915->irq_lock);
 597        ilk_enable_display_irq(i915, DE_PCU_EVENT);
 598        spin_unlock(&i915->irq_lock);
 599
 600        spin_unlock_irq(&mchdev_lock);
 601
 602        rps->ips.corr = init_emon(uncore);
 603
 604        return true;
 605}
 606
 607static void gen5_rps_disable(struct intel_rps *rps)
 608{
 609        struct drm_i915_private *i915 = rps_to_i915(rps);
 610        struct intel_uncore *uncore = rps_to_uncore(rps);
 611        u16 rgvswctl;
 612
 613        spin_lock_irq(&mchdev_lock);
 614
 615        spin_lock(&i915->irq_lock);
 616        ilk_disable_display_irq(i915, DE_PCU_EVENT);
 617        spin_unlock(&i915->irq_lock);
 618
 619        rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
 620
 621        /* Ack interrupts, disable EFC interrupt */
 622        intel_uncore_write(uncore, MEMINTREN,
 623                           intel_uncore_read(uncore, MEMINTREN) &
 624                           ~MEMINT_EVAL_CHG_EN);
 625        intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
 626
 627        /* Go back to the starting frequency */
 628        __gen5_rps_set(rps, rps->idle_freq);
 629        mdelay(1);
 630        rgvswctl |= MEMCTL_CMD_STS;
 631        intel_uncore_write(uncore, MEMSWCTL, rgvswctl);
 632        mdelay(1);
 633
 634        spin_unlock_irq(&mchdev_lock);
 635}
 636
 637static u32 rps_limits(struct intel_rps *rps, u8 val)
 638{
 639        u32 limits;
 640
 641        /*
 642         * Only set the down limit when we've reached the lowest level to avoid
 643         * getting more interrupts, otherwise leave this clear. This prevents a
 644         * race in the hw when coming out of rc6: There's a tiny window where
 645         * the hw runs at the minimal clock before selecting the desired
 646         * frequency, if the down threshold expires in that window we will not
 647         * receive a down interrupt.
 648         */
 649        if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) {
 650                limits = rps->max_freq_softlimit << 23;
 651                if (val <= rps->min_freq_softlimit)
 652                        limits |= rps->min_freq_softlimit << 14;
 653        } else {
 654                limits = rps->max_freq_softlimit << 24;
 655                if (val <= rps->min_freq_softlimit)
 656                        limits |= rps->min_freq_softlimit << 16;
 657        }
 658
 659        return limits;
 660}
 661
 662static void rps_set_power(struct intel_rps *rps, int new_power)
 663{
 664        struct intel_gt *gt = rps_to_gt(rps);
 665        struct intel_uncore *uncore = gt->uncore;
 666        u32 threshold_up = 0, threshold_down = 0; /* in % */
 667        u32 ei_up = 0, ei_down = 0;
 668
 669        lockdep_assert_held(&rps->power.mutex);
 670
 671        if (new_power == rps->power.mode)
 672                return;
 673
 674        threshold_up = 95;
 675        threshold_down = 85;
 676
 677        /* Note the units here are not exactly 1us, but 1280ns. */
 678        switch (new_power) {
 679        case LOW_POWER:
 680                ei_up = 16000;
 681                ei_down = 32000;
 682                break;
 683
 684        case BETWEEN:
 685                ei_up = 13000;
 686                ei_down = 32000;
 687                break;
 688
 689        case HIGH_POWER:
 690                ei_up = 10000;
 691                ei_down = 32000;
 692                break;
 693        }
 694
 695        /* When byt can survive without system hang with dynamic
 696         * sw freq adjustments, this restriction can be lifted.
 697         */
 698        if (IS_VALLEYVIEW(gt->i915))
 699                goto skip_hw_write;
 700
 701        GT_TRACE(gt,
 702                 "changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n",
 703                 new_power, threshold_up, ei_up, threshold_down, ei_down);
 704
 705        set(uncore, GEN6_RP_UP_EI,
 706            intel_gt_ns_to_pm_interval(gt, ei_up * 1000));
 707        set(uncore, GEN6_RP_UP_THRESHOLD,
 708            intel_gt_ns_to_pm_interval(gt, ei_up * threshold_up * 10));
 709
 710        set(uncore, GEN6_RP_DOWN_EI,
 711            intel_gt_ns_to_pm_interval(gt, ei_down * 1000));
 712        set(uncore, GEN6_RP_DOWN_THRESHOLD,
 713            intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10));
 714
 715        set(uncore, GEN6_RP_CONTROL,
 716            (GRAPHICS_VER(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
 717            GEN6_RP_MEDIA_HW_NORMAL_MODE |
 718            GEN6_RP_MEDIA_IS_GFX |
 719            GEN6_RP_ENABLE |
 720            GEN6_RP_UP_BUSY_AVG |
 721            GEN6_RP_DOWN_IDLE_AVG);
 722
 723skip_hw_write:
 724        rps->power.mode = new_power;
 725        rps->power.up_threshold = threshold_up;
 726        rps->power.down_threshold = threshold_down;
 727}
 728
 729static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val)
 730{
 731        int new_power;
 732
 733        new_power = rps->power.mode;
 734        switch (rps->power.mode) {
 735        case LOW_POWER:
 736                if (val > rps->efficient_freq + 1 &&
 737                    val > rps->cur_freq)
 738                        new_power = BETWEEN;
 739                break;
 740
 741        case BETWEEN:
 742                if (val <= rps->efficient_freq &&
 743                    val < rps->cur_freq)
 744                        new_power = LOW_POWER;
 745                else if (val >= rps->rp0_freq &&
 746                         val > rps->cur_freq)
 747                        new_power = HIGH_POWER;
 748                break;
 749
 750        case HIGH_POWER:
 751                if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
 752                    val < rps->cur_freq)
 753                        new_power = BETWEEN;
 754                break;
 755        }
 756        /* Max/min bins are special */
 757        if (val <= rps->min_freq_softlimit)
 758                new_power = LOW_POWER;
 759        if (val >= rps->max_freq_softlimit)
 760                new_power = HIGH_POWER;
 761
 762        mutex_lock(&rps->power.mutex);
 763        if (rps->power.interactive)
 764                new_power = HIGH_POWER;
 765        rps_set_power(rps, new_power);
 766        mutex_unlock(&rps->power.mutex);
 767}
 768
 769void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive)
 770{
 771        GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n", yesno(interactive));
 772
 773        mutex_lock(&rps->power.mutex);
 774        if (interactive) {
 775                if (!rps->power.interactive++ && intel_rps_is_active(rps))
 776                        rps_set_power(rps, HIGH_POWER);
 777        } else {
 778                GEM_BUG_ON(!rps->power.interactive);
 779                rps->power.interactive--;
 780        }
 781        mutex_unlock(&rps->power.mutex);
 782}
 783
 784static int gen6_rps_set(struct intel_rps *rps, u8 val)
 785{
 786        struct intel_uncore *uncore = rps_to_uncore(rps);
 787        struct drm_i915_private *i915 = rps_to_i915(rps);
 788        u32 swreq;
 789
 790        GEM_BUG_ON(rps_uses_slpc(rps));
 791
 792        if (GRAPHICS_VER(i915) >= 9)
 793                swreq = GEN9_FREQUENCY(val);
 794        else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
 795                swreq = HSW_FREQUENCY(val);
 796        else
 797                swreq = (GEN6_FREQUENCY(val) |
 798                         GEN6_OFFSET(0) |
 799                         GEN6_AGGRESSIVE_TURBO);
 800        set(uncore, GEN6_RPNSWREQ, swreq);
 801
 802        GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d, swreq:%x\n",
 803                 val, intel_gpu_freq(rps, val), swreq);
 804
 805        return 0;
 806}
 807
 808static int vlv_rps_set(struct intel_rps *rps, u8 val)
 809{
 810        struct drm_i915_private *i915 = rps_to_i915(rps);
 811        int err;
 812
 813        vlv_punit_get(i915);
 814        err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val);
 815        vlv_punit_put(i915);
 816
 817        GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n",
 818                 val, intel_gpu_freq(rps, val));
 819
 820        return err;
 821}
 822
 823static int rps_set(struct intel_rps *rps, u8 val, bool update)
 824{
 825        struct drm_i915_private *i915 = rps_to_i915(rps);
 826        int err;
 827
 828        if (val == rps->last_freq)
 829                return 0;
 830
 831        if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
 832                err = vlv_rps_set(rps, val);
 833        else if (GRAPHICS_VER(i915) >= 6)
 834                err = gen6_rps_set(rps, val);
 835        else
 836                err = gen5_rps_set(rps, val);
 837        if (err)
 838                return err;
 839
 840        if (update && GRAPHICS_VER(i915) >= 6)
 841                gen6_rps_set_thresholds(rps, val);
 842        rps->last_freq = val;
 843
 844        return 0;
 845}
 846
 847void intel_rps_unpark(struct intel_rps *rps)
 848{
 849        if (!intel_rps_is_enabled(rps))
 850                return;
 851
 852        GT_TRACE(rps_to_gt(rps), "unpark:%x\n", rps->cur_freq);
 853
 854        /*
 855         * Use the user's desired frequency as a guide, but for better
 856         * performance, jump directly to RPe as our starting frequency.
 857         */
 858        mutex_lock(&rps->lock);
 859
 860        intel_rps_set_active(rps);
 861        intel_rps_set(rps,
 862                      clamp(rps->cur_freq,
 863                            rps->min_freq_softlimit,
 864                            rps->max_freq_softlimit));
 865
 866        mutex_unlock(&rps->lock);
 867
 868        rps->pm_iir = 0;
 869        if (intel_rps_has_interrupts(rps))
 870                rps_enable_interrupts(rps);
 871        if (intel_rps_uses_timer(rps))
 872                rps_start_timer(rps);
 873
 874        if (GRAPHICS_VER(rps_to_i915(rps)) == 5)
 875                gen5_rps_update(rps);
 876}
 877
 878void intel_rps_park(struct intel_rps *rps)
 879{
 880        int adj;
 881
 882        if (!intel_rps_is_enabled(rps))
 883                return;
 884
 885        if (!intel_rps_clear_active(rps))
 886                return;
 887
 888        if (intel_rps_uses_timer(rps))
 889                rps_stop_timer(rps);
 890        if (intel_rps_has_interrupts(rps))
 891                rps_disable_interrupts(rps);
 892
 893        if (rps->last_freq <= rps->idle_freq)
 894                return;
 895
 896        /*
 897         * The punit delays the write of the frequency and voltage until it
 898         * determines the GPU is awake. During normal usage we don't want to
 899         * waste power changing the frequency if the GPU is sleeping (rc6).
 900         * However, the GPU and driver is now idle and we do not want to delay
 901         * switching to minimum voltage (reducing power whilst idle) as we do
 902         * not expect to be woken in the near future and so must flush the
 903         * change by waking the device.
 904         *
 905         * We choose to take the media powerwell (either would do to trick the
 906         * punit into committing the voltage change) as that takes a lot less
 907         * power than the render powerwell.
 908         */
 909        intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA);
 910        rps_set(rps, rps->idle_freq, false);
 911        intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA);
 912
 913        /*
 914         * Since we will try and restart from the previously requested
 915         * frequency on unparking, treat this idle point as a downclock
 916         * interrupt and reduce the frequency for resume. If we park/unpark
 917         * more frequently than the rps worker can run, we will not respond
 918         * to any EI and never see a change in frequency.
 919         *
 920         * (Note we accommodate Cherryview's limitation of only using an
 921         * even bin by applying it to all.)
 922         */
 923        adj = rps->last_adj;
 924        if (adj < 0)
 925                adj *= 2;
 926        else /* CHV needs even encode values */
 927                adj = -2;
 928        rps->last_adj = adj;
 929        rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq);
 930        if (rps->cur_freq < rps->efficient_freq) {
 931                rps->cur_freq = rps->efficient_freq;
 932                rps->last_adj = 0;
 933        }
 934
 935        GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq);
 936}
 937
 938void intel_rps_boost(struct i915_request *rq)
 939{
 940        if (i915_request_signaled(rq) || i915_request_has_waitboost(rq))
 941                return;
 942
 943        /* Serializes with i915_request_retire() */
 944        if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) {
 945                struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
 946
 947                if (atomic_fetch_inc(&rps->num_waiters))
 948                        return;
 949
 950                if (!intel_rps_is_active(rps))
 951                        return;
 952
 953                GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
 954                         rq->fence.context, rq->fence.seqno);
 955
 956                if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
 957                        schedule_work(&rps->work);
 958
 959                WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */
 960        }
 961}
 962
 963int intel_rps_set(struct intel_rps *rps, u8 val)
 964{
 965        int err;
 966
 967        lockdep_assert_held(&rps->lock);
 968        GEM_BUG_ON(val > rps->max_freq);
 969        GEM_BUG_ON(val < rps->min_freq);
 970
 971        if (intel_rps_is_active(rps)) {
 972                err = rps_set(rps, val, true);
 973                if (err)
 974                        return err;
 975
 976                /*
 977                 * Make sure we continue to get interrupts
 978                 * until we hit the minimum or maximum frequencies.
 979                 */
 980                if (intel_rps_has_interrupts(rps)) {
 981                        struct intel_uncore *uncore = rps_to_uncore(rps);
 982
 983                        set(uncore,
 984                            GEN6_RP_INTERRUPT_LIMITS, rps_limits(rps, val));
 985
 986                        set(uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, val));
 987                }
 988        }
 989
 990        rps->cur_freq = val;
 991        return 0;
 992}
 993
 994static void gen6_rps_init(struct intel_rps *rps)
 995{
 996        struct drm_i915_private *i915 = rps_to_i915(rps);
 997        struct intel_uncore *uncore = rps_to_uncore(rps);
 998
 999        /* All of these values are in units of 50MHz */
1000
1001        /* static values from HW: RP0 > RP1 > RPn (min_freq) */
1002        if (IS_GEN9_LP(i915)) {
1003                u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP);
1004
1005                rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
1006                rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
1007                rps->min_freq = (rp_state_cap >>  0) & 0xff;
1008        } else {
1009                u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
1010
1011                rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
1012                rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
1013                rps->min_freq = (rp_state_cap >> 16) & 0xff;
1014        }
1015
1016        /* hw_max = RP0 until we check for overclocking */
1017        rps->max_freq = rps->rp0_freq;
1018
1019        rps->efficient_freq = rps->rp1_freq;
1020        if (IS_HASWELL(i915) || IS_BROADWELL(i915) ||
1021            IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) {
1022                u32 ddcc_status = 0;
1023
1024                if (sandybridge_pcode_read(i915,
1025                                           HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
1026                                           &ddcc_status, NULL) == 0)
1027                        rps->efficient_freq =
1028                                clamp_t(u8,
1029                                        (ddcc_status >> 8) & 0xff,
1030                                        rps->min_freq,
1031                                        rps->max_freq);
1032        }
1033
1034        if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) {
1035                /* Store the frequency values in 16.66 MHZ units, which is
1036                 * the natural hardware unit for SKL
1037                 */
1038                rps->rp0_freq *= GEN9_FREQ_SCALER;
1039                rps->rp1_freq *= GEN9_FREQ_SCALER;
1040                rps->min_freq *= GEN9_FREQ_SCALER;
1041                rps->max_freq *= GEN9_FREQ_SCALER;
1042                rps->efficient_freq *= GEN9_FREQ_SCALER;
1043        }
1044}
1045
1046static bool rps_reset(struct intel_rps *rps)
1047{
1048        struct drm_i915_private *i915 = rps_to_i915(rps);
1049
1050        /* force a reset */
1051        rps->power.mode = -1;
1052        rps->last_freq = -1;
1053
1054        if (rps_set(rps, rps->min_freq, true)) {
1055                drm_err(&i915->drm, "Failed to reset RPS to initial values\n");
1056                return false;
1057        }
1058
1059        rps->cur_freq = rps->min_freq;
1060        return true;
1061}
1062
1063/* See the Gen9_GT_PM_Programming_Guide doc for the below */
1064static bool gen9_rps_enable(struct intel_rps *rps)
1065{
1066        struct intel_gt *gt = rps_to_gt(rps);
1067        struct intel_uncore *uncore = gt->uncore;
1068
1069        /* Program defaults and thresholds for RPS */
1070        if (GRAPHICS_VER(gt->i915) == 9)
1071                intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
1072                                      GEN9_FREQUENCY(rps->rp1_freq));
1073
1074        intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa);
1075
1076        rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
1077
1078        return rps_reset(rps);
1079}
1080
1081static bool gen8_rps_enable(struct intel_rps *rps)
1082{
1083        struct intel_uncore *uncore = rps_to_uncore(rps);
1084
1085        intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
1086                              HSW_FREQUENCY(rps->rp1_freq));
1087
1088        intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1089
1090        rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
1091
1092        return rps_reset(rps);
1093}
1094
1095static bool gen6_rps_enable(struct intel_rps *rps)
1096{
1097        struct intel_uncore *uncore = rps_to_uncore(rps);
1098
1099        /* Power down if completely idle for over 50ms */
1100        intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000);
1101        intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1102
1103        rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
1104                          GEN6_PM_RP_DOWN_THRESHOLD |
1105                          GEN6_PM_RP_DOWN_TIMEOUT);
1106
1107        return rps_reset(rps);
1108}
1109
1110static int chv_rps_max_freq(struct intel_rps *rps)
1111{
1112        struct drm_i915_private *i915 = rps_to_i915(rps);
1113        struct intel_gt *gt = rps_to_gt(rps);
1114        u32 val;
1115
1116        val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
1117
1118        switch (gt->info.sseu.eu_total) {
1119        case 8:
1120                /* (2 * 4) config */
1121                val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT;
1122                break;
1123        case 12:
1124                /* (2 * 6) config */
1125                val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT;
1126                break;
1127        case 16:
1128                /* (2 * 8) config */
1129        default:
1130                /* Setting (2 * 8) Min RP0 for any other combination */
1131                val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT;
1132                break;
1133        }
1134
1135        return val & FB_GFX_FREQ_FUSE_MASK;
1136}
1137
1138static int chv_rps_rpe_freq(struct intel_rps *rps)
1139{
1140        struct drm_i915_private *i915 = rps_to_i915(rps);
1141        u32 val;
1142
1143        val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG);
1144        val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT;
1145
1146        return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
1147}
1148
1149static int chv_rps_guar_freq(struct intel_rps *rps)
1150{
1151        struct drm_i915_private *i915 = rps_to_i915(rps);
1152        u32 val;
1153
1154        val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
1155
1156        return val & FB_GFX_FREQ_FUSE_MASK;
1157}
1158
1159static u32 chv_rps_min_freq(struct intel_rps *rps)
1160{
1161        struct drm_i915_private *i915 = rps_to_i915(rps);
1162        u32 val;
1163
1164        val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE);
1165        val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT;
1166
1167        return val & FB_GFX_FREQ_FUSE_MASK;
1168}
1169
1170static bool chv_rps_enable(struct intel_rps *rps)
1171{
1172        struct intel_uncore *uncore = rps_to_uncore(rps);
1173        struct drm_i915_private *i915 = rps_to_i915(rps);
1174        u32 val;
1175
1176        /* 1: Program defaults and thresholds for RPS*/
1177        intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
1178        intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
1179        intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
1180        intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
1181        intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
1182
1183        intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1184
1185        /* 2: Enable RPS */
1186        intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
1187                              GEN6_RP_MEDIA_HW_NORMAL_MODE |
1188                              GEN6_RP_MEDIA_IS_GFX |
1189                              GEN6_RP_ENABLE |
1190                              GEN6_RP_UP_BUSY_AVG |
1191                              GEN6_RP_DOWN_IDLE_AVG);
1192
1193        rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
1194                          GEN6_PM_RP_DOWN_THRESHOLD |
1195                          GEN6_PM_RP_DOWN_TIMEOUT);
1196
1197        /* Setting Fixed Bias */
1198        vlv_punit_get(i915);
1199
1200        val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
1201        vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
1202
1203        val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
1204
1205        vlv_punit_put(i915);
1206
1207        /* RPS code assumes GPLL is used */
1208        drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
1209                      "GPLL not enabled\n");
1210
1211        drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE));
1212        drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
1213
1214        return rps_reset(rps);
1215}
1216
1217static int vlv_rps_guar_freq(struct intel_rps *rps)
1218{
1219        struct drm_i915_private *i915 = rps_to_i915(rps);
1220        u32 val, rp1;
1221
1222        val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
1223
1224        rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK;
1225        rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
1226
1227        return rp1;
1228}
1229
1230static int vlv_rps_max_freq(struct intel_rps *rps)
1231{
1232        struct drm_i915_private *i915 = rps_to_i915(rps);
1233        u32 val, rp0;
1234
1235        val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
1236
1237        rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
1238        /* Clamp to max */
1239        rp0 = min_t(u32, rp0, 0xea);
1240
1241        return rp0;
1242}
1243
1244static int vlv_rps_rpe_freq(struct intel_rps *rps)
1245{
1246        struct drm_i915_private *i915 = rps_to_i915(rps);
1247        u32 val, rpe;
1248
1249        val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
1250        rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
1251        val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
1252        rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
1253
1254        return rpe;
1255}
1256
1257static int vlv_rps_min_freq(struct intel_rps *rps)
1258{
1259        struct drm_i915_private *i915 = rps_to_i915(rps);
1260        u32 val;
1261
1262        val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff;
1263        /*
1264         * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
1265         * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
1266         * a BYT-M B0 the above register contains 0xbf. Moreover when setting
1267         * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
1268         * to make sure it matches what Punit accepts.
1269         */
1270        return max_t(u32, val, 0xc0);
1271}
1272
1273static bool vlv_rps_enable(struct intel_rps *rps)
1274{
1275        struct intel_uncore *uncore = rps_to_uncore(rps);
1276        struct drm_i915_private *i915 = rps_to_i915(rps);
1277        u32 val;
1278
1279        intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
1280        intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
1281        intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
1282        intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
1283        intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
1284
1285        intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1286
1287        intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
1288                              GEN6_RP_MEDIA_TURBO |
1289                              GEN6_RP_MEDIA_HW_NORMAL_MODE |
1290                              GEN6_RP_MEDIA_IS_GFX |
1291                              GEN6_RP_ENABLE |
1292                              GEN6_RP_UP_BUSY_AVG |
1293                              GEN6_RP_DOWN_IDLE_CONT);
1294
1295        /* WaGsvRC0ResidencyMethod:vlv */
1296        rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
1297
1298        vlv_punit_get(i915);
1299
1300        /* Setting Fixed Bias */
1301        val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
1302        vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
1303
1304        val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
1305
1306        vlv_punit_put(i915);
1307
1308        /* RPS code assumes GPLL is used */
1309        drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
1310                      "GPLL not enabled\n");
1311
1312        drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE));
1313        drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
1314
1315        return rps_reset(rps);
1316}
1317
1318static unsigned long __ips_gfx_val(struct intel_ips *ips)
1319{
1320        struct intel_rps *rps = container_of(ips, typeof(*rps), ips);
1321        struct intel_uncore *uncore = rps_to_uncore(rps);
1322        unsigned int t, state1, state2;
1323        u32 pxvid, ext_v;
1324        u64 corr, corr2;
1325
1326        lockdep_assert_held(&mchdev_lock);
1327
1328        pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq));
1329        pxvid = (pxvid >> 24) & 0x7f;
1330        ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid);
1331
1332        state1 = ext_v;
1333
1334        /* Revel in the empirically derived constants */
1335
1336        /* Correction factor in 1/100000 units */
1337        t = ips_mch_val(uncore);
1338        if (t > 80)
1339                corr = t * 2349 + 135940;
1340        else if (t >= 50)
1341                corr = t * 964 + 29317;
1342        else /* < 50 */
1343                corr = t * 301 + 1004;
1344
1345        corr = div_u64(corr * 150142 * state1, 10000) - 78642;
1346        corr2 = div_u64(corr, 100000) * ips->corr;
1347
1348        state2 = div_u64(corr2 * state1, 10000);
1349        state2 /= 100; /* convert to mW */
1350
1351        __gen5_ips_update(ips);
1352
1353        return ips->gfx_power + state2;
1354}
1355
1356static bool has_busy_stats(struct intel_rps *rps)
1357{
1358        struct intel_engine_cs *engine;
1359        enum intel_engine_id id;
1360
1361        for_each_engine(engine, rps_to_gt(rps), id) {
1362                if (!intel_engine_supports_stats(engine))
1363                        return false;
1364        }
1365
1366        return true;
1367}
1368
1369void intel_rps_enable(struct intel_rps *rps)
1370{
1371        struct drm_i915_private *i915 = rps_to_i915(rps);
1372        struct intel_uncore *uncore = rps_to_uncore(rps);
1373        bool enabled = false;
1374
1375        if (!HAS_RPS(i915))
1376                return;
1377
1378        if (rps_uses_slpc(rps))
1379                return;
1380
1381        intel_gt_check_clock_frequency(rps_to_gt(rps));
1382
1383        intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
1384        if (rps->max_freq <= rps->min_freq)
1385                /* leave disabled, no room for dynamic reclocking */;
1386        else if (IS_CHERRYVIEW(i915))
1387                enabled = chv_rps_enable(rps);
1388        else if (IS_VALLEYVIEW(i915))
1389                enabled = vlv_rps_enable(rps);
1390        else if (GRAPHICS_VER(i915) >= 9)
1391                enabled = gen9_rps_enable(rps);
1392        else if (GRAPHICS_VER(i915) >= 8)
1393                enabled = gen8_rps_enable(rps);
1394        else if (GRAPHICS_VER(i915) >= 6)
1395                enabled = gen6_rps_enable(rps);
1396        else if (IS_IRONLAKE_M(i915))
1397                enabled = gen5_rps_enable(rps);
1398        else
1399                MISSING_CASE(GRAPHICS_VER(i915));
1400        intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
1401        if (!enabled)
1402                return;
1403
1404        GT_TRACE(rps_to_gt(rps),
1405                 "min:%x, max:%x, freq:[%d, %d]\n",
1406                 rps->min_freq, rps->max_freq,
1407                 intel_gpu_freq(rps, rps->min_freq),
1408                 intel_gpu_freq(rps, rps->max_freq));
1409
1410        GEM_BUG_ON(rps->max_freq < rps->min_freq);
1411        GEM_BUG_ON(rps->idle_freq > rps->max_freq);
1412
1413        GEM_BUG_ON(rps->efficient_freq < rps->min_freq);
1414        GEM_BUG_ON(rps->efficient_freq > rps->max_freq);
1415
1416        if (has_busy_stats(rps))
1417                intel_rps_set_timer(rps);
1418        else if (GRAPHICS_VER(i915) >= 6)
1419                intel_rps_set_interrupts(rps);
1420        else
1421                /* Ironlake currently uses intel_ips.ko */ {}
1422
1423        intel_rps_set_enabled(rps);
1424}
1425
1426static void gen6_rps_disable(struct intel_rps *rps)
1427{
1428        set(rps_to_uncore(rps), GEN6_RP_CONTROL, 0);
1429}
1430
1431void intel_rps_disable(struct intel_rps *rps)
1432{
1433        struct drm_i915_private *i915 = rps_to_i915(rps);
1434
1435        intel_rps_clear_enabled(rps);
1436        intel_rps_clear_interrupts(rps);
1437        intel_rps_clear_timer(rps);
1438
1439        if (GRAPHICS_VER(i915) >= 6)
1440                gen6_rps_disable(rps);
1441        else if (IS_IRONLAKE_M(i915))
1442                gen5_rps_disable(rps);
1443}
1444
1445static int byt_gpu_freq(struct intel_rps *rps, int val)
1446{
1447        /*
1448         * N = val - 0xb7
1449         * Slow = Fast = GPLL ref * N
1450         */
1451        return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
1452}
1453
1454static int byt_freq_opcode(struct intel_rps *rps, int val)
1455{
1456        return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
1457}
1458
1459static int chv_gpu_freq(struct intel_rps *rps, int val)
1460{
1461        /*
1462         * N = val / 2
1463         * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
1464         */
1465        return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
1466}
1467
1468static int chv_freq_opcode(struct intel_rps *rps, int val)
1469{
1470        /* CHV needs even values */
1471        return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
1472}
1473
1474int intel_gpu_freq(struct intel_rps *rps, int val)
1475{
1476        struct drm_i915_private *i915 = rps_to_i915(rps);
1477
1478        if (GRAPHICS_VER(i915) >= 9)
1479                return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
1480                                         GEN9_FREQ_SCALER);
1481        else if (IS_CHERRYVIEW(i915))
1482                return chv_gpu_freq(rps, val);
1483        else if (IS_VALLEYVIEW(i915))
1484                return byt_gpu_freq(rps, val);
1485        else if (GRAPHICS_VER(i915) >= 6)
1486                return val * GT_FREQUENCY_MULTIPLIER;
1487        else
1488                return val;
1489}
1490
1491int intel_freq_opcode(struct intel_rps *rps, int val)
1492{
1493        struct drm_i915_private *i915 = rps_to_i915(rps);
1494
1495        if (GRAPHICS_VER(i915) >= 9)
1496                return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
1497                                         GT_FREQUENCY_MULTIPLIER);
1498        else if (IS_CHERRYVIEW(i915))
1499                return chv_freq_opcode(rps, val);
1500        else if (IS_VALLEYVIEW(i915))
1501                return byt_freq_opcode(rps, val);
1502        else if (GRAPHICS_VER(i915) >= 6)
1503                return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
1504        else
1505                return val;
1506}
1507
1508static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
1509{
1510        struct drm_i915_private *i915 = rps_to_i915(rps);
1511
1512        rps->gpll_ref_freq =
1513                vlv_get_cck_clock(i915, "GPLL ref",
1514                                  CCK_GPLL_CLOCK_CONTROL,
1515                                  i915->czclk_freq);
1516
1517        drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n",
1518                rps->gpll_ref_freq);
1519}
1520
1521static void vlv_rps_init(struct intel_rps *rps)
1522{
1523        struct drm_i915_private *i915 = rps_to_i915(rps);
1524        u32 val;
1525
1526        vlv_iosf_sb_get(i915,
1527                        BIT(VLV_IOSF_SB_PUNIT) |
1528                        BIT(VLV_IOSF_SB_NC) |
1529                        BIT(VLV_IOSF_SB_CCK));
1530
1531        vlv_init_gpll_ref_freq(rps);
1532
1533        val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
1534        switch ((val >> 6) & 3) {
1535        case 0:
1536        case 1:
1537                i915->mem_freq = 800;
1538                break;
1539        case 2:
1540                i915->mem_freq = 1066;
1541                break;
1542        case 3:
1543                i915->mem_freq = 1333;
1544                break;
1545        }
1546        drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
1547
1548        rps->max_freq = vlv_rps_max_freq(rps);
1549        rps->rp0_freq = rps->max_freq;
1550        drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
1551                intel_gpu_freq(rps, rps->max_freq), rps->max_freq);
1552
1553        rps->efficient_freq = vlv_rps_rpe_freq(rps);
1554        drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n",
1555                intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq);
1556
1557        rps->rp1_freq = vlv_rps_guar_freq(rps);
1558        drm_dbg(&i915->drm, "RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
1559                intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq);
1560
1561        rps->min_freq = vlv_rps_min_freq(rps);
1562        drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
1563                intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
1564
1565        vlv_iosf_sb_put(i915,
1566                        BIT(VLV_IOSF_SB_PUNIT) |
1567                        BIT(VLV_IOSF_SB_NC) |
1568                        BIT(VLV_IOSF_SB_CCK));
1569}
1570
1571static void chv_rps_init(struct intel_rps *rps)
1572{
1573        struct drm_i915_private *i915 = rps_to_i915(rps);
1574        u32 val;
1575
1576        vlv_iosf_sb_get(i915,
1577                        BIT(VLV_IOSF_SB_PUNIT) |
1578                        BIT(VLV_IOSF_SB_NC) |
1579                        BIT(VLV_IOSF_SB_CCK));
1580
1581        vlv_init_gpll_ref_freq(rps);
1582
1583        val = vlv_cck_read(i915, CCK_FUSE_REG);
1584
1585        switch ((val >> 2) & 0x7) {
1586        case 3:
1587                i915->mem_freq = 2000;
1588                break;
1589        default:
1590                i915->mem_freq = 1600;
1591                break;
1592        }
1593        drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
1594
1595        rps->max_freq = chv_rps_max_freq(rps);
1596        rps->rp0_freq = rps->max_freq;
1597        drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
1598                intel_gpu_freq(rps, rps->max_freq), rps->max_freq);
1599
1600        rps->efficient_freq = chv_rps_rpe_freq(rps);
1601        drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n",
1602                intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq);
1603
1604        rps->rp1_freq = chv_rps_guar_freq(rps);
1605        drm_dbg(&i915->drm, "RP1(Guar) GPU freq: %d MHz (%u)\n",
1606                intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq);
1607
1608        rps->min_freq = chv_rps_min_freq(rps);
1609        drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
1610                intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
1611
1612        vlv_iosf_sb_put(i915,
1613                        BIT(VLV_IOSF_SB_PUNIT) |
1614                        BIT(VLV_IOSF_SB_NC) |
1615                        BIT(VLV_IOSF_SB_CCK));
1616
1617        drm_WARN_ONCE(&i915->drm, (rps->max_freq | rps->efficient_freq |
1618                                   rps->rp1_freq | rps->min_freq) & 1,
1619                      "Odd GPU freq values\n");
1620}
1621
1622static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei)
1623{
1624        ei->ktime = ktime_get_raw();
1625        ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT);
1626        ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT);
1627}
1628
1629static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir)
1630{
1631        struct intel_uncore *uncore = rps_to_uncore(rps);
1632        const struct intel_rps_ei *prev = &rps->ei;
1633        struct intel_rps_ei now;
1634        u32 events = 0;
1635
1636        if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
1637                return 0;
1638
1639        vlv_c0_read(uncore, &now);
1640
1641        if (prev->ktime) {
1642                u64 time, c0;
1643                u32 render, media;
1644
1645                time = ktime_us_delta(now.ktime, prev->ktime);
1646
1647                time *= rps_to_i915(rps)->czclk_freq;
1648
1649                /* Workload can be split between render + media,
1650                 * e.g. SwapBuffers being blitted in X after being rendered in
1651                 * mesa. To account for this we need to combine both engines
1652                 * into our activity counter.
1653                 */
1654                render = now.render_c0 - prev->render_c0;
1655                media = now.media_c0 - prev->media_c0;
1656                c0 = max(render, media);
1657                c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
1658
1659                if (c0 > time * rps->power.up_threshold)
1660                        events = GEN6_PM_RP_UP_THRESHOLD;
1661                else if (c0 < time * rps->power.down_threshold)
1662                        events = GEN6_PM_RP_DOWN_THRESHOLD;
1663        }
1664
1665        rps->ei = now;
1666        return events;
1667}
1668
1669static void rps_work(struct work_struct *work)
1670{
1671        struct intel_rps *rps = container_of(work, typeof(*rps), work);
1672        struct intel_gt *gt = rps_to_gt(rps);
1673        struct drm_i915_private *i915 = rps_to_i915(rps);
1674        bool client_boost = false;
1675        int new_freq, adj, min, max;
1676        u32 pm_iir = 0;
1677
1678        spin_lock_irq(&gt->irq_lock);
1679        pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events;
1680        client_boost = atomic_read(&rps->num_waiters);
1681        spin_unlock_irq(&gt->irq_lock);
1682
1683        /* Make sure we didn't queue anything we're not going to process. */
1684        if (!pm_iir && !client_boost)
1685                goto out;
1686
1687        mutex_lock(&rps->lock);
1688        if (!intel_rps_is_active(rps)) {
1689                mutex_unlock(&rps->lock);
1690                return;
1691        }
1692
1693        pm_iir |= vlv_wa_c0_ei(rps, pm_iir);
1694
1695        adj = rps->last_adj;
1696        new_freq = rps->cur_freq;
1697        min = rps->min_freq_softlimit;
1698        max = rps->max_freq_softlimit;
1699        if (client_boost)
1700                max = rps->max_freq;
1701
1702        GT_TRACE(gt,
1703                 "pm_iir:%x, client_boost:%s, last:%d, cur:%x, min:%x, max:%x\n",
1704                 pm_iir, yesno(client_boost),
1705                 adj, new_freq, min, max);
1706
1707        if (client_boost && new_freq < rps->boost_freq) {
1708                new_freq = rps->boost_freq;
1709                adj = 0;
1710        } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
1711                if (adj > 0)
1712                        adj *= 2;
1713                else /* CHV needs even encode values */
1714                        adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1;
1715
1716                if (new_freq >= rps->max_freq_softlimit)
1717                        adj = 0;
1718        } else if (client_boost) {
1719                adj = 0;
1720        } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
1721                if (rps->cur_freq > rps->efficient_freq)
1722                        new_freq = rps->efficient_freq;
1723                else if (rps->cur_freq > rps->min_freq_softlimit)
1724                        new_freq = rps->min_freq_softlimit;
1725                adj = 0;
1726        } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
1727                if (adj < 0)
1728                        adj *= 2;
1729                else /* CHV needs even encode values */
1730                        adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1;
1731
1732                if (new_freq <= rps->min_freq_softlimit)
1733                        adj = 0;
1734        } else { /* unknown event */
1735                adj = 0;
1736        }
1737
1738        /*
1739         * sysfs frequency limits may have snuck in while
1740         * servicing the interrupt
1741         */
1742        new_freq += adj;
1743        new_freq = clamp_t(int, new_freq, min, max);
1744
1745        if (intel_rps_set(rps, new_freq)) {
1746                drm_dbg(&i915->drm, "Failed to set new GPU frequency\n");
1747                adj = 0;
1748        }
1749        rps->last_adj = adj;
1750
1751        mutex_unlock(&rps->lock);
1752
1753out:
1754        spin_lock_irq(&gt->irq_lock);
1755        gen6_gt_pm_unmask_irq(gt, rps->pm_events);
1756        spin_unlock_irq(&gt->irq_lock);
1757}
1758
1759void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
1760{
1761        struct intel_gt *gt = rps_to_gt(rps);
1762        const u32 events = rps->pm_events & pm_iir;
1763
1764        lockdep_assert_held(&gt->irq_lock);
1765
1766        if (unlikely(!events))
1767                return;
1768
1769        GT_TRACE(gt, "irq events:%x\n", events);
1770
1771        gen6_gt_pm_mask_irq(gt, events);
1772
1773        rps->pm_iir |= events;
1774        schedule_work(&rps->work);
1775}
1776
1777void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
1778{
1779        struct intel_gt *gt = rps_to_gt(rps);
1780        u32 events;
1781
1782        events = pm_iir & rps->pm_events;
1783        if (events) {
1784                spin_lock(&gt->irq_lock);
1785
1786                GT_TRACE(gt, "irq events:%x\n", events);
1787
1788                gen6_gt_pm_mask_irq(gt, events);
1789                rps->pm_iir |= events;
1790
1791                schedule_work(&rps->work);
1792                spin_unlock(&gt->irq_lock);
1793        }
1794
1795        if (GRAPHICS_VER(gt->i915) >= 8)
1796                return;
1797
1798        if (pm_iir & PM_VEBOX_USER_INTERRUPT)
1799                intel_engine_cs_irq(gt->engine[VECS0], pm_iir >> 10);
1800
1801        if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
1802                DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
1803}
1804
1805void gen5_rps_irq_handler(struct intel_rps *rps)
1806{
1807        struct intel_uncore *uncore = rps_to_uncore(rps);
1808        u32 busy_up, busy_down, max_avg, min_avg;
1809        u8 new_freq;
1810
1811        spin_lock(&mchdev_lock);
1812
1813        intel_uncore_write16(uncore,
1814                             MEMINTRSTS,
1815                             intel_uncore_read(uncore, MEMINTRSTS));
1816
1817        intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
1818        busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG);
1819        busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG);
1820        max_avg = intel_uncore_read(uncore, RCBMAXAVG);
1821        min_avg = intel_uncore_read(uncore, RCBMINAVG);
1822
1823        /* Handle RCS change request from hw */
1824        new_freq = rps->cur_freq;
1825        if (busy_up > max_avg)
1826                new_freq++;
1827        else if (busy_down < min_avg)
1828                new_freq--;
1829        new_freq = clamp(new_freq,
1830                         rps->min_freq_softlimit,
1831                         rps->max_freq_softlimit);
1832
1833        if (new_freq != rps->cur_freq && !__gen5_rps_set(rps, new_freq))
1834                rps->cur_freq = new_freq;
1835
1836        spin_unlock(&mchdev_lock);
1837}
1838
1839void intel_rps_init_early(struct intel_rps *rps)
1840{
1841        mutex_init(&rps->lock);
1842        mutex_init(&rps->power.mutex);
1843
1844        INIT_WORK(&rps->work, rps_work);
1845        timer_setup(&rps->timer, rps_timer, 0);
1846
1847        atomic_set(&rps->num_waiters, 0);
1848}
1849
1850void intel_rps_init(struct intel_rps *rps)
1851{
1852        struct drm_i915_private *i915 = rps_to_i915(rps);
1853
1854        if (rps_uses_slpc(rps))
1855                return;
1856
1857        if (IS_CHERRYVIEW(i915))
1858                chv_rps_init(rps);
1859        else if (IS_VALLEYVIEW(i915))
1860                vlv_rps_init(rps);
1861        else if (GRAPHICS_VER(i915) >= 6)
1862                gen6_rps_init(rps);
1863        else if (IS_IRONLAKE_M(i915))
1864                gen5_rps_init(rps);
1865
1866        /* Derive initial user preferences/limits from the hardware limits */
1867        rps->max_freq_softlimit = rps->max_freq;
1868        rps->min_freq_softlimit = rps->min_freq;
1869
1870        /* After setting max-softlimit, find the overclock max freq */
1871        if (GRAPHICS_VER(i915) == 6 || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) {
1872                u32 params = 0;
1873
1874                sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS,
1875                                       &params, NULL);
1876                if (params & BIT(31)) { /* OC supported */
1877                        drm_dbg(&i915->drm,
1878                                "Overclocking supported, max: %dMHz, overclock: %dMHz\n",
1879                                (rps->max_freq & 0xff) * 50,
1880                                (params & 0xff) * 50);
1881                        rps->max_freq = params & 0xff;
1882                }
1883        }
1884
1885        /* Finally allow us to boost to max by default */
1886        rps->boost_freq = rps->max_freq;
1887        rps->idle_freq = rps->min_freq;
1888
1889        /* Start in the middle, from here we will autotune based on workload */
1890        rps->cur_freq = rps->efficient_freq;
1891
1892        rps->pm_intrmsk_mbz = 0;
1893
1894        /*
1895         * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
1896         * if GEN6_PM_UP_EI_EXPIRED is masked.
1897         *
1898         * TODO: verify if this can be reproduced on VLV,CHV.
1899         */
1900        if (GRAPHICS_VER(i915) <= 7)
1901                rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
1902
1903        if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) < 11)
1904                rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
1905
1906        /* GuC needs ARAT expired interrupt unmasked */
1907        if (intel_uc_uses_guc_submission(&rps_to_gt(rps)->uc))
1908                rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK;
1909}
1910
1911void intel_rps_sanitize(struct intel_rps *rps)
1912{
1913        if (rps_uses_slpc(rps))
1914                return;
1915
1916        if (GRAPHICS_VER(rps_to_i915(rps)) >= 6)
1917                rps_disable_interrupts(rps);
1918}
1919
1920u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
1921{
1922        struct drm_i915_private *i915 = rps_to_i915(rps);
1923        u32 cagf;
1924
1925        if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
1926                cagf = (rpstat >> 8) & 0xff;
1927        else if (GRAPHICS_VER(i915) >= 9)
1928                cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
1929        else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
1930                cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
1931        else if (GRAPHICS_VER(i915) >= 6)
1932                cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
1933        else
1934                cagf = gen5_invert_freq(rps, (rpstat & MEMSTAT_PSTATE_MASK) >>
1935                                        MEMSTAT_PSTATE_SHIFT);
1936
1937        return cagf;
1938}
1939
1940static u32 read_cagf(struct intel_rps *rps)
1941{
1942        struct drm_i915_private *i915 = rps_to_i915(rps);
1943        struct intel_uncore *uncore = rps_to_uncore(rps);
1944        u32 freq;
1945
1946        if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
1947                vlv_punit_get(i915);
1948                freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
1949                vlv_punit_put(i915);
1950        } else if (GRAPHICS_VER(i915) >= 6) {
1951                freq = intel_uncore_read(uncore, GEN6_RPSTAT1);
1952        } else {
1953                freq = intel_uncore_read(uncore, MEMSTAT_ILK);
1954        }
1955
1956        return intel_rps_get_cagf(rps, freq);
1957}
1958
1959u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
1960{
1961        struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
1962        intel_wakeref_t wakeref;
1963        u32 freq = 0;
1964
1965        with_intel_runtime_pm_if_in_use(rpm, wakeref)
1966                freq = intel_gpu_freq(rps, read_cagf(rps));
1967
1968        return freq;
1969}
1970
1971u32 intel_rps_read_punit_req(struct intel_rps *rps)
1972{
1973        struct intel_uncore *uncore = rps_to_uncore(rps);
1974        struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
1975        intel_wakeref_t wakeref;
1976        u32 freq = 0;
1977
1978        with_intel_runtime_pm_if_in_use(rpm, wakeref)
1979                freq = intel_uncore_read(uncore, GEN6_RPNSWREQ);
1980
1981        return freq;
1982}
1983
1984static u32 intel_rps_get_req(u32 pureq)
1985{
1986        u32 req = pureq >> GEN9_SW_REQ_UNSLICE_RATIO_SHIFT;
1987
1988        return req;
1989}
1990
1991u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps)
1992{
1993        u32 freq = intel_rps_get_req(intel_rps_read_punit_req(rps));
1994
1995        return intel_gpu_freq(rps, freq);
1996}
1997
1998u32 intel_rps_get_requested_frequency(struct intel_rps *rps)
1999{
2000        if (rps_uses_slpc(rps))
2001                return intel_rps_read_punit_req_frequency(rps);
2002        else
2003                return intel_gpu_freq(rps, rps->cur_freq);
2004}
2005
2006u32 intel_rps_get_max_frequency(struct intel_rps *rps)
2007{
2008        struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2009
2010        if (rps_uses_slpc(rps))
2011                return slpc->max_freq_softlimit;
2012        else
2013                return intel_gpu_freq(rps, rps->max_freq_softlimit);
2014}
2015
2016u32 intel_rps_get_rp0_frequency(struct intel_rps *rps)
2017{
2018        struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2019
2020        if (rps_uses_slpc(rps))
2021                return slpc->rp0_freq;
2022        else
2023                return intel_gpu_freq(rps, rps->rp0_freq);
2024}
2025
2026u32 intel_rps_get_rp1_frequency(struct intel_rps *rps)
2027{
2028        struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2029
2030        if (rps_uses_slpc(rps))
2031                return slpc->rp1_freq;
2032        else
2033                return intel_gpu_freq(rps, rps->rp1_freq);
2034}
2035
2036u32 intel_rps_get_rpn_frequency(struct intel_rps *rps)
2037{
2038        struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2039
2040        if (rps_uses_slpc(rps))
2041                return slpc->min_freq;
2042        else
2043                return intel_gpu_freq(rps, rps->min_freq);
2044}
2045
2046static int set_max_freq(struct intel_rps *rps, u32 val)
2047{
2048        struct drm_i915_private *i915 = rps_to_i915(rps);
2049        int ret = 0;
2050
2051        mutex_lock(&rps->lock);
2052
2053        val = intel_freq_opcode(rps, val);
2054        if (val < rps->min_freq ||
2055            val > rps->max_freq ||
2056            val < rps->min_freq_softlimit) {
2057                ret = -EINVAL;
2058                goto unlock;
2059        }
2060
2061        if (val > rps->rp0_freq)
2062                drm_dbg(&i915->drm, "User requested overclocking to %d\n",
2063                        intel_gpu_freq(rps, val));
2064
2065        rps->max_freq_softlimit = val;
2066
2067        val = clamp_t(int, rps->cur_freq,
2068                      rps->min_freq_softlimit,
2069                      rps->max_freq_softlimit);
2070
2071        /*
2072         * We still need *_set_rps to process the new max_delay and
2073         * update the interrupt limits and PMINTRMSK even though
2074         * frequency request may be unchanged.
2075         */
2076        intel_rps_set(rps, val);
2077
2078unlock:
2079        mutex_unlock(&rps->lock);
2080
2081        return ret;
2082}
2083
2084int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val)
2085{
2086        struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2087
2088        if (rps_uses_slpc(rps))
2089                return intel_guc_slpc_set_max_freq(slpc, val);
2090        else
2091                return set_max_freq(rps, val);
2092}
2093
2094u32 intel_rps_get_min_frequency(struct intel_rps *rps)
2095{
2096        struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2097
2098        if (rps_uses_slpc(rps))
2099                return slpc->min_freq_softlimit;
2100        else
2101                return intel_gpu_freq(rps, rps->min_freq_softlimit);
2102}
2103
2104static int set_min_freq(struct intel_rps *rps, u32 val)
2105{
2106        int ret = 0;
2107
2108        mutex_lock(&rps->lock);
2109
2110        val = intel_freq_opcode(rps, val);
2111        if (val < rps->min_freq ||
2112            val > rps->max_freq ||
2113            val > rps->max_freq_softlimit) {
2114                ret = -EINVAL;
2115                goto unlock;
2116        }
2117
2118        rps->min_freq_softlimit = val;
2119
2120        val = clamp_t(int, rps->cur_freq,
2121                      rps->min_freq_softlimit,
2122                      rps->max_freq_softlimit);
2123
2124        /*
2125         * We still need *_set_rps to process the new min_delay and
2126         * update the interrupt limits and PMINTRMSK even though
2127         * frequency request may be unchanged.
2128         */
2129        intel_rps_set(rps, val);
2130
2131unlock:
2132        mutex_unlock(&rps->lock);
2133
2134        return ret;
2135}
2136
2137int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val)
2138{
2139        struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2140
2141        if (rps_uses_slpc(rps))
2142                return intel_guc_slpc_set_min_freq(slpc, val);
2143        else
2144                return set_min_freq(rps, val);
2145}
2146
2147/* External interface for intel_ips.ko */
2148
2149static struct drm_i915_private __rcu *ips_mchdev;
2150
2151/**
2152 * Tells the intel_ips driver that the i915 driver is now loaded, if
2153 * IPS got loaded first.
2154 *
2155 * This awkward dance is so that neither module has to depend on the
2156 * other in order for IPS to do the appropriate communication of
2157 * GPU turbo limits to i915.
2158 */
2159static void
2160ips_ping_for_i915_load(void)
2161{
2162        void (*link)(void);
2163
2164        link = symbol_get(ips_link_to_i915_driver);
2165        if (link) {
2166                link();
2167                symbol_put(ips_link_to_i915_driver);
2168        }
2169}
2170
2171void intel_rps_driver_register(struct intel_rps *rps)
2172{
2173        struct intel_gt *gt = rps_to_gt(rps);
2174
2175        /*
2176         * We only register the i915 ips part with intel-ips once everything is
2177         * set up, to avoid intel-ips sneaking in and reading bogus values.
2178         */
2179        if (GRAPHICS_VER(gt->i915) == 5) {
2180                GEM_BUG_ON(ips_mchdev);
2181                rcu_assign_pointer(ips_mchdev, gt->i915);
2182                ips_ping_for_i915_load();
2183        }
2184}
2185
2186void intel_rps_driver_unregister(struct intel_rps *rps)
2187{
2188        if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps))
2189                rcu_assign_pointer(ips_mchdev, NULL);
2190}
2191
2192static struct drm_i915_private *mchdev_get(void)
2193{
2194        struct drm_i915_private *i915;
2195
2196        rcu_read_lock();
2197        i915 = rcu_dereference(ips_mchdev);
2198        if (i915 && !kref_get_unless_zero(&i915->drm.ref))
2199                i915 = NULL;
2200        rcu_read_unlock();
2201
2202        return i915;
2203}
2204
2205/**
2206 * i915_read_mch_val - return value for IPS use
2207 *
2208 * Calculate and return a value for the IPS driver to use when deciding whether
2209 * we have thermal and power headroom to increase CPU or GPU power budget.
2210 */
2211unsigned long i915_read_mch_val(void)
2212{
2213        struct drm_i915_private *i915;
2214        unsigned long chipset_val = 0;
2215        unsigned long graphics_val = 0;
2216        intel_wakeref_t wakeref;
2217
2218        i915 = mchdev_get();
2219        if (!i915)
2220                return 0;
2221
2222        with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
2223                struct intel_ips *ips = &i915->gt.rps.ips;
2224
2225                spin_lock_irq(&mchdev_lock);
2226                chipset_val = __ips_chipset_val(ips);
2227                graphics_val = __ips_gfx_val(ips);
2228                spin_unlock_irq(&mchdev_lock);
2229        }
2230
2231        drm_dev_put(&i915->drm);
2232        return chipset_val + graphics_val;
2233}
2234EXPORT_SYMBOL_GPL(i915_read_mch_val);
2235
2236/**
2237 * i915_gpu_raise - raise GPU frequency limit
2238 *
2239 * Raise the limit; IPS indicates we have thermal headroom.
2240 */
2241bool i915_gpu_raise(void)
2242{
2243        struct drm_i915_private *i915;
2244        struct intel_rps *rps;
2245
2246        i915 = mchdev_get();
2247        if (!i915)
2248                return false;
2249
2250        rps = &i915->gt.rps;
2251
2252        spin_lock_irq(&mchdev_lock);
2253        if (rps->max_freq_softlimit < rps->max_freq)
2254                rps->max_freq_softlimit++;
2255        spin_unlock_irq(&mchdev_lock);
2256
2257        drm_dev_put(&i915->drm);
2258        return true;
2259}
2260EXPORT_SYMBOL_GPL(i915_gpu_raise);
2261
2262/**
2263 * i915_gpu_lower - lower GPU frequency limit
2264 *
2265 * IPS indicates we're close to a thermal limit, so throttle back the GPU
2266 * frequency maximum.
2267 */
2268bool i915_gpu_lower(void)
2269{
2270        struct drm_i915_private *i915;
2271        struct intel_rps *rps;
2272
2273        i915 = mchdev_get();
2274        if (!i915)
2275                return false;
2276
2277        rps = &i915->gt.rps;
2278
2279        spin_lock_irq(&mchdev_lock);
2280        if (rps->max_freq_softlimit > rps->min_freq)
2281                rps->max_freq_softlimit--;
2282        spin_unlock_irq(&mchdev_lock);
2283
2284        drm_dev_put(&i915->drm);
2285        return true;
2286}
2287EXPORT_SYMBOL_GPL(i915_gpu_lower);
2288
2289/**
2290 * i915_gpu_busy - indicate GPU business to IPS
2291 *
2292 * Tell the IPS driver whether or not the GPU is busy.
2293 */
2294bool i915_gpu_busy(void)
2295{
2296        struct drm_i915_private *i915;
2297        bool ret;
2298
2299        i915 = mchdev_get();
2300        if (!i915)
2301                return false;
2302
2303        ret = i915->gt.awake;
2304
2305        drm_dev_put(&i915->drm);
2306        return ret;
2307}
2308EXPORT_SYMBOL_GPL(i915_gpu_busy);
2309
2310/**
2311 * i915_gpu_turbo_disable - disable graphics turbo
2312 *
2313 * Disable graphics turbo by resetting the max frequency and setting the
2314 * current frequency to the default.
2315 */
2316bool i915_gpu_turbo_disable(void)
2317{
2318        struct drm_i915_private *i915;
2319        struct intel_rps *rps;
2320        bool ret;
2321
2322        i915 = mchdev_get();
2323        if (!i915)
2324                return false;
2325
2326        rps = &i915->gt.rps;
2327
2328        spin_lock_irq(&mchdev_lock);
2329        rps->max_freq_softlimit = rps->min_freq;
2330        ret = !__gen5_rps_set(&i915->gt.rps, rps->min_freq);
2331        spin_unlock_irq(&mchdev_lock);
2332
2333        drm_dev_put(&i915->drm);
2334        return ret;
2335}
2336EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
2337
2338#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2339#include "selftest_rps.c"
2340#include "selftest_slpc.c"
2341#endif
2342