linux/drivers/gpu/drm/i915/gt/intel_rps.c
<<
>>
Prefs
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2019 Intel Corporation
   4 */
   5
   6#include <drm/i915_drm.h>
   7
   8#include "i915_drv.h"
   9#include "intel_breadcrumbs.h"
  10#include "intel_gt.h"
  11#include "intel_gt_clock_utils.h"
  12#include "intel_gt_irq.h"
  13#include "intel_gt_pm_irq.h"
  14#include "intel_rps.h"
  15#include "intel_sideband.h"
  16#include "../../../platform/x86/intel_ips.h"
  17
  18#define BUSY_MAX_EI     20u /* ms */
  19
  20/*
  21 * Lock protecting IPS related data structures
  22 */
  23static DEFINE_SPINLOCK(mchdev_lock);
  24
  25static struct intel_gt *rps_to_gt(struct intel_rps *rps)
  26{
  27        return container_of(rps, struct intel_gt, rps);
  28}
  29
  30static struct drm_i915_private *rps_to_i915(struct intel_rps *rps)
  31{
  32        return rps_to_gt(rps)->i915;
  33}
  34
  35static struct intel_uncore *rps_to_uncore(struct intel_rps *rps)
  36{
  37        return rps_to_gt(rps)->uncore;
  38}
  39
  40static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask)
  41{
  42        return mask & ~rps->pm_intrmsk_mbz;
  43}
  44
  45static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
  46{
  47        intel_uncore_write_fw(uncore, reg, val);
  48}
  49
  50static void rps_timer(struct timer_list *t)
  51{
  52        struct intel_rps *rps = from_timer(rps, t, timer);
  53        struct intel_engine_cs *engine;
  54        ktime_t dt, last, timestamp;
  55        enum intel_engine_id id;
  56        s64 max_busy[3] = {};
  57
  58        timestamp = 0;
  59        for_each_engine(engine, rps_to_gt(rps), id) {
  60                s64 busy;
  61                int i;
  62
  63                dt = intel_engine_get_busy_time(engine, &timestamp);
  64                last = engine->stats.rps;
  65                engine->stats.rps = dt;
  66
  67                busy = ktime_to_ns(ktime_sub(dt, last));
  68                for (i = 0; i < ARRAY_SIZE(max_busy); i++) {
  69                        if (busy > max_busy[i])
  70                                swap(busy, max_busy[i]);
  71                }
  72        }
  73        last = rps->pm_timestamp;
  74        rps->pm_timestamp = timestamp;
  75
  76        if (intel_rps_is_active(rps)) {
  77                s64 busy;
  78                int i;
  79
  80                dt = ktime_sub(timestamp, last);
  81
  82                /*
  83                 * Our goal is to evaluate each engine independently, so we run
  84                 * at the lowest clocks required to sustain the heaviest
  85                 * workload. However, a task may be split into sequential
  86                 * dependent operations across a set of engines, such that
  87                 * the independent contributions do not account for high load,
  88                 * but overall the task is GPU bound. For example, consider
  89                 * video decode on vcs followed by colour post-processing
  90                 * on vecs, followed by general post-processing on rcs.
  91                 * Since multi-engines being active does imply a single
  92                 * continuous workload across all engines, we hedge our
  93                 * bets by only contributing a factor of the distributed
  94                 * load into our busyness calculation.
  95                 */
  96                busy = max_busy[0];
  97                for (i = 1; i < ARRAY_SIZE(max_busy); i++) {
  98                        if (!max_busy[i])
  99                                break;
 100
 101                        busy += div_u64(max_busy[i], 1 << i);
 102                }
 103                GT_TRACE(rps_to_gt(rps),
 104                         "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n",
 105                         busy, (int)div64_u64(100 * busy, dt),
 106                         max_busy[0], max_busy[1], max_busy[2],
 107                         rps->pm_interval);
 108
 109                if (100 * busy > rps->power.up_threshold * dt &&
 110                    rps->cur_freq < rps->max_freq_softlimit) {
 111                        rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD;
 112                        rps->pm_interval = 1;
 113                        schedule_work(&rps->work);
 114                } else if (100 * busy < rps->power.down_threshold * dt &&
 115                           rps->cur_freq > rps->min_freq_softlimit) {
 116                        rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD;
 117                        rps->pm_interval = 1;
 118                        schedule_work(&rps->work);
 119                } else {
 120                        rps->last_adj = 0;
 121                }
 122
 123                mod_timer(&rps->timer,
 124                          jiffies + msecs_to_jiffies(rps->pm_interval));
 125                rps->pm_interval = min(rps->pm_interval * 2, BUSY_MAX_EI);
 126        }
 127}
 128
 129static void rps_start_timer(struct intel_rps *rps)
 130{
 131        rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
 132        rps->pm_interval = 1;
 133        mod_timer(&rps->timer, jiffies + 1);
 134}
 135
 136static void rps_stop_timer(struct intel_rps *rps)
 137{
 138        del_timer_sync(&rps->timer);
 139        rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
 140        cancel_work_sync(&rps->work);
 141}
 142
 143static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
 144{
 145        u32 mask = 0;
 146
 147        /* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */
 148        if (val > rps->min_freq_softlimit)
 149                mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
 150                         GEN6_PM_RP_DOWN_THRESHOLD |
 151                         GEN6_PM_RP_DOWN_TIMEOUT);
 152
 153        if (val < rps->max_freq_softlimit)
 154                mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
 155
 156        mask &= rps->pm_events;
 157
 158        return rps_pm_sanitize_mask(rps, ~mask);
 159}
 160
 161static void rps_reset_ei(struct intel_rps *rps)
 162{
 163        memset(&rps->ei, 0, sizeof(rps->ei));
 164}
 165
 166static void rps_enable_interrupts(struct intel_rps *rps)
 167{
 168        struct intel_gt *gt = rps_to_gt(rps);
 169
 170        GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n",
 171                 rps->pm_events, rps_pm_mask(rps, rps->last_freq));
 172
 173        rps_reset_ei(rps);
 174
 175        spin_lock_irq(&gt->irq_lock);
 176        gen6_gt_pm_enable_irq(gt, rps->pm_events);
 177        spin_unlock_irq(&gt->irq_lock);
 178
 179        intel_uncore_write(gt->uncore,
 180                           GEN6_PMINTRMSK, rps_pm_mask(rps, rps->last_freq));
 181}
 182
 183static void gen6_rps_reset_interrupts(struct intel_rps *rps)
 184{
 185        gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS);
 186}
 187
 188static void gen11_rps_reset_interrupts(struct intel_rps *rps)
 189{
 190        while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM))
 191                ;
 192}
 193
 194static void rps_reset_interrupts(struct intel_rps *rps)
 195{
 196        struct intel_gt *gt = rps_to_gt(rps);
 197
 198        spin_lock_irq(&gt->irq_lock);
 199        if (INTEL_GEN(gt->i915) >= 11)
 200                gen11_rps_reset_interrupts(rps);
 201        else
 202                gen6_rps_reset_interrupts(rps);
 203
 204        rps->pm_iir = 0;
 205        spin_unlock_irq(&gt->irq_lock);
 206}
 207
 208static void rps_disable_interrupts(struct intel_rps *rps)
 209{
 210        struct intel_gt *gt = rps_to_gt(rps);
 211
 212        intel_uncore_write(gt->uncore,
 213                           GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
 214
 215        spin_lock_irq(&gt->irq_lock);
 216        gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS);
 217        spin_unlock_irq(&gt->irq_lock);
 218
 219        intel_synchronize_irq(gt->i915);
 220
 221        /*
 222         * Now that we will not be generating any more work, flush any
 223         * outstanding tasks. As we are called on the RPS idle path,
 224         * we will reset the GPU to minimum frequencies, so the current
 225         * state of the worker can be discarded.
 226         */
 227        cancel_work_sync(&rps->work);
 228
 229        rps_reset_interrupts(rps);
 230        GT_TRACE(gt, "interrupts:off\n");
 231}
 232
 233static const struct cparams {
 234        u16 i;
 235        u16 t;
 236        u16 m;
 237        u16 c;
 238} cparams[] = {
 239        { 1, 1333, 301, 28664 },
 240        { 1, 1066, 294, 24460 },
 241        { 1, 800, 294, 25192 },
 242        { 0, 1333, 276, 27605 },
 243        { 0, 1066, 276, 27605 },
 244        { 0, 800, 231, 23784 },
 245};
 246
 247static void gen5_rps_init(struct intel_rps *rps)
 248{
 249        struct drm_i915_private *i915 = rps_to_i915(rps);
 250        struct intel_uncore *uncore = rps_to_uncore(rps);
 251        u8 fmax, fmin, fstart;
 252        u32 rgvmodectl;
 253        int c_m, i;
 254
 255        if (i915->fsb_freq <= 3200)
 256                c_m = 0;
 257        else if (i915->fsb_freq <= 4800)
 258                c_m = 1;
 259        else
 260                c_m = 2;
 261
 262        for (i = 0; i < ARRAY_SIZE(cparams); i++) {
 263                if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) {
 264                        rps->ips.m = cparams[i].m;
 265                        rps->ips.c = cparams[i].c;
 266                        break;
 267                }
 268        }
 269
 270        rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
 271
 272        /* Set up min, max, and cur for interrupt handling */
 273        fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
 274        fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
 275        fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
 276                MEMMODE_FSTART_SHIFT;
 277        drm_dbg(&i915->drm, "fmax: %d, fmin: %d, fstart: %d\n",
 278                fmax, fmin, fstart);
 279
 280        rps->min_freq = fmax;
 281        rps->efficient_freq = fstart;
 282        rps->max_freq = fmin;
 283}
 284
 285static unsigned long
 286__ips_chipset_val(struct intel_ips *ips)
 287{
 288        struct intel_uncore *uncore =
 289                rps_to_uncore(container_of(ips, struct intel_rps, ips));
 290        unsigned long now = jiffies_to_msecs(jiffies), dt;
 291        unsigned long result;
 292        u64 total, delta;
 293
 294        lockdep_assert_held(&mchdev_lock);
 295
 296        /*
 297         * Prevent division-by-zero if we are asking too fast.
 298         * Also, we don't get interesting results if we are polling
 299         * faster than once in 10ms, so just return the saved value
 300         * in such cases.
 301         */
 302        dt = now - ips->last_time1;
 303        if (dt <= 10)
 304                return ips->chipset_power;
 305
 306        /* FIXME: handle per-counter overflow */
 307        total = intel_uncore_read(uncore, DMIEC);
 308        total += intel_uncore_read(uncore, DDREC);
 309        total += intel_uncore_read(uncore, CSIEC);
 310
 311        delta = total - ips->last_count1;
 312
 313        result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10);
 314
 315        ips->last_count1 = total;
 316        ips->last_time1 = now;
 317
 318        ips->chipset_power = result;
 319
 320        return result;
 321}
 322
 323static unsigned long ips_mch_val(struct intel_uncore *uncore)
 324{
 325        unsigned int m, x, b;
 326        u32 tsfs;
 327
 328        tsfs = intel_uncore_read(uncore, TSFS);
 329        x = intel_uncore_read8(uncore, TR1);
 330
 331        b = tsfs & TSFS_INTR_MASK;
 332        m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT;
 333
 334        return m * x / 127 - b;
 335}
 336
 337static int _pxvid_to_vd(u8 pxvid)
 338{
 339        if (pxvid == 0)
 340                return 0;
 341
 342        if (pxvid >= 8 && pxvid < 31)
 343                pxvid = 31;
 344
 345        return (pxvid + 2) * 125;
 346}
 347
 348static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid)
 349{
 350        const int vd = _pxvid_to_vd(pxvid);
 351
 352        if (INTEL_INFO(i915)->is_mobile)
 353                return max(vd - 1125, 0);
 354
 355        return vd;
 356}
 357
 358static void __gen5_ips_update(struct intel_ips *ips)
 359{
 360        struct intel_uncore *uncore =
 361                rps_to_uncore(container_of(ips, struct intel_rps, ips));
 362        u64 now, delta, dt;
 363        u32 count;
 364
 365        lockdep_assert_held(&mchdev_lock);
 366
 367        now = ktime_get_raw_ns();
 368        dt = now - ips->last_time2;
 369        do_div(dt, NSEC_PER_MSEC);
 370
 371        /* Don't divide by 0 */
 372        if (dt <= 10)
 373                return;
 374
 375        count = intel_uncore_read(uncore, GFXEC);
 376        delta = count - ips->last_count2;
 377
 378        ips->last_count2 = count;
 379        ips->last_time2 = now;
 380
 381        /* More magic constants... */
 382        ips->gfx_power = div_u64(delta * 1181, dt * 10);
 383}
 384
 385static void gen5_rps_update(struct intel_rps *rps)
 386{
 387        spin_lock_irq(&mchdev_lock);
 388        __gen5_ips_update(&rps->ips);
 389        spin_unlock_irq(&mchdev_lock);
 390}
 391
 392static unsigned int gen5_invert_freq(struct intel_rps *rps,
 393                                     unsigned int val)
 394{
 395        /* Invert the frequency bin into an ips delay */
 396        val = rps->max_freq - val;
 397        val = rps->min_freq + val;
 398
 399        return val;
 400}
 401
 402static int __gen5_rps_set(struct intel_rps *rps, u8 val)
 403{
 404        struct intel_uncore *uncore = rps_to_uncore(rps);
 405        u16 rgvswctl;
 406
 407        lockdep_assert_held(&mchdev_lock);
 408
 409        rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
 410        if (rgvswctl & MEMCTL_CMD_STS) {
 411                DRM_DEBUG("gpu busy, RCS change rejected\n");
 412                return -EBUSY; /* still busy with another command */
 413        }
 414
 415        /* Invert the frequency bin into an ips delay */
 416        val = gen5_invert_freq(rps, val);
 417
 418        rgvswctl =
 419                (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
 420                (val << MEMCTL_FREQ_SHIFT) |
 421                MEMCTL_SFCAVM;
 422        intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
 423        intel_uncore_posting_read16(uncore, MEMSWCTL);
 424
 425        rgvswctl |= MEMCTL_CMD_STS;
 426        intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
 427
 428        return 0;
 429}
 430
 431static int gen5_rps_set(struct intel_rps *rps, u8 val)
 432{
 433        int err;
 434
 435        spin_lock_irq(&mchdev_lock);
 436        err = __gen5_rps_set(rps, val);
 437        spin_unlock_irq(&mchdev_lock);
 438
 439        return err;
 440}
 441
 442static unsigned long intel_pxfreq(u32 vidfreq)
 443{
 444        int div = (vidfreq & 0x3f0000) >> 16;
 445        int post = (vidfreq & 0x3000) >> 12;
 446        int pre = (vidfreq & 0x7);
 447
 448        if (!pre)
 449                return 0;
 450
 451        return div * 133333 / (pre << post);
 452}
 453
 454static unsigned int init_emon(struct intel_uncore *uncore)
 455{
 456        u8 pxw[16];
 457        int i;
 458
 459        /* Disable to program */
 460        intel_uncore_write(uncore, ECR, 0);
 461        intel_uncore_posting_read(uncore, ECR);
 462
 463        /* Program energy weights for various events */
 464        intel_uncore_write(uncore, SDEW, 0x15040d00);
 465        intel_uncore_write(uncore, CSIEW0, 0x007f0000);
 466        intel_uncore_write(uncore, CSIEW1, 0x1e220004);
 467        intel_uncore_write(uncore, CSIEW2, 0x04000004);
 468
 469        for (i = 0; i < 5; i++)
 470                intel_uncore_write(uncore, PEW(i), 0);
 471        for (i = 0; i < 3; i++)
 472                intel_uncore_write(uncore, DEW(i), 0);
 473
 474        /* Program P-state weights to account for frequency power adjustment */
 475        for (i = 0; i < 16; i++) {
 476                u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i));
 477                unsigned int freq = intel_pxfreq(pxvidfreq);
 478                unsigned int vid =
 479                        (pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
 480                unsigned int val;
 481
 482                val = vid * vid * freq / 1000 * 255;
 483                val /= 127 * 127 * 900;
 484
 485                pxw[i] = val;
 486        }
 487        /* Render standby states get 0 weight */
 488        pxw[14] = 0;
 489        pxw[15] = 0;
 490
 491        for (i = 0; i < 4; i++) {
 492                intel_uncore_write(uncore, PXW(i),
 493                                   pxw[i * 4 + 0] << 24 |
 494                                   pxw[i * 4 + 1] << 16 |
 495                                   pxw[i * 4 + 2] <<  8 |
 496                                   pxw[i * 4 + 3] <<  0);
 497        }
 498
 499        /* Adjust magic regs to magic values (more experimental results) */
 500        intel_uncore_write(uncore, OGW0, 0);
 501        intel_uncore_write(uncore, OGW1, 0);
 502        intel_uncore_write(uncore, EG0, 0x00007f00);
 503        intel_uncore_write(uncore, EG1, 0x0000000e);
 504        intel_uncore_write(uncore, EG2, 0x000e0000);
 505        intel_uncore_write(uncore, EG3, 0x68000300);
 506        intel_uncore_write(uncore, EG4, 0x42000000);
 507        intel_uncore_write(uncore, EG5, 0x00140031);
 508        intel_uncore_write(uncore, EG6, 0);
 509        intel_uncore_write(uncore, EG7, 0);
 510
 511        for (i = 0; i < 8; i++)
 512                intel_uncore_write(uncore, PXWL(i), 0);
 513
 514        /* Enable PMON + select events */
 515        intel_uncore_write(uncore, ECR, 0x80000019);
 516
 517        return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK;
 518}
 519
 520static bool gen5_rps_enable(struct intel_rps *rps)
 521{
 522        struct drm_i915_private *i915 = rps_to_i915(rps);
 523        struct intel_uncore *uncore = rps_to_uncore(rps);
 524        u8 fstart, vstart;
 525        u32 rgvmodectl;
 526
 527        spin_lock_irq(&mchdev_lock);
 528
 529        rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
 530
 531        /* Enable temp reporting */
 532        intel_uncore_write16(uncore, PMMISC,
 533                             intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN);
 534        intel_uncore_write16(uncore, TSC1,
 535                             intel_uncore_read16(uncore, TSC1) | TSE);
 536
 537        /* 100ms RC evaluation intervals */
 538        intel_uncore_write(uncore, RCUPEI, 100000);
 539        intel_uncore_write(uncore, RCDNEI, 100000);
 540
 541        /* Set max/min thresholds to 90ms and 80ms respectively */
 542        intel_uncore_write(uncore, RCBMAXAVG, 90000);
 543        intel_uncore_write(uncore, RCBMINAVG, 80000);
 544
 545        intel_uncore_write(uncore, MEMIHYST, 1);
 546
 547        /* Set up min, max, and cur for interrupt handling */
 548        fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
 549                MEMMODE_FSTART_SHIFT;
 550
 551        vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) &
 552                  PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
 553
 554        intel_uncore_write(uncore,
 555                           MEMINTREN,
 556                           MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
 557
 558        intel_uncore_write(uncore, VIDSTART, vstart);
 559        intel_uncore_posting_read(uncore, VIDSTART);
 560
 561        rgvmodectl |= MEMMODE_SWMODE_EN;
 562        intel_uncore_write(uncore, MEMMODECTL, rgvmodectl);
 563
 564        if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) &
 565                             MEMCTL_CMD_STS) == 0, 10))
 566                drm_err(&uncore->i915->drm,
 567                        "stuck trying to change perf mode\n");
 568        mdelay(1);
 569
 570        __gen5_rps_set(rps, rps->cur_freq);
 571
 572        rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC);
 573        rps->ips.last_count1 += intel_uncore_read(uncore, DDREC);
 574        rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC);
 575        rps->ips.last_time1 = jiffies_to_msecs(jiffies);
 576
 577        rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC);
 578        rps->ips.last_time2 = ktime_get_raw_ns();
 579
 580        spin_lock(&i915->irq_lock);
 581        ilk_enable_display_irq(i915, DE_PCU_EVENT);
 582        spin_unlock(&i915->irq_lock);
 583
 584        spin_unlock_irq(&mchdev_lock);
 585
 586        rps->ips.corr = init_emon(uncore);
 587
 588        return true;
 589}
 590
 591static void gen5_rps_disable(struct intel_rps *rps)
 592{
 593        struct drm_i915_private *i915 = rps_to_i915(rps);
 594        struct intel_uncore *uncore = rps_to_uncore(rps);
 595        u16 rgvswctl;
 596
 597        spin_lock_irq(&mchdev_lock);
 598
 599        spin_lock(&i915->irq_lock);
 600        ilk_disable_display_irq(i915, DE_PCU_EVENT);
 601        spin_unlock(&i915->irq_lock);
 602
 603        rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
 604
 605        /* Ack interrupts, disable EFC interrupt */
 606        intel_uncore_write(uncore, MEMINTREN,
 607                           intel_uncore_read(uncore, MEMINTREN) &
 608                           ~MEMINT_EVAL_CHG_EN);
 609        intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
 610
 611        /* Go back to the starting frequency */
 612        __gen5_rps_set(rps, rps->idle_freq);
 613        mdelay(1);
 614        rgvswctl |= MEMCTL_CMD_STS;
 615        intel_uncore_write(uncore, MEMSWCTL, rgvswctl);
 616        mdelay(1);
 617
 618        spin_unlock_irq(&mchdev_lock);
 619}
 620
 621static u32 rps_limits(struct intel_rps *rps, u8 val)
 622{
 623        u32 limits;
 624
 625        /*
 626         * Only set the down limit when we've reached the lowest level to avoid
 627         * getting more interrupts, otherwise leave this clear. This prevents a
 628         * race in the hw when coming out of rc6: There's a tiny window where
 629         * the hw runs at the minimal clock before selecting the desired
 630         * frequency, if the down threshold expires in that window we will not
 631         * receive a down interrupt.
 632         */
 633        if (INTEL_GEN(rps_to_i915(rps)) >= 9) {
 634                limits = rps->max_freq_softlimit << 23;
 635                if (val <= rps->min_freq_softlimit)
 636                        limits |= rps->min_freq_softlimit << 14;
 637        } else {
 638                limits = rps->max_freq_softlimit << 24;
 639                if (val <= rps->min_freq_softlimit)
 640                        limits |= rps->min_freq_softlimit << 16;
 641        }
 642
 643        return limits;
 644}
 645
 646static void rps_set_power(struct intel_rps *rps, int new_power)
 647{
 648        struct intel_gt *gt = rps_to_gt(rps);
 649        struct intel_uncore *uncore = gt->uncore;
 650        u32 threshold_up = 0, threshold_down = 0; /* in % */
 651        u32 ei_up = 0, ei_down = 0;
 652
 653        lockdep_assert_held(&rps->power.mutex);
 654
 655        if (new_power == rps->power.mode)
 656                return;
 657
 658        threshold_up = 95;
 659        threshold_down = 85;
 660
 661        /* Note the units here are not exactly 1us, but 1280ns. */
 662        switch (new_power) {
 663        case LOW_POWER:
 664                ei_up = 16000;
 665                ei_down = 32000;
 666                break;
 667
 668        case BETWEEN:
 669                ei_up = 13000;
 670                ei_down = 32000;
 671                break;
 672
 673        case HIGH_POWER:
 674                ei_up = 10000;
 675                ei_down = 32000;
 676                break;
 677        }
 678
 679        /* When byt can survive without system hang with dynamic
 680         * sw freq adjustments, this restriction can be lifted.
 681         */
 682        if (IS_VALLEYVIEW(gt->i915))
 683                goto skip_hw_write;
 684
 685        GT_TRACE(gt,
 686                 "changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n",
 687                 new_power, threshold_up, ei_up, threshold_down, ei_down);
 688
 689        set(uncore, GEN6_RP_UP_EI,
 690            intel_gt_ns_to_pm_interval(gt, ei_up * 1000));
 691        set(uncore, GEN6_RP_UP_THRESHOLD,
 692            intel_gt_ns_to_pm_interval(gt, ei_up * threshold_up * 10));
 693
 694        set(uncore, GEN6_RP_DOWN_EI,
 695            intel_gt_ns_to_pm_interval(gt, ei_down * 1000));
 696        set(uncore, GEN6_RP_DOWN_THRESHOLD,
 697            intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10));
 698
 699        set(uncore, GEN6_RP_CONTROL,
 700            (INTEL_GEN(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
 701            GEN6_RP_MEDIA_HW_NORMAL_MODE |
 702            GEN6_RP_MEDIA_IS_GFX |
 703            GEN6_RP_ENABLE |
 704            GEN6_RP_UP_BUSY_AVG |
 705            GEN6_RP_DOWN_IDLE_AVG);
 706
 707skip_hw_write:
 708        rps->power.mode = new_power;
 709        rps->power.up_threshold = threshold_up;
 710        rps->power.down_threshold = threshold_down;
 711}
 712
 713static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val)
 714{
 715        int new_power;
 716
 717        new_power = rps->power.mode;
 718        switch (rps->power.mode) {
 719        case LOW_POWER:
 720                if (val > rps->efficient_freq + 1 &&
 721                    val > rps->cur_freq)
 722                        new_power = BETWEEN;
 723                break;
 724
 725        case BETWEEN:
 726                if (val <= rps->efficient_freq &&
 727                    val < rps->cur_freq)
 728                        new_power = LOW_POWER;
 729                else if (val >= rps->rp0_freq &&
 730                         val > rps->cur_freq)
 731                        new_power = HIGH_POWER;
 732                break;
 733
 734        case HIGH_POWER:
 735                if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
 736                    val < rps->cur_freq)
 737                        new_power = BETWEEN;
 738                break;
 739        }
 740        /* Max/min bins are special */
 741        if (val <= rps->min_freq_softlimit)
 742                new_power = LOW_POWER;
 743        if (val >= rps->max_freq_softlimit)
 744                new_power = HIGH_POWER;
 745
 746        mutex_lock(&rps->power.mutex);
 747        if (rps->power.interactive)
 748                new_power = HIGH_POWER;
 749        rps_set_power(rps, new_power);
 750        mutex_unlock(&rps->power.mutex);
 751}
 752
 753void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive)
 754{
 755        GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n", yesno(interactive));
 756
 757        mutex_lock(&rps->power.mutex);
 758        if (interactive) {
 759                if (!rps->power.interactive++ && intel_rps_is_active(rps))
 760                        rps_set_power(rps, HIGH_POWER);
 761        } else {
 762                GEM_BUG_ON(!rps->power.interactive);
 763                rps->power.interactive--;
 764        }
 765        mutex_unlock(&rps->power.mutex);
 766}
 767
 768static int gen6_rps_set(struct intel_rps *rps, u8 val)
 769{
 770        struct intel_uncore *uncore = rps_to_uncore(rps);
 771        struct drm_i915_private *i915 = rps_to_i915(rps);
 772        u32 swreq;
 773
 774        if (INTEL_GEN(i915) >= 9)
 775                swreq = GEN9_FREQUENCY(val);
 776        else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
 777                swreq = HSW_FREQUENCY(val);
 778        else
 779                swreq = (GEN6_FREQUENCY(val) |
 780                         GEN6_OFFSET(0) |
 781                         GEN6_AGGRESSIVE_TURBO);
 782        set(uncore, GEN6_RPNSWREQ, swreq);
 783
 784        GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d, swreq:%x\n",
 785                 val, intel_gpu_freq(rps, val), swreq);
 786
 787        return 0;
 788}
 789
 790static int vlv_rps_set(struct intel_rps *rps, u8 val)
 791{
 792        struct drm_i915_private *i915 = rps_to_i915(rps);
 793        int err;
 794
 795        vlv_punit_get(i915);
 796        err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val);
 797        vlv_punit_put(i915);
 798
 799        GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n",
 800                 val, intel_gpu_freq(rps, val));
 801
 802        return err;
 803}
 804
 805static int rps_set(struct intel_rps *rps, u8 val, bool update)
 806{
 807        struct drm_i915_private *i915 = rps_to_i915(rps);
 808        int err;
 809
 810        if (val == rps->last_freq)
 811                return 0;
 812
 813        if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
 814                err = vlv_rps_set(rps, val);
 815        else if (INTEL_GEN(i915) >= 6)
 816                err = gen6_rps_set(rps, val);
 817        else
 818                err = gen5_rps_set(rps, val);
 819        if (err)
 820                return err;
 821
 822        if (update && INTEL_GEN(i915) >= 6)
 823                gen6_rps_set_thresholds(rps, val);
 824        rps->last_freq = val;
 825
 826        return 0;
 827}
 828
 829void intel_rps_unpark(struct intel_rps *rps)
 830{
 831        if (!intel_rps_is_enabled(rps))
 832                return;
 833
 834        GT_TRACE(rps_to_gt(rps), "unpark:%x\n", rps->cur_freq);
 835
 836        /*
 837         * Use the user's desired frequency as a guide, but for better
 838         * performance, jump directly to RPe as our starting frequency.
 839         */
 840        mutex_lock(&rps->lock);
 841
 842        intel_rps_set_active(rps);
 843        intel_rps_set(rps,
 844                      clamp(rps->cur_freq,
 845                            rps->min_freq_softlimit,
 846                            rps->max_freq_softlimit));
 847
 848        mutex_unlock(&rps->lock);
 849
 850        rps->pm_iir = 0;
 851        if (intel_rps_has_interrupts(rps))
 852                rps_enable_interrupts(rps);
 853        if (intel_rps_uses_timer(rps))
 854                rps_start_timer(rps);
 855
 856        if (IS_GEN(rps_to_i915(rps), 5))
 857                gen5_rps_update(rps);
 858}
 859
 860void intel_rps_park(struct intel_rps *rps)
 861{
 862        int adj;
 863
 864        GEM_BUG_ON(atomic_read(&rps->num_waiters));
 865
 866        if (!intel_rps_clear_active(rps))
 867                return;
 868
 869        if (intel_rps_uses_timer(rps))
 870                rps_stop_timer(rps);
 871        if (intel_rps_has_interrupts(rps))
 872                rps_disable_interrupts(rps);
 873
 874        if (rps->last_freq <= rps->idle_freq)
 875                return;
 876
 877        /*
 878         * The punit delays the write of the frequency and voltage until it
 879         * determines the GPU is awake. During normal usage we don't want to
 880         * waste power changing the frequency if the GPU is sleeping (rc6).
 881         * However, the GPU and driver is now idle and we do not want to delay
 882         * switching to minimum voltage (reducing power whilst idle) as we do
 883         * not expect to be woken in the near future and so must flush the
 884         * change by waking the device.
 885         *
 886         * We choose to take the media powerwell (either would do to trick the
 887         * punit into committing the voltage change) as that takes a lot less
 888         * power than the render powerwell.
 889         */
 890        intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA);
 891        rps_set(rps, rps->idle_freq, false);
 892        intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA);
 893
 894        /*
 895         * Since we will try and restart from the previously requested
 896         * frequency on unparking, treat this idle point as a downclock
 897         * interrupt and reduce the frequency for resume. If we park/unpark
 898         * more frequently than the rps worker can run, we will not respond
 899         * to any EI and never see a change in frequency.
 900         *
 901         * (Note we accommodate Cherryview's limitation of only using an
 902         * even bin by applying it to all.)
 903         */
 904        adj = rps->last_adj;
 905        if (adj < 0)
 906                adj *= 2;
 907        else /* CHV needs even encode values */
 908                adj = -2;
 909        rps->last_adj = adj;
 910        rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq);
 911        if (rps->cur_freq < rps->efficient_freq) {
 912                rps->cur_freq = rps->efficient_freq;
 913                rps->last_adj = 0;
 914        }
 915
 916        GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq);
 917}
 918
 919void intel_rps_boost(struct i915_request *rq)
 920{
 921        if (i915_request_signaled(rq) || i915_request_has_waitboost(rq))
 922                return;
 923
 924        /* Serializes with i915_request_retire() */
 925        if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) {
 926                struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
 927
 928                if (atomic_fetch_inc(&rps->num_waiters))
 929                        return;
 930
 931                if (!intel_rps_is_active(rps))
 932                        return;
 933
 934                GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
 935                         rq->fence.context, rq->fence.seqno);
 936
 937                if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
 938                        schedule_work(&rps->work);
 939
 940                WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */
 941        }
 942}
 943
 944int intel_rps_set(struct intel_rps *rps, u8 val)
 945{
 946        int err;
 947
 948        lockdep_assert_held(&rps->lock);
 949        GEM_BUG_ON(val > rps->max_freq);
 950        GEM_BUG_ON(val < rps->min_freq);
 951
 952        if (intel_rps_is_active(rps)) {
 953                err = rps_set(rps, val, true);
 954                if (err)
 955                        return err;
 956
 957                /*
 958                 * Make sure we continue to get interrupts
 959                 * until we hit the minimum or maximum frequencies.
 960                 */
 961                if (intel_rps_has_interrupts(rps)) {
 962                        struct intel_uncore *uncore = rps_to_uncore(rps);
 963
 964                        set(uncore,
 965                            GEN6_RP_INTERRUPT_LIMITS, rps_limits(rps, val));
 966
 967                        set(uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, val));
 968                }
 969        }
 970
 971        rps->cur_freq = val;
 972        return 0;
 973}
 974
 975static void gen6_rps_init(struct intel_rps *rps)
 976{
 977        struct drm_i915_private *i915 = rps_to_i915(rps);
 978        struct intel_uncore *uncore = rps_to_uncore(rps);
 979
 980        /* All of these values are in units of 50MHz */
 981
 982        /* static values from HW: RP0 > RP1 > RPn (min_freq) */
 983        if (IS_GEN9_LP(i915)) {
 984                u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP);
 985
 986                rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
 987                rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
 988                rps->min_freq = (rp_state_cap >>  0) & 0xff;
 989        } else {
 990                u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
 991
 992                rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
 993                rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
 994                rps->min_freq = (rp_state_cap >> 16) & 0xff;
 995        }
 996
 997        /* hw_max = RP0 until we check for overclocking */
 998        rps->max_freq = rps->rp0_freq;
 999
1000        rps->efficient_freq = rps->rp1_freq;
1001        if (IS_HASWELL(i915) || IS_BROADWELL(i915) ||
1002            IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) {
1003                u32 ddcc_status = 0;
1004
1005                if (sandybridge_pcode_read(i915,
1006                                           HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
1007                                           &ddcc_status, NULL) == 0)
1008                        rps->efficient_freq =
1009                                clamp_t(u8,
1010                                        (ddcc_status >> 8) & 0xff,
1011                                        rps->min_freq,
1012                                        rps->max_freq);
1013        }
1014
1015        if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) {
1016                /* Store the frequency values in 16.66 MHZ units, which is
1017                 * the natural hardware unit for SKL
1018                 */
1019                rps->rp0_freq *= GEN9_FREQ_SCALER;
1020                rps->rp1_freq *= GEN9_FREQ_SCALER;
1021                rps->min_freq *= GEN9_FREQ_SCALER;
1022                rps->max_freq *= GEN9_FREQ_SCALER;
1023                rps->efficient_freq *= GEN9_FREQ_SCALER;
1024        }
1025}
1026
1027static bool rps_reset(struct intel_rps *rps)
1028{
1029        struct drm_i915_private *i915 = rps_to_i915(rps);
1030
1031        /* force a reset */
1032        rps->power.mode = -1;
1033        rps->last_freq = -1;
1034
1035        if (rps_set(rps, rps->min_freq, true)) {
1036                drm_err(&i915->drm, "Failed to reset RPS to initial values\n");
1037                return false;
1038        }
1039
1040        rps->cur_freq = rps->min_freq;
1041        return true;
1042}
1043
1044/* See the Gen9_GT_PM_Programming_Guide doc for the below */
1045static bool gen9_rps_enable(struct intel_rps *rps)
1046{
1047        struct intel_gt *gt = rps_to_gt(rps);
1048        struct intel_uncore *uncore = gt->uncore;
1049
1050        /* Program defaults and thresholds for RPS */
1051        if (IS_GEN(gt->i915, 9))
1052                intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
1053                                      GEN9_FREQUENCY(rps->rp1_freq));
1054
1055        intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa);
1056
1057        rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
1058
1059        return rps_reset(rps);
1060}
1061
1062static bool gen8_rps_enable(struct intel_rps *rps)
1063{
1064        struct intel_uncore *uncore = rps_to_uncore(rps);
1065
1066        intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
1067                              HSW_FREQUENCY(rps->rp1_freq));
1068
1069        intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1070
1071        rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
1072
1073        return rps_reset(rps);
1074}
1075
1076static bool gen6_rps_enable(struct intel_rps *rps)
1077{
1078        struct intel_uncore *uncore = rps_to_uncore(rps);
1079
1080        /* Power down if completely idle for over 50ms */
1081        intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000);
1082        intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1083
1084        rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
1085                          GEN6_PM_RP_DOWN_THRESHOLD |
1086                          GEN6_PM_RP_DOWN_TIMEOUT);
1087
1088        return rps_reset(rps);
1089}
1090
1091static int chv_rps_max_freq(struct intel_rps *rps)
1092{
1093        struct drm_i915_private *i915 = rps_to_i915(rps);
1094        struct intel_gt *gt = rps_to_gt(rps);
1095        u32 val;
1096
1097        val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
1098
1099        switch (gt->info.sseu.eu_total) {
1100        case 8:
1101                /* (2 * 4) config */
1102                val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT;
1103                break;
1104        case 12:
1105                /* (2 * 6) config */
1106                val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT;
1107                break;
1108        case 16:
1109                /* (2 * 8) config */
1110        default:
1111                /* Setting (2 * 8) Min RP0 for any other combination */
1112                val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT;
1113                break;
1114        }
1115
1116        return val & FB_GFX_FREQ_FUSE_MASK;
1117}
1118
1119static int chv_rps_rpe_freq(struct intel_rps *rps)
1120{
1121        struct drm_i915_private *i915 = rps_to_i915(rps);
1122        u32 val;
1123
1124        val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG);
1125        val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT;
1126
1127        return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
1128}
1129
1130static int chv_rps_guar_freq(struct intel_rps *rps)
1131{
1132        struct drm_i915_private *i915 = rps_to_i915(rps);
1133        u32 val;
1134
1135        val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
1136
1137        return val & FB_GFX_FREQ_FUSE_MASK;
1138}
1139
1140static u32 chv_rps_min_freq(struct intel_rps *rps)
1141{
1142        struct drm_i915_private *i915 = rps_to_i915(rps);
1143        u32 val;
1144
1145        val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE);
1146        val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT;
1147
1148        return val & FB_GFX_FREQ_FUSE_MASK;
1149}
1150
1151static bool chv_rps_enable(struct intel_rps *rps)
1152{
1153        struct intel_uncore *uncore = rps_to_uncore(rps);
1154        struct drm_i915_private *i915 = rps_to_i915(rps);
1155        u32 val;
1156
1157        /* 1: Program defaults and thresholds for RPS*/
1158        intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
1159        intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
1160        intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
1161        intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
1162        intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
1163
1164        intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1165
1166        /* 2: Enable RPS */
1167        intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
1168                              GEN6_RP_MEDIA_HW_NORMAL_MODE |
1169                              GEN6_RP_MEDIA_IS_GFX |
1170                              GEN6_RP_ENABLE |
1171                              GEN6_RP_UP_BUSY_AVG |
1172                              GEN6_RP_DOWN_IDLE_AVG);
1173
1174        rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
1175                          GEN6_PM_RP_DOWN_THRESHOLD |
1176                          GEN6_PM_RP_DOWN_TIMEOUT);
1177
1178        /* Setting Fixed Bias */
1179        vlv_punit_get(i915);
1180
1181        val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
1182        vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
1183
1184        val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
1185
1186        vlv_punit_put(i915);
1187
1188        /* RPS code assumes GPLL is used */
1189        drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
1190                      "GPLL not enabled\n");
1191
1192        drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE));
1193        drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
1194
1195        return rps_reset(rps);
1196}
1197
1198static int vlv_rps_guar_freq(struct intel_rps *rps)
1199{
1200        struct drm_i915_private *i915 = rps_to_i915(rps);
1201        u32 val, rp1;
1202
1203        val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
1204
1205        rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK;
1206        rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
1207
1208        return rp1;
1209}
1210
1211static int vlv_rps_max_freq(struct intel_rps *rps)
1212{
1213        struct drm_i915_private *i915 = rps_to_i915(rps);
1214        u32 val, rp0;
1215
1216        val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
1217
1218        rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
1219        /* Clamp to max */
1220        rp0 = min_t(u32, rp0, 0xea);
1221
1222        return rp0;
1223}
1224
1225static int vlv_rps_rpe_freq(struct intel_rps *rps)
1226{
1227        struct drm_i915_private *i915 = rps_to_i915(rps);
1228        u32 val, rpe;
1229
1230        val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
1231        rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
1232        val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
1233        rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
1234
1235        return rpe;
1236}
1237
1238static int vlv_rps_min_freq(struct intel_rps *rps)
1239{
1240        struct drm_i915_private *i915 = rps_to_i915(rps);
1241        u32 val;
1242
1243        val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff;
1244        /*
1245         * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
1246         * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
1247         * a BYT-M B0 the above register contains 0xbf. Moreover when setting
1248         * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
1249         * to make sure it matches what Punit accepts.
1250         */
1251        return max_t(u32, val, 0xc0);
1252}
1253
1254static bool vlv_rps_enable(struct intel_rps *rps)
1255{
1256        struct intel_uncore *uncore = rps_to_uncore(rps);
1257        struct drm_i915_private *i915 = rps_to_i915(rps);
1258        u32 val;
1259
1260        intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
1261        intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
1262        intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
1263        intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
1264        intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
1265
1266        intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1267
1268        intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
1269                              GEN6_RP_MEDIA_TURBO |
1270                              GEN6_RP_MEDIA_HW_NORMAL_MODE |
1271                              GEN6_RP_MEDIA_IS_GFX |
1272                              GEN6_RP_ENABLE |
1273                              GEN6_RP_UP_BUSY_AVG |
1274                              GEN6_RP_DOWN_IDLE_CONT);
1275
1276        /* WaGsvRC0ResidencyMethod:vlv */
1277        rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
1278
1279        vlv_punit_get(i915);
1280
1281        /* Setting Fixed Bias */
1282        val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
1283        vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
1284
1285        val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
1286
1287        vlv_punit_put(i915);
1288
1289        /* RPS code assumes GPLL is used */
1290        drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
1291                      "GPLL not enabled\n");
1292
1293        drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE));
1294        drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
1295
1296        return rps_reset(rps);
1297}
1298
1299static unsigned long __ips_gfx_val(struct intel_ips *ips)
1300{
1301        struct intel_rps *rps = container_of(ips, typeof(*rps), ips);
1302        struct intel_uncore *uncore = rps_to_uncore(rps);
1303        unsigned int t, state1, state2;
1304        u32 pxvid, ext_v;
1305        u64 corr, corr2;
1306
1307        lockdep_assert_held(&mchdev_lock);
1308
1309        pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq));
1310        pxvid = (pxvid >> 24) & 0x7f;
1311        ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid);
1312
1313        state1 = ext_v;
1314
1315        /* Revel in the empirically derived constants */
1316
1317        /* Correction factor in 1/100000 units */
1318        t = ips_mch_val(uncore);
1319        if (t > 80)
1320                corr = t * 2349 + 135940;
1321        else if (t >= 50)
1322                corr = t * 964 + 29317;
1323        else /* < 50 */
1324                corr = t * 301 + 1004;
1325
1326        corr = div_u64(corr * 150142 * state1, 10000) - 78642;
1327        corr2 = div_u64(corr, 100000) * ips->corr;
1328
1329        state2 = div_u64(corr2 * state1, 10000);
1330        state2 /= 100; /* convert to mW */
1331
1332        __gen5_ips_update(ips);
1333
1334        return ips->gfx_power + state2;
1335}
1336
1337static bool has_busy_stats(struct intel_rps *rps)
1338{
1339        struct intel_engine_cs *engine;
1340        enum intel_engine_id id;
1341
1342        for_each_engine(engine, rps_to_gt(rps), id) {
1343                if (!intel_engine_supports_stats(engine))
1344                        return false;
1345        }
1346
1347        return true;
1348}
1349
1350void intel_rps_enable(struct intel_rps *rps)
1351{
1352        struct drm_i915_private *i915 = rps_to_i915(rps);
1353        struct intel_uncore *uncore = rps_to_uncore(rps);
1354        bool enabled = false;
1355
1356        if (!HAS_RPS(i915))
1357                return;
1358
1359        intel_gt_check_clock_frequency(rps_to_gt(rps));
1360
1361        intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
1362        if (rps->max_freq <= rps->min_freq)
1363                /* leave disabled, no room for dynamic reclocking */;
1364        else if (IS_CHERRYVIEW(i915))
1365                enabled = chv_rps_enable(rps);
1366        else if (IS_VALLEYVIEW(i915))
1367                enabled = vlv_rps_enable(rps);
1368        else if (INTEL_GEN(i915) >= 9)
1369                enabled = gen9_rps_enable(rps);
1370        else if (INTEL_GEN(i915) >= 8)
1371                enabled = gen8_rps_enable(rps);
1372        else if (INTEL_GEN(i915) >= 6)
1373                enabled = gen6_rps_enable(rps);
1374        else if (IS_IRONLAKE_M(i915))
1375                enabled = gen5_rps_enable(rps);
1376        else
1377                MISSING_CASE(INTEL_GEN(i915));
1378        intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
1379        if (!enabled)
1380                return;
1381
1382        GT_TRACE(rps_to_gt(rps),
1383                 "min:%x, max:%x, freq:[%d, %d]\n",
1384                 rps->min_freq, rps->max_freq,
1385                 intel_gpu_freq(rps, rps->min_freq),
1386                 intel_gpu_freq(rps, rps->max_freq));
1387
1388        GEM_BUG_ON(rps->max_freq < rps->min_freq);
1389        GEM_BUG_ON(rps->idle_freq > rps->max_freq);
1390
1391        GEM_BUG_ON(rps->efficient_freq < rps->min_freq);
1392        GEM_BUG_ON(rps->efficient_freq > rps->max_freq);
1393
1394        if (has_busy_stats(rps))
1395                intel_rps_set_timer(rps);
1396        else if (INTEL_GEN(i915) >= 6)
1397                intel_rps_set_interrupts(rps);
1398        else
1399                /* Ironlake currently uses intel_ips.ko */ {}
1400
1401        intel_rps_set_enabled(rps);
1402}
1403
1404static void gen6_rps_disable(struct intel_rps *rps)
1405{
1406        set(rps_to_uncore(rps), GEN6_RP_CONTROL, 0);
1407}
1408
1409void intel_rps_disable(struct intel_rps *rps)
1410{
1411        struct drm_i915_private *i915 = rps_to_i915(rps);
1412
1413        intel_rps_clear_enabled(rps);
1414        intel_rps_clear_interrupts(rps);
1415        intel_rps_clear_timer(rps);
1416
1417        if (INTEL_GEN(i915) >= 6)
1418                gen6_rps_disable(rps);
1419        else if (IS_IRONLAKE_M(i915))
1420                gen5_rps_disable(rps);
1421}
1422
1423static int byt_gpu_freq(struct intel_rps *rps, int val)
1424{
1425        /*
1426         * N = val - 0xb7
1427         * Slow = Fast = GPLL ref * N
1428         */
1429        return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
1430}
1431
1432static int byt_freq_opcode(struct intel_rps *rps, int val)
1433{
1434        return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
1435}
1436
1437static int chv_gpu_freq(struct intel_rps *rps, int val)
1438{
1439        /*
1440         * N = val / 2
1441         * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
1442         */
1443        return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
1444}
1445
1446static int chv_freq_opcode(struct intel_rps *rps, int val)
1447{
1448        /* CHV needs even values */
1449        return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
1450}
1451
1452int intel_gpu_freq(struct intel_rps *rps, int val)
1453{
1454        struct drm_i915_private *i915 = rps_to_i915(rps);
1455
1456        if (INTEL_GEN(i915) >= 9)
1457                return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
1458                                         GEN9_FREQ_SCALER);
1459        else if (IS_CHERRYVIEW(i915))
1460                return chv_gpu_freq(rps, val);
1461        else if (IS_VALLEYVIEW(i915))
1462                return byt_gpu_freq(rps, val);
1463        else if (INTEL_GEN(i915) >= 6)
1464                return val * GT_FREQUENCY_MULTIPLIER;
1465        else
1466                return val;
1467}
1468
1469int intel_freq_opcode(struct intel_rps *rps, int val)
1470{
1471        struct drm_i915_private *i915 = rps_to_i915(rps);
1472
1473        if (INTEL_GEN(i915) >= 9)
1474                return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
1475                                         GT_FREQUENCY_MULTIPLIER);
1476        else if (IS_CHERRYVIEW(i915))
1477                return chv_freq_opcode(rps, val);
1478        else if (IS_VALLEYVIEW(i915))
1479                return byt_freq_opcode(rps, val);
1480        else if (INTEL_GEN(i915) >= 6)
1481                return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
1482        else
1483                return val;
1484}
1485
1486static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
1487{
1488        struct drm_i915_private *i915 = rps_to_i915(rps);
1489
1490        rps->gpll_ref_freq =
1491                vlv_get_cck_clock(i915, "GPLL ref",
1492                                  CCK_GPLL_CLOCK_CONTROL,
1493                                  i915->czclk_freq);
1494
1495        drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n",
1496                rps->gpll_ref_freq);
1497}
1498
1499static void vlv_rps_init(struct intel_rps *rps)
1500{
1501        struct drm_i915_private *i915 = rps_to_i915(rps);
1502        u32 val;
1503
1504        vlv_iosf_sb_get(i915,
1505                        BIT(VLV_IOSF_SB_PUNIT) |
1506                        BIT(VLV_IOSF_SB_NC) |
1507                        BIT(VLV_IOSF_SB_CCK));
1508
1509        vlv_init_gpll_ref_freq(rps);
1510
1511        val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
1512        switch ((val >> 6) & 3) {
1513        case 0:
1514        case 1:
1515                i915->mem_freq = 800;
1516                break;
1517        case 2:
1518                i915->mem_freq = 1066;
1519                break;
1520        case 3:
1521                i915->mem_freq = 1333;
1522                break;
1523        }
1524        drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
1525
1526        rps->max_freq = vlv_rps_max_freq(rps);
1527        rps->rp0_freq = rps->max_freq;
1528        drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
1529                intel_gpu_freq(rps, rps->max_freq), rps->max_freq);
1530
1531        rps->efficient_freq = vlv_rps_rpe_freq(rps);
1532        drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n",
1533                intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq);
1534
1535        rps->rp1_freq = vlv_rps_guar_freq(rps);
1536        drm_dbg(&i915->drm, "RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
1537                intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq);
1538
1539        rps->min_freq = vlv_rps_min_freq(rps);
1540        drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
1541                intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
1542
1543        vlv_iosf_sb_put(i915,
1544                        BIT(VLV_IOSF_SB_PUNIT) |
1545                        BIT(VLV_IOSF_SB_NC) |
1546                        BIT(VLV_IOSF_SB_CCK));
1547}
1548
1549static void chv_rps_init(struct intel_rps *rps)
1550{
1551        struct drm_i915_private *i915 = rps_to_i915(rps);
1552        u32 val;
1553
1554        vlv_iosf_sb_get(i915,
1555                        BIT(VLV_IOSF_SB_PUNIT) |
1556                        BIT(VLV_IOSF_SB_NC) |
1557                        BIT(VLV_IOSF_SB_CCK));
1558
1559        vlv_init_gpll_ref_freq(rps);
1560
1561        val = vlv_cck_read(i915, CCK_FUSE_REG);
1562
1563        switch ((val >> 2) & 0x7) {
1564        case 3:
1565                i915->mem_freq = 2000;
1566                break;
1567        default:
1568                i915->mem_freq = 1600;
1569                break;
1570        }
1571        drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
1572
1573        rps->max_freq = chv_rps_max_freq(rps);
1574        rps->rp0_freq = rps->max_freq;
1575        drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
1576                intel_gpu_freq(rps, rps->max_freq), rps->max_freq);
1577
1578        rps->efficient_freq = chv_rps_rpe_freq(rps);
1579        drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n",
1580                intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq);
1581
1582        rps->rp1_freq = chv_rps_guar_freq(rps);
1583        drm_dbg(&i915->drm, "RP1(Guar) GPU freq: %d MHz (%u)\n",
1584                intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq);
1585
1586        rps->min_freq = chv_rps_min_freq(rps);
1587        drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
1588                intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
1589
1590        vlv_iosf_sb_put(i915,
1591                        BIT(VLV_IOSF_SB_PUNIT) |
1592                        BIT(VLV_IOSF_SB_NC) |
1593                        BIT(VLV_IOSF_SB_CCK));
1594
1595        drm_WARN_ONCE(&i915->drm, (rps->max_freq | rps->efficient_freq |
1596                                   rps->rp1_freq | rps->min_freq) & 1,
1597                      "Odd GPU freq values\n");
1598}
1599
1600static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei)
1601{
1602        ei->ktime = ktime_get_raw();
1603        ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT);
1604        ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT);
1605}
1606
1607static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir)
1608{
1609        struct intel_uncore *uncore = rps_to_uncore(rps);
1610        const struct intel_rps_ei *prev = &rps->ei;
1611        struct intel_rps_ei now;
1612        u32 events = 0;
1613
1614        if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
1615                return 0;
1616
1617        vlv_c0_read(uncore, &now);
1618
1619        if (prev->ktime) {
1620                u64 time, c0;
1621                u32 render, media;
1622
1623                time = ktime_us_delta(now.ktime, prev->ktime);
1624
1625                time *= rps_to_i915(rps)->czclk_freq;
1626
1627                /* Workload can be split between render + media,
1628                 * e.g. SwapBuffers being blitted in X after being rendered in
1629                 * mesa. To account for this we need to combine both engines
1630                 * into our activity counter.
1631                 */
1632                render = now.render_c0 - prev->render_c0;
1633                media = now.media_c0 - prev->media_c0;
1634                c0 = max(render, media);
1635                c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
1636
1637                if (c0 > time * rps->power.up_threshold)
1638                        events = GEN6_PM_RP_UP_THRESHOLD;
1639                else if (c0 < time * rps->power.down_threshold)
1640                        events = GEN6_PM_RP_DOWN_THRESHOLD;
1641        }
1642
1643        rps->ei = now;
1644        return events;
1645}
1646
1647static void rps_work(struct work_struct *work)
1648{
1649        struct intel_rps *rps = container_of(work, typeof(*rps), work);
1650        struct intel_gt *gt = rps_to_gt(rps);
1651        struct drm_i915_private *i915 = rps_to_i915(rps);
1652        bool client_boost = false;
1653        int new_freq, adj, min, max;
1654        u32 pm_iir = 0;
1655
1656        spin_lock_irq(&gt->irq_lock);
1657        pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events;
1658        client_boost = atomic_read(&rps->num_waiters);
1659        spin_unlock_irq(&gt->irq_lock);
1660
1661        /* Make sure we didn't queue anything we're not going to process. */
1662        if (!pm_iir && !client_boost)
1663                goto out;
1664
1665        mutex_lock(&rps->lock);
1666        if (!intel_rps_is_active(rps)) {
1667                mutex_unlock(&rps->lock);
1668                return;
1669        }
1670
1671        pm_iir |= vlv_wa_c0_ei(rps, pm_iir);
1672
1673        adj = rps->last_adj;
1674        new_freq = rps->cur_freq;
1675        min = rps->min_freq_softlimit;
1676        max = rps->max_freq_softlimit;
1677        if (client_boost)
1678                max = rps->max_freq;
1679
1680        GT_TRACE(gt,
1681                 "pm_iir:%x, client_boost:%s, last:%d, cur:%x, min:%x, max:%x\n",
1682                 pm_iir, yesno(client_boost),
1683                 adj, new_freq, min, max);
1684
1685        if (client_boost && new_freq < rps->boost_freq) {
1686                new_freq = rps->boost_freq;
1687                adj = 0;
1688        } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
1689                if (adj > 0)
1690                        adj *= 2;
1691                else /* CHV needs even encode values */
1692                        adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1;
1693
1694                if (new_freq >= rps->max_freq_softlimit)
1695                        adj = 0;
1696        } else if (client_boost) {
1697                adj = 0;
1698        } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
1699                if (rps->cur_freq > rps->efficient_freq)
1700                        new_freq = rps->efficient_freq;
1701                else if (rps->cur_freq > rps->min_freq_softlimit)
1702                        new_freq = rps->min_freq_softlimit;
1703                adj = 0;
1704        } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
1705                if (adj < 0)
1706                        adj *= 2;
1707                else /* CHV needs even encode values */
1708                        adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1;
1709
1710                if (new_freq <= rps->min_freq_softlimit)
1711                        adj = 0;
1712        } else { /* unknown event */
1713                adj = 0;
1714        }
1715
1716        /*
1717         * sysfs frequency limits may have snuck in while
1718         * servicing the interrupt
1719         */
1720        new_freq += adj;
1721        new_freq = clamp_t(int, new_freq, min, max);
1722
1723        if (intel_rps_set(rps, new_freq)) {
1724                drm_dbg(&i915->drm, "Failed to set new GPU frequency\n");
1725                adj = 0;
1726        }
1727        rps->last_adj = adj;
1728
1729        mutex_unlock(&rps->lock);
1730
1731out:
1732        spin_lock_irq(&gt->irq_lock);
1733        gen6_gt_pm_unmask_irq(gt, rps->pm_events);
1734        spin_unlock_irq(&gt->irq_lock);
1735}
1736
1737void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
1738{
1739        struct intel_gt *gt = rps_to_gt(rps);
1740        const u32 events = rps->pm_events & pm_iir;
1741
1742        lockdep_assert_held(&gt->irq_lock);
1743
1744        if (unlikely(!events))
1745                return;
1746
1747        GT_TRACE(gt, "irq events:%x\n", events);
1748
1749        gen6_gt_pm_mask_irq(gt, events);
1750
1751        rps->pm_iir |= events;
1752        schedule_work(&rps->work);
1753}
1754
1755void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
1756{
1757        struct intel_gt *gt = rps_to_gt(rps);
1758        u32 events;
1759
1760        events = pm_iir & rps->pm_events;
1761        if (events) {
1762                spin_lock(&gt->irq_lock);
1763
1764                GT_TRACE(gt, "irq events:%x\n", events);
1765
1766                gen6_gt_pm_mask_irq(gt, events);
1767                rps->pm_iir |= events;
1768
1769                schedule_work(&rps->work);
1770                spin_unlock(&gt->irq_lock);
1771        }
1772
1773        if (INTEL_GEN(gt->i915) >= 8)
1774                return;
1775
1776        if (pm_iir & PM_VEBOX_USER_INTERRUPT)
1777                intel_engine_signal_breadcrumbs(gt->engine[VECS0]);
1778
1779        if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
1780                DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
1781}
1782
1783void gen5_rps_irq_handler(struct intel_rps *rps)
1784{
1785        struct intel_uncore *uncore = rps_to_uncore(rps);
1786        u32 busy_up, busy_down, max_avg, min_avg;
1787        u8 new_freq;
1788
1789        spin_lock(&mchdev_lock);
1790
1791        intel_uncore_write16(uncore,
1792                             MEMINTRSTS,
1793                             intel_uncore_read(uncore, MEMINTRSTS));
1794
1795        intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
1796        busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG);
1797        busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG);
1798        max_avg = intel_uncore_read(uncore, RCBMAXAVG);
1799        min_avg = intel_uncore_read(uncore, RCBMINAVG);
1800
1801        /* Handle RCS change request from hw */
1802        new_freq = rps->cur_freq;
1803        if (busy_up > max_avg)
1804                new_freq++;
1805        else if (busy_down < min_avg)
1806                new_freq--;
1807        new_freq = clamp(new_freq,
1808                         rps->min_freq_softlimit,
1809                         rps->max_freq_softlimit);
1810
1811        if (new_freq != rps->cur_freq && !__gen5_rps_set(rps, new_freq))
1812                rps->cur_freq = new_freq;
1813
1814        spin_unlock(&mchdev_lock);
1815}
1816
1817void intel_rps_init_early(struct intel_rps *rps)
1818{
1819        mutex_init(&rps->lock);
1820        mutex_init(&rps->power.mutex);
1821
1822        INIT_WORK(&rps->work, rps_work);
1823        timer_setup(&rps->timer, rps_timer, 0);
1824
1825        atomic_set(&rps->num_waiters, 0);
1826}
1827
1828void intel_rps_init(struct intel_rps *rps)
1829{
1830        struct drm_i915_private *i915 = rps_to_i915(rps);
1831
1832        if (IS_CHERRYVIEW(i915))
1833                chv_rps_init(rps);
1834        else if (IS_VALLEYVIEW(i915))
1835                vlv_rps_init(rps);
1836        else if (INTEL_GEN(i915) >= 6)
1837                gen6_rps_init(rps);
1838        else if (IS_IRONLAKE_M(i915))
1839                gen5_rps_init(rps);
1840
1841        /* Derive initial user preferences/limits from the hardware limits */
1842        rps->max_freq_softlimit = rps->max_freq;
1843        rps->min_freq_softlimit = rps->min_freq;
1844
1845        /* After setting max-softlimit, find the overclock max freq */
1846        if (IS_GEN(i915, 6) || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) {
1847                u32 params = 0;
1848
1849                sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS,
1850                                       &params, NULL);
1851                if (params & BIT(31)) { /* OC supported */
1852                        drm_dbg(&i915->drm,
1853                                "Overclocking supported, max: %dMHz, overclock: %dMHz\n",
1854                                (rps->max_freq & 0xff) * 50,
1855                                (params & 0xff) * 50);
1856                        rps->max_freq = params & 0xff;
1857                }
1858        }
1859
1860        /* Finally allow us to boost to max by default */
1861        rps->boost_freq = rps->max_freq;
1862        rps->idle_freq = rps->min_freq;
1863
1864        /* Start in the middle, from here we will autotune based on workload */
1865        rps->cur_freq = rps->efficient_freq;
1866
1867        rps->pm_intrmsk_mbz = 0;
1868
1869        /*
1870         * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
1871         * if GEN6_PM_UP_EI_EXPIRED is masked.
1872         *
1873         * TODO: verify if this can be reproduced on VLV,CHV.
1874         */
1875        if (INTEL_GEN(i915) <= 7)
1876                rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
1877
1878        if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) < 11)
1879                rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
1880}
1881
1882void intel_rps_sanitize(struct intel_rps *rps)
1883{
1884        if (INTEL_GEN(rps_to_i915(rps)) >= 6)
1885                rps_disable_interrupts(rps);
1886}
1887
1888u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
1889{
1890        struct drm_i915_private *i915 = rps_to_i915(rps);
1891        u32 cagf;
1892
1893        if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
1894                cagf = (rpstat >> 8) & 0xff;
1895        else if (INTEL_GEN(i915) >= 9)
1896                cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
1897        else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
1898                cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
1899        else if (INTEL_GEN(i915) >= 6)
1900                cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
1901        else
1902                cagf = gen5_invert_freq(rps, (rpstat & MEMSTAT_PSTATE_MASK) >>
1903                                        MEMSTAT_PSTATE_SHIFT);
1904
1905        return cagf;
1906}
1907
1908static u32 read_cagf(struct intel_rps *rps)
1909{
1910        struct drm_i915_private *i915 = rps_to_i915(rps);
1911        struct intel_uncore *uncore = rps_to_uncore(rps);
1912        u32 freq;
1913
1914        if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
1915                vlv_punit_get(i915);
1916                freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
1917                vlv_punit_put(i915);
1918        } else if (INTEL_GEN(i915) >= 6) {
1919                freq = intel_uncore_read(uncore, GEN6_RPSTAT1);
1920        } else {
1921                freq = intel_uncore_read(uncore, MEMSTAT_ILK);
1922        }
1923
1924        return intel_rps_get_cagf(rps, freq);
1925}
1926
1927u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
1928{
1929        struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
1930        intel_wakeref_t wakeref;
1931        u32 freq = 0;
1932
1933        with_intel_runtime_pm_if_in_use(rpm, wakeref)
1934                freq = intel_gpu_freq(rps, read_cagf(rps));
1935
1936        return freq;
1937}
1938
1939/* External interface for intel_ips.ko */
1940
1941static struct drm_i915_private __rcu *ips_mchdev;
1942
1943/**
1944 * Tells the intel_ips driver that the i915 driver is now loaded, if
1945 * IPS got loaded first.
1946 *
1947 * This awkward dance is so that neither module has to depend on the
1948 * other in order for IPS to do the appropriate communication of
1949 * GPU turbo limits to i915.
1950 */
1951static void
1952ips_ping_for_i915_load(void)
1953{
1954        void (*link)(void);
1955
1956        link = symbol_get(ips_link_to_i915_driver);
1957        if (link) {
1958                link();
1959                symbol_put(ips_link_to_i915_driver);
1960        }
1961}
1962
1963void intel_rps_driver_register(struct intel_rps *rps)
1964{
1965        struct intel_gt *gt = rps_to_gt(rps);
1966
1967        /*
1968         * We only register the i915 ips part with intel-ips once everything is
1969         * set up, to avoid intel-ips sneaking in and reading bogus values.
1970         */
1971        if (IS_GEN(gt->i915, 5)) {
1972                GEM_BUG_ON(ips_mchdev);
1973                rcu_assign_pointer(ips_mchdev, gt->i915);
1974                ips_ping_for_i915_load();
1975        }
1976}
1977
1978void intel_rps_driver_unregister(struct intel_rps *rps)
1979{
1980        if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps))
1981                rcu_assign_pointer(ips_mchdev, NULL);
1982}
1983
1984static struct drm_i915_private *mchdev_get(void)
1985{
1986        struct drm_i915_private *i915;
1987
1988        rcu_read_lock();
1989        i915 = rcu_dereference(ips_mchdev);
1990        if (i915 && !kref_get_unless_zero(&i915->drm.ref))
1991                i915 = NULL;
1992        rcu_read_unlock();
1993
1994        return i915;
1995}
1996
1997/**
1998 * i915_read_mch_val - return value for IPS use
1999 *
2000 * Calculate and return a value for the IPS driver to use when deciding whether
2001 * we have thermal and power headroom to increase CPU or GPU power budget.
2002 */
2003unsigned long i915_read_mch_val(void)
2004{
2005        struct drm_i915_private *i915;
2006        unsigned long chipset_val = 0;
2007        unsigned long graphics_val = 0;
2008        intel_wakeref_t wakeref;
2009
2010        i915 = mchdev_get();
2011        if (!i915)
2012                return 0;
2013
2014        with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
2015                struct intel_ips *ips = &i915->gt.rps.ips;
2016
2017                spin_lock_irq(&mchdev_lock);
2018                chipset_val = __ips_chipset_val(ips);
2019                graphics_val = __ips_gfx_val(ips);
2020                spin_unlock_irq(&mchdev_lock);
2021        }
2022
2023        drm_dev_put(&i915->drm);
2024        return chipset_val + graphics_val;
2025}
2026EXPORT_SYMBOL_GPL(i915_read_mch_val);
2027
2028/**
2029 * i915_gpu_raise - raise GPU frequency limit
2030 *
2031 * Raise the limit; IPS indicates we have thermal headroom.
2032 */
2033bool i915_gpu_raise(void)
2034{
2035        struct drm_i915_private *i915;
2036        struct intel_rps *rps;
2037
2038        i915 = mchdev_get();
2039        if (!i915)
2040                return false;
2041
2042        rps = &i915->gt.rps;
2043
2044        spin_lock_irq(&mchdev_lock);
2045        if (rps->max_freq_softlimit < rps->max_freq)
2046                rps->max_freq_softlimit++;
2047        spin_unlock_irq(&mchdev_lock);
2048
2049        drm_dev_put(&i915->drm);
2050        return true;
2051}
2052EXPORT_SYMBOL_GPL(i915_gpu_raise);
2053
2054/**
2055 * i915_gpu_lower - lower GPU frequency limit
2056 *
2057 * IPS indicates we're close to a thermal limit, so throttle back the GPU
2058 * frequency maximum.
2059 */
2060bool i915_gpu_lower(void)
2061{
2062        struct drm_i915_private *i915;
2063        struct intel_rps *rps;
2064
2065        i915 = mchdev_get();
2066        if (!i915)
2067                return false;
2068
2069        rps = &i915->gt.rps;
2070
2071        spin_lock_irq(&mchdev_lock);
2072        if (rps->max_freq_softlimit > rps->min_freq)
2073                rps->max_freq_softlimit--;
2074        spin_unlock_irq(&mchdev_lock);
2075
2076        drm_dev_put(&i915->drm);
2077        return true;
2078}
2079EXPORT_SYMBOL_GPL(i915_gpu_lower);
2080
2081/**
2082 * i915_gpu_busy - indicate GPU business to IPS
2083 *
2084 * Tell the IPS driver whether or not the GPU is busy.
2085 */
2086bool i915_gpu_busy(void)
2087{
2088        struct drm_i915_private *i915;
2089        bool ret;
2090
2091        i915 = mchdev_get();
2092        if (!i915)
2093                return false;
2094
2095        ret = i915->gt.awake;
2096
2097        drm_dev_put(&i915->drm);
2098        return ret;
2099}
2100EXPORT_SYMBOL_GPL(i915_gpu_busy);
2101
2102/**
2103 * i915_gpu_turbo_disable - disable graphics turbo
2104 *
2105 * Disable graphics turbo by resetting the max frequency and setting the
2106 * current frequency to the default.
2107 */
2108bool i915_gpu_turbo_disable(void)
2109{
2110        struct drm_i915_private *i915;
2111        struct intel_rps *rps;
2112        bool ret;
2113
2114        i915 = mchdev_get();
2115        if (!i915)
2116                return false;
2117
2118        rps = &i915->gt.rps;
2119
2120        spin_lock_irq(&mchdev_lock);
2121        rps->max_freq_softlimit = rps->min_freq;
2122        ret = !__gen5_rps_set(&i915->gt.rps, rps->min_freq);
2123        spin_unlock_irq(&mchdev_lock);
2124
2125        drm_dev_put(&i915->drm);
2126        return ret;
2127}
2128EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
2129
2130#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2131#include "selftest_rps.c"
2132#endif
2133