linux/drivers/gpu/drm/i915/gt/selftest_rps.c
<<
>>
Prefs
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2020 Intel Corporation
   4 */
   5
   6#include <linux/pm_qos.h>
   7#include <linux/sort.h>
   8
   9#include "intel_engine_heartbeat.h"
  10#include "intel_engine_pm.h"
  11#include "intel_gpu_commands.h"
  12#include "intel_gt_clock_utils.h"
  13#include "intel_gt_pm.h"
  14#include "intel_rc6.h"
  15#include "selftest_engine_heartbeat.h"
  16#include "selftest_rps.h"
  17#include "selftests/igt_flush_test.h"
  18#include "selftests/igt_spinner.h"
  19#include "selftests/librapl.h"
  20
  21/* Try to isolate the impact of cstates from determing frequency response */
  22#define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
  23
  24static void dummy_rps_work(struct work_struct *wrk)
  25{
  26}
  27
  28static int cmp_u64(const void *A, const void *B)
  29{
  30        const u64 *a = A, *b = B;
  31
  32        if (*a < *b)
  33                return -1;
  34        else if (*a > *b)
  35                return 1;
  36        else
  37                return 0;
  38}
  39
  40static int cmp_u32(const void *A, const void *B)
  41{
  42        const u32 *a = A, *b = B;
  43
  44        if (*a < *b)
  45                return -1;
  46        else if (*a > *b)
  47                return 1;
  48        else
  49                return 0;
  50}
  51
  52static struct i915_vma *
  53create_spin_counter(struct intel_engine_cs *engine,
  54                    struct i915_address_space *vm,
  55                    bool srm,
  56                    u32 **cancel,
  57                    u32 **counter)
  58{
  59        enum {
  60                COUNT,
  61                INC,
  62                __NGPR__,
  63        };
  64#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
  65        struct drm_i915_gem_object *obj;
  66        struct i915_vma *vma;
  67        unsigned long end;
  68        u32 *base, *cs;
  69        int loop, i;
  70        int err;
  71
  72        obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
  73        if (IS_ERR(obj))
  74                return ERR_CAST(obj);
  75
  76        end = obj->base.size / sizeof(u32) - 1;
  77
  78        vma = i915_vma_instance(obj, vm, NULL);
  79        if (IS_ERR(vma)) {
  80                err = PTR_ERR(vma);
  81                goto err_put;
  82        }
  83
  84        err = i915_vma_pin(vma, 0, 0, PIN_USER);
  85        if (err)
  86                goto err_unlock;
  87
  88        i915_vma_lock(vma);
  89
  90        base = i915_gem_object_pin_map(obj, I915_MAP_WC);
  91        if (IS_ERR(base)) {
  92                err = PTR_ERR(base);
  93                goto err_unpin;
  94        }
  95        cs = base;
  96
  97        *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
  98        for (i = 0; i < __NGPR__; i++) {
  99                *cs++ = i915_mmio_reg_offset(CS_GPR(i));
 100                *cs++ = 0;
 101                *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
 102                *cs++ = 0;
 103        }
 104
 105        *cs++ = MI_LOAD_REGISTER_IMM(1);
 106        *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
 107        *cs++ = 1;
 108
 109        loop = cs - base;
 110
 111        /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
 112        for (i = 0; i < 1024; i++) {
 113                *cs++ = MI_MATH(4);
 114                *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
 115                *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
 116                *cs++ = MI_MATH_ADD;
 117                *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
 118
 119                if (srm) {
 120                        *cs++ = MI_STORE_REGISTER_MEM_GEN8;
 121                        *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
 122                        *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
 123                        *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
 124                }
 125        }
 126
 127        *cs++ = MI_BATCH_BUFFER_START_GEN8;
 128        *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
 129        *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
 130        GEM_BUG_ON(cs - base > end);
 131
 132        i915_gem_object_flush_map(obj);
 133
 134        *cancel = base + loop;
 135        *counter = srm ? memset32(base + end, 0, 1) : NULL;
 136        return vma;
 137
 138err_unpin:
 139        i915_vma_unpin(vma);
 140err_unlock:
 141        i915_vma_unlock(vma);
 142err_put:
 143        i915_gem_object_put(obj);
 144        return ERR_PTR(err);
 145}
 146
 147static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
 148{
 149        u8 history[64], i;
 150        unsigned long end;
 151        int sleep;
 152
 153        i = 0;
 154        memset(history, freq, sizeof(history));
 155        sleep = 20;
 156
 157        /* The PCU does not change instantly, but drifts towards the goal? */
 158        end = jiffies + msecs_to_jiffies(timeout_ms);
 159        do {
 160                u8 act;
 161
 162                act = read_cagf(rps);
 163                if (time_after(jiffies, end))
 164                        return act;
 165
 166                /* Target acquired */
 167                if (act == freq)
 168                        return act;
 169
 170                /* Any change within the last N samples? */
 171                if (!memchr_inv(history, act, sizeof(history)))
 172                        return act;
 173
 174                history[i] = act;
 175                i = (i + 1) % ARRAY_SIZE(history);
 176
 177                usleep_range(sleep, 2 * sleep);
 178                sleep *= 2;
 179                if (sleep > timeout_ms * 20)
 180                        sleep = timeout_ms * 20;
 181        } while (1);
 182}
 183
 184static u8 rps_set_check(struct intel_rps *rps, u8 freq)
 185{
 186        mutex_lock(&rps->lock);
 187        GEM_BUG_ON(!intel_rps_is_active(rps));
 188        intel_rps_set(rps, freq);
 189        GEM_BUG_ON(rps->last_freq != freq);
 190        mutex_unlock(&rps->lock);
 191
 192        return wait_for_freq(rps, freq, 50);
 193}
 194
 195static void show_pstate_limits(struct intel_rps *rps)
 196{
 197        struct drm_i915_private *i915 = rps_to_i915(rps);
 198
 199        if (IS_BROXTON(i915)) {
 200                pr_info("P_STATE_CAP[%x]: 0x%08x\n",
 201                        i915_mmio_reg_offset(BXT_RP_STATE_CAP),
 202                        intel_uncore_read(rps_to_uncore(rps),
 203                                          BXT_RP_STATE_CAP));
 204        } else if (IS_GEN(i915, 9)) {
 205                pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
 206                        i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
 207                        intel_uncore_read(rps_to_uncore(rps),
 208                                          GEN9_RP_STATE_LIMITS));
 209        }
 210}
 211
 212int live_rps_clock_interval(void *arg)
 213{
 214        struct intel_gt *gt = arg;
 215        struct intel_rps *rps = &gt->rps;
 216        void (*saved_work)(struct work_struct *wrk);
 217        struct intel_engine_cs *engine;
 218        enum intel_engine_id id;
 219        struct igt_spinner spin;
 220        int err = 0;
 221
 222        if (!intel_rps_is_enabled(rps))
 223                return 0;
 224
 225        if (igt_spinner_init(&spin, gt))
 226                return -ENOMEM;
 227
 228        intel_gt_pm_wait_for_idle(gt);
 229        saved_work = rps->work.func;
 230        rps->work.func = dummy_rps_work;
 231
 232        intel_gt_pm_get(gt);
 233        intel_rps_disable(&gt->rps);
 234
 235        intel_gt_check_clock_frequency(gt);
 236
 237        for_each_engine(engine, gt, id) {
 238                struct i915_request *rq;
 239                u32 cycles;
 240                u64 dt;
 241
 242                if (!intel_engine_can_store_dword(engine))
 243                        continue;
 244
 245                st_engine_heartbeat_disable(engine);
 246
 247                rq = igt_spinner_create_request(&spin,
 248                                                engine->kernel_context,
 249                                                MI_NOOP);
 250                if (IS_ERR(rq)) {
 251                        st_engine_heartbeat_enable(engine);
 252                        err = PTR_ERR(rq);
 253                        break;
 254                }
 255
 256                i915_request_add(rq);
 257
 258                if (!igt_wait_for_spinner(&spin, rq)) {
 259                        pr_err("%s: RPS spinner did not start\n",
 260                               engine->name);
 261                        igt_spinner_end(&spin);
 262                        st_engine_heartbeat_enable(engine);
 263                        intel_gt_set_wedged(engine->gt);
 264                        err = -EIO;
 265                        break;
 266                }
 267
 268                intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 269
 270                intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
 271
 272                /* Set the evaluation interval to infinity! */
 273                intel_uncore_write_fw(gt->uncore,
 274                                      GEN6_RP_UP_EI, 0xffffffff);
 275                intel_uncore_write_fw(gt->uncore,
 276                                      GEN6_RP_UP_THRESHOLD, 0xffffffff);
 277
 278                intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
 279                                      GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
 280
 281                if (wait_for(intel_uncore_read_fw(gt->uncore,
 282                                                  GEN6_RP_CUR_UP_EI),
 283                             10)) {
 284                        /* Just skip the test; assume lack of HW support */
 285                        pr_notice("%s: rps evaluation interval not ticking\n",
 286                                  engine->name);
 287                        err = -ENODEV;
 288                } else {
 289                        ktime_t dt_[5];
 290                        u32 cycles_[5];
 291                        int i;
 292
 293                        for (i = 0; i < 5; i++) {
 294                                preempt_disable();
 295
 296                                dt_[i] = ktime_get();
 297                                cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
 298
 299                                udelay(1000);
 300
 301                                dt_[i] = ktime_sub(ktime_get(), dt_[i]);
 302                                cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
 303
 304                                preempt_enable();
 305                        }
 306
 307                        /* Use the median of both cycle/dt; close enough */
 308                        sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
 309                        cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
 310                        sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
 311                        dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
 312                }
 313
 314                intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
 315                intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
 316
 317                igt_spinner_end(&spin);
 318                st_engine_heartbeat_enable(engine);
 319
 320                if (err == 0) {
 321                        u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
 322                        u32 expected =
 323                                intel_gt_ns_to_pm_interval(gt, dt);
 324
 325                        pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
 326                                engine->name, cycles, time, dt, expected,
 327                                gt->clock_frequency / 1000);
 328
 329                        if (10 * time < 8 * dt ||
 330                            8 * time > 10 * dt) {
 331                                pr_err("%s: rps clock time does not match walltime!\n",
 332                                       engine->name);
 333                                err = -EINVAL;
 334                        }
 335
 336                        if (10 * expected < 8 * cycles ||
 337                            8 * expected > 10 * cycles) {
 338                                pr_err("%s: walltime does not match rps clock ticks!\n",
 339                                       engine->name);
 340                                err = -EINVAL;
 341                        }
 342                }
 343
 344                if (igt_flush_test(gt->i915))
 345                        err = -EIO;
 346
 347                break; /* once is enough */
 348        }
 349
 350        intel_rps_enable(&gt->rps);
 351        intel_gt_pm_put(gt);
 352
 353        igt_spinner_fini(&spin);
 354
 355        intel_gt_pm_wait_for_idle(gt);
 356        rps->work.func = saved_work;
 357
 358        if (err == -ENODEV) /* skipped, don't report a fail */
 359                err = 0;
 360
 361        return err;
 362}
 363
 364int live_rps_control(void *arg)
 365{
 366        struct intel_gt *gt = arg;
 367        struct intel_rps *rps = &gt->rps;
 368        void (*saved_work)(struct work_struct *wrk);
 369        struct intel_engine_cs *engine;
 370        enum intel_engine_id id;
 371        struct igt_spinner spin;
 372        int err = 0;
 373
 374        /*
 375         * Check that the actual frequency matches our requested frequency,
 376         * to verify our control mechanism. We have to be careful that the
 377         * PCU may throttle the GPU in which case the actual frequency used
 378         * will be lowered than requested.
 379         */
 380
 381        if (!intel_rps_is_enabled(rps))
 382                return 0;
 383
 384        if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
 385                return 0;
 386
 387        if (igt_spinner_init(&spin, gt))
 388                return -ENOMEM;
 389
 390        intel_gt_pm_wait_for_idle(gt);
 391        saved_work = rps->work.func;
 392        rps->work.func = dummy_rps_work;
 393
 394        intel_gt_pm_get(gt);
 395        for_each_engine(engine, gt, id) {
 396                struct i915_request *rq;
 397                ktime_t min_dt, max_dt;
 398                int f, limit;
 399                int min, max;
 400
 401                if (!intel_engine_can_store_dword(engine))
 402                        continue;
 403
 404                st_engine_heartbeat_disable(engine);
 405
 406                rq = igt_spinner_create_request(&spin,
 407                                                engine->kernel_context,
 408                                                MI_NOOP);
 409                if (IS_ERR(rq)) {
 410                        err = PTR_ERR(rq);
 411                        break;
 412                }
 413
 414                i915_request_add(rq);
 415
 416                if (!igt_wait_for_spinner(&spin, rq)) {
 417                        pr_err("%s: RPS spinner did not start\n",
 418                               engine->name);
 419                        igt_spinner_end(&spin);
 420                        st_engine_heartbeat_enable(engine);
 421                        intel_gt_set_wedged(engine->gt);
 422                        err = -EIO;
 423                        break;
 424                }
 425
 426                if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
 427                        pr_err("%s: could not set minimum frequency [%x], only %x!\n",
 428                               engine->name, rps->min_freq, read_cagf(rps));
 429                        igt_spinner_end(&spin);
 430                        st_engine_heartbeat_enable(engine);
 431                        show_pstate_limits(rps);
 432                        err = -EINVAL;
 433                        break;
 434                }
 435
 436                for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
 437                        if (rps_set_check(rps, f) < f)
 438                                break;
 439                }
 440
 441                limit = rps_set_check(rps, f);
 442
 443                if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
 444                        pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
 445                               engine->name, rps->min_freq, read_cagf(rps));
 446                        igt_spinner_end(&spin);
 447                        st_engine_heartbeat_enable(engine);
 448                        show_pstate_limits(rps);
 449                        err = -EINVAL;
 450                        break;
 451                }
 452
 453                max_dt = ktime_get();
 454                max = rps_set_check(rps, limit);
 455                max_dt = ktime_sub(ktime_get(), max_dt);
 456
 457                min_dt = ktime_get();
 458                min = rps_set_check(rps, rps->min_freq);
 459                min_dt = ktime_sub(ktime_get(), min_dt);
 460
 461                igt_spinner_end(&spin);
 462                st_engine_heartbeat_enable(engine);
 463
 464                pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
 465                        engine->name,
 466                        rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
 467                        rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
 468                        limit, intel_gpu_freq(rps, limit),
 469                        min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
 470
 471                if (limit == rps->min_freq) {
 472                        pr_err("%s: GPU throttled to minimum!\n",
 473                               engine->name);
 474                        show_pstate_limits(rps);
 475                        err = -ENODEV;
 476                        break;
 477                }
 478
 479                if (igt_flush_test(gt->i915)) {
 480                        err = -EIO;
 481                        break;
 482                }
 483        }
 484        intel_gt_pm_put(gt);
 485
 486        igt_spinner_fini(&spin);
 487
 488        intel_gt_pm_wait_for_idle(gt);
 489        rps->work.func = saved_work;
 490
 491        return err;
 492}
 493
 494static void show_pcu_config(struct intel_rps *rps)
 495{
 496        struct drm_i915_private *i915 = rps_to_i915(rps);
 497        unsigned int max_gpu_freq, min_gpu_freq;
 498        intel_wakeref_t wakeref;
 499        int gpu_freq;
 500
 501        if (!HAS_LLC(i915))
 502                return;
 503
 504        min_gpu_freq = rps->min_freq;
 505        max_gpu_freq = rps->max_freq;
 506        if (INTEL_GEN(i915) >= 9) {
 507                /* Convert GT frequency to 50 HZ units */
 508                min_gpu_freq /= GEN9_FREQ_SCALER;
 509                max_gpu_freq /= GEN9_FREQ_SCALER;
 510        }
 511
 512        wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
 513
 514        pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
 515        for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
 516                int ia_freq = gpu_freq;
 517
 518                sandybridge_pcode_read(i915,
 519                                       GEN6_PCODE_READ_MIN_FREQ_TABLE,
 520                                       &ia_freq, NULL);
 521
 522                pr_info("%5d  %5d  %5d\n",
 523                        gpu_freq * 50,
 524                        ((ia_freq >> 0) & 0xff) * 100,
 525                        ((ia_freq >> 8) & 0xff) * 100);
 526        }
 527
 528        intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
 529}
 530
 531static u64 __measure_frequency(u32 *cntr, int duration_ms)
 532{
 533        u64 dc, dt;
 534
 535        dt = ktime_get();
 536        dc = READ_ONCE(*cntr);
 537        usleep_range(1000 * duration_ms, 2000 * duration_ms);
 538        dc = READ_ONCE(*cntr) - dc;
 539        dt = ktime_get() - dt;
 540
 541        return div64_u64(1000 * 1000 * dc, dt);
 542}
 543
 544static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
 545{
 546        u64 x[5];
 547        int i;
 548
 549        *freq = rps_set_check(rps, *freq);
 550        for (i = 0; i < 5; i++)
 551                x[i] = __measure_frequency(cntr, 2);
 552        *freq = (*freq + read_cagf(rps)) / 2;
 553
 554        /* A simple triangle filter for better result stability */
 555        sort(x, 5, sizeof(*x), cmp_u64, NULL);
 556        return div_u64(x[1] + 2 * x[2] + x[3], 4);
 557}
 558
 559static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
 560                                  int duration_ms)
 561{
 562        u64 dc, dt;
 563
 564        dt = ktime_get();
 565        dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
 566        usleep_range(1000 * duration_ms, 2000 * duration_ms);
 567        dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
 568        dt = ktime_get() - dt;
 569
 570        return div64_u64(1000 * 1000 * dc, dt);
 571}
 572
 573static u64 measure_cs_frequency_at(struct intel_rps *rps,
 574                                   struct intel_engine_cs *engine,
 575                                   int *freq)
 576{
 577        u64 x[5];
 578        int i;
 579
 580        *freq = rps_set_check(rps, *freq);
 581        for (i = 0; i < 5; i++)
 582                x[i] = __measure_cs_frequency(engine, 2);
 583        *freq = (*freq + read_cagf(rps)) / 2;
 584
 585        /* A simple triangle filter for better result stability */
 586        sort(x, 5, sizeof(*x), cmp_u64, NULL);
 587        return div_u64(x[1] + 2 * x[2] + x[3], 4);
 588}
 589
 590static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
 591{
 592        return f_d * x > f_n * y && f_n * x < f_d * y;
 593}
 594
 595int live_rps_frequency_cs(void *arg)
 596{
 597        void (*saved_work)(struct work_struct *wrk);
 598        struct intel_gt *gt = arg;
 599        struct intel_rps *rps = &gt->rps;
 600        struct intel_engine_cs *engine;
 601        struct pm_qos_request qos;
 602        enum intel_engine_id id;
 603        int err = 0;
 604
 605        /*
 606         * The premise is that the GPU does change freqency at our behest.
 607         * Let's check there is a correspondence between the requested
 608         * frequency, the actual frequency, and the observed clock rate.
 609         */
 610
 611        if (!intel_rps_is_enabled(rps))
 612                return 0;
 613
 614        if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
 615                return 0;
 616
 617        if (CPU_LATENCY >= 0)
 618                cpu_latency_qos_add_request(&qos, CPU_LATENCY);
 619
 620        intel_gt_pm_wait_for_idle(gt);
 621        saved_work = rps->work.func;
 622        rps->work.func = dummy_rps_work;
 623
 624        for_each_engine(engine, gt, id) {
 625                struct i915_request *rq;
 626                struct i915_vma *vma;
 627                u32 *cancel, *cntr;
 628                struct {
 629                        u64 count;
 630                        int freq;
 631                } min, max;
 632
 633                st_engine_heartbeat_disable(engine);
 634
 635                vma = create_spin_counter(engine,
 636                                          engine->kernel_context->vm, false,
 637                                          &cancel, &cntr);
 638                if (IS_ERR(vma)) {
 639                        err = PTR_ERR(vma);
 640                        st_engine_heartbeat_enable(engine);
 641                        break;
 642                }
 643
 644                rq = intel_engine_create_kernel_request(engine);
 645                if (IS_ERR(rq)) {
 646                        err = PTR_ERR(rq);
 647                        goto err_vma;
 648                }
 649
 650                err = i915_request_await_object(rq, vma->obj, false);
 651                if (!err)
 652                        err = i915_vma_move_to_active(vma, rq, 0);
 653                if (!err)
 654                        err = rq->engine->emit_bb_start(rq,
 655                                                        vma->node.start,
 656                                                        PAGE_SIZE, 0);
 657                i915_request_add(rq);
 658                if (err)
 659                        goto err_vma;
 660
 661                if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
 662                             10)) {
 663                        pr_err("%s: timed loop did not start\n",
 664                               engine->name);
 665                        goto err_vma;
 666                }
 667
 668                min.freq = rps->min_freq;
 669                min.count = measure_cs_frequency_at(rps, engine, &min.freq);
 670
 671                max.freq = rps->max_freq;
 672                max.count = measure_cs_frequency_at(rps, engine, &max.freq);
 673
 674                pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
 675                        engine->name,
 676                        min.count, intel_gpu_freq(rps, min.freq),
 677                        max.count, intel_gpu_freq(rps, max.freq),
 678                        (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
 679                                                     max.freq * min.count));
 680
 681                if (!scaled_within(max.freq * min.count,
 682                                   min.freq * max.count,
 683                                   2, 3)) {
 684                        int f;
 685
 686                        pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
 687                               engine->name,
 688                               max.freq * min.count,
 689                               min.freq * max.count);
 690                        show_pcu_config(rps);
 691
 692                        for (f = min.freq + 1; f <= rps->max_freq; f++) {
 693                                int act = f;
 694                                u64 count;
 695
 696                                count = measure_cs_frequency_at(rps, engine, &act);
 697                                if (act < f)
 698                                        break;
 699
 700                                pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
 701                                        engine->name,
 702                                        act, intel_gpu_freq(rps, act), count,
 703                                        (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
 704                                                                     act * min.count));
 705
 706                                f = act; /* may skip ahead [pcu granularity] */
 707                        }
 708
 709                        err = -EINTR; /* ignore error, continue on with test */
 710                }
 711
 712err_vma:
 713                *cancel = MI_BATCH_BUFFER_END;
 714                i915_gem_object_flush_map(vma->obj);
 715                i915_gem_object_unpin_map(vma->obj);
 716                i915_vma_unpin(vma);
 717                i915_vma_unlock(vma);
 718                i915_vma_put(vma);
 719
 720                st_engine_heartbeat_enable(engine);
 721                if (igt_flush_test(gt->i915))
 722                        err = -EIO;
 723                if (err)
 724                        break;
 725        }
 726
 727        intel_gt_pm_wait_for_idle(gt);
 728        rps->work.func = saved_work;
 729
 730        if (CPU_LATENCY >= 0)
 731                cpu_latency_qos_remove_request(&qos);
 732
 733        return err;
 734}
 735
 736int live_rps_frequency_srm(void *arg)
 737{
 738        void (*saved_work)(struct work_struct *wrk);
 739        struct intel_gt *gt = arg;
 740        struct intel_rps *rps = &gt->rps;
 741        struct intel_engine_cs *engine;
 742        struct pm_qos_request qos;
 743        enum intel_engine_id id;
 744        int err = 0;
 745
 746        /*
 747         * The premise is that the GPU does change freqency at our behest.
 748         * Let's check there is a correspondence between the requested
 749         * frequency, the actual frequency, and the observed clock rate.
 750         */
 751
 752        if (!intel_rps_is_enabled(rps))
 753                return 0;
 754
 755        if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
 756                return 0;
 757
 758        if (CPU_LATENCY >= 0)
 759                cpu_latency_qos_add_request(&qos, CPU_LATENCY);
 760
 761        intel_gt_pm_wait_for_idle(gt);
 762        saved_work = rps->work.func;
 763        rps->work.func = dummy_rps_work;
 764
 765        for_each_engine(engine, gt, id) {
 766                struct i915_request *rq;
 767                struct i915_vma *vma;
 768                u32 *cancel, *cntr;
 769                struct {
 770                        u64 count;
 771                        int freq;
 772                } min, max;
 773
 774                st_engine_heartbeat_disable(engine);
 775
 776                vma = create_spin_counter(engine,
 777                                          engine->kernel_context->vm, true,
 778                                          &cancel, &cntr);
 779                if (IS_ERR(vma)) {
 780                        err = PTR_ERR(vma);
 781                        st_engine_heartbeat_enable(engine);
 782                        break;
 783                }
 784
 785                rq = intel_engine_create_kernel_request(engine);
 786                if (IS_ERR(rq)) {
 787                        err = PTR_ERR(rq);
 788                        goto err_vma;
 789                }
 790
 791                err = i915_request_await_object(rq, vma->obj, false);
 792                if (!err)
 793                        err = i915_vma_move_to_active(vma, rq, 0);
 794                if (!err)
 795                        err = rq->engine->emit_bb_start(rq,
 796                                                        vma->node.start,
 797                                                        PAGE_SIZE, 0);
 798                i915_request_add(rq);
 799                if (err)
 800                        goto err_vma;
 801
 802                if (wait_for(READ_ONCE(*cntr), 10)) {
 803                        pr_err("%s: timed loop did not start\n",
 804                               engine->name);
 805                        goto err_vma;
 806                }
 807
 808                min.freq = rps->min_freq;
 809                min.count = measure_frequency_at(rps, cntr, &min.freq);
 810
 811                max.freq = rps->max_freq;
 812                max.count = measure_frequency_at(rps, cntr, &max.freq);
 813
 814                pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
 815                        engine->name,
 816                        min.count, intel_gpu_freq(rps, min.freq),
 817                        max.count, intel_gpu_freq(rps, max.freq),
 818                        (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
 819                                                     max.freq * min.count));
 820
 821                if (!scaled_within(max.freq * min.count,
 822                                   min.freq * max.count,
 823                                   1, 2)) {
 824                        int f;
 825
 826                        pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
 827                               engine->name,
 828                               max.freq * min.count,
 829                               min.freq * max.count);
 830                        show_pcu_config(rps);
 831
 832                        for (f = min.freq + 1; f <= rps->max_freq; f++) {
 833                                int act = f;
 834                                u64 count;
 835
 836                                count = measure_frequency_at(rps, cntr, &act);
 837                                if (act < f)
 838                                        break;
 839
 840                                pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
 841                                        engine->name,
 842                                        act, intel_gpu_freq(rps, act), count,
 843                                        (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
 844                                                                     act * min.count));
 845
 846                                f = act; /* may skip ahead [pcu granularity] */
 847                        }
 848
 849                        err = -EINTR; /* ignore error, continue on with test */
 850                }
 851
 852err_vma:
 853                *cancel = MI_BATCH_BUFFER_END;
 854                i915_gem_object_flush_map(vma->obj);
 855                i915_gem_object_unpin_map(vma->obj);
 856                i915_vma_unpin(vma);
 857                i915_vma_unlock(vma);
 858                i915_vma_put(vma);
 859
 860                st_engine_heartbeat_enable(engine);
 861                if (igt_flush_test(gt->i915))
 862                        err = -EIO;
 863                if (err)
 864                        break;
 865        }
 866
 867        intel_gt_pm_wait_for_idle(gt);
 868        rps->work.func = saved_work;
 869
 870        if (CPU_LATENCY >= 0)
 871                cpu_latency_qos_remove_request(&qos);
 872
 873        return err;
 874}
 875
 876static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
 877{
 878        /* Flush any previous EI */
 879        usleep_range(timeout_us, 2 * timeout_us);
 880
 881        /* Reset the interrupt status */
 882        rps_disable_interrupts(rps);
 883        GEM_BUG_ON(rps->pm_iir);
 884        rps_enable_interrupts(rps);
 885
 886        /* And then wait for the timeout, for real this time */
 887        usleep_range(2 * timeout_us, 3 * timeout_us);
 888}
 889
 890static int __rps_up_interrupt(struct intel_rps *rps,
 891                              struct intel_engine_cs *engine,
 892                              struct igt_spinner *spin)
 893{
 894        struct intel_uncore *uncore = engine->uncore;
 895        struct i915_request *rq;
 896        u32 timeout;
 897
 898        if (!intel_engine_can_store_dword(engine))
 899                return 0;
 900
 901        rps_set_check(rps, rps->min_freq);
 902
 903        rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
 904        if (IS_ERR(rq))
 905                return PTR_ERR(rq);
 906
 907        i915_request_get(rq);
 908        i915_request_add(rq);
 909
 910        if (!igt_wait_for_spinner(spin, rq)) {
 911                pr_err("%s: RPS spinner did not start\n",
 912                       engine->name);
 913                i915_request_put(rq);
 914                intel_gt_set_wedged(engine->gt);
 915                return -EIO;
 916        }
 917
 918        if (!intel_rps_is_active(rps)) {
 919                pr_err("%s: RPS not enabled on starting spinner\n",
 920                       engine->name);
 921                igt_spinner_end(spin);
 922                i915_request_put(rq);
 923                return -EINVAL;
 924        }
 925
 926        if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
 927                pr_err("%s: RPS did not register UP interrupt\n",
 928                       engine->name);
 929                i915_request_put(rq);
 930                return -EINVAL;
 931        }
 932
 933        if (rps->last_freq != rps->min_freq) {
 934                pr_err("%s: RPS did not program min frequency\n",
 935                       engine->name);
 936                i915_request_put(rq);
 937                return -EINVAL;
 938        }
 939
 940        timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
 941        timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
 942        timeout = DIV_ROUND_UP(timeout, 1000);
 943
 944        sleep_for_ei(rps, timeout);
 945        GEM_BUG_ON(i915_request_completed(rq));
 946
 947        igt_spinner_end(spin);
 948        i915_request_put(rq);
 949
 950        if (rps->cur_freq != rps->min_freq) {
 951                pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
 952                       engine->name, intel_rps_read_actual_frequency(rps));
 953                return -EINVAL;
 954        }
 955
 956        if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
 957                pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
 958                       engine->name, rps->pm_iir,
 959                       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
 960                       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
 961                       intel_uncore_read(uncore, GEN6_RP_UP_EI));
 962                return -EINVAL;
 963        }
 964
 965        return 0;
 966}
 967
 968static int __rps_down_interrupt(struct intel_rps *rps,
 969                                struct intel_engine_cs *engine)
 970{
 971        struct intel_uncore *uncore = engine->uncore;
 972        u32 timeout;
 973
 974        rps_set_check(rps, rps->max_freq);
 975
 976        if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
 977                pr_err("%s: RPS did not register DOWN interrupt\n",
 978                       engine->name);
 979                return -EINVAL;
 980        }
 981
 982        if (rps->last_freq != rps->max_freq) {
 983                pr_err("%s: RPS did not program max frequency\n",
 984                       engine->name);
 985                return -EINVAL;
 986        }
 987
 988        timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
 989        timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
 990        timeout = DIV_ROUND_UP(timeout, 1000);
 991
 992        sleep_for_ei(rps, timeout);
 993
 994        if (rps->cur_freq != rps->max_freq) {
 995                pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
 996                       engine->name,
 997                       intel_rps_read_actual_frequency(rps));
 998                return -EINVAL;
 999        }
1000
1001        if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1002                pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1003                       engine->name, rps->pm_iir,
1004                       intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1005                       intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1006                       intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1007                       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1008                       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1009                       intel_uncore_read(uncore, GEN6_RP_UP_EI));
1010                return -EINVAL;
1011        }
1012
1013        return 0;
1014}
1015
1016int live_rps_interrupt(void *arg)
1017{
1018        struct intel_gt *gt = arg;
1019        struct intel_rps *rps = &gt->rps;
1020        void (*saved_work)(struct work_struct *wrk);
1021        struct intel_engine_cs *engine;
1022        enum intel_engine_id id;
1023        struct igt_spinner spin;
1024        u32 pm_events;
1025        int err = 0;
1026
1027        /*
1028         * First, let's check whether or not we are receiving interrupts.
1029         */
1030
1031        if (!intel_rps_has_interrupts(rps))
1032                return 0;
1033
1034        intel_gt_pm_get(gt);
1035        pm_events = rps->pm_events;
1036        intel_gt_pm_put(gt);
1037        if (!pm_events) {
1038                pr_err("No RPS PM events registered, but RPS is enabled?\n");
1039                return -ENODEV;
1040        }
1041
1042        if (igt_spinner_init(&spin, gt))
1043                return -ENOMEM;
1044
1045        intel_gt_pm_wait_for_idle(gt);
1046        saved_work = rps->work.func;
1047        rps->work.func = dummy_rps_work;
1048
1049        for_each_engine(engine, gt, id) {
1050                /* Keep the engine busy with a spinner; expect an UP! */
1051                if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1052                        intel_gt_pm_wait_for_idle(engine->gt);
1053                        GEM_BUG_ON(intel_rps_is_active(rps));
1054
1055                        st_engine_heartbeat_disable(engine);
1056
1057                        err = __rps_up_interrupt(rps, engine, &spin);
1058
1059                        st_engine_heartbeat_enable(engine);
1060                        if (err)
1061                                goto out;
1062
1063                        intel_gt_pm_wait_for_idle(engine->gt);
1064                }
1065
1066                /* Keep the engine awake but idle and check for DOWN */
1067                if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1068                        st_engine_heartbeat_disable(engine);
1069                        intel_rc6_disable(&gt->rc6);
1070
1071                        err = __rps_down_interrupt(rps, engine);
1072
1073                        intel_rc6_enable(&gt->rc6);
1074                        st_engine_heartbeat_enable(engine);
1075                        if (err)
1076                                goto out;
1077                }
1078        }
1079
1080out:
1081        if (igt_flush_test(gt->i915))
1082                err = -EIO;
1083
1084        igt_spinner_fini(&spin);
1085
1086        intel_gt_pm_wait_for_idle(gt);
1087        rps->work.func = saved_work;
1088
1089        return err;
1090}
1091
1092static u64 __measure_power(int duration_ms)
1093{
1094        u64 dE, dt;
1095
1096        dt = ktime_get();
1097        dE = librapl_energy_uJ();
1098        usleep_range(1000 * duration_ms, 2000 * duration_ms);
1099        dE = librapl_energy_uJ() - dE;
1100        dt = ktime_get() - dt;
1101
1102        return div64_u64(1000 * 1000 * dE, dt);
1103}
1104
1105static u64 measure_power_at(struct intel_rps *rps, int *freq)
1106{
1107        u64 x[5];
1108        int i;
1109
1110        *freq = rps_set_check(rps, *freq);
1111        for (i = 0; i < 5; i++)
1112                x[i] = __measure_power(5);
1113        *freq = (*freq + read_cagf(rps)) / 2;
1114
1115        /* A simple triangle filter for better result stability */
1116        sort(x, 5, sizeof(*x), cmp_u64, NULL);
1117        return div_u64(x[1] + 2 * x[2] + x[3], 4);
1118}
1119
1120int live_rps_power(void *arg)
1121{
1122        struct intel_gt *gt = arg;
1123        struct intel_rps *rps = &gt->rps;
1124        void (*saved_work)(struct work_struct *wrk);
1125        struct intel_engine_cs *engine;
1126        enum intel_engine_id id;
1127        struct igt_spinner spin;
1128        int err = 0;
1129
1130        /*
1131         * Our fundamental assumption is that running at lower frequency
1132         * actually saves power. Let's see if our RAPL measurement support
1133         * that theory.
1134         */
1135
1136        if (!intel_rps_is_enabled(rps))
1137                return 0;
1138
1139        if (!librapl_energy_uJ())
1140                return 0;
1141
1142        if (igt_spinner_init(&spin, gt))
1143                return -ENOMEM;
1144
1145        intel_gt_pm_wait_for_idle(gt);
1146        saved_work = rps->work.func;
1147        rps->work.func = dummy_rps_work;
1148
1149        for_each_engine(engine, gt, id) {
1150                struct i915_request *rq;
1151                struct {
1152                        u64 power;
1153                        int freq;
1154                } min, max;
1155
1156                if (!intel_engine_can_store_dword(engine))
1157                        continue;
1158
1159                st_engine_heartbeat_disable(engine);
1160
1161                rq = igt_spinner_create_request(&spin,
1162                                                engine->kernel_context,
1163                                                MI_NOOP);
1164                if (IS_ERR(rq)) {
1165                        st_engine_heartbeat_enable(engine);
1166                        err = PTR_ERR(rq);
1167                        break;
1168                }
1169
1170                i915_request_add(rq);
1171
1172                if (!igt_wait_for_spinner(&spin, rq)) {
1173                        pr_err("%s: RPS spinner did not start\n",
1174                               engine->name);
1175                        igt_spinner_end(&spin);
1176                        st_engine_heartbeat_enable(engine);
1177                        intel_gt_set_wedged(engine->gt);
1178                        err = -EIO;
1179                        break;
1180                }
1181
1182                max.freq = rps->max_freq;
1183                max.power = measure_power_at(rps, &max.freq);
1184
1185                min.freq = rps->min_freq;
1186                min.power = measure_power_at(rps, &min.freq);
1187
1188                igt_spinner_end(&spin);
1189                st_engine_heartbeat_enable(engine);
1190
1191                pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1192                        engine->name,
1193                        min.power, intel_gpu_freq(rps, min.freq),
1194                        max.power, intel_gpu_freq(rps, max.freq));
1195
1196                if (10 * min.freq >= 9 * max.freq) {
1197                        pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1198                                  min.freq, intel_gpu_freq(rps, min.freq),
1199                                  max.freq, intel_gpu_freq(rps, max.freq));
1200                        continue;
1201                }
1202
1203                if (11 * min.power > 10 * max.power) {
1204                        pr_err("%s: did not conserve power when setting lower frequency!\n",
1205                               engine->name);
1206                        err = -EINVAL;
1207                        break;
1208                }
1209
1210                if (igt_flush_test(gt->i915)) {
1211                        err = -EIO;
1212                        break;
1213                }
1214        }
1215
1216        igt_spinner_fini(&spin);
1217
1218        intel_gt_pm_wait_for_idle(gt);
1219        rps->work.func = saved_work;
1220
1221        return err;
1222}
1223
1224int live_rps_dynamic(void *arg)
1225{
1226        struct intel_gt *gt = arg;
1227        struct intel_rps *rps = &gt->rps;
1228        struct intel_engine_cs *engine;
1229        enum intel_engine_id id;
1230        struct igt_spinner spin;
1231        int err = 0;
1232
1233        /*
1234         * We've looked at the bascs, and have established that we
1235         * can change the clock frequency and that the HW will generate
1236         * interrupts based on load. Now we check how we integrate those
1237         * moving parts into dynamic reclocking based on load.
1238         */
1239
1240        if (!intel_rps_is_enabled(rps))
1241                return 0;
1242
1243        if (igt_spinner_init(&spin, gt))
1244                return -ENOMEM;
1245
1246        if (intel_rps_has_interrupts(rps))
1247                pr_info("RPS has interrupt support\n");
1248        if (intel_rps_uses_timer(rps))
1249                pr_info("RPS has timer support\n");
1250
1251        for_each_engine(engine, gt, id) {
1252                struct i915_request *rq;
1253                struct {
1254                        ktime_t dt;
1255                        u8 freq;
1256                } min, max;
1257
1258                if (!intel_engine_can_store_dword(engine))
1259                        continue;
1260
1261                intel_gt_pm_wait_for_idle(gt);
1262                GEM_BUG_ON(intel_rps_is_active(rps));
1263                rps->cur_freq = rps->min_freq;
1264
1265                intel_engine_pm_get(engine);
1266                intel_rc6_disable(&gt->rc6);
1267                GEM_BUG_ON(rps->last_freq != rps->min_freq);
1268
1269                rq = igt_spinner_create_request(&spin,
1270                                                engine->kernel_context,
1271                                                MI_NOOP);
1272                if (IS_ERR(rq)) {
1273                        err = PTR_ERR(rq);
1274                        goto err;
1275                }
1276
1277                i915_request_add(rq);
1278
1279                max.dt = ktime_get();
1280                max.freq = wait_for_freq(rps, rps->max_freq, 500);
1281                max.dt = ktime_sub(ktime_get(), max.dt);
1282
1283                igt_spinner_end(&spin);
1284
1285                min.dt = ktime_get();
1286                min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1287                min.dt = ktime_sub(ktime_get(), min.dt);
1288
1289                pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1290                        engine->name,
1291                        max.freq, intel_gpu_freq(rps, max.freq),
1292                        ktime_to_ns(max.dt),
1293                        min.freq, intel_gpu_freq(rps, min.freq),
1294                        ktime_to_ns(min.dt));
1295                if (min.freq >= max.freq) {
1296                        pr_err("%s: dynamic reclocking of spinner failed\n!",
1297                               engine->name);
1298                        err = -EINVAL;
1299                }
1300
1301err:
1302                intel_rc6_enable(&gt->rc6);
1303                intel_engine_pm_put(engine);
1304
1305                if (igt_flush_test(gt->i915))
1306                        err = -EIO;
1307                if (err)
1308                        break;
1309        }
1310
1311        igt_spinner_fini(&spin);
1312
1313        return err;
1314}
1315