linux/drivers/gpu/drm/i915/gt/selftest_rps.c
<<
>>
Prefs
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2020 Intel Corporation
   4 */
   5
   6#include <linux/pm_qos.h>
   7#include <linux/sort.h>
   8
   9#include "intel_engine_heartbeat.h"
  10#include "intel_engine_pm.h"
  11#include "intel_gpu_commands.h"
  12#include "intel_gt_clock_utils.h"
  13#include "intel_gt_pm.h"
  14#include "intel_rc6.h"
  15#include "selftest_engine_heartbeat.h"
  16#include "selftest_rps.h"
  17#include "selftests/igt_flush_test.h"
  18#include "selftests/igt_spinner.h"
  19#include "selftests/librapl.h"
  20
  21/* Try to isolate the impact of cstates from determing frequency response */
  22#define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
  23
  24static void dummy_rps_work(struct work_struct *wrk)
  25{
  26}
  27
  28static int cmp_u64(const void *A, const void *B)
  29{
  30        const u64 *a = A, *b = B;
  31
  32        if (*a < *b)
  33                return -1;
  34        else if (*a > *b)
  35                return 1;
  36        else
  37                return 0;
  38}
  39
  40static int cmp_u32(const void *A, const void *B)
  41{
  42        const u32 *a = A, *b = B;
  43
  44        if (*a < *b)
  45                return -1;
  46        else if (*a > *b)
  47                return 1;
  48        else
  49                return 0;
  50}
  51
  52static struct i915_vma *
  53create_spin_counter(struct intel_engine_cs *engine,
  54                    struct i915_address_space *vm,
  55                    bool srm,
  56                    u32 **cancel,
  57                    u32 **counter)
  58{
  59        enum {
  60                COUNT,
  61                INC,
  62                __NGPR__,
  63        };
  64#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
  65        struct drm_i915_gem_object *obj;
  66        struct i915_vma *vma;
  67        unsigned long end;
  68        u32 *base, *cs;
  69        int loop, i;
  70        int err;
  71
  72        obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
  73        if (IS_ERR(obj))
  74                return ERR_CAST(obj);
  75
  76        end = obj->base.size / sizeof(u32) - 1;
  77
  78        vma = i915_vma_instance(obj, vm, NULL);
  79        if (IS_ERR(vma)) {
  80                err = PTR_ERR(vma);
  81                goto err_put;
  82        }
  83
  84        err = i915_vma_pin(vma, 0, 0, PIN_USER);
  85        if (err)
  86                goto err_unlock;
  87
  88        i915_vma_lock(vma);
  89
  90        base = i915_gem_object_pin_map(obj, I915_MAP_WC);
  91        if (IS_ERR(base)) {
  92                err = PTR_ERR(base);
  93                goto err_unpin;
  94        }
  95        cs = base;
  96
  97        *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
  98        for (i = 0; i < __NGPR__; i++) {
  99                *cs++ = i915_mmio_reg_offset(CS_GPR(i));
 100                *cs++ = 0;
 101                *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
 102                *cs++ = 0;
 103        }
 104
 105        *cs++ = MI_LOAD_REGISTER_IMM(1);
 106        *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
 107        *cs++ = 1;
 108
 109        loop = cs - base;
 110
 111        /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
 112        for (i = 0; i < 1024; i++) {
 113                *cs++ = MI_MATH(4);
 114                *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
 115                *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
 116                *cs++ = MI_MATH_ADD;
 117                *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
 118
 119                if (srm) {
 120                        *cs++ = MI_STORE_REGISTER_MEM_GEN8;
 121                        *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
 122                        *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
 123                        *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
 124                }
 125        }
 126
 127        *cs++ = MI_BATCH_BUFFER_START_GEN8;
 128        *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
 129        *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
 130        GEM_BUG_ON(cs - base > end);
 131
 132        i915_gem_object_flush_map(obj);
 133
 134        *cancel = base + loop;
 135        *counter = srm ? memset32(base + end, 0, 1) : NULL;
 136        return vma;
 137
 138err_unpin:
 139        i915_vma_unpin(vma);
 140err_unlock:
 141        i915_vma_unlock(vma);
 142err_put:
 143        i915_gem_object_put(obj);
 144        return ERR_PTR(err);
 145}
 146
 147static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
 148{
 149        u8 history[64], i;
 150        unsigned long end;
 151        int sleep;
 152
 153        i = 0;
 154        memset(history, freq, sizeof(history));
 155        sleep = 20;
 156
 157        /* The PCU does not change instantly, but drifts towards the goal? */
 158        end = jiffies + msecs_to_jiffies(timeout_ms);
 159        do {
 160                u8 act;
 161
 162                act = read_cagf(rps);
 163                if (time_after(jiffies, end))
 164                        return act;
 165
 166                /* Target acquired */
 167                if (act == freq)
 168                        return act;
 169
 170                /* Any change within the last N samples? */
 171                if (!memchr_inv(history, act, sizeof(history)))
 172                        return act;
 173
 174                history[i] = act;
 175                i = (i + 1) % ARRAY_SIZE(history);
 176
 177                usleep_range(sleep, 2 * sleep);
 178                sleep *= 2;
 179                if (sleep > timeout_ms * 20)
 180                        sleep = timeout_ms * 20;
 181        } while (1);
 182}
 183
 184static u8 rps_set_check(struct intel_rps *rps, u8 freq)
 185{
 186        mutex_lock(&rps->lock);
 187        GEM_BUG_ON(!intel_rps_is_active(rps));
 188        if (wait_for(!intel_rps_set(rps, freq), 50)) {
 189                mutex_unlock(&rps->lock);
 190                return 0;
 191        }
 192        GEM_BUG_ON(rps->last_freq != freq);
 193        mutex_unlock(&rps->lock);
 194
 195        return wait_for_freq(rps, freq, 50);
 196}
 197
 198static void show_pstate_limits(struct intel_rps *rps)
 199{
 200        struct drm_i915_private *i915 = rps_to_i915(rps);
 201
 202        if (IS_BROXTON(i915)) {
 203                pr_info("P_STATE_CAP[%x]: 0x%08x\n",
 204                        i915_mmio_reg_offset(BXT_RP_STATE_CAP),
 205                        intel_uncore_read(rps_to_uncore(rps),
 206                                          BXT_RP_STATE_CAP));
 207        } else if (GRAPHICS_VER(i915) == 9) {
 208                pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
 209                        i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
 210                        intel_uncore_read(rps_to_uncore(rps),
 211                                          GEN9_RP_STATE_LIMITS));
 212        }
 213}
 214
 215int live_rps_clock_interval(void *arg)
 216{
 217        struct intel_gt *gt = arg;
 218        struct intel_rps *rps = &gt->rps;
 219        void (*saved_work)(struct work_struct *wrk);
 220        struct intel_engine_cs *engine;
 221        enum intel_engine_id id;
 222        struct igt_spinner spin;
 223        int err = 0;
 224
 225        if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
 226                return 0;
 227
 228        if (igt_spinner_init(&spin, gt))
 229                return -ENOMEM;
 230
 231        intel_gt_pm_wait_for_idle(gt);
 232        saved_work = rps->work.func;
 233        rps->work.func = dummy_rps_work;
 234
 235        intel_gt_pm_get(gt);
 236        intel_rps_disable(&gt->rps);
 237
 238        intel_gt_check_clock_frequency(gt);
 239
 240        for_each_engine(engine, gt, id) {
 241                struct i915_request *rq;
 242                u32 cycles;
 243                u64 dt;
 244
 245                if (!intel_engine_can_store_dword(engine))
 246                        continue;
 247
 248                st_engine_heartbeat_disable(engine);
 249
 250                rq = igt_spinner_create_request(&spin,
 251                                                engine->kernel_context,
 252                                                MI_NOOP);
 253                if (IS_ERR(rq)) {
 254                        st_engine_heartbeat_enable(engine);
 255                        err = PTR_ERR(rq);
 256                        break;
 257                }
 258
 259                i915_request_add(rq);
 260
 261                if (!igt_wait_for_spinner(&spin, rq)) {
 262                        pr_err("%s: RPS spinner did not start\n",
 263                               engine->name);
 264                        igt_spinner_end(&spin);
 265                        st_engine_heartbeat_enable(engine);
 266                        intel_gt_set_wedged(engine->gt);
 267                        err = -EIO;
 268                        break;
 269                }
 270
 271                intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 272
 273                intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
 274
 275                /* Set the evaluation interval to infinity! */
 276                intel_uncore_write_fw(gt->uncore,
 277                                      GEN6_RP_UP_EI, 0xffffffff);
 278                intel_uncore_write_fw(gt->uncore,
 279                                      GEN6_RP_UP_THRESHOLD, 0xffffffff);
 280
 281                intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
 282                                      GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
 283
 284                if (wait_for(intel_uncore_read_fw(gt->uncore,
 285                                                  GEN6_RP_CUR_UP_EI),
 286                             10)) {
 287                        /* Just skip the test; assume lack of HW support */
 288                        pr_notice("%s: rps evaluation interval not ticking\n",
 289                                  engine->name);
 290                        err = -ENODEV;
 291                } else {
 292                        ktime_t dt_[5];
 293                        u32 cycles_[5];
 294                        int i;
 295
 296                        for (i = 0; i < 5; i++) {
 297                                preempt_disable();
 298
 299                                dt_[i] = ktime_get();
 300                                cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
 301
 302                                udelay(1000);
 303
 304                                dt_[i] = ktime_sub(ktime_get(), dt_[i]);
 305                                cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
 306
 307                                preempt_enable();
 308                        }
 309
 310                        /* Use the median of both cycle/dt; close enough */
 311                        sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
 312                        cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
 313                        sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
 314                        dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
 315                }
 316
 317                intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
 318                intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
 319
 320                igt_spinner_end(&spin);
 321                st_engine_heartbeat_enable(engine);
 322
 323                if (err == 0) {
 324                        u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
 325                        u32 expected =
 326                                intel_gt_ns_to_pm_interval(gt, dt);
 327
 328                        pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
 329                                engine->name, cycles, time, dt, expected,
 330                                gt->clock_frequency / 1000);
 331
 332                        if (10 * time < 8 * dt ||
 333                            8 * time > 10 * dt) {
 334                                pr_err("%s: rps clock time does not match walltime!\n",
 335                                       engine->name);
 336                                err = -EINVAL;
 337                        }
 338
 339                        if (10 * expected < 8 * cycles ||
 340                            8 * expected > 10 * cycles) {
 341                                pr_err("%s: walltime does not match rps clock ticks!\n",
 342                                       engine->name);
 343                                err = -EINVAL;
 344                        }
 345                }
 346
 347                if (igt_flush_test(gt->i915))
 348                        err = -EIO;
 349
 350                break; /* once is enough */
 351        }
 352
 353        intel_rps_enable(&gt->rps);
 354        intel_gt_pm_put(gt);
 355
 356        igt_spinner_fini(&spin);
 357
 358        intel_gt_pm_wait_for_idle(gt);
 359        rps->work.func = saved_work;
 360
 361        if (err == -ENODEV) /* skipped, don't report a fail */
 362                err = 0;
 363
 364        return err;
 365}
 366
 367int live_rps_control(void *arg)
 368{
 369        struct intel_gt *gt = arg;
 370        struct intel_rps *rps = &gt->rps;
 371        void (*saved_work)(struct work_struct *wrk);
 372        struct intel_engine_cs *engine;
 373        enum intel_engine_id id;
 374        struct igt_spinner spin;
 375        int err = 0;
 376
 377        /*
 378         * Check that the actual frequency matches our requested frequency,
 379         * to verify our control mechanism. We have to be careful that the
 380         * PCU may throttle the GPU in which case the actual frequency used
 381         * will be lowered than requested.
 382         */
 383
 384        if (!intel_rps_is_enabled(rps))
 385                return 0;
 386
 387        if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
 388                return 0;
 389
 390        if (igt_spinner_init(&spin, gt))
 391                return -ENOMEM;
 392
 393        intel_gt_pm_wait_for_idle(gt);
 394        saved_work = rps->work.func;
 395        rps->work.func = dummy_rps_work;
 396
 397        intel_gt_pm_get(gt);
 398        for_each_engine(engine, gt, id) {
 399                struct i915_request *rq;
 400                ktime_t min_dt, max_dt;
 401                int f, limit;
 402                int min, max;
 403
 404                if (!intel_engine_can_store_dword(engine))
 405                        continue;
 406
 407                st_engine_heartbeat_disable(engine);
 408
 409                rq = igt_spinner_create_request(&spin,
 410                                                engine->kernel_context,
 411                                                MI_NOOP);
 412                if (IS_ERR(rq)) {
 413                        err = PTR_ERR(rq);
 414                        break;
 415                }
 416
 417                i915_request_add(rq);
 418
 419                if (!igt_wait_for_spinner(&spin, rq)) {
 420                        pr_err("%s: RPS spinner did not start\n",
 421                               engine->name);
 422                        igt_spinner_end(&spin);
 423                        st_engine_heartbeat_enable(engine);
 424                        intel_gt_set_wedged(engine->gt);
 425                        err = -EIO;
 426                        break;
 427                }
 428
 429                if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
 430                        pr_err("%s: could not set minimum frequency [%x], only %x!\n",
 431                               engine->name, rps->min_freq, read_cagf(rps));
 432                        igt_spinner_end(&spin);
 433                        st_engine_heartbeat_enable(engine);
 434                        show_pstate_limits(rps);
 435                        err = -EINVAL;
 436                        break;
 437                }
 438
 439                for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
 440                        if (rps_set_check(rps, f) < f)
 441                                break;
 442                }
 443
 444                limit = rps_set_check(rps, f);
 445
 446                if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
 447                        pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
 448                               engine->name, rps->min_freq, read_cagf(rps));
 449                        igt_spinner_end(&spin);
 450                        st_engine_heartbeat_enable(engine);
 451                        show_pstate_limits(rps);
 452                        err = -EINVAL;
 453                        break;
 454                }
 455
 456                max_dt = ktime_get();
 457                max = rps_set_check(rps, limit);
 458                max_dt = ktime_sub(ktime_get(), max_dt);
 459
 460                min_dt = ktime_get();
 461                min = rps_set_check(rps, rps->min_freq);
 462                min_dt = ktime_sub(ktime_get(), min_dt);
 463
 464                igt_spinner_end(&spin);
 465                st_engine_heartbeat_enable(engine);
 466
 467                pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
 468                        engine->name,
 469                        rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
 470                        rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
 471                        limit, intel_gpu_freq(rps, limit),
 472                        min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
 473
 474                if (limit == rps->min_freq) {
 475                        pr_err("%s: GPU throttled to minimum!\n",
 476                               engine->name);
 477                        show_pstate_limits(rps);
 478                        err = -ENODEV;
 479                        break;
 480                }
 481
 482                if (igt_flush_test(gt->i915)) {
 483                        err = -EIO;
 484                        break;
 485                }
 486        }
 487        intel_gt_pm_put(gt);
 488
 489        igt_spinner_fini(&spin);
 490
 491        intel_gt_pm_wait_for_idle(gt);
 492        rps->work.func = saved_work;
 493
 494        return err;
 495}
 496
 497static void show_pcu_config(struct intel_rps *rps)
 498{
 499        struct drm_i915_private *i915 = rps_to_i915(rps);
 500        unsigned int max_gpu_freq, min_gpu_freq;
 501        intel_wakeref_t wakeref;
 502        int gpu_freq;
 503
 504        if (!HAS_LLC(i915))
 505                return;
 506
 507        min_gpu_freq = rps->min_freq;
 508        max_gpu_freq = rps->max_freq;
 509        if (GRAPHICS_VER(i915) >= 9) {
 510                /* Convert GT frequency to 50 HZ units */
 511                min_gpu_freq /= GEN9_FREQ_SCALER;
 512                max_gpu_freq /= GEN9_FREQ_SCALER;
 513        }
 514
 515        wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
 516
 517        pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
 518        for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
 519                int ia_freq = gpu_freq;
 520
 521                sandybridge_pcode_read(i915,
 522                                       GEN6_PCODE_READ_MIN_FREQ_TABLE,
 523                                       &ia_freq, NULL);
 524
 525                pr_info("%5d  %5d  %5d\n",
 526                        gpu_freq * 50,
 527                        ((ia_freq >> 0) & 0xff) * 100,
 528                        ((ia_freq >> 8) & 0xff) * 100);
 529        }
 530
 531        intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
 532}
 533
 534static u64 __measure_frequency(u32 *cntr, int duration_ms)
 535{
 536        u64 dc, dt;
 537
 538        dt = ktime_get();
 539        dc = READ_ONCE(*cntr);
 540        usleep_range(1000 * duration_ms, 2000 * duration_ms);
 541        dc = READ_ONCE(*cntr) - dc;
 542        dt = ktime_get() - dt;
 543
 544        return div64_u64(1000 * 1000 * dc, dt);
 545}
 546
 547static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
 548{
 549        u64 x[5];
 550        int i;
 551
 552        *freq = rps_set_check(rps, *freq);
 553        for (i = 0; i < 5; i++)
 554                x[i] = __measure_frequency(cntr, 2);
 555        *freq = (*freq + read_cagf(rps)) / 2;
 556
 557        /* A simple triangle filter for better result stability */
 558        sort(x, 5, sizeof(*x), cmp_u64, NULL);
 559        return div_u64(x[1] + 2 * x[2] + x[3], 4);
 560}
 561
 562static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
 563                                  int duration_ms)
 564{
 565        u64 dc, dt;
 566
 567        dt = ktime_get();
 568        dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
 569        usleep_range(1000 * duration_ms, 2000 * duration_ms);
 570        dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
 571        dt = ktime_get() - dt;
 572
 573        return div64_u64(1000 * 1000 * dc, dt);
 574}
 575
 576static u64 measure_cs_frequency_at(struct intel_rps *rps,
 577                                   struct intel_engine_cs *engine,
 578                                   int *freq)
 579{
 580        u64 x[5];
 581        int i;
 582
 583        *freq = rps_set_check(rps, *freq);
 584        for (i = 0; i < 5; i++)
 585                x[i] = __measure_cs_frequency(engine, 2);
 586        *freq = (*freq + read_cagf(rps)) / 2;
 587
 588        /* A simple triangle filter for better result stability */
 589        sort(x, 5, sizeof(*x), cmp_u64, NULL);
 590        return div_u64(x[1] + 2 * x[2] + x[3], 4);
 591}
 592
 593static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
 594{
 595        return f_d * x > f_n * y && f_n * x < f_d * y;
 596}
 597
 598int live_rps_frequency_cs(void *arg)
 599{
 600        void (*saved_work)(struct work_struct *wrk);
 601        struct intel_gt *gt = arg;
 602        struct intel_rps *rps = &gt->rps;
 603        struct intel_engine_cs *engine;
 604        struct pm_qos_request qos;
 605        enum intel_engine_id id;
 606        int err = 0;
 607
 608        /*
 609         * The premise is that the GPU does change frequency at our behest.
 610         * Let's check there is a correspondence between the requested
 611         * frequency, the actual frequency, and the observed clock rate.
 612         */
 613
 614        if (!intel_rps_is_enabled(rps))
 615                return 0;
 616
 617        if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
 618                return 0;
 619
 620        if (CPU_LATENCY >= 0)
 621                cpu_latency_qos_add_request(&qos, CPU_LATENCY);
 622
 623        intel_gt_pm_wait_for_idle(gt);
 624        saved_work = rps->work.func;
 625        rps->work.func = dummy_rps_work;
 626
 627        for_each_engine(engine, gt, id) {
 628                struct i915_request *rq;
 629                struct i915_vma *vma;
 630                u32 *cancel, *cntr;
 631                struct {
 632                        u64 count;
 633                        int freq;
 634                } min, max;
 635
 636                st_engine_heartbeat_disable(engine);
 637
 638                vma = create_spin_counter(engine,
 639                                          engine->kernel_context->vm, false,
 640                                          &cancel, &cntr);
 641                if (IS_ERR(vma)) {
 642                        err = PTR_ERR(vma);
 643                        st_engine_heartbeat_enable(engine);
 644                        break;
 645                }
 646
 647                rq = intel_engine_create_kernel_request(engine);
 648                if (IS_ERR(rq)) {
 649                        err = PTR_ERR(rq);
 650                        goto err_vma;
 651                }
 652
 653                err = i915_request_await_object(rq, vma->obj, false);
 654                if (!err)
 655                        err = i915_vma_move_to_active(vma, rq, 0);
 656                if (!err)
 657                        err = rq->engine->emit_bb_start(rq,
 658                                                        vma->node.start,
 659                                                        PAGE_SIZE, 0);
 660                i915_request_add(rq);
 661                if (err)
 662                        goto err_vma;
 663
 664                if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
 665                             10)) {
 666                        pr_err("%s: timed loop did not start\n",
 667                               engine->name);
 668                        goto err_vma;
 669                }
 670
 671                min.freq = rps->min_freq;
 672                min.count = measure_cs_frequency_at(rps, engine, &min.freq);
 673
 674                max.freq = rps->max_freq;
 675                max.count = measure_cs_frequency_at(rps, engine, &max.freq);
 676
 677                pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
 678                        engine->name,
 679                        min.count, intel_gpu_freq(rps, min.freq),
 680                        max.count, intel_gpu_freq(rps, max.freq),
 681                        (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
 682                                                     max.freq * min.count));
 683
 684                if (!scaled_within(max.freq * min.count,
 685                                   min.freq * max.count,
 686                                   2, 3)) {
 687                        int f;
 688
 689                        pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
 690                               engine->name,
 691                               max.freq * min.count,
 692                               min.freq * max.count);
 693                        show_pcu_config(rps);
 694
 695                        for (f = min.freq + 1; f <= rps->max_freq; f++) {
 696                                int act = f;
 697                                u64 count;
 698
 699                                count = measure_cs_frequency_at(rps, engine, &act);
 700                                if (act < f)
 701                                        break;
 702
 703                                pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
 704                                        engine->name,
 705                                        act, intel_gpu_freq(rps, act), count,
 706                                        (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
 707                                                                     act * min.count));
 708
 709                                f = act; /* may skip ahead [pcu granularity] */
 710                        }
 711
 712                        err = -EINTR; /* ignore error, continue on with test */
 713                }
 714
 715err_vma:
 716                *cancel = MI_BATCH_BUFFER_END;
 717                i915_gem_object_flush_map(vma->obj);
 718                i915_gem_object_unpin_map(vma->obj);
 719                i915_vma_unpin(vma);
 720                i915_vma_unlock(vma);
 721                i915_vma_put(vma);
 722
 723                st_engine_heartbeat_enable(engine);
 724                if (igt_flush_test(gt->i915))
 725                        err = -EIO;
 726                if (err)
 727                        break;
 728        }
 729
 730        intel_gt_pm_wait_for_idle(gt);
 731        rps->work.func = saved_work;
 732
 733        if (CPU_LATENCY >= 0)
 734                cpu_latency_qos_remove_request(&qos);
 735
 736        return err;
 737}
 738
 739int live_rps_frequency_srm(void *arg)
 740{
 741        void (*saved_work)(struct work_struct *wrk);
 742        struct intel_gt *gt = arg;
 743        struct intel_rps *rps = &gt->rps;
 744        struct intel_engine_cs *engine;
 745        struct pm_qos_request qos;
 746        enum intel_engine_id id;
 747        int err = 0;
 748
 749        /*
 750         * The premise is that the GPU does change frequency at our behest.
 751         * Let's check there is a correspondence between the requested
 752         * frequency, the actual frequency, and the observed clock rate.
 753         */
 754
 755        if (!intel_rps_is_enabled(rps))
 756                return 0;
 757
 758        if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
 759                return 0;
 760
 761        if (CPU_LATENCY >= 0)
 762                cpu_latency_qos_add_request(&qos, CPU_LATENCY);
 763
 764        intel_gt_pm_wait_for_idle(gt);
 765        saved_work = rps->work.func;
 766        rps->work.func = dummy_rps_work;
 767
 768        for_each_engine(engine, gt, id) {
 769                struct i915_request *rq;
 770                struct i915_vma *vma;
 771                u32 *cancel, *cntr;
 772                struct {
 773                        u64 count;
 774                        int freq;
 775                } min, max;
 776
 777                st_engine_heartbeat_disable(engine);
 778
 779                vma = create_spin_counter(engine,
 780                                          engine->kernel_context->vm, true,
 781                                          &cancel, &cntr);
 782                if (IS_ERR(vma)) {
 783                        err = PTR_ERR(vma);
 784                        st_engine_heartbeat_enable(engine);
 785                        break;
 786                }
 787
 788                rq = intel_engine_create_kernel_request(engine);
 789                if (IS_ERR(rq)) {
 790                        err = PTR_ERR(rq);
 791                        goto err_vma;
 792                }
 793
 794                err = i915_request_await_object(rq, vma->obj, false);
 795                if (!err)
 796                        err = i915_vma_move_to_active(vma, rq, 0);
 797                if (!err)
 798                        err = rq->engine->emit_bb_start(rq,
 799                                                        vma->node.start,
 800                                                        PAGE_SIZE, 0);
 801                i915_request_add(rq);
 802                if (err)
 803                        goto err_vma;
 804
 805                if (wait_for(READ_ONCE(*cntr), 10)) {
 806                        pr_err("%s: timed loop did not start\n",
 807                               engine->name);
 808                        goto err_vma;
 809                }
 810
 811                min.freq = rps->min_freq;
 812                min.count = measure_frequency_at(rps, cntr, &min.freq);
 813
 814                max.freq = rps->max_freq;
 815                max.count = measure_frequency_at(rps, cntr, &max.freq);
 816
 817                pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
 818                        engine->name,
 819                        min.count, intel_gpu_freq(rps, min.freq),
 820                        max.count, intel_gpu_freq(rps, max.freq),
 821                        (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
 822                                                     max.freq * min.count));
 823
 824                if (!scaled_within(max.freq * min.count,
 825                                   min.freq * max.count,
 826                                   1, 2)) {
 827                        int f;
 828
 829                        pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
 830                               engine->name,
 831                               max.freq * min.count,
 832                               min.freq * max.count);
 833                        show_pcu_config(rps);
 834
 835                        for (f = min.freq + 1; f <= rps->max_freq; f++) {
 836                                int act = f;
 837                                u64 count;
 838
 839                                count = measure_frequency_at(rps, cntr, &act);
 840                                if (act < f)
 841                                        break;
 842
 843                                pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
 844                                        engine->name,
 845                                        act, intel_gpu_freq(rps, act), count,
 846                                        (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
 847                                                                     act * min.count));
 848
 849                                f = act; /* may skip ahead [pcu granularity] */
 850                        }
 851
 852                        err = -EINTR; /* ignore error, continue on with test */
 853                }
 854
 855err_vma:
 856                *cancel = MI_BATCH_BUFFER_END;
 857                i915_gem_object_flush_map(vma->obj);
 858                i915_gem_object_unpin_map(vma->obj);
 859                i915_vma_unpin(vma);
 860                i915_vma_unlock(vma);
 861                i915_vma_put(vma);
 862
 863                st_engine_heartbeat_enable(engine);
 864                if (igt_flush_test(gt->i915))
 865                        err = -EIO;
 866                if (err)
 867                        break;
 868        }
 869
 870        intel_gt_pm_wait_for_idle(gt);
 871        rps->work.func = saved_work;
 872
 873        if (CPU_LATENCY >= 0)
 874                cpu_latency_qos_remove_request(&qos);
 875
 876        return err;
 877}
 878
 879static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
 880{
 881        /* Flush any previous EI */
 882        usleep_range(timeout_us, 2 * timeout_us);
 883
 884        /* Reset the interrupt status */
 885        rps_disable_interrupts(rps);
 886        GEM_BUG_ON(rps->pm_iir);
 887        rps_enable_interrupts(rps);
 888
 889        /* And then wait for the timeout, for real this time */
 890        usleep_range(2 * timeout_us, 3 * timeout_us);
 891}
 892
 893static int __rps_up_interrupt(struct intel_rps *rps,
 894                              struct intel_engine_cs *engine,
 895                              struct igt_spinner *spin)
 896{
 897        struct intel_uncore *uncore = engine->uncore;
 898        struct i915_request *rq;
 899        u32 timeout;
 900
 901        if (!intel_engine_can_store_dword(engine))
 902                return 0;
 903
 904        rps_set_check(rps, rps->min_freq);
 905
 906        rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
 907        if (IS_ERR(rq))
 908                return PTR_ERR(rq);
 909
 910        i915_request_get(rq);
 911        i915_request_add(rq);
 912
 913        if (!igt_wait_for_spinner(spin, rq)) {
 914                pr_err("%s: RPS spinner did not start\n",
 915                       engine->name);
 916                i915_request_put(rq);
 917                intel_gt_set_wedged(engine->gt);
 918                return -EIO;
 919        }
 920
 921        if (!intel_rps_is_active(rps)) {
 922                pr_err("%s: RPS not enabled on starting spinner\n",
 923                       engine->name);
 924                igt_spinner_end(spin);
 925                i915_request_put(rq);
 926                return -EINVAL;
 927        }
 928
 929        if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
 930                pr_err("%s: RPS did not register UP interrupt\n",
 931                       engine->name);
 932                i915_request_put(rq);
 933                return -EINVAL;
 934        }
 935
 936        if (rps->last_freq != rps->min_freq) {
 937                pr_err("%s: RPS did not program min frequency\n",
 938                       engine->name);
 939                i915_request_put(rq);
 940                return -EINVAL;
 941        }
 942
 943        timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
 944        timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
 945        timeout = DIV_ROUND_UP(timeout, 1000);
 946
 947        sleep_for_ei(rps, timeout);
 948        GEM_BUG_ON(i915_request_completed(rq));
 949
 950        igt_spinner_end(spin);
 951        i915_request_put(rq);
 952
 953        if (rps->cur_freq != rps->min_freq) {
 954                pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
 955                       engine->name, intel_rps_read_actual_frequency(rps));
 956                return -EINVAL;
 957        }
 958
 959        if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
 960                pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
 961                       engine->name, rps->pm_iir,
 962                       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
 963                       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
 964                       intel_uncore_read(uncore, GEN6_RP_UP_EI));
 965                return -EINVAL;
 966        }
 967
 968        return 0;
 969}
 970
 971static int __rps_down_interrupt(struct intel_rps *rps,
 972                                struct intel_engine_cs *engine)
 973{
 974        struct intel_uncore *uncore = engine->uncore;
 975        u32 timeout;
 976
 977        rps_set_check(rps, rps->max_freq);
 978
 979        if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
 980                pr_err("%s: RPS did not register DOWN interrupt\n",
 981                       engine->name);
 982                return -EINVAL;
 983        }
 984
 985        if (rps->last_freq != rps->max_freq) {
 986                pr_err("%s: RPS did not program max frequency\n",
 987                       engine->name);
 988                return -EINVAL;
 989        }
 990
 991        timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
 992        timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
 993        timeout = DIV_ROUND_UP(timeout, 1000);
 994
 995        sleep_for_ei(rps, timeout);
 996
 997        if (rps->cur_freq != rps->max_freq) {
 998                pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
 999                       engine->name,
1000                       intel_rps_read_actual_frequency(rps));
1001                return -EINVAL;
1002        }
1003
1004        if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1005                pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1006                       engine->name, rps->pm_iir,
1007                       intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1008                       intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1009                       intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1010                       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1011                       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1012                       intel_uncore_read(uncore, GEN6_RP_UP_EI));
1013                return -EINVAL;
1014        }
1015
1016        return 0;
1017}
1018
1019int live_rps_interrupt(void *arg)
1020{
1021        struct intel_gt *gt = arg;
1022        struct intel_rps *rps = &gt->rps;
1023        void (*saved_work)(struct work_struct *wrk);
1024        struct intel_engine_cs *engine;
1025        enum intel_engine_id id;
1026        struct igt_spinner spin;
1027        u32 pm_events;
1028        int err = 0;
1029
1030        /*
1031         * First, let's check whether or not we are receiving interrupts.
1032         */
1033
1034        if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
1035                return 0;
1036
1037        intel_gt_pm_get(gt);
1038        pm_events = rps->pm_events;
1039        intel_gt_pm_put(gt);
1040        if (!pm_events) {
1041                pr_err("No RPS PM events registered, but RPS is enabled?\n");
1042                return -ENODEV;
1043        }
1044
1045        if (igt_spinner_init(&spin, gt))
1046                return -ENOMEM;
1047
1048        intel_gt_pm_wait_for_idle(gt);
1049        saved_work = rps->work.func;
1050        rps->work.func = dummy_rps_work;
1051
1052        for_each_engine(engine, gt, id) {
1053                /* Keep the engine busy with a spinner; expect an UP! */
1054                if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1055                        intel_gt_pm_wait_for_idle(engine->gt);
1056                        GEM_BUG_ON(intel_rps_is_active(rps));
1057
1058                        st_engine_heartbeat_disable(engine);
1059
1060                        err = __rps_up_interrupt(rps, engine, &spin);
1061
1062                        st_engine_heartbeat_enable(engine);
1063                        if (err)
1064                                goto out;
1065
1066                        intel_gt_pm_wait_for_idle(engine->gt);
1067                }
1068
1069                /* Keep the engine awake but idle and check for DOWN */
1070                if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1071                        st_engine_heartbeat_disable(engine);
1072                        intel_rc6_disable(&gt->rc6);
1073
1074                        err = __rps_down_interrupt(rps, engine);
1075
1076                        intel_rc6_enable(&gt->rc6);
1077                        st_engine_heartbeat_enable(engine);
1078                        if (err)
1079                                goto out;
1080                }
1081        }
1082
1083out:
1084        if (igt_flush_test(gt->i915))
1085                err = -EIO;
1086
1087        igt_spinner_fini(&spin);
1088
1089        intel_gt_pm_wait_for_idle(gt);
1090        rps->work.func = saved_work;
1091
1092        return err;
1093}
1094
1095static u64 __measure_power(int duration_ms)
1096{
1097        u64 dE, dt;
1098
1099        dt = ktime_get();
1100        dE = librapl_energy_uJ();
1101        usleep_range(1000 * duration_ms, 2000 * duration_ms);
1102        dE = librapl_energy_uJ() - dE;
1103        dt = ktime_get() - dt;
1104
1105        return div64_u64(1000 * 1000 * dE, dt);
1106}
1107
1108static u64 measure_power_at(struct intel_rps *rps, int *freq)
1109{
1110        u64 x[5];
1111        int i;
1112
1113        *freq = rps_set_check(rps, *freq);
1114        for (i = 0; i < 5; i++)
1115                x[i] = __measure_power(5);
1116        *freq = (*freq + read_cagf(rps)) / 2;
1117
1118        /* A simple triangle filter for better result stability */
1119        sort(x, 5, sizeof(*x), cmp_u64, NULL);
1120        return div_u64(x[1] + 2 * x[2] + x[3], 4);
1121}
1122
1123int live_rps_power(void *arg)
1124{
1125        struct intel_gt *gt = arg;
1126        struct intel_rps *rps = &gt->rps;
1127        void (*saved_work)(struct work_struct *wrk);
1128        struct intel_engine_cs *engine;
1129        enum intel_engine_id id;
1130        struct igt_spinner spin;
1131        int err = 0;
1132
1133        /*
1134         * Our fundamental assumption is that running at lower frequency
1135         * actually saves power. Let's see if our RAPL measurement support
1136         * that theory.
1137         */
1138
1139        if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1140                return 0;
1141
1142        if (!librapl_supported(gt->i915))
1143                return 0;
1144
1145        if (igt_spinner_init(&spin, gt))
1146                return -ENOMEM;
1147
1148        intel_gt_pm_wait_for_idle(gt);
1149        saved_work = rps->work.func;
1150        rps->work.func = dummy_rps_work;
1151
1152        for_each_engine(engine, gt, id) {
1153                struct i915_request *rq;
1154                struct {
1155                        u64 power;
1156                        int freq;
1157                } min, max;
1158
1159                if (!intel_engine_can_store_dword(engine))
1160                        continue;
1161
1162                st_engine_heartbeat_disable(engine);
1163
1164                rq = igt_spinner_create_request(&spin,
1165                                                engine->kernel_context,
1166                                                MI_NOOP);
1167                if (IS_ERR(rq)) {
1168                        st_engine_heartbeat_enable(engine);
1169                        err = PTR_ERR(rq);
1170                        break;
1171                }
1172
1173                i915_request_add(rq);
1174
1175                if (!igt_wait_for_spinner(&spin, rq)) {
1176                        pr_err("%s: RPS spinner did not start\n",
1177                               engine->name);
1178                        igt_spinner_end(&spin);
1179                        st_engine_heartbeat_enable(engine);
1180                        intel_gt_set_wedged(engine->gt);
1181                        err = -EIO;
1182                        break;
1183                }
1184
1185                max.freq = rps->max_freq;
1186                max.power = measure_power_at(rps, &max.freq);
1187
1188                min.freq = rps->min_freq;
1189                min.power = measure_power_at(rps, &min.freq);
1190
1191                igt_spinner_end(&spin);
1192                st_engine_heartbeat_enable(engine);
1193
1194                pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1195                        engine->name,
1196                        min.power, intel_gpu_freq(rps, min.freq),
1197                        max.power, intel_gpu_freq(rps, max.freq));
1198
1199                if (10 * min.freq >= 9 * max.freq) {
1200                        pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1201                                  min.freq, intel_gpu_freq(rps, min.freq),
1202                                  max.freq, intel_gpu_freq(rps, max.freq));
1203                        continue;
1204                }
1205
1206                if (11 * min.power > 10 * max.power) {
1207                        pr_err("%s: did not conserve power when setting lower frequency!\n",
1208                               engine->name);
1209                        err = -EINVAL;
1210                        break;
1211                }
1212
1213                if (igt_flush_test(gt->i915)) {
1214                        err = -EIO;
1215                        break;
1216                }
1217        }
1218
1219        igt_spinner_fini(&spin);
1220
1221        intel_gt_pm_wait_for_idle(gt);
1222        rps->work.func = saved_work;
1223
1224        return err;
1225}
1226
1227int live_rps_dynamic(void *arg)
1228{
1229        struct intel_gt *gt = arg;
1230        struct intel_rps *rps = &gt->rps;
1231        struct intel_engine_cs *engine;
1232        enum intel_engine_id id;
1233        struct igt_spinner spin;
1234        int err = 0;
1235
1236        /*
1237         * We've looked at the bascs, and have established that we
1238         * can change the clock frequency and that the HW will generate
1239         * interrupts based on load. Now we check how we integrate those
1240         * moving parts into dynamic reclocking based on load.
1241         */
1242
1243        if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1244                return 0;
1245
1246        if (igt_spinner_init(&spin, gt))
1247                return -ENOMEM;
1248
1249        if (intel_rps_has_interrupts(rps))
1250                pr_info("RPS has interrupt support\n");
1251        if (intel_rps_uses_timer(rps))
1252                pr_info("RPS has timer support\n");
1253
1254        for_each_engine(engine, gt, id) {
1255                struct i915_request *rq;
1256                struct {
1257                        ktime_t dt;
1258                        u8 freq;
1259                } min, max;
1260
1261                if (!intel_engine_can_store_dword(engine))
1262                        continue;
1263
1264                intel_gt_pm_wait_for_idle(gt);
1265                GEM_BUG_ON(intel_rps_is_active(rps));
1266                rps->cur_freq = rps->min_freq;
1267
1268                intel_engine_pm_get(engine);
1269                intel_rc6_disable(&gt->rc6);
1270                GEM_BUG_ON(rps->last_freq != rps->min_freq);
1271
1272                rq = igt_spinner_create_request(&spin,
1273                                                engine->kernel_context,
1274                                                MI_NOOP);
1275                if (IS_ERR(rq)) {
1276                        err = PTR_ERR(rq);
1277                        goto err;
1278                }
1279
1280                i915_request_add(rq);
1281
1282                max.dt = ktime_get();
1283                max.freq = wait_for_freq(rps, rps->max_freq, 500);
1284                max.dt = ktime_sub(ktime_get(), max.dt);
1285
1286                igt_spinner_end(&spin);
1287
1288                min.dt = ktime_get();
1289                min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1290                min.dt = ktime_sub(ktime_get(), min.dt);
1291
1292                pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1293                        engine->name,
1294                        max.freq, intel_gpu_freq(rps, max.freq),
1295                        ktime_to_ns(max.dt),
1296                        min.freq, intel_gpu_freq(rps, min.freq),
1297                        ktime_to_ns(min.dt));
1298                if (min.freq >= max.freq) {
1299                        pr_err("%s: dynamic reclocking of spinner failed\n!",
1300                               engine->name);
1301                        err = -EINVAL;
1302                }
1303
1304err:
1305                intel_rc6_enable(&gt->rc6);
1306                intel_engine_pm_put(engine);
1307
1308                if (igt_flush_test(gt->i915))
1309                        err = -EIO;
1310                if (err)
1311                        break;
1312        }
1313
1314        igt_spinner_fini(&spin);
1315
1316        return err;
1317}
1318