linux/drivers/gpu/drm/i915/gt/selftest_rps.c
<<
>>
Prefs
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2020 Intel Corporation
   4 */
   5
   6#include <linux/pm_qos.h>
   7#include <linux/sort.h>
   8
   9#include "gem/i915_gem_internal.h"
  10
  11#include "intel_engine_heartbeat.h"
  12#include "intel_engine_pm.h"
  13#include "intel_engine_regs.h"
  14#include "intel_gpu_commands.h"
  15#include "intel_gt_clock_utils.h"
  16#include "intel_gt_pm.h"
  17#include "intel_rc6.h"
  18#include "selftest_engine_heartbeat.h"
  19#include "selftest_rps.h"
  20#include "selftests/igt_flush_test.h"
  21#include "selftests/igt_spinner.h"
  22#include "selftests/librapl.h"
  23
  24/* Try to isolate the impact of cstates from determing frequency response */
  25#define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
  26
  27static void dummy_rps_work(struct work_struct *wrk)
  28{
  29}
  30
  31static int cmp_u64(const void *A, const void *B)
  32{
  33        const u64 *a = A, *b = B;
  34
  35        if (*a < *b)
  36                return -1;
  37        else if (*a > *b)
  38                return 1;
  39        else
  40                return 0;
  41}
  42
  43static int cmp_u32(const void *A, const void *B)
  44{
  45        const u32 *a = A, *b = B;
  46
  47        if (*a < *b)
  48                return -1;
  49        else if (*a > *b)
  50                return 1;
  51        else
  52                return 0;
  53}
  54
  55static struct i915_vma *
  56create_spin_counter(struct intel_engine_cs *engine,
  57                    struct i915_address_space *vm,
  58                    bool srm,
  59                    u32 **cancel,
  60                    u32 **counter)
  61{
  62        enum {
  63                COUNT,
  64                INC,
  65                __NGPR__,
  66        };
  67#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
  68        struct drm_i915_gem_object *obj;
  69        struct i915_vma *vma;
  70        unsigned long end;
  71        u32 *base, *cs;
  72        int loop, i;
  73        int err;
  74
  75        obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
  76        if (IS_ERR(obj))
  77                return ERR_CAST(obj);
  78
  79        end = obj->base.size / sizeof(u32) - 1;
  80
  81        vma = i915_vma_instance(obj, vm, NULL);
  82        if (IS_ERR(vma)) {
  83                err = PTR_ERR(vma);
  84                goto err_put;
  85        }
  86
  87        err = i915_vma_pin(vma, 0, 0, PIN_USER);
  88        if (err)
  89                goto err_unlock;
  90
  91        i915_vma_lock(vma);
  92
  93        base = i915_gem_object_pin_map(obj, I915_MAP_WC);
  94        if (IS_ERR(base)) {
  95                err = PTR_ERR(base);
  96                goto err_unpin;
  97        }
  98        cs = base;
  99
 100        *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
 101        for (i = 0; i < __NGPR__; i++) {
 102                *cs++ = i915_mmio_reg_offset(CS_GPR(i));
 103                *cs++ = 0;
 104                *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
 105                *cs++ = 0;
 106        }
 107
 108        *cs++ = MI_LOAD_REGISTER_IMM(1);
 109        *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
 110        *cs++ = 1;
 111
 112        loop = cs - base;
 113
 114        /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
 115        for (i = 0; i < 1024; i++) {
 116                *cs++ = MI_MATH(4);
 117                *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
 118                *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
 119                *cs++ = MI_MATH_ADD;
 120                *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
 121
 122                if (srm) {
 123                        *cs++ = MI_STORE_REGISTER_MEM_GEN8;
 124                        *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
 125                        *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
 126                        *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
 127                }
 128        }
 129
 130        *cs++ = MI_BATCH_BUFFER_START_GEN8;
 131        *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
 132        *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
 133        GEM_BUG_ON(cs - base > end);
 134
 135        i915_gem_object_flush_map(obj);
 136
 137        *cancel = base + loop;
 138        *counter = srm ? memset32(base + end, 0, 1) : NULL;
 139        return vma;
 140
 141err_unpin:
 142        i915_vma_unpin(vma);
 143err_unlock:
 144        i915_vma_unlock(vma);
 145err_put:
 146        i915_gem_object_put(obj);
 147        return ERR_PTR(err);
 148}
 149
 150static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
 151{
 152        u8 history[64], i;
 153        unsigned long end;
 154        int sleep;
 155
 156        i = 0;
 157        memset(history, freq, sizeof(history));
 158        sleep = 20;
 159
 160        /* The PCU does not change instantly, but drifts towards the goal? */
 161        end = jiffies + msecs_to_jiffies(timeout_ms);
 162        do {
 163                u8 act;
 164
 165                act = read_cagf(rps);
 166                if (time_after(jiffies, end))
 167                        return act;
 168
 169                /* Target acquired */
 170                if (act == freq)
 171                        return act;
 172
 173                /* Any change within the last N samples? */
 174                if (!memchr_inv(history, act, sizeof(history)))
 175                        return act;
 176
 177                history[i] = act;
 178                i = (i + 1) % ARRAY_SIZE(history);
 179
 180                usleep_range(sleep, 2 * sleep);
 181                sleep *= 2;
 182                if (sleep > timeout_ms * 20)
 183                        sleep = timeout_ms * 20;
 184        } while (1);
 185}
 186
 187static u8 rps_set_check(struct intel_rps *rps, u8 freq)
 188{
 189        mutex_lock(&rps->lock);
 190        GEM_BUG_ON(!intel_rps_is_active(rps));
 191        if (wait_for(!intel_rps_set(rps, freq), 50)) {
 192                mutex_unlock(&rps->lock);
 193                return 0;
 194        }
 195        GEM_BUG_ON(rps->last_freq != freq);
 196        mutex_unlock(&rps->lock);
 197
 198        return wait_for_freq(rps, freq, 50);
 199}
 200
 201static void show_pstate_limits(struct intel_rps *rps)
 202{
 203        struct drm_i915_private *i915 = rps_to_i915(rps);
 204
 205        if (IS_BROXTON(i915)) {
 206                pr_info("P_STATE_CAP[%x]: 0x%08x\n",
 207                        i915_mmio_reg_offset(BXT_RP_STATE_CAP),
 208                        intel_uncore_read(rps_to_uncore(rps),
 209                                          BXT_RP_STATE_CAP));
 210        } else if (GRAPHICS_VER(i915) == 9) {
 211                pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
 212                        i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
 213                        intel_uncore_read(rps_to_uncore(rps),
 214                                          GEN9_RP_STATE_LIMITS));
 215        }
 216}
 217
 218int live_rps_clock_interval(void *arg)
 219{
 220        struct intel_gt *gt = arg;
 221        struct intel_rps *rps = &gt->rps;
 222        void (*saved_work)(struct work_struct *wrk);
 223        struct intel_engine_cs *engine;
 224        enum intel_engine_id id;
 225        struct igt_spinner spin;
 226        int err = 0;
 227
 228        if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
 229                return 0;
 230
 231        if (igt_spinner_init(&spin, gt))
 232                return -ENOMEM;
 233
 234        intel_gt_pm_wait_for_idle(gt);
 235        saved_work = rps->work.func;
 236        rps->work.func = dummy_rps_work;
 237
 238        intel_gt_pm_get(gt);
 239        intel_rps_disable(&gt->rps);
 240
 241        intel_gt_check_clock_frequency(gt);
 242
 243        for_each_engine(engine, gt, id) {
 244                struct i915_request *rq;
 245                u32 cycles;
 246                u64 dt;
 247
 248                if (!intel_engine_can_store_dword(engine))
 249                        continue;
 250
 251                st_engine_heartbeat_disable(engine);
 252
 253                rq = igt_spinner_create_request(&spin,
 254                                                engine->kernel_context,
 255                                                MI_NOOP);
 256                if (IS_ERR(rq)) {
 257                        st_engine_heartbeat_enable(engine);
 258                        err = PTR_ERR(rq);
 259                        break;
 260                }
 261
 262                i915_request_add(rq);
 263
 264                if (!igt_wait_for_spinner(&spin, rq)) {
 265                        pr_err("%s: RPS spinner did not start\n",
 266                               engine->name);
 267                        igt_spinner_end(&spin);
 268                        st_engine_heartbeat_enable(engine);
 269                        intel_gt_set_wedged(engine->gt);
 270                        err = -EIO;
 271                        break;
 272                }
 273
 274                intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 275
 276                intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
 277
 278                /* Set the evaluation interval to infinity! */
 279                intel_uncore_write_fw(gt->uncore,
 280                                      GEN6_RP_UP_EI, 0xffffffff);
 281                intel_uncore_write_fw(gt->uncore,
 282                                      GEN6_RP_UP_THRESHOLD, 0xffffffff);
 283
 284                intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
 285                                      GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
 286
 287                if (wait_for(intel_uncore_read_fw(gt->uncore,
 288                                                  GEN6_RP_CUR_UP_EI),
 289                             10)) {
 290                        /* Just skip the test; assume lack of HW support */
 291                        pr_notice("%s: rps evaluation interval not ticking\n",
 292                                  engine->name);
 293                        err = -ENODEV;
 294                } else {
 295                        ktime_t dt_[5];
 296                        u32 cycles_[5];
 297                        int i;
 298
 299                        for (i = 0; i < 5; i++) {
 300                                preempt_disable();
 301
 302                                dt_[i] = ktime_get();
 303                                cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
 304
 305                                udelay(1000);
 306
 307                                dt_[i] = ktime_sub(ktime_get(), dt_[i]);
 308                                cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
 309
 310                                preempt_enable();
 311                        }
 312
 313                        /* Use the median of both cycle/dt; close enough */
 314                        sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
 315                        cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
 316                        sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
 317                        dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
 318                }
 319
 320                intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
 321                intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
 322
 323                igt_spinner_end(&spin);
 324                st_engine_heartbeat_enable(engine);
 325
 326                if (err == 0) {
 327                        u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
 328                        u32 expected =
 329                                intel_gt_ns_to_pm_interval(gt, dt);
 330
 331                        pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
 332                                engine->name, cycles, time, dt, expected,
 333                                gt->clock_frequency / 1000);
 334
 335                        if (10 * time < 8 * dt ||
 336                            8 * time > 10 * dt) {
 337                                pr_err("%s: rps clock time does not match walltime!\n",
 338                                       engine->name);
 339                                err = -EINVAL;
 340                        }
 341
 342                        if (10 * expected < 8 * cycles ||
 343                            8 * expected > 10 * cycles) {
 344                                pr_err("%s: walltime does not match rps clock ticks!\n",
 345                                       engine->name);
 346                                err = -EINVAL;
 347                        }
 348                }
 349
 350                if (igt_flush_test(gt->i915))
 351                        err = -EIO;
 352
 353                break; /* once is enough */
 354        }
 355
 356        intel_rps_enable(&gt->rps);
 357        intel_gt_pm_put(gt);
 358
 359        igt_spinner_fini(&spin);
 360
 361        intel_gt_pm_wait_for_idle(gt);
 362        rps->work.func = saved_work;
 363
 364        if (err == -ENODEV) /* skipped, don't report a fail */
 365                err = 0;
 366
 367        return err;
 368}
 369
 370int live_rps_control(void *arg)
 371{
 372        struct intel_gt *gt = arg;
 373        struct intel_rps *rps = &gt->rps;
 374        void (*saved_work)(struct work_struct *wrk);
 375        struct intel_engine_cs *engine;
 376        enum intel_engine_id id;
 377        struct igt_spinner spin;
 378        int err = 0;
 379
 380        /*
 381         * Check that the actual frequency matches our requested frequency,
 382         * to verify our control mechanism. We have to be careful that the
 383         * PCU may throttle the GPU in which case the actual frequency used
 384         * will be lowered than requested.
 385         */
 386
 387        if (!intel_rps_is_enabled(rps))
 388                return 0;
 389
 390        if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
 391                return 0;
 392
 393        if (igt_spinner_init(&spin, gt))
 394                return -ENOMEM;
 395
 396        intel_gt_pm_wait_for_idle(gt);
 397        saved_work = rps->work.func;
 398        rps->work.func = dummy_rps_work;
 399
 400        intel_gt_pm_get(gt);
 401        for_each_engine(engine, gt, id) {
 402                struct i915_request *rq;
 403                ktime_t min_dt, max_dt;
 404                int f, limit;
 405                int min, max;
 406
 407                if (!intel_engine_can_store_dword(engine))
 408                        continue;
 409
 410                st_engine_heartbeat_disable(engine);
 411
 412                rq = igt_spinner_create_request(&spin,
 413                                                engine->kernel_context,
 414                                                MI_NOOP);
 415                if (IS_ERR(rq)) {
 416                        err = PTR_ERR(rq);
 417                        break;
 418                }
 419
 420                i915_request_add(rq);
 421
 422                if (!igt_wait_for_spinner(&spin, rq)) {
 423                        pr_err("%s: RPS spinner did not start\n",
 424                               engine->name);
 425                        igt_spinner_end(&spin);
 426                        st_engine_heartbeat_enable(engine);
 427                        intel_gt_set_wedged(engine->gt);
 428                        err = -EIO;
 429                        break;
 430                }
 431
 432                if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
 433                        pr_err("%s: could not set minimum frequency [%x], only %x!\n",
 434                               engine->name, rps->min_freq, read_cagf(rps));
 435                        igt_spinner_end(&spin);
 436                        st_engine_heartbeat_enable(engine);
 437                        show_pstate_limits(rps);
 438                        err = -EINVAL;
 439                        break;
 440                }
 441
 442                for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
 443                        if (rps_set_check(rps, f) < f)
 444                                break;
 445                }
 446
 447                limit = rps_set_check(rps, f);
 448
 449                if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
 450                        pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
 451                               engine->name, rps->min_freq, read_cagf(rps));
 452                        igt_spinner_end(&spin);
 453                        st_engine_heartbeat_enable(engine);
 454                        show_pstate_limits(rps);
 455                        err = -EINVAL;
 456                        break;
 457                }
 458
 459                max_dt = ktime_get();
 460                max = rps_set_check(rps, limit);
 461                max_dt = ktime_sub(ktime_get(), max_dt);
 462
 463                min_dt = ktime_get();
 464                min = rps_set_check(rps, rps->min_freq);
 465                min_dt = ktime_sub(ktime_get(), min_dt);
 466
 467                igt_spinner_end(&spin);
 468                st_engine_heartbeat_enable(engine);
 469
 470                pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
 471                        engine->name,
 472                        rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
 473                        rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
 474                        limit, intel_gpu_freq(rps, limit),
 475                        min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
 476
 477                if (limit == rps->min_freq) {
 478                        pr_err("%s: GPU throttled to minimum!\n",
 479                               engine->name);
 480                        show_pstate_limits(rps);
 481                        err = -ENODEV;
 482                        break;
 483                }
 484
 485                if (igt_flush_test(gt->i915)) {
 486                        err = -EIO;
 487                        break;
 488                }
 489        }
 490        intel_gt_pm_put(gt);
 491
 492        igt_spinner_fini(&spin);
 493
 494        intel_gt_pm_wait_for_idle(gt);
 495        rps->work.func = saved_work;
 496
 497        return err;
 498}
 499
 500static void show_pcu_config(struct intel_rps *rps)
 501{
 502        struct drm_i915_private *i915 = rps_to_i915(rps);
 503        unsigned int max_gpu_freq, min_gpu_freq;
 504        intel_wakeref_t wakeref;
 505        int gpu_freq;
 506
 507        if (!HAS_LLC(i915))
 508                return;
 509
 510        min_gpu_freq = rps->min_freq;
 511        max_gpu_freq = rps->max_freq;
 512        if (GRAPHICS_VER(i915) >= 9) {
 513                /* Convert GT frequency to 50 HZ units */
 514                min_gpu_freq /= GEN9_FREQ_SCALER;
 515                max_gpu_freq /= GEN9_FREQ_SCALER;
 516        }
 517
 518        wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
 519
 520        pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
 521        for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
 522                int ia_freq = gpu_freq;
 523
 524                snb_pcode_read(i915, GEN6_PCODE_READ_MIN_FREQ_TABLE,
 525                               &ia_freq, NULL);
 526
 527                pr_info("%5d  %5d  %5d\n",
 528                        gpu_freq * 50,
 529                        ((ia_freq >> 0) & 0xff) * 100,
 530                        ((ia_freq >> 8) & 0xff) * 100);
 531        }
 532
 533        intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
 534}
 535
 536static u64 __measure_frequency(u32 *cntr, int duration_ms)
 537{
 538        u64 dc, dt;
 539
 540        dt = ktime_get();
 541        dc = READ_ONCE(*cntr);
 542        usleep_range(1000 * duration_ms, 2000 * duration_ms);
 543        dc = READ_ONCE(*cntr) - dc;
 544        dt = ktime_get() - dt;
 545
 546        return div64_u64(1000 * 1000 * dc, dt);
 547}
 548
 549static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
 550{
 551        u64 x[5];
 552        int i;
 553
 554        *freq = rps_set_check(rps, *freq);
 555        for (i = 0; i < 5; i++)
 556                x[i] = __measure_frequency(cntr, 2);
 557        *freq = (*freq + read_cagf(rps)) / 2;
 558
 559        /* A simple triangle filter for better result stability */
 560        sort(x, 5, sizeof(*x), cmp_u64, NULL);
 561        return div_u64(x[1] + 2 * x[2] + x[3], 4);
 562}
 563
 564static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
 565                                  int duration_ms)
 566{
 567        u64 dc, dt;
 568
 569        dt = ktime_get();
 570        dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
 571        usleep_range(1000 * duration_ms, 2000 * duration_ms);
 572        dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
 573        dt = ktime_get() - dt;
 574
 575        return div64_u64(1000 * 1000 * dc, dt);
 576}
 577
 578static u64 measure_cs_frequency_at(struct intel_rps *rps,
 579                                   struct intel_engine_cs *engine,
 580                                   int *freq)
 581{
 582        u64 x[5];
 583        int i;
 584
 585        *freq = rps_set_check(rps, *freq);
 586        for (i = 0; i < 5; i++)
 587                x[i] = __measure_cs_frequency(engine, 2);
 588        *freq = (*freq + read_cagf(rps)) / 2;
 589
 590        /* A simple triangle filter for better result stability */
 591        sort(x, 5, sizeof(*x), cmp_u64, NULL);
 592        return div_u64(x[1] + 2 * x[2] + x[3], 4);
 593}
 594
 595static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
 596{
 597        return f_d * x > f_n * y && f_n * x < f_d * y;
 598}
 599
 600int live_rps_frequency_cs(void *arg)
 601{
 602        void (*saved_work)(struct work_struct *wrk);
 603        struct intel_gt *gt = arg;
 604        struct intel_rps *rps = &gt->rps;
 605        struct intel_engine_cs *engine;
 606        struct pm_qos_request qos;
 607        enum intel_engine_id id;
 608        int err = 0;
 609
 610        /*
 611         * The premise is that the GPU does change frequency at our behest.
 612         * Let's check there is a correspondence between the requested
 613         * frequency, the actual frequency, and the observed clock rate.
 614         */
 615
 616        if (!intel_rps_is_enabled(rps))
 617                return 0;
 618
 619        if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
 620                return 0;
 621
 622        if (CPU_LATENCY >= 0)
 623                cpu_latency_qos_add_request(&qos, CPU_LATENCY);
 624
 625        intel_gt_pm_wait_for_idle(gt);
 626        saved_work = rps->work.func;
 627        rps->work.func = dummy_rps_work;
 628
 629        for_each_engine(engine, gt, id) {
 630                struct i915_request *rq;
 631                struct i915_vma *vma;
 632                u32 *cancel, *cntr;
 633                struct {
 634                        u64 count;
 635                        int freq;
 636                } min, max;
 637
 638                st_engine_heartbeat_disable(engine);
 639
 640                vma = create_spin_counter(engine,
 641                                          engine->kernel_context->vm, false,
 642                                          &cancel, &cntr);
 643                if (IS_ERR(vma)) {
 644                        err = PTR_ERR(vma);
 645                        st_engine_heartbeat_enable(engine);
 646                        break;
 647                }
 648
 649                rq = intel_engine_create_kernel_request(engine);
 650                if (IS_ERR(rq)) {
 651                        err = PTR_ERR(rq);
 652                        goto err_vma;
 653                }
 654
 655                err = i915_request_await_object(rq, vma->obj, false);
 656                if (!err)
 657                        err = i915_vma_move_to_active(vma, rq, 0);
 658                if (!err)
 659                        err = rq->engine->emit_bb_start(rq,
 660                                                        vma->node.start,
 661                                                        PAGE_SIZE, 0);
 662                i915_request_add(rq);
 663                if (err)
 664                        goto err_vma;
 665
 666                if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
 667                             10)) {
 668                        pr_err("%s: timed loop did not start\n",
 669                               engine->name);
 670                        goto err_vma;
 671                }
 672
 673                min.freq = rps->min_freq;
 674                min.count = measure_cs_frequency_at(rps, engine, &min.freq);
 675
 676                max.freq = rps->max_freq;
 677                max.count = measure_cs_frequency_at(rps, engine, &max.freq);
 678
 679                pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
 680                        engine->name,
 681                        min.count, intel_gpu_freq(rps, min.freq),
 682                        max.count, intel_gpu_freq(rps, max.freq),
 683                        (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
 684                                                     max.freq * min.count));
 685
 686                if (!scaled_within(max.freq * min.count,
 687                                   min.freq * max.count,
 688                                   2, 3)) {
 689                        int f;
 690
 691                        pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
 692                               engine->name,
 693                               max.freq * min.count,
 694                               min.freq * max.count);
 695                        show_pcu_config(rps);
 696
 697                        for (f = min.freq + 1; f <= rps->max_freq; f++) {
 698                                int act = f;
 699                                u64 count;
 700
 701                                count = measure_cs_frequency_at(rps, engine, &act);
 702                                if (act < f)
 703                                        break;
 704
 705                                pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
 706                                        engine->name,
 707                                        act, intel_gpu_freq(rps, act), count,
 708                                        (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
 709                                                                     act * min.count));
 710
 711                                f = act; /* may skip ahead [pcu granularity] */
 712                        }
 713
 714                        err = -EINTR; /* ignore error, continue on with test */
 715                }
 716
 717err_vma:
 718                *cancel = MI_BATCH_BUFFER_END;
 719                i915_gem_object_flush_map(vma->obj);
 720                i915_gem_object_unpin_map(vma->obj);
 721                i915_vma_unpin(vma);
 722                i915_vma_unlock(vma);
 723                i915_vma_put(vma);
 724
 725                st_engine_heartbeat_enable(engine);
 726                if (igt_flush_test(gt->i915))
 727                        err = -EIO;
 728                if (err)
 729                        break;
 730        }
 731
 732        intel_gt_pm_wait_for_idle(gt);
 733        rps->work.func = saved_work;
 734
 735        if (CPU_LATENCY >= 0)
 736                cpu_latency_qos_remove_request(&qos);
 737
 738        return err;
 739}
 740
 741int live_rps_frequency_srm(void *arg)
 742{
 743        void (*saved_work)(struct work_struct *wrk);
 744        struct intel_gt *gt = arg;
 745        struct intel_rps *rps = &gt->rps;
 746        struct intel_engine_cs *engine;
 747        struct pm_qos_request qos;
 748        enum intel_engine_id id;
 749        int err = 0;
 750
 751        /*
 752         * The premise is that the GPU does change frequency at our behest.
 753         * Let's check there is a correspondence between the requested
 754         * frequency, the actual frequency, and the observed clock rate.
 755         */
 756
 757        if (!intel_rps_is_enabled(rps))
 758                return 0;
 759
 760        if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
 761                return 0;
 762
 763        if (CPU_LATENCY >= 0)
 764                cpu_latency_qos_add_request(&qos, CPU_LATENCY);
 765
 766        intel_gt_pm_wait_for_idle(gt);
 767        saved_work = rps->work.func;
 768        rps->work.func = dummy_rps_work;
 769
 770        for_each_engine(engine, gt, id) {
 771                struct i915_request *rq;
 772                struct i915_vma *vma;
 773                u32 *cancel, *cntr;
 774                struct {
 775                        u64 count;
 776                        int freq;
 777                } min, max;
 778
 779                st_engine_heartbeat_disable(engine);
 780
 781                vma = create_spin_counter(engine,
 782                                          engine->kernel_context->vm, true,
 783                                          &cancel, &cntr);
 784                if (IS_ERR(vma)) {
 785                        err = PTR_ERR(vma);
 786                        st_engine_heartbeat_enable(engine);
 787                        break;
 788                }
 789
 790                rq = intel_engine_create_kernel_request(engine);
 791                if (IS_ERR(rq)) {
 792                        err = PTR_ERR(rq);
 793                        goto err_vma;
 794                }
 795
 796                err = i915_request_await_object(rq, vma->obj, false);
 797                if (!err)
 798                        err = i915_vma_move_to_active(vma, rq, 0);
 799                if (!err)
 800                        err = rq->engine->emit_bb_start(rq,
 801                                                        vma->node.start,
 802                                                        PAGE_SIZE, 0);
 803                i915_request_add(rq);
 804                if (err)
 805                        goto err_vma;
 806
 807                if (wait_for(READ_ONCE(*cntr), 10)) {
 808                        pr_err("%s: timed loop did not start\n",
 809                               engine->name);
 810                        goto err_vma;
 811                }
 812
 813                min.freq = rps->min_freq;
 814                min.count = measure_frequency_at(rps, cntr, &min.freq);
 815
 816                max.freq = rps->max_freq;
 817                max.count = measure_frequency_at(rps, cntr, &max.freq);
 818
 819                pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
 820                        engine->name,
 821                        min.count, intel_gpu_freq(rps, min.freq),
 822                        max.count, intel_gpu_freq(rps, max.freq),
 823                        (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
 824                                                     max.freq * min.count));
 825
 826                if (!scaled_within(max.freq * min.count,
 827                                   min.freq * max.count,
 828                                   1, 2)) {
 829                        int f;
 830
 831                        pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
 832                               engine->name,
 833                               max.freq * min.count,
 834                               min.freq * max.count);
 835                        show_pcu_config(rps);
 836
 837                        for (f = min.freq + 1; f <= rps->max_freq; f++) {
 838                                int act = f;
 839                                u64 count;
 840
 841                                count = measure_frequency_at(rps, cntr, &act);
 842                                if (act < f)
 843                                        break;
 844
 845                                pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
 846                                        engine->name,
 847                                        act, intel_gpu_freq(rps, act), count,
 848                                        (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
 849                                                                     act * min.count));
 850
 851                                f = act; /* may skip ahead [pcu granularity] */
 852                        }
 853
 854                        err = -EINTR; /* ignore error, continue on with test */
 855                }
 856
 857err_vma:
 858                *cancel = MI_BATCH_BUFFER_END;
 859                i915_gem_object_flush_map(vma->obj);
 860                i915_gem_object_unpin_map(vma->obj);
 861                i915_vma_unpin(vma);
 862                i915_vma_unlock(vma);
 863                i915_vma_put(vma);
 864
 865                st_engine_heartbeat_enable(engine);
 866                if (igt_flush_test(gt->i915))
 867                        err = -EIO;
 868                if (err)
 869                        break;
 870        }
 871
 872        intel_gt_pm_wait_for_idle(gt);
 873        rps->work.func = saved_work;
 874
 875        if (CPU_LATENCY >= 0)
 876                cpu_latency_qos_remove_request(&qos);
 877
 878        return err;
 879}
 880
 881static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
 882{
 883        /* Flush any previous EI */
 884        usleep_range(timeout_us, 2 * timeout_us);
 885
 886        /* Reset the interrupt status */
 887        rps_disable_interrupts(rps);
 888        GEM_BUG_ON(rps->pm_iir);
 889        rps_enable_interrupts(rps);
 890
 891        /* And then wait for the timeout, for real this time */
 892        usleep_range(2 * timeout_us, 3 * timeout_us);
 893}
 894
 895static int __rps_up_interrupt(struct intel_rps *rps,
 896                              struct intel_engine_cs *engine,
 897                              struct igt_spinner *spin)
 898{
 899        struct intel_uncore *uncore = engine->uncore;
 900        struct i915_request *rq;
 901        u32 timeout;
 902
 903        if (!intel_engine_can_store_dword(engine))
 904                return 0;
 905
 906        rps_set_check(rps, rps->min_freq);
 907
 908        rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
 909        if (IS_ERR(rq))
 910                return PTR_ERR(rq);
 911
 912        i915_request_get(rq);
 913        i915_request_add(rq);
 914
 915        if (!igt_wait_for_spinner(spin, rq)) {
 916                pr_err("%s: RPS spinner did not start\n",
 917                       engine->name);
 918                i915_request_put(rq);
 919                intel_gt_set_wedged(engine->gt);
 920                return -EIO;
 921        }
 922
 923        if (!intel_rps_is_active(rps)) {
 924                pr_err("%s: RPS not enabled on starting spinner\n",
 925                       engine->name);
 926                igt_spinner_end(spin);
 927                i915_request_put(rq);
 928                return -EINVAL;
 929        }
 930
 931        if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
 932                pr_err("%s: RPS did not register UP interrupt\n",
 933                       engine->name);
 934                i915_request_put(rq);
 935                return -EINVAL;
 936        }
 937
 938        if (rps->last_freq != rps->min_freq) {
 939                pr_err("%s: RPS did not program min frequency\n",
 940                       engine->name);
 941                i915_request_put(rq);
 942                return -EINVAL;
 943        }
 944
 945        timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
 946        timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
 947        timeout = DIV_ROUND_UP(timeout, 1000);
 948
 949        sleep_for_ei(rps, timeout);
 950        GEM_BUG_ON(i915_request_completed(rq));
 951
 952        igt_spinner_end(spin);
 953        i915_request_put(rq);
 954
 955        if (rps->cur_freq != rps->min_freq) {
 956                pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
 957                       engine->name, intel_rps_read_actual_frequency(rps));
 958                return -EINVAL;
 959        }
 960
 961        if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
 962                pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
 963                       engine->name, rps->pm_iir,
 964                       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
 965                       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
 966                       intel_uncore_read(uncore, GEN6_RP_UP_EI));
 967                return -EINVAL;
 968        }
 969
 970        return 0;
 971}
 972
 973static int __rps_down_interrupt(struct intel_rps *rps,
 974                                struct intel_engine_cs *engine)
 975{
 976        struct intel_uncore *uncore = engine->uncore;
 977        u32 timeout;
 978
 979        rps_set_check(rps, rps->max_freq);
 980
 981        if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
 982                pr_err("%s: RPS did not register DOWN interrupt\n",
 983                       engine->name);
 984                return -EINVAL;
 985        }
 986
 987        if (rps->last_freq != rps->max_freq) {
 988                pr_err("%s: RPS did not program max frequency\n",
 989                       engine->name);
 990                return -EINVAL;
 991        }
 992
 993        timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
 994        timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
 995        timeout = DIV_ROUND_UP(timeout, 1000);
 996
 997        sleep_for_ei(rps, timeout);
 998
 999        if (rps->cur_freq != rps->max_freq) {
1000                pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
1001                       engine->name,
1002                       intel_rps_read_actual_frequency(rps));
1003                return -EINVAL;
1004        }
1005
1006        if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1007                pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1008                       engine->name, rps->pm_iir,
1009                       intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1010                       intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1011                       intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1012                       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1013                       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1014                       intel_uncore_read(uncore, GEN6_RP_UP_EI));
1015                return -EINVAL;
1016        }
1017
1018        return 0;
1019}
1020
1021int live_rps_interrupt(void *arg)
1022{
1023        struct intel_gt *gt = arg;
1024        struct intel_rps *rps = &gt->rps;
1025        void (*saved_work)(struct work_struct *wrk);
1026        struct intel_engine_cs *engine;
1027        enum intel_engine_id id;
1028        struct igt_spinner spin;
1029        u32 pm_events;
1030        int err = 0;
1031
1032        /*
1033         * First, let's check whether or not we are receiving interrupts.
1034         */
1035
1036        if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
1037                return 0;
1038
1039        intel_gt_pm_get(gt);
1040        pm_events = rps->pm_events;
1041        intel_gt_pm_put(gt);
1042        if (!pm_events) {
1043                pr_err("No RPS PM events registered, but RPS is enabled?\n");
1044                return -ENODEV;
1045        }
1046
1047        if (igt_spinner_init(&spin, gt))
1048                return -ENOMEM;
1049
1050        intel_gt_pm_wait_for_idle(gt);
1051        saved_work = rps->work.func;
1052        rps->work.func = dummy_rps_work;
1053
1054        for_each_engine(engine, gt, id) {
1055                /* Keep the engine busy with a spinner; expect an UP! */
1056                if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1057                        intel_gt_pm_wait_for_idle(engine->gt);
1058                        GEM_BUG_ON(intel_rps_is_active(rps));
1059
1060                        st_engine_heartbeat_disable(engine);
1061
1062                        err = __rps_up_interrupt(rps, engine, &spin);
1063
1064                        st_engine_heartbeat_enable(engine);
1065                        if (err)
1066                                goto out;
1067
1068                        intel_gt_pm_wait_for_idle(engine->gt);
1069                }
1070
1071                /* Keep the engine awake but idle and check for DOWN */
1072                if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1073                        st_engine_heartbeat_disable(engine);
1074                        intel_rc6_disable(&gt->rc6);
1075
1076                        err = __rps_down_interrupt(rps, engine);
1077
1078                        intel_rc6_enable(&gt->rc6);
1079                        st_engine_heartbeat_enable(engine);
1080                        if (err)
1081                                goto out;
1082                }
1083        }
1084
1085out:
1086        if (igt_flush_test(gt->i915))
1087                err = -EIO;
1088
1089        igt_spinner_fini(&spin);
1090
1091        intel_gt_pm_wait_for_idle(gt);
1092        rps->work.func = saved_work;
1093
1094        return err;
1095}
1096
1097static u64 __measure_power(int duration_ms)
1098{
1099        u64 dE, dt;
1100
1101        dt = ktime_get();
1102        dE = librapl_energy_uJ();
1103        usleep_range(1000 * duration_ms, 2000 * duration_ms);
1104        dE = librapl_energy_uJ() - dE;
1105        dt = ktime_get() - dt;
1106
1107        return div64_u64(1000 * 1000 * dE, dt);
1108}
1109
1110static u64 measure_power_at(struct intel_rps *rps, int *freq)
1111{
1112        u64 x[5];
1113        int i;
1114
1115        *freq = rps_set_check(rps, *freq);
1116        for (i = 0; i < 5; i++)
1117                x[i] = __measure_power(5);
1118        *freq = (*freq + read_cagf(rps)) / 2;
1119
1120        /* A simple triangle filter for better result stability */
1121        sort(x, 5, sizeof(*x), cmp_u64, NULL);
1122        return div_u64(x[1] + 2 * x[2] + x[3], 4);
1123}
1124
1125int live_rps_power(void *arg)
1126{
1127        struct intel_gt *gt = arg;
1128        struct intel_rps *rps = &gt->rps;
1129        void (*saved_work)(struct work_struct *wrk);
1130        struct intel_engine_cs *engine;
1131        enum intel_engine_id id;
1132        struct igt_spinner spin;
1133        int err = 0;
1134
1135        /*
1136         * Our fundamental assumption is that running at lower frequency
1137         * actually saves power. Let's see if our RAPL measurement support
1138         * that theory.
1139         */
1140
1141        if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1142                return 0;
1143
1144        if (!librapl_supported(gt->i915))
1145                return 0;
1146
1147        if (igt_spinner_init(&spin, gt))
1148                return -ENOMEM;
1149
1150        intel_gt_pm_wait_for_idle(gt);
1151        saved_work = rps->work.func;
1152        rps->work.func = dummy_rps_work;
1153
1154        for_each_engine(engine, gt, id) {
1155                struct i915_request *rq;
1156                struct {
1157                        u64 power;
1158                        int freq;
1159                } min, max;
1160
1161                if (!intel_engine_can_store_dword(engine))
1162                        continue;
1163
1164                st_engine_heartbeat_disable(engine);
1165
1166                rq = igt_spinner_create_request(&spin,
1167                                                engine->kernel_context,
1168                                                MI_NOOP);
1169                if (IS_ERR(rq)) {
1170                        st_engine_heartbeat_enable(engine);
1171                        err = PTR_ERR(rq);
1172                        break;
1173                }
1174
1175                i915_request_add(rq);
1176
1177                if (!igt_wait_for_spinner(&spin, rq)) {
1178                        pr_err("%s: RPS spinner did not start\n",
1179                               engine->name);
1180                        igt_spinner_end(&spin);
1181                        st_engine_heartbeat_enable(engine);
1182                        intel_gt_set_wedged(engine->gt);
1183                        err = -EIO;
1184                        break;
1185                }
1186
1187                max.freq = rps->max_freq;
1188                max.power = measure_power_at(rps, &max.freq);
1189
1190                min.freq = rps->min_freq;
1191                min.power = measure_power_at(rps, &min.freq);
1192
1193                igt_spinner_end(&spin);
1194                st_engine_heartbeat_enable(engine);
1195
1196                pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1197                        engine->name,
1198                        min.power, intel_gpu_freq(rps, min.freq),
1199                        max.power, intel_gpu_freq(rps, max.freq));
1200
1201                if (10 * min.freq >= 9 * max.freq) {
1202                        pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1203                                  min.freq, intel_gpu_freq(rps, min.freq),
1204                                  max.freq, intel_gpu_freq(rps, max.freq));
1205                        continue;
1206                }
1207
1208                if (11 * min.power > 10 * max.power) {
1209                        pr_err("%s: did not conserve power when setting lower frequency!\n",
1210                               engine->name);
1211                        err = -EINVAL;
1212                        break;
1213                }
1214
1215                if (igt_flush_test(gt->i915)) {
1216                        err = -EIO;
1217                        break;
1218                }
1219        }
1220
1221        igt_spinner_fini(&spin);
1222
1223        intel_gt_pm_wait_for_idle(gt);
1224        rps->work.func = saved_work;
1225
1226        return err;
1227}
1228
1229int live_rps_dynamic(void *arg)
1230{
1231        struct intel_gt *gt = arg;
1232        struct intel_rps *rps = &gt->rps;
1233        struct intel_engine_cs *engine;
1234        enum intel_engine_id id;
1235        struct igt_spinner spin;
1236        int err = 0;
1237
1238        /*
1239         * We've looked at the bascs, and have established that we
1240         * can change the clock frequency and that the HW will generate
1241         * interrupts based on load. Now we check how we integrate those
1242         * moving parts into dynamic reclocking based on load.
1243         */
1244
1245        if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1246                return 0;
1247
1248        if (igt_spinner_init(&spin, gt))
1249                return -ENOMEM;
1250
1251        if (intel_rps_has_interrupts(rps))
1252                pr_info("RPS has interrupt support\n");
1253        if (intel_rps_uses_timer(rps))
1254                pr_info("RPS has timer support\n");
1255
1256        for_each_engine(engine, gt, id) {
1257                struct i915_request *rq;
1258                struct {
1259                        ktime_t dt;
1260                        u8 freq;
1261                } min, max;
1262
1263                if (!intel_engine_can_store_dword(engine))
1264                        continue;
1265
1266                intel_gt_pm_wait_for_idle(gt);
1267                GEM_BUG_ON(intel_rps_is_active(rps));
1268                rps->cur_freq = rps->min_freq;
1269
1270                intel_engine_pm_get(engine);
1271                intel_rc6_disable(&gt->rc6);
1272                GEM_BUG_ON(rps->last_freq != rps->min_freq);
1273
1274                rq = igt_spinner_create_request(&spin,
1275                                                engine->kernel_context,
1276                                                MI_NOOP);
1277                if (IS_ERR(rq)) {
1278                        err = PTR_ERR(rq);
1279                        goto err;
1280                }
1281
1282                i915_request_add(rq);
1283
1284                max.dt = ktime_get();
1285                max.freq = wait_for_freq(rps, rps->max_freq, 500);
1286                max.dt = ktime_sub(ktime_get(), max.dt);
1287
1288                igt_spinner_end(&spin);
1289
1290                min.dt = ktime_get();
1291                min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1292                min.dt = ktime_sub(ktime_get(), min.dt);
1293
1294                pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1295                        engine->name,
1296                        max.freq, intel_gpu_freq(rps, max.freq),
1297                        ktime_to_ns(max.dt),
1298                        min.freq, intel_gpu_freq(rps, min.freq),
1299                        ktime_to_ns(min.dt));
1300                if (min.freq >= max.freq) {
1301                        pr_err("%s: dynamic reclocking of spinner failed\n!",
1302                               engine->name);
1303                        err = -EINVAL;
1304                }
1305
1306err:
1307                intel_rc6_enable(&gt->rc6);
1308                intel_engine_pm_put(engine);
1309
1310                if (igt_flush_test(gt->i915))
1311                        err = -EIO;
1312                if (err)
1313                        break;
1314        }
1315
1316        igt_spinner_fini(&spin);
1317
1318        return err;
1319}
1320