linux/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
<<
>>
Prefs
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2018 Intel Corporation
   4 */
   5
   6#include <linux/sort.h>
   7
   8#include "i915_drv.h"
   9
  10#include "intel_gt_requests.h"
  11#include "i915_selftest.h"
  12#include "selftest_engine_heartbeat.h"
  13
  14static void reset_heartbeat(struct intel_engine_cs *engine)
  15{
  16        intel_engine_set_heartbeat(engine,
  17                                   engine->defaults.heartbeat_interval_ms);
  18}
  19
  20static int timeline_sync(struct intel_timeline *tl)
  21{
  22        struct dma_fence *fence;
  23        long timeout;
  24
  25        fence = i915_active_fence_get(&tl->last_request);
  26        if (!fence)
  27                return 0;
  28
  29        timeout = dma_fence_wait_timeout(fence, true, HZ / 2);
  30        dma_fence_put(fence);
  31        if (timeout < 0)
  32                return timeout;
  33
  34        return 0;
  35}
  36
  37static int engine_sync_barrier(struct intel_engine_cs *engine)
  38{
  39        return timeline_sync(engine->kernel_context->timeline);
  40}
  41
  42struct pulse {
  43        struct i915_active active;
  44        struct kref kref;
  45};
  46
  47static int pulse_active(struct i915_active *active)
  48{
  49        kref_get(&container_of(active, struct pulse, active)->kref);
  50        return 0;
  51}
  52
  53static void pulse_free(struct kref *kref)
  54{
  55        struct pulse *p = container_of(kref, typeof(*p), kref);
  56
  57        i915_active_fini(&p->active);
  58        kfree(p);
  59}
  60
  61static void pulse_put(struct pulse *p)
  62{
  63        kref_put(&p->kref, pulse_free);
  64}
  65
  66static void pulse_retire(struct i915_active *active)
  67{
  68        pulse_put(container_of(active, struct pulse, active));
  69}
  70
  71static struct pulse *pulse_create(void)
  72{
  73        struct pulse *p;
  74
  75        p = kmalloc(sizeof(*p), GFP_KERNEL);
  76        if (!p)
  77                return p;
  78
  79        kref_init(&p->kref);
  80        i915_active_init(&p->active, pulse_active, pulse_retire, 0);
  81
  82        return p;
  83}
  84
  85static void pulse_unlock_wait(struct pulse *p)
  86{
  87        i915_active_unlock_wait(&p->active);
  88}
  89
  90static int __live_idle_pulse(struct intel_engine_cs *engine,
  91                             int (*fn)(struct intel_engine_cs *cs))
  92{
  93        struct pulse *p;
  94        int err;
  95
  96        GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
  97
  98        p = pulse_create();
  99        if (!p)
 100                return -ENOMEM;
 101
 102        err = i915_active_acquire(&p->active);
 103        if (err)
 104                goto out;
 105
 106        err = i915_active_acquire_preallocate_barrier(&p->active, engine);
 107        if (err) {
 108                i915_active_release(&p->active);
 109                goto out;
 110        }
 111
 112        i915_active_acquire_barrier(&p->active);
 113        i915_active_release(&p->active);
 114
 115        GEM_BUG_ON(i915_active_is_idle(&p->active));
 116        GEM_BUG_ON(llist_empty(&engine->barrier_tasks));
 117
 118        err = fn(engine);
 119        if (err)
 120                goto out;
 121
 122        GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
 123
 124        if (engine_sync_barrier(engine)) {
 125                struct drm_printer m = drm_err_printer("pulse");
 126
 127                pr_err("%s: no heartbeat pulse?\n", engine->name);
 128                intel_engine_dump(engine, &m, "%s", engine->name);
 129
 130                err = -ETIME;
 131                goto out;
 132        }
 133
 134        GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial);
 135
 136        pulse_unlock_wait(p); /* synchronize with the retirement callback */
 137
 138        if (!i915_active_is_idle(&p->active)) {
 139                struct drm_printer m = drm_err_printer("pulse");
 140
 141                pr_err("%s: heartbeat pulse did not flush idle tasks\n",
 142                       engine->name);
 143                i915_active_print(&p->active, &m);
 144
 145                err = -EINVAL;
 146                goto out;
 147        }
 148
 149out:
 150        pulse_put(p);
 151        return err;
 152}
 153
 154static int live_idle_flush(void *arg)
 155{
 156        struct intel_gt *gt = arg;
 157        struct intel_engine_cs *engine;
 158        enum intel_engine_id id;
 159        int err = 0;
 160
 161        /* Check that we can flush the idle barriers */
 162
 163        for_each_engine(engine, gt, id) {
 164                st_engine_heartbeat_disable(engine);
 165                err = __live_idle_pulse(engine, intel_engine_flush_barriers);
 166                st_engine_heartbeat_enable(engine);
 167                if (err)
 168                        break;
 169        }
 170
 171        return err;
 172}
 173
 174static int live_idle_pulse(void *arg)
 175{
 176        struct intel_gt *gt = arg;
 177        struct intel_engine_cs *engine;
 178        enum intel_engine_id id;
 179        int err = 0;
 180
 181        /* Check that heartbeat pulses flush the idle barriers */
 182
 183        for_each_engine(engine, gt, id) {
 184                st_engine_heartbeat_disable(engine);
 185                err = __live_idle_pulse(engine, intel_engine_pulse);
 186                st_engine_heartbeat_enable(engine);
 187                if (err && err != -ENODEV)
 188                        break;
 189
 190                err = 0;
 191        }
 192
 193        return err;
 194}
 195
 196static int cmp_u32(const void *_a, const void *_b)
 197{
 198        const u32 *a = _a, *b = _b;
 199
 200        return *a - *b;
 201}
 202
 203static int __live_heartbeat_fast(struct intel_engine_cs *engine)
 204{
 205        const unsigned int error_threshold = max(20000u, jiffies_to_usecs(6));
 206        struct intel_context *ce;
 207        struct i915_request *rq;
 208        ktime_t t0, t1;
 209        u32 times[5];
 210        int err;
 211        int i;
 212
 213        ce = intel_context_create(engine);
 214        if (IS_ERR(ce))
 215                return PTR_ERR(ce);
 216
 217        intel_engine_pm_get(engine);
 218
 219        err = intel_engine_set_heartbeat(engine, 1);
 220        if (err)
 221                goto err_pm;
 222
 223        for (i = 0; i < ARRAY_SIZE(times); i++) {
 224                do {
 225                        /* Manufacture a tick */
 226                        intel_engine_park_heartbeat(engine);
 227                        GEM_BUG_ON(engine->heartbeat.systole);
 228                        engine->serial++; /*  pretend we are not idle! */
 229                        intel_engine_unpark_heartbeat(engine);
 230
 231                        flush_delayed_work(&engine->heartbeat.work);
 232                        if (!delayed_work_pending(&engine->heartbeat.work)) {
 233                                pr_err("%s: heartbeat %d did not start\n",
 234                                       engine->name, i);
 235                                err = -EINVAL;
 236                                goto err_pm;
 237                        }
 238
 239                        rcu_read_lock();
 240                        rq = READ_ONCE(engine->heartbeat.systole);
 241                        if (rq)
 242                                rq = i915_request_get_rcu(rq);
 243                        rcu_read_unlock();
 244                } while (!rq);
 245
 246                t0 = ktime_get();
 247                while (rq == READ_ONCE(engine->heartbeat.systole))
 248                        yield(); /* work is on the local cpu! */
 249                t1 = ktime_get();
 250
 251                i915_request_put(rq);
 252                times[i] = ktime_us_delta(t1, t0);
 253        }
 254
 255        sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL);
 256
 257        pr_info("%s: Heartbeat delay: %uus [%u, %u]\n",
 258                engine->name,
 259                times[ARRAY_SIZE(times) / 2],
 260                times[0],
 261                times[ARRAY_SIZE(times) - 1]);
 262
 263        /*
 264         * Ideally, the upper bound on min work delay would be something like
 265         * 2 * 2 (worst), +1 for scheduling, +1 for slack. In practice, we
 266         * are, even with system_wq_highpri, at the mercy of the CPU scheduler
 267         * and may be stuck behind some slow work for many millisecond. Such
 268         * as our very own display workers.
 269         */
 270        if (times[ARRAY_SIZE(times) / 2] > error_threshold) {
 271                pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n",
 272                       engine->name,
 273                       times[ARRAY_SIZE(times) / 2],
 274                       error_threshold);
 275                err = -EINVAL;
 276        }
 277
 278        reset_heartbeat(engine);
 279err_pm:
 280        intel_engine_pm_put(engine);
 281        intel_context_put(ce);
 282        return err;
 283}
 284
 285static int live_heartbeat_fast(void *arg)
 286{
 287        struct intel_gt *gt = arg;
 288        struct intel_engine_cs *engine;
 289        enum intel_engine_id id;
 290        int err = 0;
 291
 292        /* Check that the heartbeat ticks at the desired rate. */
 293        if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
 294                return 0;
 295
 296        for_each_engine(engine, gt, id) {
 297                err = __live_heartbeat_fast(engine);
 298                if (err)
 299                        break;
 300        }
 301
 302        return err;
 303}
 304
 305static int __live_heartbeat_off(struct intel_engine_cs *engine)
 306{
 307        int err;
 308
 309        intel_engine_pm_get(engine);
 310
 311        engine->serial++;
 312        flush_delayed_work(&engine->heartbeat.work);
 313        if (!delayed_work_pending(&engine->heartbeat.work)) {
 314                pr_err("%s: heartbeat not running\n",
 315                       engine->name);
 316                err = -EINVAL;
 317                goto err_pm;
 318        }
 319
 320        err = intel_engine_set_heartbeat(engine, 0);
 321        if (err)
 322                goto err_pm;
 323
 324        engine->serial++;
 325        flush_delayed_work(&engine->heartbeat.work);
 326        if (delayed_work_pending(&engine->heartbeat.work)) {
 327                pr_err("%s: heartbeat still running\n",
 328                       engine->name);
 329                err = -EINVAL;
 330                goto err_beat;
 331        }
 332
 333        if (READ_ONCE(engine->heartbeat.systole)) {
 334                pr_err("%s: heartbeat still allocated\n",
 335                       engine->name);
 336                err = -EINVAL;
 337                goto err_beat;
 338        }
 339
 340err_beat:
 341        reset_heartbeat(engine);
 342err_pm:
 343        intel_engine_pm_put(engine);
 344        return err;
 345}
 346
 347static int live_heartbeat_off(void *arg)
 348{
 349        struct intel_gt *gt = arg;
 350        struct intel_engine_cs *engine;
 351        enum intel_engine_id id;
 352        int err = 0;
 353
 354        /* Check that we can turn off heartbeat and not interrupt VIP */
 355        if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
 356                return 0;
 357
 358        for_each_engine(engine, gt, id) {
 359                if (!intel_engine_has_preemption(engine))
 360                        continue;
 361
 362                err = __live_heartbeat_off(engine);
 363                if (err)
 364                        break;
 365        }
 366
 367        return err;
 368}
 369
 370int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
 371{
 372        static const struct i915_subtest tests[] = {
 373                SUBTEST(live_idle_flush),
 374                SUBTEST(live_idle_pulse),
 375                SUBTEST(live_heartbeat_fast),
 376                SUBTEST(live_heartbeat_off),
 377        };
 378        int saved_hangcheck;
 379        int err;
 380
 381        if (intel_gt_is_wedged(&i915->gt))
 382                return 0;
 383
 384        saved_hangcheck = i915->params.enable_hangcheck;
 385        i915->params.enable_hangcheck = INT_MAX;
 386
 387        err = intel_gt_live_subtests(tests, &i915->gt);
 388
 389        i915->params.enable_hangcheck = saved_hangcheck;
 390        return err;
 391}
 392
 393void st_engine_heartbeat_disable(struct intel_engine_cs *engine)
 394{
 395        engine->props.heartbeat_interval_ms = 0;
 396
 397        intel_engine_pm_get(engine);
 398        intel_engine_park_heartbeat(engine);
 399}
 400
 401void st_engine_heartbeat_enable(struct intel_engine_cs *engine)
 402{
 403        intel_engine_pm_put(engine);
 404
 405        engine->props.heartbeat_interval_ms =
 406                engine->defaults.heartbeat_interval_ms;
 407}
 408
 409void st_engine_heartbeat_disable_no_pm(struct intel_engine_cs *engine)
 410{
 411        engine->props.heartbeat_interval_ms = 0;
 412
 413        /*
 414         * Park the heartbeat but without holding the PM lock as that
 415         * makes the engines appear not-idle. Note that if/when unpark
 416         * is called due to the PM lock being acquired later the
 417         * heartbeat still won't be enabled because of the above = 0.
 418         */
 419        if (intel_engine_pm_get_if_awake(engine)) {
 420                intel_engine_park_heartbeat(engine);
 421                intel_engine_pm_put(engine);
 422        }
 423}
 424
 425void st_engine_heartbeat_enable_no_pm(struct intel_engine_cs *engine)
 426{
 427        engine->props.heartbeat_interval_ms =
 428                engine->defaults.heartbeat_interval_ms;
 429}
 430