linux/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
<<
>>
Prefs
   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2018 Intel Corporation
   5 */
   6
   7#include <linux/sort.h>
   8
   9#include "i915_drv.h"
  10
  11#include "intel_gt_requests.h"
  12#include "i915_selftest.h"
  13#include "selftest_engine_heartbeat.h"
  14
  15static int timeline_sync(struct intel_timeline *tl)
  16{
  17        struct dma_fence *fence;
  18        long timeout;
  19
  20        fence = i915_active_fence_get(&tl->last_request);
  21        if (!fence)
  22                return 0;
  23
  24        timeout = dma_fence_wait_timeout(fence, true, HZ / 2);
  25        dma_fence_put(fence);
  26        if (timeout < 0)
  27                return timeout;
  28
  29        return 0;
  30}
  31
  32static int engine_sync_barrier(struct intel_engine_cs *engine)
  33{
  34        return timeline_sync(engine->kernel_context->timeline);
  35}
  36
  37struct pulse {
  38        struct i915_active active;
  39        struct kref kref;
  40};
  41
  42static int pulse_active(struct i915_active *active)
  43{
  44        kref_get(&container_of(active, struct pulse, active)->kref);
  45        return 0;
  46}
  47
  48static void pulse_free(struct kref *kref)
  49{
  50        kfree(container_of(kref, struct pulse, kref));
  51}
  52
  53static void pulse_put(struct pulse *p)
  54{
  55        kref_put(&p->kref, pulse_free);
  56}
  57
  58static void pulse_retire(struct i915_active *active)
  59{
  60        pulse_put(container_of(active, struct pulse, active));
  61}
  62
  63static struct pulse *pulse_create(void)
  64{
  65        struct pulse *p;
  66
  67        p = kmalloc(sizeof(*p), GFP_KERNEL);
  68        if (!p)
  69                return p;
  70
  71        kref_init(&p->kref);
  72        i915_active_init(&p->active, pulse_active, pulse_retire);
  73
  74        return p;
  75}
  76
  77static void pulse_unlock_wait(struct pulse *p)
  78{
  79        i915_active_unlock_wait(&p->active);
  80}
  81
  82static int __live_idle_pulse(struct intel_engine_cs *engine,
  83                             int (*fn)(struct intel_engine_cs *cs))
  84{
  85        struct pulse *p;
  86        int err;
  87
  88        GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
  89
  90        p = pulse_create();
  91        if (!p)
  92                return -ENOMEM;
  93
  94        err = i915_active_acquire(&p->active);
  95        if (err)
  96                goto out;
  97
  98        err = i915_active_acquire_preallocate_barrier(&p->active, engine);
  99        if (err) {
 100                i915_active_release(&p->active);
 101                goto out;
 102        }
 103
 104        i915_active_acquire_barrier(&p->active);
 105        i915_active_release(&p->active);
 106
 107        GEM_BUG_ON(i915_active_is_idle(&p->active));
 108        GEM_BUG_ON(llist_empty(&engine->barrier_tasks));
 109
 110        err = fn(engine);
 111        if (err)
 112                goto out;
 113
 114        GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
 115
 116        if (engine_sync_barrier(engine)) {
 117                struct drm_printer m = drm_err_printer("pulse");
 118
 119                pr_err("%s: no heartbeat pulse?\n", engine->name);
 120                intel_engine_dump(engine, &m, "%s", engine->name);
 121
 122                err = -ETIME;
 123                goto out;
 124        }
 125
 126        GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial);
 127
 128        pulse_unlock_wait(p); /* synchronize with the retirement callback */
 129
 130        if (!i915_active_is_idle(&p->active)) {
 131                struct drm_printer m = drm_err_printer("pulse");
 132
 133                pr_err("%s: heartbeat pulse did not flush idle tasks\n",
 134                       engine->name);
 135                i915_active_print(&p->active, &m);
 136
 137                err = -EINVAL;
 138                goto out;
 139        }
 140
 141out:
 142        pulse_put(p);
 143        return err;
 144}
 145
 146static int live_idle_flush(void *arg)
 147{
 148        struct intel_gt *gt = arg;
 149        struct intel_engine_cs *engine;
 150        enum intel_engine_id id;
 151        int err = 0;
 152
 153        /* Check that we can flush the idle barriers */
 154
 155        for_each_engine(engine, gt, id) {
 156                st_engine_heartbeat_disable(engine);
 157                err = __live_idle_pulse(engine, intel_engine_flush_barriers);
 158                st_engine_heartbeat_enable(engine);
 159                if (err)
 160                        break;
 161        }
 162
 163        return err;
 164}
 165
 166static int live_idle_pulse(void *arg)
 167{
 168        struct intel_gt *gt = arg;
 169        struct intel_engine_cs *engine;
 170        enum intel_engine_id id;
 171        int err = 0;
 172
 173        /* Check that heartbeat pulses flush the idle barriers */
 174
 175        for_each_engine(engine, gt, id) {
 176                st_engine_heartbeat_disable(engine);
 177                err = __live_idle_pulse(engine, intel_engine_pulse);
 178                st_engine_heartbeat_enable(engine);
 179                if (err && err != -ENODEV)
 180                        break;
 181
 182                err = 0;
 183        }
 184
 185        return err;
 186}
 187
 188static int cmp_u32(const void *_a, const void *_b)
 189{
 190        const u32 *a = _a, *b = _b;
 191
 192        return *a - *b;
 193}
 194
 195static int __live_heartbeat_fast(struct intel_engine_cs *engine)
 196{
 197        struct intel_context *ce;
 198        struct i915_request *rq;
 199        ktime_t t0, t1;
 200        u32 times[5];
 201        int err;
 202        int i;
 203
 204        ce = intel_context_create(engine);
 205        if (IS_ERR(ce))
 206                return PTR_ERR(ce);
 207
 208        intel_engine_pm_get(engine);
 209
 210        err = intel_engine_set_heartbeat(engine, 1);
 211        if (err)
 212                goto err_pm;
 213
 214        for (i = 0; i < ARRAY_SIZE(times); i++) {
 215                /* Manufacture a tick */
 216                do {
 217                        while (READ_ONCE(engine->heartbeat.systole))
 218                                flush_delayed_work(&engine->heartbeat.work);
 219
 220                        engine->serial++; /* quick, pretend we are not idle! */
 221                        flush_delayed_work(&engine->heartbeat.work);
 222                        if (!delayed_work_pending(&engine->heartbeat.work)) {
 223                                pr_err("%s: heartbeat did not start\n",
 224                                       engine->name);
 225                                err = -EINVAL;
 226                                goto err_pm;
 227                        }
 228
 229                        rcu_read_lock();
 230                        rq = READ_ONCE(engine->heartbeat.systole);
 231                        if (rq)
 232                                rq = i915_request_get_rcu(rq);
 233                        rcu_read_unlock();
 234                } while (!rq);
 235
 236                t0 = ktime_get();
 237                while (rq == READ_ONCE(engine->heartbeat.systole))
 238                        yield(); /* work is on the local cpu! */
 239                t1 = ktime_get();
 240
 241                i915_request_put(rq);
 242                times[i] = ktime_us_delta(t1, t0);
 243        }
 244
 245        sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL);
 246
 247        pr_info("%s: Heartbeat delay: %uus [%u, %u]\n",
 248                engine->name,
 249                times[ARRAY_SIZE(times) / 2],
 250                times[0],
 251                times[ARRAY_SIZE(times) - 1]);
 252
 253        /* Min work delay is 2 * 2 (worst), +1 for scheduling, +1 for slack */
 254        if (times[ARRAY_SIZE(times) / 2] > jiffies_to_usecs(6)) {
 255                pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n",
 256                       engine->name,
 257                       times[ARRAY_SIZE(times) / 2],
 258                       jiffies_to_usecs(6));
 259                err = -EINVAL;
 260        }
 261
 262        intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
 263err_pm:
 264        intel_engine_pm_put(engine);
 265        intel_context_put(ce);
 266        return err;
 267}
 268
 269static int live_heartbeat_fast(void *arg)
 270{
 271        struct intel_gt *gt = arg;
 272        struct intel_engine_cs *engine;
 273        enum intel_engine_id id;
 274        int err = 0;
 275
 276        /* Check that the heartbeat ticks at the desired rate. */
 277        if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
 278                return 0;
 279
 280        for_each_engine(engine, gt, id) {
 281                err = __live_heartbeat_fast(engine);
 282                if (err)
 283                        break;
 284        }
 285
 286        return err;
 287}
 288
 289static int __live_heartbeat_off(struct intel_engine_cs *engine)
 290{
 291        int err;
 292
 293        intel_engine_pm_get(engine);
 294
 295        engine->serial++;
 296        flush_delayed_work(&engine->heartbeat.work);
 297        if (!delayed_work_pending(&engine->heartbeat.work)) {
 298                pr_err("%s: heartbeat not running\n",
 299                       engine->name);
 300                err = -EINVAL;
 301                goto err_pm;
 302        }
 303
 304        err = intel_engine_set_heartbeat(engine, 0);
 305        if (err)
 306                goto err_pm;
 307
 308        engine->serial++;
 309        flush_delayed_work(&engine->heartbeat.work);
 310        if (delayed_work_pending(&engine->heartbeat.work)) {
 311                pr_err("%s: heartbeat still running\n",
 312                       engine->name);
 313                err = -EINVAL;
 314                goto err_beat;
 315        }
 316
 317        if (READ_ONCE(engine->heartbeat.systole)) {
 318                pr_err("%s: heartbeat still allocated\n",
 319                       engine->name);
 320                err = -EINVAL;
 321                goto err_beat;
 322        }
 323
 324err_beat:
 325        intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
 326err_pm:
 327        intel_engine_pm_put(engine);
 328        return err;
 329}
 330
 331static int live_heartbeat_off(void *arg)
 332{
 333        struct intel_gt *gt = arg;
 334        struct intel_engine_cs *engine;
 335        enum intel_engine_id id;
 336        int err = 0;
 337
 338        /* Check that we can turn off heartbeat and not interrupt VIP */
 339        if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
 340                return 0;
 341
 342        for_each_engine(engine, gt, id) {
 343                if (!intel_engine_has_preemption(engine))
 344                        continue;
 345
 346                err = __live_heartbeat_off(engine);
 347                if (err)
 348                        break;
 349        }
 350
 351        return err;
 352}
 353
 354int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
 355{
 356        static const struct i915_subtest tests[] = {
 357                SUBTEST(live_idle_flush),
 358                SUBTEST(live_idle_pulse),
 359                SUBTEST(live_heartbeat_fast),
 360                SUBTEST(live_heartbeat_off),
 361        };
 362        int saved_hangcheck;
 363        int err;
 364
 365        if (intel_gt_is_wedged(&i915->gt))
 366                return 0;
 367
 368        saved_hangcheck = i915->params.enable_hangcheck;
 369        i915->params.enable_hangcheck = INT_MAX;
 370
 371        err = intel_gt_live_subtests(tests, &i915->gt);
 372
 373        i915->params.enable_hangcheck = saved_hangcheck;
 374        return err;
 375}
 376
 377void st_engine_heartbeat_disable(struct intel_engine_cs *engine)
 378{
 379        engine->props.heartbeat_interval_ms = 0;
 380
 381        intel_engine_pm_get(engine);
 382        intel_engine_park_heartbeat(engine);
 383}
 384
 385void st_engine_heartbeat_enable(struct intel_engine_cs *engine)
 386{
 387        intel_engine_pm_put(engine);
 388
 389        engine->props.heartbeat_interval_ms =
 390                engine->defaults.heartbeat_interval_ms;
 391}
 392