linux/drivers/gpu/drm/i915/selftests/i915_perf.c
<<
>>
Prefs
   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2019 Intel Corporation
   5 */
   6
   7#include <linux/kref.h>
   8
   9#include "gem/i915_gem_pm.h"
  10#include "gt/intel_gt.h"
  11
  12#include "i915_selftest.h"
  13
  14#include "igt_flush_test.h"
  15#include "lib_sw_fence.h"
  16
  17#define TEST_OA_CONFIG_UUID "12345678-1234-1234-1234-1234567890ab"
  18
  19static int
  20alloc_empty_config(struct i915_perf *perf)
  21{
  22        struct i915_oa_config *oa_config;
  23
  24        oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL);
  25        if (!oa_config)
  26                return -ENOMEM;
  27
  28        oa_config->perf = perf;
  29        kref_init(&oa_config->ref);
  30
  31        strlcpy(oa_config->uuid, TEST_OA_CONFIG_UUID, sizeof(oa_config->uuid));
  32
  33        mutex_lock(&perf->metrics_lock);
  34
  35        oa_config->id = idr_alloc(&perf->metrics_idr, oa_config, 2, 0, GFP_KERNEL);
  36        if (oa_config->id < 0)  {
  37                mutex_unlock(&perf->metrics_lock);
  38                i915_oa_config_put(oa_config);
  39                return -ENOMEM;
  40        }
  41
  42        mutex_unlock(&perf->metrics_lock);
  43
  44        return 0;
  45}
  46
  47static void
  48destroy_empty_config(struct i915_perf *perf)
  49{
  50        struct i915_oa_config *oa_config = NULL, *tmp;
  51        int id;
  52
  53        mutex_lock(&perf->metrics_lock);
  54
  55        idr_for_each_entry(&perf->metrics_idr, tmp, id) {
  56                if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
  57                        oa_config = tmp;
  58                        break;
  59                }
  60        }
  61
  62        if (oa_config)
  63                idr_remove(&perf->metrics_idr, oa_config->id);
  64
  65        mutex_unlock(&perf->metrics_lock);
  66
  67        if (oa_config)
  68                i915_oa_config_put(oa_config);
  69}
  70
  71static struct i915_oa_config *
  72get_empty_config(struct i915_perf *perf)
  73{
  74        struct i915_oa_config *oa_config = NULL, *tmp;
  75        int id;
  76
  77        mutex_lock(&perf->metrics_lock);
  78
  79        idr_for_each_entry(&perf->metrics_idr, tmp, id) {
  80                if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
  81                        oa_config = i915_oa_config_get(tmp);
  82                        break;
  83                }
  84        }
  85
  86        mutex_unlock(&perf->metrics_lock);
  87
  88        return oa_config;
  89}
  90
  91static struct i915_perf_stream *
  92test_stream(struct i915_perf *perf)
  93{
  94        struct drm_i915_perf_open_param param = {};
  95        struct i915_oa_config *oa_config = get_empty_config(perf);
  96        struct perf_open_properties props = {
  97                .engine = intel_engine_lookup_user(perf->i915,
  98                                                   I915_ENGINE_CLASS_RENDER,
  99                                                   0),
 100                .sample_flags = SAMPLE_OA_REPORT,
 101                .oa_format = IS_GEN(perf->i915, 12) ?
 102                I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8,
 103        };
 104        struct i915_perf_stream *stream;
 105
 106        if (!oa_config)
 107                return NULL;
 108
 109        props.metrics_set = oa_config->id;
 110
 111        stream = kzalloc(sizeof(*stream), GFP_KERNEL);
 112        if (!stream) {
 113                i915_oa_config_put(oa_config);
 114                return NULL;
 115        }
 116
 117        stream->perf = perf;
 118
 119        mutex_lock(&perf->lock);
 120        if (i915_oa_stream_init(stream, &param, &props)) {
 121                kfree(stream);
 122                stream =  NULL;
 123        }
 124        mutex_unlock(&perf->lock);
 125
 126        i915_oa_config_put(oa_config);
 127
 128        return stream;
 129}
 130
 131static void stream_destroy(struct i915_perf_stream *stream)
 132{
 133        struct i915_perf *perf = stream->perf;
 134
 135        mutex_lock(&perf->lock);
 136        i915_perf_destroy_locked(stream);
 137        mutex_unlock(&perf->lock);
 138}
 139
 140static int live_sanitycheck(void *arg)
 141{
 142        struct drm_i915_private *i915 = arg;
 143        struct i915_perf_stream *stream;
 144
 145        /* Quick check we can create a perf stream */
 146
 147        stream = test_stream(&i915->perf);
 148        if (!stream)
 149                return -EINVAL;
 150
 151        stream_destroy(stream);
 152        return 0;
 153}
 154
 155static int write_timestamp(struct i915_request *rq, int slot)
 156{
 157        u32 *cs;
 158        int len;
 159
 160        cs = intel_ring_begin(rq, 6);
 161        if (IS_ERR(cs))
 162                return PTR_ERR(cs);
 163
 164        len = 5;
 165        if (INTEL_GEN(rq->engine->i915) >= 8)
 166                len++;
 167
 168        *cs++ = GFX_OP_PIPE_CONTROL(len);
 169        *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB |
 170                PIPE_CONTROL_STORE_DATA_INDEX |
 171                PIPE_CONTROL_WRITE_TIMESTAMP;
 172        *cs++ = slot * sizeof(u32);
 173        *cs++ = 0;
 174        *cs++ = 0;
 175        *cs++ = 0;
 176
 177        intel_ring_advance(rq, cs);
 178
 179        return 0;
 180}
 181
 182static ktime_t poll_status(struct i915_request *rq, int slot)
 183{
 184        while (!intel_read_status_page(rq->engine, slot) &&
 185               !i915_request_completed(rq))
 186                cpu_relax();
 187
 188        return ktime_get();
 189}
 190
 191static int live_noa_delay(void *arg)
 192{
 193        struct drm_i915_private *i915 = arg;
 194        struct i915_perf_stream *stream;
 195        struct i915_request *rq;
 196        ktime_t t0, t1;
 197        u64 expected;
 198        u32 delay;
 199        int err;
 200        int i;
 201
 202        /* Check that the GPU delays matches expectations */
 203
 204        stream = test_stream(&i915->perf);
 205        if (!stream)
 206                return -ENOMEM;
 207
 208        expected = atomic64_read(&stream->perf->noa_programming_delay);
 209
 210        if (stream->engine->class != RENDER_CLASS) {
 211                err = -ENODEV;
 212                goto out;
 213        }
 214
 215        for (i = 0; i < 4; i++)
 216                intel_write_status_page(stream->engine, 0x100 + i, 0);
 217
 218        rq = intel_engine_create_kernel_request(stream->engine);
 219        if (IS_ERR(rq)) {
 220                err = PTR_ERR(rq);
 221                goto out;
 222        }
 223
 224        if (rq->engine->emit_init_breadcrumb) {
 225                err = rq->engine->emit_init_breadcrumb(rq);
 226                if (err) {
 227                        i915_request_add(rq);
 228                        goto out;
 229                }
 230        }
 231
 232        err = write_timestamp(rq, 0x100);
 233        if (err) {
 234                i915_request_add(rq);
 235                goto out;
 236        }
 237
 238        err = rq->engine->emit_bb_start(rq,
 239                                        i915_ggtt_offset(stream->noa_wait), 0,
 240                                        I915_DISPATCH_SECURE);
 241        if (err) {
 242                i915_request_add(rq);
 243                goto out;
 244        }
 245
 246        err = write_timestamp(rq, 0x102);
 247        if (err) {
 248                i915_request_add(rq);
 249                goto out;
 250        }
 251
 252        i915_request_get(rq);
 253        i915_request_add(rq);
 254
 255        preempt_disable();
 256        t0 = poll_status(rq, 0x100);
 257        t1 = poll_status(rq, 0x102);
 258        preempt_enable();
 259
 260        pr_info("CPU delay: %lluns, expected %lluns\n",
 261                ktime_sub(t1, t0), expected);
 262
 263        delay = intel_read_status_page(stream->engine, 0x102);
 264        delay -= intel_read_status_page(stream->engine, 0x100);
 265        delay = intel_gt_clock_interval_to_ns(stream->engine->gt, delay);
 266        pr_info("GPU delay: %uns, expected %lluns\n",
 267                delay, expected);
 268
 269        if (4 * delay < 3 * expected || 2 * delay > 3 * expected) {
 270                pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n",
 271                       delay / 1000,
 272                       div_u64(3 * expected, 4000),
 273                       div_u64(3 * expected, 2000));
 274                err = -EINVAL;
 275        }
 276
 277        i915_request_put(rq);
 278out:
 279        stream_destroy(stream);
 280        return err;
 281}
 282
 283static int live_noa_gpr(void *arg)
 284{
 285        struct drm_i915_private *i915 = arg;
 286        struct i915_perf_stream *stream;
 287        struct intel_context *ce;
 288        struct i915_request *rq;
 289        u32 *cs, *store;
 290        void *scratch;
 291        u32 gpr0;
 292        int err;
 293        int i;
 294
 295        /* Check that the delay does not clobber user context state (GPR) */
 296
 297        stream = test_stream(&i915->perf);
 298        if (!stream)
 299                return -ENOMEM;
 300
 301        gpr0 = i915_mmio_reg_offset(GEN8_RING_CS_GPR(stream->engine->mmio_base, 0));
 302
 303        ce = intel_context_create(stream->engine);
 304        if (IS_ERR(ce)) {
 305                err = PTR_ERR(ce);
 306                goto out;
 307        }
 308
 309        /* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */
 310        scratch = kmap(__px_page(ce->vm->scratch[0]));
 311        memset(scratch, POISON_FREE, PAGE_SIZE);
 312
 313        rq = intel_context_create_request(ce);
 314        if (IS_ERR(rq)) {
 315                err = PTR_ERR(rq);
 316                goto out_ce;
 317        }
 318        i915_request_get(rq);
 319
 320        if (rq->engine->emit_init_breadcrumb) {
 321                err = rq->engine->emit_init_breadcrumb(rq);
 322                if (err) {
 323                        i915_request_add(rq);
 324                        goto out_rq;
 325                }
 326        }
 327
 328        /* Fill the 16 qword [32 dword] GPR with a known unlikely value */
 329        cs = intel_ring_begin(rq, 2 * 32 + 2);
 330        if (IS_ERR(cs)) {
 331                err = PTR_ERR(cs);
 332                i915_request_add(rq);
 333                goto out_rq;
 334        }
 335
 336        *cs++ = MI_LOAD_REGISTER_IMM(32);
 337        for (i = 0; i < 32; i++) {
 338                *cs++ = gpr0 + i * sizeof(u32);
 339                *cs++ = STACK_MAGIC;
 340        }
 341        *cs++ = MI_NOOP;
 342        intel_ring_advance(rq, cs);
 343
 344        /* Execute the GPU delay */
 345        err = rq->engine->emit_bb_start(rq,
 346                                        i915_ggtt_offset(stream->noa_wait), 0,
 347                                        I915_DISPATCH_SECURE);
 348        if (err) {
 349                i915_request_add(rq);
 350                goto out_rq;
 351        }
 352
 353        /* Read the GPR back, using the pinned global HWSP for convenience */
 354        store = memset32(rq->engine->status_page.addr + 512, 0, 32);
 355        for (i = 0; i < 32; i++) {
 356                u32 cmd;
 357
 358                cs = intel_ring_begin(rq, 4);
 359                if (IS_ERR(cs)) {
 360                        err = PTR_ERR(cs);
 361                        i915_request_add(rq);
 362                        goto out_rq;
 363                }
 364
 365                cmd = MI_STORE_REGISTER_MEM;
 366                if (INTEL_GEN(i915) >= 8)
 367                        cmd++;
 368                cmd |= MI_USE_GGTT;
 369
 370                *cs++ = cmd;
 371                *cs++ = gpr0 + i * sizeof(u32);
 372                *cs++ = i915_ggtt_offset(rq->engine->status_page.vma) +
 373                        offset_in_page(store) +
 374                        i * sizeof(u32);
 375                *cs++ = 0;
 376                intel_ring_advance(rq, cs);
 377        }
 378
 379        i915_request_add(rq);
 380
 381        if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, HZ / 2) < 0) {
 382                pr_err("noa_wait timed out\n");
 383                intel_gt_set_wedged(stream->engine->gt);
 384                err = -EIO;
 385                goto out_rq;
 386        }
 387
 388        /* Verify that the GPR contain our expected values */
 389        for (i = 0; i < 32; i++) {
 390                if (store[i] == STACK_MAGIC)
 391                        continue;
 392
 393                pr_err("GPR[%d] lost, found:%08x, expected:%08x!\n",
 394                       i, store[i], STACK_MAGIC);
 395                err = -EINVAL;
 396        }
 397
 398        /* Verify that the user's scratch page was not used for GPR storage */
 399        if (memchr_inv(scratch, POISON_FREE, PAGE_SIZE)) {
 400                pr_err("Scratch page overwritten!\n");
 401                igt_hexdump(scratch, 4096);
 402                err = -EINVAL;
 403        }
 404
 405out_rq:
 406        i915_request_put(rq);
 407out_ce:
 408        kunmap(__px_page(ce->vm->scratch[0]));
 409        intel_context_put(ce);
 410out:
 411        stream_destroy(stream);
 412        return err;
 413}
 414
 415int i915_perf_live_selftests(struct drm_i915_private *i915)
 416{
 417        static const struct i915_subtest tests[] = {
 418                SUBTEST(live_sanitycheck),
 419                SUBTEST(live_noa_delay),
 420                SUBTEST(live_noa_gpr),
 421        };
 422        struct i915_perf *perf = &i915->perf;
 423        int err;
 424
 425        if (!perf->metrics_kobj || !perf->ops.enable_metric_set)
 426                return 0;
 427
 428        if (intel_gt_is_wedged(&i915->gt))
 429                return 0;
 430
 431        err = alloc_empty_config(&i915->perf);
 432        if (err)
 433                return err;
 434
 435        err = i915_subtests(tests, i915);
 436
 437        destroy_empty_config(&i915->perf);
 438
 439        return err;
 440}
 441