linux/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2017 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 */
  24
  25#include "../i915_selftest.h"
  26#include "i915_random.h"
  27
  28#include "mock_gem_device.h"
  29#include "mock_timeline.h"
  30
  31struct __igt_sync {
  32        const char *name;
  33        u32 seqno;
  34        bool expected;
  35        bool set;
  36};
  37
  38static int __igt_sync(struct intel_timeline *tl,
  39                      u64 ctx,
  40                      const struct __igt_sync *p,
  41                      const char *name)
  42{
  43        int ret;
  44
  45        if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
  46                pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
  47                       name, p->name, ctx, p->seqno, yesno(p->expected));
  48                return -EINVAL;
  49        }
  50
  51        if (p->set) {
  52                ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
  53                if (ret)
  54                        return ret;
  55        }
  56
  57        return 0;
  58}
  59
  60static int igt_sync(void *arg)
  61{
  62        const struct __igt_sync pass[] = {
  63                { "unset", 0, false, false },
  64                { "new", 0, false, true },
  65                { "0a", 0, true, true },
  66                { "1a", 1, false, true },
  67                { "1b", 1, true, true },
  68                { "0b", 0, true, false },
  69                { "2a", 2, false, true },
  70                { "4", 4, false, true },
  71                { "INT_MAX", INT_MAX, false, true },
  72                { "INT_MAX-1", INT_MAX-1, true, false },
  73                { "INT_MAX+1", (u32)INT_MAX+1, false, true },
  74                { "INT_MAX", INT_MAX, true, false },
  75                { "UINT_MAX", UINT_MAX, false, true },
  76                { "wrap", 0, false, true },
  77                { "unwrap", UINT_MAX, true, false },
  78                {},
  79        }, *p;
  80        struct intel_timeline *tl;
  81        int order, offset;
  82        int ret = -ENODEV;
  83
  84        tl = mock_timeline(0);
  85        if (!tl)
  86                return -ENOMEM;
  87
  88        for (p = pass; p->name; p++) {
  89                for (order = 1; order < 64; order++) {
  90                        for (offset = -1; offset <= (order > 1); offset++) {
  91                                u64 ctx = BIT_ULL(order) + offset;
  92
  93                                ret = __igt_sync(tl, ctx, p, "1");
  94                                if (ret)
  95                                        goto out;
  96                        }
  97                }
  98        }
  99        mock_timeline_destroy(tl);
 100
 101        tl = mock_timeline(0);
 102        if (!tl)
 103                return -ENOMEM;
 104
 105        for (order = 1; order < 64; order++) {
 106                for (offset = -1; offset <= (order > 1); offset++) {
 107                        u64 ctx = BIT_ULL(order) + offset;
 108
 109                        for (p = pass; p->name; p++) {
 110                                ret = __igt_sync(tl, ctx, p, "2");
 111                                if (ret)
 112                                        goto out;
 113                        }
 114                }
 115        }
 116
 117out:
 118        mock_timeline_destroy(tl);
 119        return ret;
 120}
 121
 122static unsigned int random_engine(struct rnd_state *rnd)
 123{
 124        return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
 125}
 126
 127static int bench_sync(void *arg)
 128{
 129        struct rnd_state prng;
 130        struct intel_timeline *tl;
 131        unsigned long end_time, count;
 132        u64 prng32_1M;
 133        ktime_t kt;
 134        int order, last_order;
 135
 136        tl = mock_timeline(0);
 137        if (!tl)
 138                return -ENOMEM;
 139
 140        /* Lookups from cache are very fast and so the random number generation
 141         * and the loop itself becomes a significant factor in the per-iteration
 142         * timings. We try to compensate the results by measuring the overhead
 143         * of the prng and subtract it from the reported results.
 144         */
 145        prandom_seed_state(&prng, i915_selftest.random_seed);
 146        count = 0;
 147        kt = ktime_get();
 148        end_time = jiffies + HZ/10;
 149        do {
 150                u32 x;
 151
 152                /* Make sure the compiler doesn't optimise away the prng call */
 153                WRITE_ONCE(x, prandom_u32_state(&prng));
 154
 155                count++;
 156        } while (!time_after(jiffies, end_time));
 157        kt = ktime_sub(ktime_get(), kt);
 158        pr_debug("%s: %lu random evaluations, %lluns/prng\n",
 159                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 160        prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
 161
 162        /* Benchmark (only) setting random context ids */
 163        prandom_seed_state(&prng, i915_selftest.random_seed);
 164        count = 0;
 165        kt = ktime_get();
 166        end_time = jiffies + HZ/10;
 167        do {
 168                u64 id = i915_prandom_u64_state(&prng);
 169
 170                __intel_timeline_sync_set(tl, id, 0);
 171                count++;
 172        } while (!time_after(jiffies, end_time));
 173        kt = ktime_sub(ktime_get(), kt);
 174        kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
 175        pr_info("%s: %lu random insertions, %lluns/insert\n",
 176                __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 177
 178        /* Benchmark looking up the exact same context ids as we just set */
 179        prandom_seed_state(&prng, i915_selftest.random_seed);
 180        end_time = count;
 181        kt = ktime_get();
 182        while (end_time--) {
 183                u64 id = i915_prandom_u64_state(&prng);
 184
 185                if (!__intel_timeline_sync_is_later(tl, id, 0)) {
 186                        mock_timeline_destroy(tl);
 187                        pr_err("Lookup of %llu failed\n", id);
 188                        return -EINVAL;
 189                }
 190        }
 191        kt = ktime_sub(ktime_get(), kt);
 192        kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
 193        pr_info("%s: %lu random lookups, %lluns/lookup\n",
 194                __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 195
 196        mock_timeline_destroy(tl);
 197        cond_resched();
 198
 199        tl = mock_timeline(0);
 200        if (!tl)
 201                return -ENOMEM;
 202
 203        /* Benchmark setting the first N (in order) contexts */
 204        count = 0;
 205        kt = ktime_get();
 206        end_time = jiffies + HZ/10;
 207        do {
 208                __intel_timeline_sync_set(tl, count++, 0);
 209        } while (!time_after(jiffies, end_time));
 210        kt = ktime_sub(ktime_get(), kt);
 211        pr_info("%s: %lu in-order insertions, %lluns/insert\n",
 212                __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 213
 214        /* Benchmark looking up the exact same context ids as we just set */
 215        end_time = count;
 216        kt = ktime_get();
 217        while (end_time--) {
 218                if (!__intel_timeline_sync_is_later(tl, end_time, 0)) {
 219                        pr_err("Lookup of %lu failed\n", end_time);
 220                        mock_timeline_destroy(tl);
 221                        return -EINVAL;
 222                }
 223        }
 224        kt = ktime_sub(ktime_get(), kt);
 225        pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
 226                __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 227
 228        mock_timeline_destroy(tl);
 229        cond_resched();
 230
 231        tl = mock_timeline(0);
 232        if (!tl)
 233                return -ENOMEM;
 234
 235        /* Benchmark searching for a random context id and maybe changing it */
 236        prandom_seed_state(&prng, i915_selftest.random_seed);
 237        count = 0;
 238        kt = ktime_get();
 239        end_time = jiffies + HZ/10;
 240        do {
 241                u32 id = random_engine(&prng);
 242                u32 seqno = prandom_u32_state(&prng);
 243
 244                if (!__intel_timeline_sync_is_later(tl, id, seqno))
 245                        __intel_timeline_sync_set(tl, id, seqno);
 246
 247                count++;
 248        } while (!time_after(jiffies, end_time));
 249        kt = ktime_sub(ktime_get(), kt);
 250        kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
 251        pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
 252                __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 253        mock_timeline_destroy(tl);
 254        cond_resched();
 255
 256        /* Benchmark searching for a known context id and changing the seqno */
 257        for (last_order = 1, order = 1; order < 32;
 258             ({ int tmp = last_order; last_order = order; order += tmp; })) {
 259                unsigned int mask = BIT(order) - 1;
 260
 261                tl = mock_timeline(0);
 262                if (!tl)
 263                        return -ENOMEM;
 264
 265                count = 0;
 266                kt = ktime_get();
 267                end_time = jiffies + HZ/10;
 268                do {
 269                        /* Without assuming too many details of the underlying
 270                         * implementation, try to identify its phase-changes
 271                         * (if any)!
 272                         */
 273                        u64 id = (u64)(count & mask) << order;
 274
 275                        __intel_timeline_sync_is_later(tl, id, 0);
 276                        __intel_timeline_sync_set(tl, id, 0);
 277
 278                        count++;
 279                } while (!time_after(jiffies, end_time));
 280                kt = ktime_sub(ktime_get(), kt);
 281                pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
 282                        __func__, count, order,
 283                        (long long)div64_ul(ktime_to_ns(kt), count));
 284                mock_timeline_destroy(tl);
 285                cond_resched();
 286        }
 287
 288        return 0;
 289}
 290
 291int i915_gem_timeline_mock_selftests(void)
 292{
 293        static const struct i915_subtest tests[] = {
 294                SUBTEST(igt_sync),
 295                SUBTEST(bench_sync),
 296        };
 297
 298        return i915_subtests(tests, NULL);
 299}
 300