linux/drivers/gpu/drm/i915/i915_syncmap.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2017 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 */
  24
  25#include <linux/slab.h>
  26
  27#include "i915_syncmap.h"
  28
  29#include "i915_gem.h" /* GEM_BUG_ON() */
  30#include "i915_selftest.h"
  31
  32#define SHIFT ilog2(KSYNCMAP)
  33#define MASK (KSYNCMAP - 1)
  34
  35/*
  36 * struct i915_syncmap is a layer of a radixtree that maps a u64 fence
  37 * context id to the last u32 fence seqno waited upon from that context.
  38 * Unlike lib/radixtree it uses a parent pointer that allows traversal back to
  39 * the root. This allows us to access the whole tree via a single pointer
  40 * to the most recently used layer. We expect fence contexts to be dense
  41 * and most reuse to be on the same i915_gem_context but on neighbouring
  42 * engines (i.e. on adjacent contexts) and reuse the same leaf, a very
  43 * effective lookup cache. If the new lookup is not on the same leaf, we
  44 * expect it to be on the neighbouring branch.
  45 *
  46 * A leaf holds an array of u32 seqno, and has height 0. The bitmap field
  47 * allows us to store whether a particular seqno is valid (i.e. allows us
  48 * to distinguish unset from 0).
  49 *
  50 * A branch holds an array of layer pointers, and has height > 0, and always
  51 * has at least 2 layers (either branches or leaves) below it.
  52 *
  53 * For example,
  54 *      for x in
  55 *        0 1 2 0x10 0x11 0x200 0x201
  56 *        0x500000 0x500001 0x503000 0x503001
  57 *        0xE<<60:
  58 *              i915_syncmap_set(&sync, x, lower_32_bits(x));
  59 * will build a tree like:
  60 *      0xXXXXXXXXXXXXXXXX
  61 *      0-> 0x0000000000XXXXXX
  62 *      |   0-> 0x0000000000000XXX
  63 *      |   |   0-> 0x00000000000000XX
  64 *      |   |   |   0-> 0x000000000000000X 0:0, 1:1, 2:2
  65 *      |   |   |   1-> 0x000000000000001X 0:10, 1:11
  66 *      |   |   2-> 0x000000000000020X 0:200, 1:201
  67 *      |   5-> 0x000000000050XXXX
  68 *      |       0-> 0x000000000050000X 0:500000, 1:500001
  69 *      |       3-> 0x000000000050300X 0:503000, 1:503001
  70 *      e-> 0xe00000000000000X e:e
  71 */
  72
  73struct i915_syncmap {
  74        u64 prefix;
  75        unsigned int height;
  76        unsigned int bitmap;
  77        struct i915_syncmap *parent;
  78        /*
  79         * Following this header is an array of either seqno or child pointers:
  80         * union {
  81         *      u32 seqno[KSYNCMAP];
  82         *      struct i915_syncmap *child[KSYNCMAP];
  83         * };
  84         */
  85};
  86
  87/**
  88 * i915_syncmap_init -- initialise the #i915_syncmap
  89 * @root: pointer to the #i915_syncmap
  90 */
  91void i915_syncmap_init(struct i915_syncmap **root)
  92{
  93        BUILD_BUG_ON_NOT_POWER_OF_2(KSYNCMAP);
  94        BUILD_BUG_ON_NOT_POWER_OF_2(SHIFT);
  95        BUILD_BUG_ON(KSYNCMAP > BITS_PER_TYPE((*root)->bitmap));
  96        *root = NULL;
  97}
  98
  99static inline u32 *__sync_seqno(struct i915_syncmap *p)
 100{
 101        GEM_BUG_ON(p->height);
 102        return (u32 *)(p + 1);
 103}
 104
 105static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p)
 106{
 107        GEM_BUG_ON(!p->height);
 108        return (struct i915_syncmap **)(p + 1);
 109}
 110
 111static inline unsigned int
 112__sync_branch_idx(const struct i915_syncmap *p, u64 id)
 113{
 114        return (id >> p->height) & MASK;
 115}
 116
 117static inline unsigned int
 118__sync_leaf_idx(const struct i915_syncmap *p, u64 id)
 119{
 120        GEM_BUG_ON(p->height);
 121        return id & MASK;
 122}
 123
 124static inline u64 __sync_branch_prefix(const struct i915_syncmap *p, u64 id)
 125{
 126        return id >> p->height >> SHIFT;
 127}
 128
 129static inline u64 __sync_leaf_prefix(const struct i915_syncmap *p, u64 id)
 130{
 131        GEM_BUG_ON(p->height);
 132        return id >> SHIFT;
 133}
 134
 135static inline bool seqno_later(u32 a, u32 b)
 136{
 137        return (s32)(a - b) >= 0;
 138}
 139
 140/**
 141 * i915_syncmap_is_later -- compare against the last know sync point
 142 * @root: pointer to the #i915_syncmap
 143 * @id: the context id (other timeline) we are synchronising to
 144 * @seqno: the sequence number along the other timeline
 145 *
 146 * If we have already synchronised this @root timeline with another (@id) then
 147 * we can omit any repeated or earlier synchronisation requests. If the two
 148 * timelines are already coupled, we can also omit the dependency between the
 149 * two as that is already known via the timeline.
 150 *
 151 * Returns true if the two timelines are already synchronised wrt to @seqno,
 152 * false if not and the synchronisation must be emitted.
 153 */
 154bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno)
 155{
 156        struct i915_syncmap *p;
 157        unsigned int idx;
 158
 159        p = *root;
 160        if (!p)
 161                return false;
 162
 163        if (likely(__sync_leaf_prefix(p, id) == p->prefix))
 164                goto found;
 165
 166        /* First climb the tree back to a parent branch */
 167        do {
 168                p = p->parent;
 169                if (!p)
 170                        return false;
 171
 172                if (__sync_branch_prefix(p, id) == p->prefix)
 173                        break;
 174        } while (1);
 175
 176        /* And then descend again until we find our leaf */
 177        do {
 178                if (!p->height)
 179                        break;
 180
 181                p = __sync_child(p)[__sync_branch_idx(p, id)];
 182                if (!p)
 183                        return false;
 184
 185                if (__sync_branch_prefix(p, id) != p->prefix)
 186                        return false;
 187        } while (1);
 188
 189        *root = p;
 190found:
 191        idx = __sync_leaf_idx(p, id);
 192        if (!(p->bitmap & BIT(idx)))
 193                return false;
 194
 195        return seqno_later(__sync_seqno(p)[idx], seqno);
 196}
 197
 198static struct i915_syncmap *
 199__sync_alloc_leaf(struct i915_syncmap *parent, u64 id)
 200{
 201        struct i915_syncmap *p;
 202
 203        p = kmalloc(sizeof(*p) + KSYNCMAP * sizeof(u32), GFP_KERNEL);
 204        if (unlikely(!p))
 205                return NULL;
 206
 207        p->parent = parent;
 208        p->height = 0;
 209        p->bitmap = 0;
 210        p->prefix = __sync_leaf_prefix(p, id);
 211        return p;
 212}
 213
 214static inline void __sync_set_seqno(struct i915_syncmap *p, u64 id, u32 seqno)
 215{
 216        unsigned int idx = __sync_leaf_idx(p, id);
 217
 218        p->bitmap |= BIT(idx);
 219        __sync_seqno(p)[idx] = seqno;
 220}
 221
 222static inline void __sync_set_child(struct i915_syncmap *p,
 223                                    unsigned int idx,
 224                                    struct i915_syncmap *child)
 225{
 226        p->bitmap |= BIT(idx);
 227        __sync_child(p)[idx] = child;
 228}
 229
 230static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno)
 231{
 232        struct i915_syncmap *p = *root;
 233        unsigned int idx;
 234
 235        if (!p) {
 236                p = __sync_alloc_leaf(NULL, id);
 237                if (unlikely(!p))
 238                        return -ENOMEM;
 239
 240                goto found;
 241        }
 242
 243        /* Caller handled the likely cached case */
 244        GEM_BUG_ON(__sync_leaf_prefix(p, id) == p->prefix);
 245
 246        /* Climb back up the tree until we find a common prefix */
 247        do {
 248                if (!p->parent)
 249                        break;
 250
 251                p = p->parent;
 252
 253                if (__sync_branch_prefix(p, id) == p->prefix)
 254                        break;
 255        } while (1);
 256
 257        /*
 258         * No shortcut, we have to descend the tree to find the right layer
 259         * containing this fence.
 260         *
 261         * Each layer in the tree holds 16 (KSYNCMAP) pointers, either fences
 262         * or lower layers. Leaf nodes (height = 0) contain the fences, all
 263         * other nodes (height > 0) are internal layers that point to a lower
 264         * node. Each internal layer has at least 2 descendents.
 265         *
 266         * Starting at the top, we check whether the current prefix matches. If
 267         * it doesn't, we have gone past our target and need to insert a join
 268         * into the tree, and a new leaf node for the target as a descendent
 269         * of the join, as well as the original layer.
 270         *
 271         * The matching prefix means we are still following the right branch
 272         * of the tree. If it has height 0, we have found our leaf and just
 273         * need to replace the fence slot with ourselves. If the height is
 274         * not zero, our slot contains the next layer in the tree (unless
 275         * it is empty, in which case we can add ourselves as a new leaf).
 276         * As descend the tree the prefix grows (and height decreases).
 277         */
 278        do {
 279                struct i915_syncmap *next;
 280
 281                if (__sync_branch_prefix(p, id) != p->prefix) {
 282                        unsigned int above;
 283
 284                        /* Insert a join above the current layer */
 285                        next = kzalloc(sizeof(*next) + KSYNCMAP * sizeof(next),
 286                                       GFP_KERNEL);
 287                        if (unlikely(!next))
 288                                return -ENOMEM;
 289
 290                        /* Compute the height at which these two diverge */
 291                        above = fls64(__sync_branch_prefix(p, id) ^ p->prefix);
 292                        above = round_up(above, SHIFT);
 293                        next->height = above + p->height;
 294                        next->prefix = __sync_branch_prefix(next, id);
 295
 296                        /* Insert the join into the parent */
 297                        if (p->parent) {
 298                                idx = __sync_branch_idx(p->parent, id);
 299                                __sync_child(p->parent)[idx] = next;
 300                                GEM_BUG_ON(!(p->parent->bitmap & BIT(idx)));
 301                        }
 302                        next->parent = p->parent;
 303
 304                        /* Compute the idx of the other branch, not our id! */
 305                        idx = p->prefix >> (above - SHIFT) & MASK;
 306                        __sync_set_child(next, idx, p);
 307                        p->parent = next;
 308
 309                        /* Ascend to the join */
 310                        p = next;
 311                } else {
 312                        if (!p->height)
 313                                break;
 314                }
 315
 316                /* Descend into the next layer */
 317                GEM_BUG_ON(!p->height);
 318                idx = __sync_branch_idx(p, id);
 319                next = __sync_child(p)[idx];
 320                if (!next) {
 321                        next = __sync_alloc_leaf(p, id);
 322                        if (unlikely(!next))
 323                                return -ENOMEM;
 324
 325                        __sync_set_child(p, idx, next);
 326                        p = next;
 327                        break;
 328                }
 329
 330                p = next;
 331        } while (1);
 332
 333found:
 334        GEM_BUG_ON(p->prefix != __sync_leaf_prefix(p, id));
 335        __sync_set_seqno(p, id, seqno);
 336        *root = p;
 337        return 0;
 338}
 339
 340/**
 341 * i915_syncmap_set -- mark the most recent syncpoint between contexts
 342 * @root: pointer to the #i915_syncmap
 343 * @id: the context id (other timeline) we have synchronised to
 344 * @seqno: the sequence number along the other timeline
 345 *
 346 * When we synchronise this @root timeline with another (@id), we also know
 347 * that we have synchronized with all previous seqno along that timeline. If
 348 * we then have a request to synchronise with the same seqno or older, we can
 349 * omit it, see i915_syncmap_is_later()
 350 *
 351 * Returns 0 on success, or a negative error code.
 352 */
 353int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno)
 354{
 355        struct i915_syncmap *p = *root;
 356
 357        /*
 358         * We expect to be called in sequence following is_later(id), which
 359         * should have preloaded the root for us.
 360         */
 361        if (likely(p && __sync_leaf_prefix(p, id) == p->prefix)) {
 362                __sync_set_seqno(p, id, seqno);
 363                return 0;
 364        }
 365
 366        return __sync_set(root, id, seqno);
 367}
 368
 369static void __sync_free(struct i915_syncmap *p)
 370{
 371        if (p->height) {
 372                unsigned int i;
 373
 374                while ((i = ffs(p->bitmap))) {
 375                        p->bitmap &= ~0u << i;
 376                        __sync_free(__sync_child(p)[i - 1]);
 377                }
 378        }
 379
 380        kfree(p);
 381}
 382
 383/**
 384 * i915_syncmap_free -- free all memory associated with the syncmap
 385 * @root: pointer to the #i915_syncmap
 386 *
 387 * Either when the timeline is to be freed and we no longer need the sync
 388 * point tracking, or when the fences are all known to be signaled and the
 389 * sync point tracking is redundant, we can free the #i915_syncmap to recover
 390 * its allocations.
 391 *
 392 * Will reinitialise the @root pointer so that the #i915_syncmap is ready for
 393 * reuse.
 394 */
 395void i915_syncmap_free(struct i915_syncmap **root)
 396{
 397        struct i915_syncmap *p;
 398
 399        p = *root;
 400        if (!p)
 401                return;
 402
 403        while (p->parent)
 404                p = p->parent;
 405
 406        __sync_free(p);
 407        *root = NULL;
 408}
 409
 410#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 411#include "selftests/i915_syncmap.c"
 412#endif
 413