linux/drivers/gpu/drm/i915/i915_active.c
<<
>>
Prefs
   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2019 Intel Corporation
   5 */
   6
   7#include "gt/intel_engine_pm.h"
   8
   9#include "i915_drv.h"
  10#include "i915_active.h"
  11#include "i915_globals.h"
  12
  13#define BKL(ref) (&(ref)->i915->drm.struct_mutex)
  14
  15/*
  16 * Active refs memory management
  17 *
  18 * To be more economical with memory, we reap all the i915_active trees as
  19 * they idle (when we know the active requests are inactive) and allocate the
  20 * nodes from a local slab cache to hopefully reduce the fragmentation.
  21 */
  22static struct i915_global_active {
  23        struct i915_global base;
  24        struct kmem_cache *slab_cache;
  25} global;
  26
  27struct active_node {
  28        struct i915_active_request base;
  29        struct i915_active *ref;
  30        struct rb_node node;
  31        u64 timeline;
  32};
  33
  34static void
  35__active_park(struct i915_active *ref)
  36{
  37        struct active_node *it, *n;
  38
  39        rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
  40                GEM_BUG_ON(i915_active_request_isset(&it->base));
  41                kmem_cache_free(global.slab_cache, it);
  42        }
  43        ref->tree = RB_ROOT;
  44}
  45
  46static void
  47__active_retire(struct i915_active *ref)
  48{
  49        GEM_BUG_ON(!ref->count);
  50        if (--ref->count)
  51                return;
  52
  53        /* return the unused nodes to our slabcache */
  54        __active_park(ref);
  55
  56        ref->retire(ref);
  57}
  58
  59static void
  60node_retire(struct i915_active_request *base, struct i915_request *rq)
  61{
  62        __active_retire(container_of(base, struct active_node, base)->ref);
  63}
  64
  65static void
  66last_retire(struct i915_active_request *base, struct i915_request *rq)
  67{
  68        __active_retire(container_of(base, struct i915_active, last));
  69}
  70
  71static struct i915_active_request *
  72active_instance(struct i915_active *ref, u64 idx)
  73{
  74        struct active_node *node;
  75        struct rb_node **p, *parent;
  76        struct i915_request *old;
  77
  78        /*
  79         * We track the most recently used timeline to skip a rbtree search
  80         * for the common case, under typical loads we never need the rbtree
  81         * at all. We can reuse the last slot if it is empty, that is
  82         * after the previous activity has been retired, or if it matches the
  83         * current timeline.
  84         *
  85         * Note that we allow the timeline to be active simultaneously in
  86         * the rbtree and the last cache. We do this to avoid having
  87         * to search and replace the rbtree element for a new timeline, with
  88         * the cost being that we must be aware that the ref may be retired
  89         * twice for the same timeline (as the older rbtree element will be
  90         * retired before the new request added to last).
  91         */
  92        old = i915_active_request_raw(&ref->last, BKL(ref));
  93        if (!old || old->fence.context == idx)
  94                goto out;
  95
  96        /* Move the currently active fence into the rbtree */
  97        idx = old->fence.context;
  98
  99        parent = NULL;
 100        p = &ref->tree.rb_node;
 101        while (*p) {
 102                parent = *p;
 103
 104                node = rb_entry(parent, struct active_node, node);
 105                if (node->timeline == idx)
 106                        goto replace;
 107
 108                if (node->timeline < idx)
 109                        p = &parent->rb_right;
 110                else
 111                        p = &parent->rb_left;
 112        }
 113
 114        node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
 115
 116        /* kmalloc may retire the ref->last (thanks shrinker)! */
 117        if (unlikely(!i915_active_request_raw(&ref->last, BKL(ref)))) {
 118                kmem_cache_free(global.slab_cache, node);
 119                goto out;
 120        }
 121
 122        if (unlikely(!node))
 123                return ERR_PTR(-ENOMEM);
 124
 125        i915_active_request_init(&node->base, NULL, node_retire);
 126        node->ref = ref;
 127        node->timeline = idx;
 128
 129        rb_link_node(&node->node, parent, p);
 130        rb_insert_color(&node->node, &ref->tree);
 131
 132replace:
 133        /*
 134         * Overwrite the previous active slot in the rbtree with last,
 135         * leaving last zeroed. If the previous slot is still active,
 136         * we must be careful as we now only expect to receive one retire
 137         * callback not two, and so much undo the active counting for the
 138         * overwritten slot.
 139         */
 140        if (i915_active_request_isset(&node->base)) {
 141                /* Retire ourselves from the old rq->active_list */
 142                __list_del_entry(&node->base.link);
 143                ref->count--;
 144                GEM_BUG_ON(!ref->count);
 145        }
 146        GEM_BUG_ON(list_empty(&ref->last.link));
 147        list_replace_init(&ref->last.link, &node->base.link);
 148        node->base.request = fetch_and_zero(&ref->last.request);
 149
 150out:
 151        return &ref->last;
 152}
 153
 154void i915_active_init(struct drm_i915_private *i915,
 155                      struct i915_active *ref,
 156                      void (*retire)(struct i915_active *ref))
 157{
 158        ref->i915 = i915;
 159        ref->retire = retire;
 160        ref->tree = RB_ROOT;
 161        i915_active_request_init(&ref->last, NULL, last_retire);
 162        init_llist_head(&ref->barriers);
 163        ref->count = 0;
 164}
 165
 166int i915_active_ref(struct i915_active *ref,
 167                    u64 timeline,
 168                    struct i915_request *rq)
 169{
 170        struct i915_active_request *active;
 171        int err = 0;
 172
 173        /* Prevent reaping in case we malloc/wait while building the tree */
 174        i915_active_acquire(ref);
 175
 176        active = active_instance(ref, timeline);
 177        if (IS_ERR(active)) {
 178                err = PTR_ERR(active);
 179                goto out;
 180        }
 181
 182        if (!i915_active_request_isset(active))
 183                ref->count++;
 184        __i915_active_request_set(active, rq);
 185
 186        GEM_BUG_ON(!ref->count);
 187out:
 188        i915_active_release(ref);
 189        return err;
 190}
 191
 192bool i915_active_acquire(struct i915_active *ref)
 193{
 194        lockdep_assert_held(BKL(ref));
 195        return !ref->count++;
 196}
 197
 198void i915_active_release(struct i915_active *ref)
 199{
 200        lockdep_assert_held(BKL(ref));
 201        __active_retire(ref);
 202}
 203
 204int i915_active_wait(struct i915_active *ref)
 205{
 206        struct active_node *it, *n;
 207        int ret = 0;
 208
 209        if (i915_active_acquire(ref))
 210                goto out_release;
 211
 212        ret = i915_active_request_retire(&ref->last, BKL(ref));
 213        if (ret)
 214                goto out_release;
 215
 216        rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
 217                ret = i915_active_request_retire(&it->base, BKL(ref));
 218                if (ret)
 219                        break;
 220        }
 221
 222out_release:
 223        i915_active_release(ref);
 224        return ret;
 225}
 226
 227int i915_request_await_active_request(struct i915_request *rq,
 228                                      struct i915_active_request *active)
 229{
 230        struct i915_request *barrier =
 231                i915_active_request_raw(active, &rq->i915->drm.struct_mutex);
 232
 233        return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
 234}
 235
 236int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
 237{
 238        struct active_node *it, *n;
 239        int err = 0;
 240
 241        /* await allocates and so we need to avoid hitting the shrinker */
 242        if (i915_active_acquire(ref))
 243                goto out; /* was idle */
 244
 245        err = i915_request_await_active_request(rq, &ref->last);
 246        if (err)
 247                goto out;
 248
 249        rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
 250                err = i915_request_await_active_request(rq, &it->base);
 251                if (err)
 252                        goto out;
 253        }
 254
 255out:
 256        i915_active_release(ref);
 257        return err;
 258}
 259
 260#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 261void i915_active_fini(struct i915_active *ref)
 262{
 263        GEM_BUG_ON(i915_active_request_isset(&ref->last));
 264        GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
 265        GEM_BUG_ON(ref->count);
 266}
 267#endif
 268
 269int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 270                                            struct intel_engine_cs *engine)
 271{
 272        struct drm_i915_private *i915 = engine->i915;
 273        struct llist_node *pos, *next;
 274        unsigned long tmp;
 275        int err;
 276
 277        GEM_BUG_ON(!engine->mask);
 278        for_each_engine_masked(engine, i915, engine->mask, tmp) {
 279                struct intel_context *kctx = engine->kernel_context;
 280                struct active_node *node;
 281
 282                node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
 283                if (unlikely(!node)) {
 284                        err = -ENOMEM;
 285                        goto unwind;
 286                }
 287
 288                i915_active_request_init(&node->base,
 289                                         (void *)engine, node_retire);
 290                node->timeline = kctx->ring->timeline->fence_context;
 291                node->ref = ref;
 292                ref->count++;
 293
 294                intel_engine_pm_get(engine);
 295                llist_add((struct llist_node *)&node->base.link,
 296                          &ref->barriers);
 297        }
 298
 299        return 0;
 300
 301unwind:
 302        llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) {
 303                struct active_node *node;
 304
 305                node = container_of((struct list_head *)pos,
 306                                    typeof(*node), base.link);
 307                engine = (void *)rcu_access_pointer(node->base.request);
 308
 309                intel_engine_pm_put(engine);
 310                kmem_cache_free(global.slab_cache, node);
 311        }
 312        return err;
 313}
 314
 315void i915_active_acquire_barrier(struct i915_active *ref)
 316{
 317        struct llist_node *pos, *next;
 318
 319        i915_active_acquire(ref);
 320
 321        llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) {
 322                struct intel_engine_cs *engine;
 323                struct active_node *node;
 324                struct rb_node **p, *parent;
 325
 326                node = container_of((struct list_head *)pos,
 327                                    typeof(*node), base.link);
 328
 329                engine = (void *)rcu_access_pointer(node->base.request);
 330                RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
 331
 332                parent = NULL;
 333                p = &ref->tree.rb_node;
 334                while (*p) {
 335                        parent = *p;
 336                        if (rb_entry(parent,
 337                                     struct active_node,
 338                                     node)->timeline < node->timeline)
 339                                p = &parent->rb_right;
 340                        else
 341                                p = &parent->rb_left;
 342                }
 343                rb_link_node(&node->node, parent, p);
 344                rb_insert_color(&node->node, &ref->tree);
 345
 346                llist_add((struct llist_node *)&node->base.link,
 347                          &engine->barrier_tasks);
 348                intel_engine_pm_put(engine);
 349        }
 350        i915_active_release(ref);
 351}
 352
 353void i915_request_add_barriers(struct i915_request *rq)
 354{
 355        struct intel_engine_cs *engine = rq->engine;
 356        struct llist_node *node, *next;
 357
 358        llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks))
 359                list_add_tail((struct list_head *)node, &rq->active_list);
 360}
 361
 362int i915_active_request_set(struct i915_active_request *active,
 363                            struct i915_request *rq)
 364{
 365        int err;
 366
 367        /* Must maintain ordering wrt previous active requests */
 368        err = i915_request_await_active_request(rq, active);
 369        if (err)
 370                return err;
 371
 372        __i915_active_request_set(active, rq);
 373        return 0;
 374}
 375
 376void i915_active_retire_noop(struct i915_active_request *active,
 377                             struct i915_request *request)
 378{
 379        /* Space left intentionally blank */
 380}
 381
 382#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 383#include "selftests/i915_active.c"
 384#endif
 385
 386static void i915_global_active_shrink(void)
 387{
 388        kmem_cache_shrink(global.slab_cache);
 389}
 390
 391static void i915_global_active_exit(void)
 392{
 393        kmem_cache_destroy(global.slab_cache);
 394}
 395
 396static struct i915_global_active global = { {
 397        .shrink = i915_global_active_shrink,
 398        .exit = i915_global_active_exit,
 399} };
 400
 401int __init i915_global_active_init(void)
 402{
 403        global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
 404        if (!global.slab_cache)
 405                return -ENOMEM;
 406
 407        i915_global_register(&global.base);
 408        return 0;
 409}
 410