linux/drivers/gpu/drm/i915/i915_active.h
<<
>>
Prefs
   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2019 Intel Corporation
   5 */
   6
   7#ifndef _I915_ACTIVE_H_
   8#define _I915_ACTIVE_H_
   9
  10#include <linux/lockdep.h>
  11
  12#include "i915_active_types.h"
  13#include "i915_request.h"
  14
  15/*
  16 * We treat requests as fences. This is not be to confused with our
  17 * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
  18 * We use the fences to synchronize access from the CPU with activity on the
  19 * GPU, for example, we should not rewrite an object's PTE whilst the GPU
  20 * is reading them. We also track fences at a higher level to provide
  21 * implicit synchronisation around GEM objects, e.g. set-domain will wait
  22 * for outstanding GPU rendering before marking the object ready for CPU
  23 * access, or a pageflip will wait until the GPU is complete before showing
  24 * the frame on the scanout.
  25 *
  26 * In order to use a fence, the object must track the fence it needs to
  27 * serialise with. For example, GEM objects want to track both read and
  28 * write access so that we can perform concurrent read operations between
  29 * the CPU and GPU engines, as well as waiting for all rendering to
  30 * complete, or waiting for the last GPU user of a "fence register". The
  31 * object then embeds a #i915_active_request to track the most recent (in
  32 * retirement order) request relevant for the desired mode of access.
  33 * The #i915_active_request is updated with i915_active_request_set() to
  34 * track the most recent fence request, typically this is done as part of
  35 * i915_vma_move_to_active().
  36 *
  37 * When the #i915_active_request completes (is retired), it will
  38 * signal its completion to the owner through a callback as well as mark
  39 * itself as idle (i915_active_request.request == NULL). The owner
  40 * can then perform any action, such as delayed freeing of an active
  41 * resource including itself.
  42 */
  43
  44void i915_active_retire_noop(struct i915_active_request *active,
  45                             struct i915_request *request);
  46
  47/**
  48 * i915_active_request_init - prepares the activity tracker for use
  49 * @active - the active tracker
  50 * @rq - initial request to track, can be NULL
  51 * @func - a callback when then the tracker is retired (becomes idle),
  52 *         can be NULL
  53 *
  54 * i915_active_request_init() prepares the embedded @active struct for use as
  55 * an activity tracker, that is for tracking the last known active request
  56 * associated with it. When the last request becomes idle, when it is retired
  57 * after completion, the optional callback @func is invoked.
  58 */
  59static inline void
  60i915_active_request_init(struct i915_active_request *active,
  61                         struct i915_request *rq,
  62                         i915_active_retire_fn retire)
  63{
  64        RCU_INIT_POINTER(active->request, rq);
  65        INIT_LIST_HEAD(&active->link);
  66        active->retire = retire ?: i915_active_retire_noop;
  67}
  68
  69#define INIT_ACTIVE_REQUEST(name) i915_active_request_init((name), NULL, NULL)
  70
  71/**
  72 * i915_active_request_set - updates the tracker to watch the current request
  73 * @active - the active tracker
  74 * @request - the request to watch
  75 *
  76 * __i915_active_request_set() watches the given @request for completion. Whilst
  77 * that @request is busy, the @active reports busy. When that @request is
  78 * retired, the @active tracker is updated to report idle.
  79 */
  80static inline void
  81__i915_active_request_set(struct i915_active_request *active,
  82                          struct i915_request *request)
  83{
  84        list_move(&active->link, &request->active_list);
  85        rcu_assign_pointer(active->request, request);
  86}
  87
  88int __must_check
  89i915_active_request_set(struct i915_active_request *active,
  90                        struct i915_request *rq);
  91
  92/**
  93 * i915_active_request_set_retire_fn - updates the retirement callback
  94 * @active - the active tracker
  95 * @fn - the routine called when the request is retired
  96 * @mutex - struct_mutex used to guard retirements
  97 *
  98 * i915_active_request_set_retire_fn() updates the function pointer that
  99 * is called when the final request associated with the @active tracker
 100 * is retired.
 101 */
 102static inline void
 103i915_active_request_set_retire_fn(struct i915_active_request *active,
 104                                  i915_active_retire_fn fn,
 105                                  struct mutex *mutex)
 106{
 107        lockdep_assert_held(mutex);
 108        active->retire = fn ?: i915_active_retire_noop;
 109}
 110
 111/**
 112 * i915_active_request_raw - return the active request
 113 * @active - the active tracker
 114 *
 115 * i915_active_request_raw() returns the current request being tracked, or NULL.
 116 * It does not obtain a reference on the request for the caller, so the caller
 117 * must hold struct_mutex.
 118 */
 119static inline struct i915_request *
 120i915_active_request_raw(const struct i915_active_request *active,
 121                        struct mutex *mutex)
 122{
 123        return rcu_dereference_protected(active->request,
 124                                         lockdep_is_held(mutex));
 125}
 126
 127/**
 128 * i915_active_request_peek - report the active request being monitored
 129 * @active - the active tracker
 130 *
 131 * i915_active_request_peek() returns the current request being tracked if
 132 * still active, or NULL. It does not obtain a reference on the request
 133 * for the caller, so the caller must hold struct_mutex.
 134 */
 135static inline struct i915_request *
 136i915_active_request_peek(const struct i915_active_request *active,
 137                         struct mutex *mutex)
 138{
 139        struct i915_request *request;
 140
 141        request = i915_active_request_raw(active, mutex);
 142        if (!request || i915_request_completed(request))
 143                return NULL;
 144
 145        return request;
 146}
 147
 148/**
 149 * i915_active_request_get - return a reference to the active request
 150 * @active - the active tracker
 151 *
 152 * i915_active_request_get() returns a reference to the active request, or NULL
 153 * if the active tracker is idle. The caller must hold struct_mutex.
 154 */
 155static inline struct i915_request *
 156i915_active_request_get(const struct i915_active_request *active,
 157                        struct mutex *mutex)
 158{
 159        return i915_request_get(i915_active_request_peek(active, mutex));
 160}
 161
 162/**
 163 * __i915_active_request_get_rcu - return a reference to the active request
 164 * @active - the active tracker
 165 *
 166 * __i915_active_request_get() returns a reference to the active request,
 167 * or NULL if the active tracker is idle. The caller must hold the RCU read
 168 * lock, but the returned pointer is safe to use outside of RCU.
 169 */
 170static inline struct i915_request *
 171__i915_active_request_get_rcu(const struct i915_active_request *active)
 172{
 173        /*
 174         * Performing a lockless retrieval of the active request is super
 175         * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
 176         * slab of request objects will not be freed whilst we hold the
 177         * RCU read lock. It does not guarantee that the request itself
 178         * will not be freed and then *reused*. Viz,
 179         *
 180         * Thread A                     Thread B
 181         *
 182         * rq = active.request
 183         *                              retire(rq) -> free(rq);
 184         *                              (rq is now first on the slab freelist)
 185         *                              active.request = NULL
 186         *
 187         *                              rq = new submission on a new object
 188         * ref(rq)
 189         *
 190         * To prevent the request from being reused whilst the caller
 191         * uses it, we take a reference like normal. Whilst acquiring
 192         * the reference we check that it is not in a destroyed state
 193         * (refcnt == 0). That prevents the request being reallocated
 194         * whilst the caller holds on to it. To check that the request
 195         * was not reallocated as we acquired the reference we have to
 196         * check that our request remains the active request across
 197         * the lookup, in the same manner as a seqlock. The visibility
 198         * of the pointer versus the reference counting is controlled
 199         * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
 200         *
 201         * In the middle of all that, we inspect whether the request is
 202         * complete. Retiring is lazy so the request may be completed long
 203         * before the active tracker is updated. Querying whether the
 204         * request is complete is far cheaper (as it involves no locked
 205         * instructions setting cachelines to exclusive) than acquiring
 206         * the reference, so we do it first. The RCU read lock ensures the
 207         * pointer dereference is valid, but does not ensure that the
 208         * seqno nor HWS is the right one! However, if the request was
 209         * reallocated, that means the active tracker's request was complete.
 210         * If the new request is also complete, then both are and we can
 211         * just report the active tracker is idle. If the new request is
 212         * incomplete, then we acquire a reference on it and check that
 213         * it remained the active request.
 214         *
 215         * It is then imperative that we do not zero the request on
 216         * reallocation, so that we can chase the dangling pointers!
 217         * See i915_request_alloc().
 218         */
 219        do {
 220                struct i915_request *request;
 221
 222                request = rcu_dereference(active->request);
 223                if (!request || i915_request_completed(request))
 224                        return NULL;
 225
 226                /*
 227                 * An especially silly compiler could decide to recompute the
 228                 * result of i915_request_completed, more specifically
 229                 * re-emit the load for request->fence.seqno. A race would catch
 230                 * a later seqno value, which could flip the result from true to
 231                 * false. Which means part of the instructions below might not
 232                 * be executed, while later on instructions are executed. Due to
 233                 * barriers within the refcounting the inconsistency can't reach
 234                 * past the call to i915_request_get_rcu, but not executing
 235                 * that while still executing i915_request_put() creates
 236                 * havoc enough.  Prevent this with a compiler barrier.
 237                 */
 238                barrier();
 239
 240                request = i915_request_get_rcu(request);
 241
 242                /*
 243                 * What stops the following rcu_access_pointer() from occurring
 244                 * before the above i915_request_get_rcu()? If we were
 245                 * to read the value before pausing to get the reference to
 246                 * the request, we may not notice a change in the active
 247                 * tracker.
 248                 *
 249                 * The rcu_access_pointer() is a mere compiler barrier, which
 250                 * means both the CPU and compiler are free to perform the
 251                 * memory read without constraint. The compiler only has to
 252                 * ensure that any operations after the rcu_access_pointer()
 253                 * occur afterwards in program order. This means the read may
 254                 * be performed earlier by an out-of-order CPU, or adventurous
 255                 * compiler.
 256                 *
 257                 * The atomic operation at the heart of
 258                 * i915_request_get_rcu(), see dma_fence_get_rcu(), is
 259                 * atomic_inc_not_zero() which is only a full memory barrier
 260                 * when successful. That is, if i915_request_get_rcu()
 261                 * returns the request (and so with the reference counted
 262                 * incremented) then the following read for rcu_access_pointer()
 263                 * must occur after the atomic operation and so confirm
 264                 * that this request is the one currently being tracked.
 265                 *
 266                 * The corresponding write barrier is part of
 267                 * rcu_assign_pointer().
 268                 */
 269                if (!request || request == rcu_access_pointer(active->request))
 270                        return rcu_pointer_handoff(request);
 271
 272                i915_request_put(request);
 273        } while (1);
 274}
 275
 276/**
 277 * i915_active_request_get_unlocked - return a reference to the active request
 278 * @active - the active tracker
 279 *
 280 * i915_active_request_get_unlocked() returns a reference to the active request,
 281 * or NULL if the active tracker is idle. The reference is obtained under RCU,
 282 * so no locking is required by the caller.
 283 *
 284 * The reference should be freed with i915_request_put().
 285 */
 286static inline struct i915_request *
 287i915_active_request_get_unlocked(const struct i915_active_request *active)
 288{
 289        struct i915_request *request;
 290
 291        rcu_read_lock();
 292        request = __i915_active_request_get_rcu(active);
 293        rcu_read_unlock();
 294
 295        return request;
 296}
 297
 298/**
 299 * i915_active_request_isset - report whether the active tracker is assigned
 300 * @active - the active tracker
 301 *
 302 * i915_active_request_isset() returns true if the active tracker is currently
 303 * assigned to a request. Due to the lazy retiring, that request may be idle
 304 * and this may report stale information.
 305 */
 306static inline bool
 307i915_active_request_isset(const struct i915_active_request *active)
 308{
 309        return rcu_access_pointer(active->request);
 310}
 311
 312/**
 313 * i915_active_request_retire - waits until the request is retired
 314 * @active - the active request on which to wait
 315 *
 316 * i915_active_request_retire() waits until the request is completed,
 317 * and then ensures that at least the retirement handler for this
 318 * @active tracker is called before returning. If the @active
 319 * tracker is idle, the function returns immediately.
 320 */
 321static inline int __must_check
 322i915_active_request_retire(struct i915_active_request *active,
 323                           struct mutex *mutex)
 324{
 325        struct i915_request *request;
 326        long ret;
 327
 328        request = i915_active_request_raw(active, mutex);
 329        if (!request)
 330                return 0;
 331
 332        ret = i915_request_wait(request,
 333                                I915_WAIT_INTERRUPTIBLE,
 334                                MAX_SCHEDULE_TIMEOUT);
 335        if (ret < 0)
 336                return ret;
 337
 338        list_del_init(&active->link);
 339        RCU_INIT_POINTER(active->request, NULL);
 340
 341        active->retire(active, request);
 342
 343        return 0;
 344}
 345
 346/*
 347 * GPU activity tracking
 348 *
 349 * Each set of commands submitted to the GPU compromises a single request that
 350 * signals a fence upon completion. struct i915_request combines the
 351 * command submission, scheduling and fence signaling roles. If we want to see
 352 * if a particular task is complete, we need to grab the fence (struct
 353 * i915_request) for that task and check or wait for it to be signaled. More
 354 * often though we want to track the status of a bunch of tasks, for example
 355 * to wait for the GPU to finish accessing some memory across a variety of
 356 * different command pipelines from different clients. We could choose to
 357 * track every single request associated with the task, but knowing that
 358 * each request belongs to an ordered timeline (later requests within a
 359 * timeline must wait for earlier requests), we need only track the
 360 * latest request in each timeline to determine the overall status of the
 361 * task.
 362 *
 363 * struct i915_active provides this tracking across timelines. It builds a
 364 * composite shared-fence, and is updated as new work is submitted to the task,
 365 * forming a snapshot of the current status. It should be embedded into the
 366 * different resources that need to track their associated GPU activity to
 367 * provide a callback when that GPU activity has ceased, or otherwise to
 368 * provide a serialisation point either for request submission or for CPU
 369 * synchronisation.
 370 */
 371
 372void i915_active_init(struct drm_i915_private *i915,
 373                      struct i915_active *ref,
 374                      void (*retire)(struct i915_active *ref));
 375
 376int i915_active_ref(struct i915_active *ref,
 377                    u64 timeline,
 378                    struct i915_request *rq);
 379
 380int i915_active_wait(struct i915_active *ref);
 381
 382int i915_request_await_active(struct i915_request *rq,
 383                              struct i915_active *ref);
 384int i915_request_await_active_request(struct i915_request *rq,
 385                                      struct i915_active_request *active);
 386
 387bool i915_active_acquire(struct i915_active *ref);
 388
 389static inline void i915_active_cancel(struct i915_active *ref)
 390{
 391        GEM_BUG_ON(ref->count != 1);
 392        ref->count = 0;
 393}
 394
 395void i915_active_release(struct i915_active *ref);
 396
 397static inline bool
 398i915_active_is_idle(const struct i915_active *ref)
 399{
 400        return !ref->count;
 401}
 402
 403#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 404void i915_active_fini(struct i915_active *ref);
 405#else
 406static inline void i915_active_fini(struct i915_active *ref) { }
 407#endif
 408
 409int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 410                                            struct intel_engine_cs *engine);
 411void i915_active_acquire_barrier(struct i915_active *ref);
 412void i915_request_add_barriers(struct i915_request *rq);
 413
 414#endif /* _I915_ACTIVE_H_ */
 415