linux/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the
   7 * "Software"), to deal in the Software without restriction, including
   8 * without limitation the rights to use, copy, modify, merge, publish,
   9 * distribute, sub license, and/or sell copies of the Software, and to
  10 * permit persons to whom the Software is furnished to do so, subject to
  11 * the following conditions:
  12 *
  13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20 *
  21 * The above copyright notice and this permission notice (including the
  22 * next paragraph) shall be included in all copies or substantial portions
  23 * of the Software.
  24 *
  25 */
  26/*
  27 * Authors:
  28 *    Christian König <christian.koenig@amd.com>
  29 */
  30
  31#include <linux/dma-fence-chain.h>
  32
  33#include "amdgpu.h"
  34#include "amdgpu_trace.h"
  35#include "amdgpu_amdkfd.h"
  36
  37struct amdgpu_sync_entry {
  38        struct hlist_node       node;
  39        struct dma_fence        *fence;
  40};
  41
  42static struct kmem_cache *amdgpu_sync_slab;
  43
  44/**
  45 * amdgpu_sync_create - zero init sync object
  46 *
  47 * @sync: sync object to initialize
  48 *
  49 * Just clear the sync object for now.
  50 */
  51void amdgpu_sync_create(struct amdgpu_sync *sync)
  52{
  53        hash_init(sync->fences);
  54        sync->last_vm_update = NULL;
  55}
  56
  57/**
  58 * amdgpu_sync_same_dev - test if fence belong to us
  59 *
  60 * @adev: amdgpu device to use for the test
  61 * @f: fence to test
  62 *
  63 * Test if the fence was issued by us.
  64 */
  65static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
  66                                 struct dma_fence *f)
  67{
  68        struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
  69
  70        if (s_fence) {
  71                struct amdgpu_ring *ring;
  72
  73                ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
  74                return ring->adev == adev;
  75        }
  76
  77        return false;
  78}
  79
  80/**
  81 * amdgpu_sync_get_owner - extract the owner of a fence
  82 *
  83 * @f: fence get the owner from
  84 *
  85 * Extract who originally created the fence.
  86 */
  87static void *amdgpu_sync_get_owner(struct dma_fence *f)
  88{
  89        struct drm_sched_fence *s_fence;
  90        struct amdgpu_amdkfd_fence *kfd_fence;
  91
  92        if (!f)
  93                return AMDGPU_FENCE_OWNER_UNDEFINED;
  94
  95        s_fence = to_drm_sched_fence(f);
  96        if (s_fence)
  97                return s_fence->owner;
  98
  99        kfd_fence = to_amdgpu_amdkfd_fence(f);
 100        if (kfd_fence)
 101                return AMDGPU_FENCE_OWNER_KFD;
 102
 103        return AMDGPU_FENCE_OWNER_UNDEFINED;
 104}
 105
 106/**
 107 * amdgpu_sync_keep_later - Keep the later fence
 108 *
 109 * @keep: existing fence to test
 110 * @fence: new fence
 111 *
 112 * Either keep the existing fence or the new one, depending which one is later.
 113 */
 114static void amdgpu_sync_keep_later(struct dma_fence **keep,
 115                                   struct dma_fence *fence)
 116{
 117        if (*keep && dma_fence_is_later(*keep, fence))
 118                return;
 119
 120        dma_fence_put(*keep);
 121        *keep = dma_fence_get(fence);
 122}
 123
 124/**
 125 * amdgpu_sync_add_later - add the fence to the hash
 126 *
 127 * @sync: sync object to add the fence to
 128 * @f: fence to add
 129 *
 130 * Tries to add the fence to an existing hash entry. Returns true when an entry
 131 * was found, false otherwise.
 132 */
 133static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
 134{
 135        struct amdgpu_sync_entry *e;
 136
 137        hash_for_each_possible(sync->fences, e, node, f->context) {
 138                if (unlikely(e->fence->context != f->context))
 139                        continue;
 140
 141                amdgpu_sync_keep_later(&e->fence, f);
 142                return true;
 143        }
 144        return false;
 145}
 146
 147/**
 148 * amdgpu_sync_fence - remember to sync to this fence
 149 *
 150 * @sync: sync object to add fence to
 151 * @f: fence to sync to
 152 *
 153 * Add the fence to the sync object.
 154 */
 155int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
 156{
 157        struct amdgpu_sync_entry *e;
 158
 159        if (!f)
 160                return 0;
 161
 162        if (amdgpu_sync_add_later(sync, f))
 163                return 0;
 164
 165        e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
 166        if (!e)
 167                return -ENOMEM;
 168
 169        hash_add(sync->fences, &e->node, f->context);
 170        e->fence = dma_fence_get(f);
 171        return 0;
 172}
 173
 174/**
 175 * amdgpu_sync_vm_fence - remember to sync to this VM fence
 176 *
 177 * @sync: sync object to add fence to
 178 * @fence: the VM fence to add
 179 *
 180 * Add the fence to the sync object and remember it as VM update.
 181 */
 182int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
 183{
 184        if (!fence)
 185                return 0;
 186
 187        amdgpu_sync_keep_later(&sync->last_vm_update, fence);
 188        return amdgpu_sync_fence(sync, fence);
 189}
 190
 191/* Determine based on the owner and mode if we should sync to a fence or not */
 192static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
 193                                   enum amdgpu_sync_mode mode,
 194                                   void *owner, struct dma_fence *f)
 195{
 196        void *fence_owner = amdgpu_sync_get_owner(f);
 197
 198        /* Always sync to moves, no matter what */
 199        if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
 200                return true;
 201
 202        /* We only want to trigger KFD eviction fences on
 203         * evict or move jobs. Skip KFD fences otherwise.
 204         */
 205        if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
 206            owner != AMDGPU_FENCE_OWNER_UNDEFINED)
 207                return false;
 208
 209        /* Never sync to VM updates either. */
 210        if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
 211            owner != AMDGPU_FENCE_OWNER_UNDEFINED)
 212                return false;
 213
 214        /* Ignore fences depending on the sync mode */
 215        switch (mode) {
 216        case AMDGPU_SYNC_ALWAYS:
 217                return true;
 218
 219        case AMDGPU_SYNC_NE_OWNER:
 220                if (amdgpu_sync_same_dev(adev, f) &&
 221                    fence_owner == owner)
 222                        return false;
 223                break;
 224
 225        case AMDGPU_SYNC_EQ_OWNER:
 226                if (amdgpu_sync_same_dev(adev, f) &&
 227                    fence_owner != owner)
 228                        return false;
 229                break;
 230
 231        case AMDGPU_SYNC_EXPLICIT:
 232                return false;
 233        }
 234
 235        WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
 236             "Adding eviction fence to sync obj");
 237        return true;
 238}
 239
 240/**
 241 * amdgpu_sync_resv - sync to a reservation object
 242 *
 243 * @adev: amdgpu device
 244 * @sync: sync object to add fences from reservation object to
 245 * @resv: reservation object with embedded fence
 246 * @mode: how owner affects which fences we sync to
 247 * @owner: owner of the planned job submission
 248 *
 249 * Sync to the fence
 250 */
 251int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 252                     struct dma_resv *resv, enum amdgpu_sync_mode mode,
 253                     void *owner)
 254{
 255        struct dma_resv_list *flist;
 256        struct dma_fence *f;
 257        unsigned i;
 258        int r = 0;
 259
 260        if (resv == NULL)
 261                return -EINVAL;
 262
 263        /* always sync to the exclusive fence */
 264        f = dma_resv_excl_fence(resv);
 265        dma_fence_chain_for_each(f, f) {
 266                struct dma_fence_chain *chain = to_dma_fence_chain(f);
 267
 268                if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
 269                                           chain->fence : f)) {
 270                        r = amdgpu_sync_fence(sync, f);
 271                        dma_fence_put(f);
 272                        if (r)
 273                                return r;
 274                        break;
 275                }
 276        }
 277
 278        flist = dma_resv_shared_list(resv);
 279        if (!flist)
 280                return 0;
 281
 282        for (i = 0; i < flist->shared_count; ++i) {
 283                f = rcu_dereference_protected(flist->shared[i],
 284                                              dma_resv_held(resv));
 285
 286                if (amdgpu_sync_test_fence(adev, mode, owner, f)) {
 287                        r = amdgpu_sync_fence(sync, f);
 288                        if (r)
 289                                return r;
 290                }
 291        }
 292        return 0;
 293}
 294
 295/**
 296 * amdgpu_sync_peek_fence - get the next fence not signaled yet
 297 *
 298 * @sync: the sync object
 299 * @ring: optional ring to use for test
 300 *
 301 * Returns the next fence not signaled yet without removing it from the sync
 302 * object.
 303 */
 304struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
 305                                         struct amdgpu_ring *ring)
 306{
 307        struct amdgpu_sync_entry *e;
 308        struct hlist_node *tmp;
 309        int i;
 310
 311        hash_for_each_safe(sync->fences, i, tmp, e, node) {
 312                struct dma_fence *f = e->fence;
 313                struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
 314
 315                if (dma_fence_is_signaled(f)) {
 316                        hash_del(&e->node);
 317                        dma_fence_put(f);
 318                        kmem_cache_free(amdgpu_sync_slab, e);
 319                        continue;
 320                }
 321                if (ring && s_fence) {
 322                        /* For fences from the same ring it is sufficient
 323                         * when they are scheduled.
 324                         */
 325                        if (s_fence->sched == &ring->sched) {
 326                                if (dma_fence_is_signaled(&s_fence->scheduled))
 327                                        continue;
 328
 329                                return &s_fence->scheduled;
 330                        }
 331                }
 332
 333                return f;
 334        }
 335
 336        return NULL;
 337}
 338
 339/**
 340 * amdgpu_sync_get_fence - get the next fence from the sync object
 341 *
 342 * @sync: sync object to use
 343 *
 344 * Get and removes the next fence from the sync object not signaled yet.
 345 */
 346struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
 347{
 348        struct amdgpu_sync_entry *e;
 349        struct hlist_node *tmp;
 350        struct dma_fence *f;
 351        int i;
 352        hash_for_each_safe(sync->fences, i, tmp, e, node) {
 353
 354                f = e->fence;
 355
 356                hash_del(&e->node);
 357                kmem_cache_free(amdgpu_sync_slab, e);
 358
 359                if (!dma_fence_is_signaled(f))
 360                        return f;
 361
 362                dma_fence_put(f);
 363        }
 364        return NULL;
 365}
 366
 367/**
 368 * amdgpu_sync_clone - clone a sync object
 369 *
 370 * @source: sync object to clone
 371 * @clone: pointer to destination sync object
 372 *
 373 * Adds references to all unsignaled fences in @source to @clone. Also
 374 * removes signaled fences from @source while at it.
 375 */
 376int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
 377{
 378        struct amdgpu_sync_entry *e;
 379        struct hlist_node *tmp;
 380        struct dma_fence *f;
 381        int i, r;
 382
 383        hash_for_each_safe(source->fences, i, tmp, e, node) {
 384                f = e->fence;
 385                if (!dma_fence_is_signaled(f)) {
 386                        r = amdgpu_sync_fence(clone, f);
 387                        if (r)
 388                                return r;
 389                } else {
 390                        hash_del(&e->node);
 391                        dma_fence_put(f);
 392                        kmem_cache_free(amdgpu_sync_slab, e);
 393                }
 394        }
 395
 396        dma_fence_put(clone->last_vm_update);
 397        clone->last_vm_update = dma_fence_get(source->last_vm_update);
 398
 399        return 0;
 400}
 401
 402int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
 403{
 404        struct amdgpu_sync_entry *e;
 405        struct hlist_node *tmp;
 406        int i, r;
 407
 408        hash_for_each_safe(sync->fences, i, tmp, e, node) {
 409                r = dma_fence_wait(e->fence, intr);
 410                if (r)
 411                        return r;
 412
 413                hash_del(&e->node);
 414                dma_fence_put(e->fence);
 415                kmem_cache_free(amdgpu_sync_slab, e);
 416        }
 417
 418        return 0;
 419}
 420
 421/**
 422 * amdgpu_sync_free - free the sync object
 423 *
 424 * @sync: sync object to use
 425 *
 426 * Free the sync object.
 427 */
 428void amdgpu_sync_free(struct amdgpu_sync *sync)
 429{
 430        struct amdgpu_sync_entry *e;
 431        struct hlist_node *tmp;
 432        unsigned i;
 433
 434        hash_for_each_safe(sync->fences, i, tmp, e, node) {
 435                hash_del(&e->node);
 436                dma_fence_put(e->fence);
 437                kmem_cache_free(amdgpu_sync_slab, e);
 438        }
 439
 440        dma_fence_put(sync->last_vm_update);
 441}
 442
 443/**
 444 * amdgpu_sync_init - init sync object subsystem
 445 *
 446 * Allocate the slab allocator.
 447 */
 448int amdgpu_sync_init(void)
 449{
 450        amdgpu_sync_slab = kmem_cache_create(
 451                "amdgpu_sync", sizeof(struct amdgpu_sync_entry), 0,
 452                SLAB_HWCACHE_ALIGN, NULL);
 453        if (!amdgpu_sync_slab)
 454                return -ENOMEM;
 455
 456        return 0;
 457}
 458
 459/**
 460 * amdgpu_sync_fini - fini sync object subsystem
 461 *
 462 * Free the slab allocator.
 463 */
 464void amdgpu_sync_fini(void)
 465{
 466        kmem_cache_destroy(amdgpu_sync_slab);
 467}
 468