linux/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR MIT
   2/*
   3 * Copyright 2020-2021 Advanced Micro Devices, Inc.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the "Software"),
   7 * to deal in the Software without restriction, including without limitation
   8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9 * and/or sell copies of the Software, and to permit persons to whom the
  10 * Software is furnished to do so, subject to the following conditions:
  11 *
  12 * The above copyright notice and this permission notice shall be included in
  13 * all copies or substantial portions of the Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21 * OTHER DEALINGS IN THE SOFTWARE.
  22 */
  23
  24#include <linux/types.h>
  25#include <linux/hmm.h>
  26#include <linux/dma-direction.h>
  27#include <linux/dma-mapping.h>
  28#include "amdgpu_sync.h"
  29#include "amdgpu_object.h"
  30#include "amdgpu_vm.h"
  31#include "amdgpu_mn.h"
  32#include "amdgpu_res_cursor.h"
  33#include "kfd_priv.h"
  34#include "kfd_svm.h"
  35#include "kfd_migrate.h"
  36
  37static uint64_t
  38svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
  39{
  40        return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
  41}
  42
  43static int
  44svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
  45                     dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
  46{
  47        struct amdgpu_device *adev = ring->adev;
  48        struct amdgpu_job *job;
  49        unsigned int num_dw, num_bytes;
  50        struct dma_fence *fence;
  51        uint64_t src_addr, dst_addr;
  52        uint64_t pte_flags;
  53        void *cpu_addr;
  54        int r;
  55
  56        /* use gart window 0 */
  57        *gart_addr = adev->gmc.gart_start;
  58
  59        num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
  60        num_bytes = npages * 8;
  61
  62        r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
  63                                     AMDGPU_IB_POOL_DELAYED, &job);
  64        if (r)
  65                return r;
  66
  67        src_addr = num_dw * 4;
  68        src_addr += job->ibs[0].gpu_addr;
  69
  70        dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
  71        amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
  72                                dst_addr, num_bytes, false);
  73
  74        amdgpu_ring_pad_ib(ring, &job->ibs[0]);
  75        WARN_ON(job->ibs[0].length_dw > num_dw);
  76
  77        pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
  78        pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED;
  79        if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
  80                pte_flags |= AMDGPU_PTE_WRITEABLE;
  81        pte_flags |= adev->gart.gart_pte_flags;
  82
  83        cpu_addr = &job->ibs[0].ptr[num_dw];
  84
  85        r = amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
  86        if (r)
  87                goto error_free;
  88
  89        r = amdgpu_job_submit(job, &adev->mman.entity,
  90                              AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
  91        if (r)
  92                goto error_free;
  93
  94        dma_fence_put(fence);
  95
  96        return r;
  97
  98error_free:
  99        amdgpu_job_free(job);
 100        return r;
 101}
 102
 103/**
 104 * svm_migrate_copy_memory_gart - sdma copy data between ram and vram
 105 *
 106 * @adev: amdgpu device the sdma ring running
 107 * @src: source page address array
 108 * @dst: destination page address array
 109 * @npages: number of pages to copy
 110 * @direction: enum MIGRATION_COPY_DIR
 111 * @mfence: output, sdma fence to signal after sdma is done
 112 *
 113 * ram address uses GART table continuous entries mapping to ram pages,
 114 * vram address uses direct mapping of vram pages, which must have npages
 115 * number of continuous pages.
 116 * GART update and sdma uses same buf copy function ring, sdma is splited to
 117 * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for
 118 * the last sdma finish fence which is returned to check copy memory is done.
 119 *
 120 * Context: Process context, takes and releases gtt_window_lock
 121 *
 122 * Return:
 123 * 0 - OK, otherwise error code
 124 */
 125
 126static int
 127svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
 128                             uint64_t *vram, uint64_t npages,
 129                             enum MIGRATION_COPY_DIR direction,
 130                             struct dma_fence **mfence)
 131{
 132        const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
 133        struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 134        uint64_t gart_s, gart_d;
 135        struct dma_fence *next;
 136        uint64_t size;
 137        int r;
 138
 139        mutex_lock(&adev->mman.gtt_window_lock);
 140
 141        while (npages) {
 142                size = min(GTT_MAX_PAGES, npages);
 143
 144                if (direction == FROM_VRAM_TO_RAM) {
 145                        gart_s = svm_migrate_direct_mapping_addr(adev, *vram);
 146                        r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0);
 147
 148                } else if (direction == FROM_RAM_TO_VRAM) {
 149                        r = svm_migrate_gart_map(ring, size, sys, &gart_s,
 150                                                 KFD_IOCTL_SVM_FLAG_GPU_RO);
 151                        gart_d = svm_migrate_direct_mapping_addr(adev, *vram);
 152                }
 153                if (r) {
 154                        pr_debug("failed %d to create gart mapping\n", r);
 155                        goto out_unlock;
 156                }
 157
 158                r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
 159                                       NULL, &next, false, true, false);
 160                if (r) {
 161                        pr_debug("failed %d to copy memory\n", r);
 162                        goto out_unlock;
 163                }
 164
 165                dma_fence_put(*mfence);
 166                *mfence = next;
 167                npages -= size;
 168                if (npages) {
 169                        sys += size;
 170                        vram += size;
 171                }
 172        }
 173
 174out_unlock:
 175        mutex_unlock(&adev->mman.gtt_window_lock);
 176
 177        return r;
 178}
 179
 180/**
 181 * svm_migrate_copy_done - wait for memory copy sdma is done
 182 *
 183 * @adev: amdgpu device the sdma memory copy is executing on
 184 * @mfence: migrate fence
 185 *
 186 * Wait for dma fence is signaled, if the copy ssplit into multiple sdma
 187 * operations, this is the last sdma operation fence.
 188 *
 189 * Context: called after svm_migrate_copy_memory
 190 *
 191 * Return:
 192 * 0            - success
 193 * otherwise    - error code from dma fence signal
 194 */
 195static int
 196svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence)
 197{
 198        int r = 0;
 199
 200        if (mfence) {
 201                r = dma_fence_wait(mfence, false);
 202                dma_fence_put(mfence);
 203                pr_debug("sdma copy memory fence done\n");
 204        }
 205
 206        return r;
 207}
 208
 209unsigned long
 210svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr)
 211{
 212        return (addr + adev->kfd.dev->pgmap.range.start) >> PAGE_SHIFT;
 213}
 214
 215static void
 216svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
 217{
 218        struct page *page;
 219
 220        page = pfn_to_page(pfn);
 221        svm_range_bo_ref(prange->svm_bo);
 222        page->zone_device_data = prange->svm_bo;
 223        get_page(page);
 224        lock_page(page);
 225}
 226
 227static void
 228svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr)
 229{
 230        struct page *page;
 231
 232        page = pfn_to_page(svm_migrate_addr_to_pfn(adev, addr));
 233        unlock_page(page);
 234        put_page(page);
 235}
 236
 237static unsigned long
 238svm_migrate_addr(struct amdgpu_device *adev, struct page *page)
 239{
 240        unsigned long addr;
 241
 242        addr = page_to_pfn(page) << PAGE_SHIFT;
 243        return (addr - adev->kfd.dev->pgmap.range.start);
 244}
 245
 246static struct page *
 247svm_migrate_get_sys_page(struct vm_area_struct *vma, unsigned long addr)
 248{
 249        struct page *page;
 250
 251        page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
 252        if (page)
 253                lock_page(page);
 254
 255        return page;
 256}
 257
 258static void svm_migrate_put_sys_page(unsigned long addr)
 259{
 260        struct page *page;
 261
 262        page = pfn_to_page(addr >> PAGE_SHIFT);
 263        unlock_page(page);
 264        put_page(page);
 265}
 266
 267static int
 268svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 269                         struct migrate_vma *migrate, struct dma_fence **mfence,
 270                         dma_addr_t *scratch)
 271{
 272        uint64_t npages = migrate->cpages;
 273        struct device *dev = adev->dev;
 274        struct amdgpu_res_cursor cursor;
 275        dma_addr_t *src;
 276        uint64_t *dst;
 277        uint64_t i, j;
 278        int r;
 279
 280        pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
 281                 prange->last);
 282
 283        src = scratch;
 284        dst = (uint64_t *)(scratch + npages);
 285
 286        r = svm_range_vram_node_new(adev, prange, true);
 287        if (r) {
 288                pr_debug("failed %d get 0x%llx pages from vram\n", r, npages);
 289                goto out;
 290        }
 291
 292        amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT,
 293                         npages << PAGE_SHIFT, &cursor);
 294        for (i = j = 0; i < npages; i++) {
 295                struct page *spage;
 296
 297                spage = migrate_pfn_to_page(migrate->src[i]);
 298                if (spage && !is_zone_device_page(spage)) {
 299                        dst[i] = cursor.start + (j << PAGE_SHIFT);
 300                        migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
 301                        svm_migrate_get_vram_page(prange, migrate->dst[i]);
 302                        migrate->dst[i] = migrate_pfn(migrate->dst[i]);
 303                        migrate->dst[i] |= MIGRATE_PFN_LOCKED;
 304                        src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
 305                                              DMA_TO_DEVICE);
 306                        r = dma_mapping_error(dev, src[i]);
 307                        if (r) {
 308                                pr_debug("failed %d dma_map_page\n", r);
 309                                goto out_free_vram_pages;
 310                        }
 311                } else {
 312                        if (j) {
 313                                r = svm_migrate_copy_memory_gart(
 314                                                adev, src + i - j,
 315                                                dst + i - j, j,
 316                                                FROM_RAM_TO_VRAM,
 317                                                mfence);
 318                                if (r)
 319                                        goto out_free_vram_pages;
 320                                amdgpu_res_next(&cursor, j << PAGE_SHIFT);
 321                                j = 0;
 322                        } else {
 323                                amdgpu_res_next(&cursor, PAGE_SIZE);
 324                        }
 325                        continue;
 326                }
 327
 328                pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n",
 329                         src[i] >> PAGE_SHIFT, page_to_pfn(spage));
 330
 331                if (j >= (cursor.size >> PAGE_SHIFT) - 1 && i < npages - 1) {
 332                        r = svm_migrate_copy_memory_gart(adev, src + i - j,
 333                                                         dst + i - j, j + 1,
 334                                                         FROM_RAM_TO_VRAM,
 335                                                         mfence);
 336                        if (r)
 337                                goto out_free_vram_pages;
 338                        amdgpu_res_next(&cursor, (j + 1) * PAGE_SIZE);
 339                        j= 0;
 340                } else {
 341                        j++;
 342                }
 343        }
 344
 345        r = svm_migrate_copy_memory_gart(adev, src + i - j, dst + i - j, j,
 346                                         FROM_RAM_TO_VRAM, mfence);
 347
 348out_free_vram_pages:
 349        if (r) {
 350                pr_debug("failed %d to copy memory to vram\n", r);
 351                while (i--) {
 352                        svm_migrate_put_vram_page(adev, dst[i]);
 353                        migrate->dst[i] = 0;
 354                }
 355        }
 356
 357#ifdef DEBUG_FORCE_MIXED_DOMAINS
 358        for (i = 0, j = 0; i < npages; i += 4, j++) {
 359                if (j & 1)
 360                        continue;
 361                svm_migrate_put_vram_page(adev, dst[i]);
 362                migrate->dst[i] = 0;
 363                svm_migrate_put_vram_page(adev, dst[i + 1]);
 364                migrate->dst[i + 1] = 0;
 365                svm_migrate_put_vram_page(adev, dst[i + 2]);
 366                migrate->dst[i + 2] = 0;
 367                svm_migrate_put_vram_page(adev, dst[i + 3]);
 368                migrate->dst[i + 3] = 0;
 369        }
 370#endif
 371out:
 372        return r;
 373}
 374
 375static int
 376svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 377                        struct vm_area_struct *vma, uint64_t start,
 378                        uint64_t end)
 379{
 380        uint64_t npages = (end - start) >> PAGE_SHIFT;
 381        struct kfd_process_device *pdd;
 382        struct dma_fence *mfence = NULL;
 383        struct migrate_vma migrate;
 384        dma_addr_t *scratch;
 385        size_t size;
 386        void *buf;
 387        int r = -ENOMEM;
 388
 389        memset(&migrate, 0, sizeof(migrate));
 390        migrate.vma = vma;
 391        migrate.start = start;
 392        migrate.end = end;
 393        migrate.flags = MIGRATE_VMA_SELECT_SYSTEM;
 394        migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
 395
 396        size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
 397        size *= npages;
 398        buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
 399        if (!buf)
 400                goto out;
 401
 402        migrate.src = buf;
 403        migrate.dst = migrate.src + npages;
 404        scratch = (dma_addr_t *)(migrate.dst + npages);
 405
 406        r = migrate_vma_setup(&migrate);
 407        if (r) {
 408                pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
 409                         r, prange->svms, prange->start, prange->last);
 410                goto out_free;
 411        }
 412        if (migrate.cpages != npages) {
 413                pr_debug("Partial migration. 0x%lx/0x%llx pages can be migrated\n",
 414                         migrate.cpages,
 415                         npages);
 416        }
 417
 418        if (migrate.cpages) {
 419                r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence,
 420                                             scratch);
 421                migrate_vma_pages(&migrate);
 422                svm_migrate_copy_done(adev, mfence);
 423                migrate_vma_finalize(&migrate);
 424        }
 425
 426        svm_range_dma_unmap(adev->dev, scratch, 0, npages);
 427        svm_range_free_dma_mappings(prange);
 428
 429out_free:
 430        kvfree(buf);
 431out:
 432        if (!r) {
 433                pdd = svm_range_get_pdd_by_adev(prange, adev);
 434                if (pdd)
 435                        WRITE_ONCE(pdd->page_in, pdd->page_in + migrate.cpages);
 436        }
 437
 438        return r;
 439}
 440
 441/**
 442 * svm_migrate_ram_to_vram - migrate svm range from system to device
 443 * @prange: range structure
 444 * @best_loc: the device to migrate to
 445 * @mm: the process mm structure
 446 *
 447 * Context: Process context, caller hold mmap read lock, svms lock, prange lock
 448 *
 449 * Return:
 450 * 0 - OK, otherwise error code
 451 */
 452static int
 453svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 454                        struct mm_struct *mm)
 455{
 456        unsigned long addr, start, end;
 457        struct vm_area_struct *vma;
 458        struct amdgpu_device *adev;
 459        int r = 0;
 460
 461        if (prange->actual_loc == best_loc) {
 462                pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
 463                         prange->svms, prange->start, prange->last, best_loc);
 464                return 0;
 465        }
 466
 467        adev = svm_range_get_adev_by_id(prange, best_loc);
 468        if (!adev) {
 469                pr_debug("failed to get device by id 0x%x\n", best_loc);
 470                return -ENODEV;
 471        }
 472
 473        pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
 474                 prange->start, prange->last, best_loc);
 475
 476        /* FIXME: workaround for page locking bug with invalid pages */
 477        svm_range_prefault(prange, mm, SVM_ADEV_PGMAP_OWNER(adev));
 478
 479        start = prange->start << PAGE_SHIFT;
 480        end = (prange->last + 1) << PAGE_SHIFT;
 481
 482        for (addr = start; addr < end;) {
 483                unsigned long next;
 484
 485                vma = find_vma(mm, addr);
 486                if (!vma || addr < vma->vm_start)
 487                        break;
 488
 489                next = min(vma->vm_end, end);
 490                r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next);
 491                if (r) {
 492                        pr_debug("failed to migrate\n");
 493                        break;
 494                }
 495                addr = next;
 496        }
 497
 498        if (!r)
 499                prange->actual_loc = best_loc;
 500
 501        return r;
 502}
 503
 504static void svm_migrate_page_free(struct page *page)
 505{
 506        struct svm_range_bo *svm_bo = page->zone_device_data;
 507
 508        if (svm_bo) {
 509                pr_debug("svm_bo ref left: %d\n", kref_read(&svm_bo->kref));
 510                svm_range_bo_unref(svm_bo);
 511        }
 512}
 513
 514static int
 515svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
 516                        struct migrate_vma *migrate, struct dma_fence **mfence,
 517                        dma_addr_t *scratch, uint64_t npages)
 518{
 519        struct device *dev = adev->dev;
 520        uint64_t *src;
 521        dma_addr_t *dst;
 522        struct page *dpage;
 523        uint64_t i = 0, j;
 524        uint64_t addr;
 525        int r = 0;
 526
 527        pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
 528                 prange->last);
 529
 530        addr = prange->start << PAGE_SHIFT;
 531
 532        src = (uint64_t *)(scratch + npages);
 533        dst = scratch;
 534
 535        for (i = 0, j = 0; i < npages; i++, addr += PAGE_SIZE) {
 536                struct page *spage;
 537
 538                spage = migrate_pfn_to_page(migrate->src[i]);
 539                if (!spage || !is_zone_device_page(spage)) {
 540                        pr_debug("invalid page. Could be in CPU already svms 0x%p [0x%lx 0x%lx]\n",
 541                                 prange->svms, prange->start, prange->last);
 542                        if (j) {
 543                                r = svm_migrate_copy_memory_gart(adev, dst + i - j,
 544                                                                 src + i - j, j,
 545                                                                 FROM_VRAM_TO_RAM,
 546                                                                 mfence);
 547                                if (r)
 548                                        goto out_oom;
 549                                j = 0;
 550                        }
 551                        continue;
 552                }
 553                src[i] = svm_migrate_addr(adev, spage);
 554                if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
 555                        r = svm_migrate_copy_memory_gart(adev, dst + i - j,
 556                                                         src + i - j, j,
 557                                                         FROM_VRAM_TO_RAM,
 558                                                         mfence);
 559                        if (r)
 560                                goto out_oom;
 561                        j = 0;
 562                }
 563
 564                dpage = svm_migrate_get_sys_page(migrate->vma, addr);
 565                if (!dpage) {
 566                        pr_debug("failed get page svms 0x%p [0x%lx 0x%lx]\n",
 567                                 prange->svms, prange->start, prange->last);
 568                        r = -ENOMEM;
 569                        goto out_oom;
 570                }
 571
 572                dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
 573                r = dma_mapping_error(dev, dst[i]);
 574                if (r) {
 575                        pr_debug("failed %d dma_map_page\n", r);
 576                        goto out_oom;
 577                }
 578
 579                pr_debug("dma mapping dst to 0x%llx, page_to_pfn 0x%lx\n",
 580                              dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
 581
 582                migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
 583                migrate->dst[i] |= MIGRATE_PFN_LOCKED;
 584                j++;
 585        }
 586
 587        r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j,
 588                                         FROM_VRAM_TO_RAM, mfence);
 589
 590out_oom:
 591        if (r) {
 592                pr_debug("failed %d copy to ram\n", r);
 593                while (i--) {
 594                        svm_migrate_put_sys_page(dst[i]);
 595                        migrate->dst[i] = 0;
 596                }
 597        }
 598
 599        return r;
 600}
 601
 602static int
 603svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
 604                       struct vm_area_struct *vma, uint64_t start, uint64_t end)
 605{
 606        uint64_t npages = (end - start) >> PAGE_SHIFT;
 607        struct kfd_process_device *pdd;
 608        struct dma_fence *mfence = NULL;
 609        struct migrate_vma migrate;
 610        dma_addr_t *scratch;
 611        size_t size;
 612        void *buf;
 613        int r = -ENOMEM;
 614
 615        memset(&migrate, 0, sizeof(migrate));
 616        migrate.vma = vma;
 617        migrate.start = start;
 618        migrate.end = end;
 619        migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
 620        migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
 621
 622        size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
 623        size *= npages;
 624        buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
 625        if (!buf)
 626                goto out;
 627
 628        migrate.src = buf;
 629        migrate.dst = migrate.src + npages;
 630        scratch = (dma_addr_t *)(migrate.dst + npages);
 631
 632        r = migrate_vma_setup(&migrate);
 633        if (r) {
 634                pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
 635                         r, prange->svms, prange->start, prange->last);
 636                goto out_free;
 637        }
 638
 639        pr_debug("cpages %ld\n", migrate.cpages);
 640
 641        if (migrate.cpages) {
 642                r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
 643                                            scratch, npages);
 644                migrate_vma_pages(&migrate);
 645                svm_migrate_copy_done(adev, mfence);
 646                migrate_vma_finalize(&migrate);
 647        } else {
 648                pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n",
 649                         prange->start, prange->last);
 650        }
 651
 652        svm_range_dma_unmap(adev->dev, scratch, 0, npages);
 653
 654out_free:
 655        kvfree(buf);
 656out:
 657        if (!r) {
 658                pdd = svm_range_get_pdd_by_adev(prange, adev);
 659                if (pdd)
 660                        WRITE_ONCE(pdd->page_out,
 661                                   pdd->page_out + migrate.cpages);
 662        }
 663        return r;
 664}
 665
 666/**
 667 * svm_migrate_vram_to_ram - migrate svm range from device to system
 668 * @prange: range structure
 669 * @mm: process mm, use current->mm if NULL
 670 *
 671 * Context: Process context, caller hold mmap read lock, svms lock, prange lock
 672 *
 673 * Return:
 674 * 0 - OK, otherwise error code
 675 */
 676int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
 677{
 678        struct amdgpu_device *adev;
 679        struct vm_area_struct *vma;
 680        unsigned long addr;
 681        unsigned long start;
 682        unsigned long end;
 683        int r = 0;
 684
 685        if (!prange->actual_loc) {
 686                pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
 687                         prange->start, prange->last);
 688                return 0;
 689        }
 690
 691        adev = svm_range_get_adev_by_id(prange, prange->actual_loc);
 692        if (!adev) {
 693                pr_debug("failed to get device by id 0x%x\n",
 694                         prange->actual_loc);
 695                return -ENODEV;
 696        }
 697
 698        pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n",
 699                 prange->svms, prange, prange->start, prange->last,
 700                 prange->actual_loc);
 701
 702        start = prange->start << PAGE_SHIFT;
 703        end = (prange->last + 1) << PAGE_SHIFT;
 704
 705        for (addr = start; addr < end;) {
 706                unsigned long next;
 707
 708                vma = find_vma(mm, addr);
 709                if (!vma || addr < vma->vm_start)
 710                        break;
 711
 712                next = min(vma->vm_end, end);
 713                r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next);
 714                if (r) {
 715                        pr_debug("failed %d to migrate\n", r);
 716                        break;
 717                }
 718                addr = next;
 719        }
 720
 721        if (!r) {
 722                svm_range_vram_node_free(prange);
 723                prange->actual_loc = 0;
 724        }
 725        return r;
 726}
 727
 728/**
 729 * svm_migrate_vram_to_vram - migrate svm range from device to device
 730 * @prange: range structure
 731 * @best_loc: the device to migrate to
 732 * @mm: process mm, use current->mm if NULL
 733 *
 734 * Context: Process context, caller hold mmap read lock, svms lock, prange lock
 735 *
 736 * Return:
 737 * 0 - OK, otherwise error code
 738 */
 739static int
 740svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
 741                         struct mm_struct *mm)
 742{
 743        int r;
 744
 745        /*
 746         * TODO: for both devices with PCIe large bar or on same xgmi hive, skip
 747         * system memory as migration bridge
 748         */
 749
 750        pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
 751
 752        r = svm_migrate_vram_to_ram(prange, mm);
 753        if (r)
 754                return r;
 755
 756        return svm_migrate_ram_to_vram(prange, best_loc, mm);
 757}
 758
 759int
 760svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
 761                    struct mm_struct *mm)
 762{
 763        if  (!prange->actual_loc)
 764                return svm_migrate_ram_to_vram(prange, best_loc, mm);
 765        else
 766                return svm_migrate_vram_to_vram(prange, best_loc, mm);
 767
 768}
 769
 770/**
 771 * svm_migrate_to_ram - CPU page fault handler
 772 * @vmf: CPU vm fault vma, address
 773 *
 774 * Context: vm fault handler, caller holds the mmap read lock
 775 *
 776 * Return:
 777 * 0 - OK
 778 * VM_FAULT_SIGBUS - notice application to have SIGBUS page fault
 779 */
 780static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
 781{
 782        unsigned long addr = vmf->address;
 783        struct vm_area_struct *vma;
 784        enum svm_work_list_ops op;
 785        struct svm_range *parent;
 786        struct svm_range *prange;
 787        struct kfd_process *p;
 788        struct mm_struct *mm;
 789        int r = 0;
 790
 791        vma = vmf->vma;
 792        mm = vma->vm_mm;
 793
 794        p = kfd_lookup_process_by_mm(vma->vm_mm);
 795        if (!p) {
 796                pr_debug("failed find process at fault address 0x%lx\n", addr);
 797                return VM_FAULT_SIGBUS;
 798        }
 799        addr >>= PAGE_SHIFT;
 800        pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr);
 801
 802        mutex_lock(&p->svms.lock);
 803
 804        prange = svm_range_from_addr(&p->svms, addr, &parent);
 805        if (!prange) {
 806                pr_debug("cannot find svm range at 0x%lx\n", addr);
 807                r = -EFAULT;
 808                goto out;
 809        }
 810
 811        mutex_lock(&parent->migrate_mutex);
 812        if (prange != parent)
 813                mutex_lock_nested(&prange->migrate_mutex, 1);
 814
 815        if (!prange->actual_loc)
 816                goto out_unlock_prange;
 817
 818        svm_range_lock(parent);
 819        if (prange != parent)
 820                mutex_lock_nested(&prange->lock, 1);
 821        r = svm_range_split_by_granularity(p, mm, addr, parent, prange);
 822        if (prange != parent)
 823                mutex_unlock(&prange->lock);
 824        svm_range_unlock(parent);
 825        if (r) {
 826                pr_debug("failed %d to split range by granularity\n", r);
 827                goto out_unlock_prange;
 828        }
 829
 830        r = svm_migrate_vram_to_ram(prange, mm);
 831        if (r)
 832                pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r,
 833                         prange, prange->start, prange->last);
 834
 835        /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
 836        if (p->xnack_enabled && parent == prange)
 837                op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
 838        else
 839                op = SVM_OP_UPDATE_RANGE_NOTIFIER;
 840        svm_range_add_list_work(&p->svms, parent, mm, op);
 841        schedule_deferred_list_work(&p->svms);
 842
 843out_unlock_prange:
 844        if (prange != parent)
 845                mutex_unlock(&prange->migrate_mutex);
 846        mutex_unlock(&parent->migrate_mutex);
 847out:
 848        mutex_unlock(&p->svms.lock);
 849        kfd_unref_process(p);
 850
 851        pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr);
 852
 853        return r ? VM_FAULT_SIGBUS : 0;
 854}
 855
 856static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
 857        .page_free              = svm_migrate_page_free,
 858        .migrate_to_ram         = svm_migrate_to_ram,
 859};
 860
 861/* Each VRAM page uses sizeof(struct page) on system memory */
 862#define SVM_HMM_PAGE_STRUCT_SIZE(size) ((size)/PAGE_SIZE * sizeof(struct page))
 863
 864int svm_migrate_init(struct amdgpu_device *adev)
 865{
 866        struct kfd_dev *kfddev = adev->kfd.dev;
 867        struct dev_pagemap *pgmap;
 868        struct resource *res;
 869        unsigned long size;
 870        void *r;
 871
 872        /* Page migration works on Vega10 or newer */
 873        if (kfddev->device_info->asic_family < CHIP_VEGA10)
 874                return -EINVAL;
 875
 876        pgmap = &kfddev->pgmap;
 877        memset(pgmap, 0, sizeof(*pgmap));
 878
 879        /* TODO: register all vram to HMM for now.
 880         * should remove reserved size
 881         */
 882        size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20);
 883        res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
 884        if (IS_ERR(res))
 885                return -ENOMEM;
 886
 887        pgmap->type = MEMORY_DEVICE_PRIVATE;
 888        pgmap->nr_range = 1;
 889        pgmap->range.start = res->start;
 890        pgmap->range.end = res->end;
 891        pgmap->ops = &svm_migrate_pgmap_ops;
 892        pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev);
 893        pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
 894        r = devm_memremap_pages(adev->dev, pgmap);
 895        if (IS_ERR(r)) {
 896                pr_err("failed to register HMM device memory\n");
 897                devm_release_mem_region(adev->dev, res->start,
 898                                        res->end - res->start + 1);
 899                return PTR_ERR(r);
 900        }
 901
 902        pr_debug("reserve %ldMB system memory for VRAM pages struct\n",
 903                 SVM_HMM_PAGE_STRUCT_SIZE(size) >> 20);
 904
 905        amdgpu_amdkfd_reserve_system_mem(SVM_HMM_PAGE_STRUCT_SIZE(size));
 906
 907        pr_info("HMM registered %ldMB device memory\n", size >> 20);
 908
 909        return 0;
 910}
 911
 912void svm_migrate_fini(struct amdgpu_device *adev)
 913{
 914        struct dev_pagemap *pgmap = &adev->kfd.dev->pgmap;
 915
 916        devm_memunmap_pages(adev->dev, pgmap);
 917        devm_release_mem_region(adev->dev, pgmap->range.start,
 918                                pgmap->range.end - pgmap->range.start + 1);
 919}
 920