linux/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR MIT
   2/*
   3 * Copyright 2020-2021 Advanced Micro Devices, Inc.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the "Software"),
   7 * to deal in the Software without restriction, including without limitation
   8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9 * and/or sell copies of the Software, and to permit persons to whom the
  10 * Software is furnished to do so, subject to the following conditions:
  11 *
  12 * The above copyright notice and this permission notice shall be included in
  13 * all copies or substantial portions of the Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21 * OTHER DEALINGS IN THE SOFTWARE.
  22 */
  23#include <linux/types.h>
  24#include <linux/hmm.h>
  25#include <linux/dma-direction.h>
  26#include <linux/dma-mapping.h>
  27#include <linux/migrate.h>
  28#include "amdgpu_sync.h"
  29#include "amdgpu_object.h"
  30#include "amdgpu_vm.h"
  31#include "amdgpu_mn.h"
  32#include "amdgpu_res_cursor.h"
  33#include "kfd_priv.h"
  34#include "kfd_svm.h"
  35#include "kfd_migrate.h"
  36
  37#ifdef dev_fmt
  38#undef dev_fmt
  39#endif
  40#define dev_fmt(fmt) "kfd_migrate: " fmt
  41
  42static uint64_t
  43svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
  44{
  45        return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
  46}
  47
  48static int
  49svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
  50                     dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
  51{
  52        struct amdgpu_device *adev = ring->adev;
  53        struct amdgpu_job *job;
  54        unsigned int num_dw, num_bytes;
  55        struct dma_fence *fence;
  56        uint64_t src_addr, dst_addr;
  57        uint64_t pte_flags;
  58        void *cpu_addr;
  59        int r;
  60
  61        /* use gart window 0 */
  62        *gart_addr = adev->gmc.gart_start;
  63
  64        num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
  65        num_bytes = npages * 8;
  66
  67        r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
  68                                     AMDGPU_IB_POOL_DELAYED, &job);
  69        if (r)
  70                return r;
  71
  72        src_addr = num_dw * 4;
  73        src_addr += job->ibs[0].gpu_addr;
  74
  75        dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
  76        amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
  77                                dst_addr, num_bytes, false);
  78
  79        amdgpu_ring_pad_ib(ring, &job->ibs[0]);
  80        WARN_ON(job->ibs[0].length_dw > num_dw);
  81
  82        pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
  83        pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED;
  84        if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
  85                pte_flags |= AMDGPU_PTE_WRITEABLE;
  86        pte_flags |= adev->gart.gart_pte_flags;
  87
  88        cpu_addr = &job->ibs[0].ptr[num_dw];
  89
  90        amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
  91        r = amdgpu_job_submit(job, &adev->mman.entity,
  92                              AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
  93        if (r)
  94                goto error_free;
  95
  96        dma_fence_put(fence);
  97
  98        return r;
  99
 100error_free:
 101        amdgpu_job_free(job);
 102        return r;
 103}
 104
 105/**
 106 * svm_migrate_copy_memory_gart - sdma copy data between ram and vram
 107 *
 108 * @adev: amdgpu device the sdma ring running
 109 * @sys: system DMA pointer to be copied
 110 * @vram: vram destination DMA pointer
 111 * @npages: number of pages to copy
 112 * @direction: enum MIGRATION_COPY_DIR
 113 * @mfence: output, sdma fence to signal after sdma is done
 114 *
 115 * ram address uses GART table continuous entries mapping to ram pages,
 116 * vram address uses direct mapping of vram pages, which must have npages
 117 * number of continuous pages.
 118 * GART update and sdma uses same buf copy function ring, sdma is splited to
 119 * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for
 120 * the last sdma finish fence which is returned to check copy memory is done.
 121 *
 122 * Context: Process context, takes and releases gtt_window_lock
 123 *
 124 * Return:
 125 * 0 - OK, otherwise error code
 126 */
 127
 128static int
 129svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
 130                             uint64_t *vram, uint64_t npages,
 131                             enum MIGRATION_COPY_DIR direction,
 132                             struct dma_fence **mfence)
 133{
 134        const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
 135        struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 136        uint64_t gart_s, gart_d;
 137        struct dma_fence *next;
 138        uint64_t size;
 139        int r;
 140
 141        mutex_lock(&adev->mman.gtt_window_lock);
 142
 143        while (npages) {
 144                size = min(GTT_MAX_PAGES, npages);
 145
 146                if (direction == FROM_VRAM_TO_RAM) {
 147                        gart_s = svm_migrate_direct_mapping_addr(adev, *vram);
 148                        r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0);
 149
 150                } else if (direction == FROM_RAM_TO_VRAM) {
 151                        r = svm_migrate_gart_map(ring, size, sys, &gart_s,
 152                                                 KFD_IOCTL_SVM_FLAG_GPU_RO);
 153                        gart_d = svm_migrate_direct_mapping_addr(adev, *vram);
 154                }
 155                if (r) {
 156                        dev_err(adev->dev, "fail %d create gart mapping\n", r);
 157                        goto out_unlock;
 158                }
 159
 160                r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
 161                                       NULL, &next, false, true, false);
 162                if (r) {
 163                        dev_err(adev->dev, "fail %d to copy memory\n", r);
 164                        goto out_unlock;
 165                }
 166
 167                dma_fence_put(*mfence);
 168                *mfence = next;
 169                npages -= size;
 170                if (npages) {
 171                        sys += size;
 172                        vram += size;
 173                }
 174        }
 175
 176out_unlock:
 177        mutex_unlock(&adev->mman.gtt_window_lock);
 178
 179        return r;
 180}
 181
 182/**
 183 * svm_migrate_copy_done - wait for memory copy sdma is done
 184 *
 185 * @adev: amdgpu device the sdma memory copy is executing on
 186 * @mfence: migrate fence
 187 *
 188 * Wait for dma fence is signaled, if the copy ssplit into multiple sdma
 189 * operations, this is the last sdma operation fence.
 190 *
 191 * Context: called after svm_migrate_copy_memory
 192 *
 193 * Return:
 194 * 0            - success
 195 * otherwise    - error code from dma fence signal
 196 */
 197static int
 198svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence)
 199{
 200        int r = 0;
 201
 202        if (mfence) {
 203                r = dma_fence_wait(mfence, false);
 204                dma_fence_put(mfence);
 205                pr_debug("sdma copy memory fence done\n");
 206        }
 207
 208        return r;
 209}
 210
 211unsigned long
 212svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr)
 213{
 214        return (addr + adev->kfd.dev->pgmap.range.start) >> PAGE_SHIFT;
 215}
 216
 217static void
 218svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
 219{
 220        struct page *page;
 221
 222        page = pfn_to_page(pfn);
 223        svm_range_bo_ref(prange->svm_bo);
 224        page->zone_device_data = prange->svm_bo;
 225        lock_page(page);
 226}
 227
 228static void
 229svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr)
 230{
 231        struct page *page;
 232
 233        page = pfn_to_page(svm_migrate_addr_to_pfn(adev, addr));
 234        unlock_page(page);
 235        put_page(page);
 236}
 237
 238static unsigned long
 239svm_migrate_addr(struct amdgpu_device *adev, struct page *page)
 240{
 241        unsigned long addr;
 242
 243        addr = page_to_pfn(page) << PAGE_SHIFT;
 244        return (addr - adev->kfd.dev->pgmap.range.start);
 245}
 246
 247static struct page *
 248svm_migrate_get_sys_page(struct vm_area_struct *vma, unsigned long addr)
 249{
 250        struct page *page;
 251
 252        page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
 253        if (page)
 254                lock_page(page);
 255
 256        return page;
 257}
 258
 259static void svm_migrate_put_sys_page(unsigned long addr)
 260{
 261        struct page *page;
 262
 263        page = pfn_to_page(addr >> PAGE_SHIFT);
 264        unlock_page(page);
 265        put_page(page);
 266}
 267
 268static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate)
 269{
 270        unsigned long cpages = 0;
 271        unsigned long i;
 272
 273        for (i = 0; i < migrate->npages; i++) {
 274                if (migrate->src[i] & MIGRATE_PFN_VALID &&
 275                    migrate->src[i] & MIGRATE_PFN_MIGRATE)
 276                        cpages++;
 277        }
 278        return cpages;
 279}
 280
 281static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
 282{
 283        unsigned long upages = 0;
 284        unsigned long i;
 285
 286        for (i = 0; i < migrate->npages; i++) {
 287                if (migrate->src[i] & MIGRATE_PFN_VALID &&
 288                    !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
 289                        upages++;
 290        }
 291        return upages;
 292}
 293
 294static int
 295svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 296                         struct migrate_vma *migrate, struct dma_fence **mfence,
 297                         dma_addr_t *scratch)
 298{
 299        uint64_t npages = migrate->npages;
 300        struct device *dev = adev->dev;
 301        struct amdgpu_res_cursor cursor;
 302        dma_addr_t *src;
 303        uint64_t *dst;
 304        uint64_t i, j;
 305        int r;
 306
 307        pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
 308                 prange->last);
 309
 310        src = scratch;
 311        dst = (uint64_t *)(scratch + npages);
 312
 313        r = svm_range_vram_node_new(adev, prange, true);
 314        if (r) {
 315                dev_dbg(adev->dev, "fail %d to alloc vram\n", r);
 316                goto out;
 317        }
 318
 319        amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT,
 320                         npages << PAGE_SHIFT, &cursor);
 321        for (i = j = 0; i < npages; i++) {
 322                struct page *spage;
 323
 324                spage = migrate_pfn_to_page(migrate->src[i]);
 325                if (spage && !is_zone_device_page(spage)) {
 326                        dst[i] = cursor.start + (j << PAGE_SHIFT);
 327                        migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
 328                        svm_migrate_get_vram_page(prange, migrate->dst[i]);
 329                        migrate->dst[i] = migrate_pfn(migrate->dst[i]);
 330                        src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
 331                                              DMA_TO_DEVICE);
 332                        r = dma_mapping_error(dev, src[i]);
 333                        if (r) {
 334                                dev_err(adev->dev, "%s: fail %d dma_map_page\n",
 335                                        __func__, r);
 336                                goto out_free_vram_pages;
 337                        }
 338                } else {
 339                        if (j) {
 340                                r = svm_migrate_copy_memory_gart(
 341                                                adev, src + i - j,
 342                                                dst + i - j, j,
 343                                                FROM_RAM_TO_VRAM,
 344                                                mfence);
 345                                if (r)
 346                                        goto out_free_vram_pages;
 347                                amdgpu_res_next(&cursor, (j + 1) << PAGE_SHIFT);
 348                                j = 0;
 349                        } else {
 350                                amdgpu_res_next(&cursor, PAGE_SIZE);
 351                        }
 352                        continue;
 353                }
 354
 355                pr_debug_ratelimited("dma mapping src to 0x%llx, pfn 0x%lx\n",
 356                                     src[i] >> PAGE_SHIFT, page_to_pfn(spage));
 357
 358                if (j >= (cursor.size >> PAGE_SHIFT) - 1 && i < npages - 1) {
 359                        r = svm_migrate_copy_memory_gart(adev, src + i - j,
 360                                                         dst + i - j, j + 1,
 361                                                         FROM_RAM_TO_VRAM,
 362                                                         mfence);
 363                        if (r)
 364                                goto out_free_vram_pages;
 365                        amdgpu_res_next(&cursor, (j + 1) * PAGE_SIZE);
 366                        j = 0;
 367                } else {
 368                        j++;
 369                }
 370        }
 371
 372        r = svm_migrate_copy_memory_gart(adev, src + i - j, dst + i - j, j,
 373                                         FROM_RAM_TO_VRAM, mfence);
 374
 375out_free_vram_pages:
 376        if (r) {
 377                pr_debug("failed %d to copy memory to vram\n", r);
 378                while (i--) {
 379                        svm_migrate_put_vram_page(adev, dst[i]);
 380                        migrate->dst[i] = 0;
 381                }
 382        }
 383
 384#ifdef DEBUG_FORCE_MIXED_DOMAINS
 385        for (i = 0, j = 0; i < npages; i += 4, j++) {
 386                if (j & 1)
 387                        continue;
 388                svm_migrate_put_vram_page(adev, dst[i]);
 389                migrate->dst[i] = 0;
 390                svm_migrate_put_vram_page(adev, dst[i + 1]);
 391                migrate->dst[i + 1] = 0;
 392                svm_migrate_put_vram_page(adev, dst[i + 2]);
 393                migrate->dst[i + 2] = 0;
 394                svm_migrate_put_vram_page(adev, dst[i + 3]);
 395                migrate->dst[i + 3] = 0;
 396        }
 397#endif
 398out:
 399        return r;
 400}
 401
 402static long
 403svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 404                        struct vm_area_struct *vma, uint64_t start,
 405                        uint64_t end)
 406{
 407        uint64_t npages = (end - start) >> PAGE_SHIFT;
 408        struct kfd_process_device *pdd;
 409        struct dma_fence *mfence = NULL;
 410        struct migrate_vma migrate;
 411        unsigned long cpages = 0;
 412        dma_addr_t *scratch;
 413        void *buf;
 414        int r = -ENOMEM;
 415
 416        memset(&migrate, 0, sizeof(migrate));
 417        migrate.vma = vma;
 418        migrate.start = start;
 419        migrate.end = end;
 420        migrate.flags = MIGRATE_VMA_SELECT_SYSTEM;
 421        migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
 422
 423        buf = kvcalloc(npages,
 424                       2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t),
 425                       GFP_KERNEL);
 426        if (!buf)
 427                goto out;
 428
 429        migrate.src = buf;
 430        migrate.dst = migrate.src + npages;
 431        scratch = (dma_addr_t *)(migrate.dst + npages);
 432
 433        r = migrate_vma_setup(&migrate);
 434        if (r) {
 435                dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
 436                        __func__, r, prange->start, prange->last);
 437                goto out_free;
 438        }
 439
 440        cpages = migrate.cpages;
 441        if (!cpages) {
 442                pr_debug("failed collect migrate sys pages [0x%lx 0x%lx]\n",
 443                         prange->start, prange->last);
 444                goto out_free;
 445        }
 446        if (cpages != npages)
 447                pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
 448                         cpages, npages);
 449        else
 450                pr_debug("0x%lx pages migrated\n", cpages);
 451
 452        r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch);
 453        migrate_vma_pages(&migrate);
 454
 455        pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
 456                svm_migrate_successful_pages(&migrate), cpages, migrate.npages);
 457
 458        svm_migrate_copy_done(adev, mfence);
 459        migrate_vma_finalize(&migrate);
 460
 461        svm_range_dma_unmap(adev->dev, scratch, 0, npages);
 462        svm_range_free_dma_mappings(prange);
 463
 464out_free:
 465        kvfree(buf);
 466out:
 467        if (!r && cpages) {
 468                pdd = svm_range_get_pdd_by_adev(prange, adev);
 469                if (pdd)
 470                        WRITE_ONCE(pdd->page_in, pdd->page_in + cpages);
 471
 472                return cpages;
 473        }
 474        return r;
 475}
 476
 477/**
 478 * svm_migrate_ram_to_vram - migrate svm range from system to device
 479 * @prange: range structure
 480 * @best_loc: the device to migrate to
 481 * @mm: the process mm structure
 482 *
 483 * Context: Process context, caller hold mmap read lock, svms lock, prange lock
 484 *
 485 * Return:
 486 * 0 - OK, otherwise error code
 487 */
 488static int
 489svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 490                        struct mm_struct *mm)
 491{
 492        unsigned long addr, start, end;
 493        struct vm_area_struct *vma;
 494        struct amdgpu_device *adev;
 495        unsigned long cpages = 0;
 496        long r = 0;
 497
 498        if (prange->actual_loc == best_loc) {
 499                pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
 500                         prange->svms, prange->start, prange->last, best_loc);
 501                return 0;
 502        }
 503
 504        adev = svm_range_get_adev_by_id(prange, best_loc);
 505        if (!adev) {
 506                pr_debug("failed to get device by id 0x%x\n", best_loc);
 507                return -ENODEV;
 508        }
 509
 510        pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
 511                 prange->start, prange->last, best_loc);
 512
 513        /* FIXME: workaround for page locking bug with invalid pages */
 514        svm_range_prefault(prange, mm, SVM_ADEV_PGMAP_OWNER(adev));
 515
 516        start = prange->start << PAGE_SHIFT;
 517        end = (prange->last + 1) << PAGE_SHIFT;
 518
 519        for (addr = start; addr < end;) {
 520                unsigned long next;
 521
 522                vma = find_vma(mm, addr);
 523                if (!vma || addr < vma->vm_start)
 524                        break;
 525
 526                next = min(vma->vm_end, end);
 527                r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next);
 528                if (r < 0) {
 529                        pr_debug("failed %ld to migrate\n", r);
 530                        break;
 531                } else {
 532                        cpages += r;
 533                }
 534                addr = next;
 535        }
 536
 537        if (cpages)
 538                prange->actual_loc = best_loc;
 539
 540        return r < 0 ? r : 0;
 541}
 542
 543static void svm_migrate_page_free(struct page *page)
 544{
 545        struct svm_range_bo *svm_bo = page->zone_device_data;
 546
 547        if (svm_bo) {
 548                pr_debug_ratelimited("ref: %d\n", kref_read(&svm_bo->kref));
 549                svm_range_bo_unref_async(svm_bo);
 550        }
 551}
 552
 553static int
 554svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
 555                        struct migrate_vma *migrate, struct dma_fence **mfence,
 556                        dma_addr_t *scratch, uint64_t npages)
 557{
 558        struct device *dev = adev->dev;
 559        uint64_t *src;
 560        dma_addr_t *dst;
 561        struct page *dpage;
 562        uint64_t i = 0, j;
 563        uint64_t addr;
 564        int r = 0;
 565
 566        pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
 567                 prange->last);
 568
 569        addr = prange->start << PAGE_SHIFT;
 570
 571        src = (uint64_t *)(scratch + npages);
 572        dst = scratch;
 573
 574        for (i = 0, j = 0; i < npages; i++, addr += PAGE_SIZE) {
 575                struct page *spage;
 576
 577                spage = migrate_pfn_to_page(migrate->src[i]);
 578                if (!spage || !is_zone_device_page(spage)) {
 579                        pr_debug("invalid page. Could be in CPU already svms 0x%p [0x%lx 0x%lx]\n",
 580                                 prange->svms, prange->start, prange->last);
 581                        if (j) {
 582                                r = svm_migrate_copy_memory_gart(adev, dst + i - j,
 583                                                                 src + i - j, j,
 584                                                                 FROM_VRAM_TO_RAM,
 585                                                                 mfence);
 586                                if (r)
 587                                        goto out_oom;
 588                                j = 0;
 589                        }
 590                        continue;
 591                }
 592                src[i] = svm_migrate_addr(adev, spage);
 593                if (j > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
 594                        r = svm_migrate_copy_memory_gart(adev, dst + i - j,
 595                                                         src + i - j, j,
 596                                                         FROM_VRAM_TO_RAM,
 597                                                         mfence);
 598                        if (r)
 599                                goto out_oom;
 600                        j = 0;
 601                }
 602
 603                dpage = svm_migrate_get_sys_page(migrate->vma, addr);
 604                if (!dpage) {
 605                        pr_debug("failed get page svms 0x%p [0x%lx 0x%lx]\n",
 606                                 prange->svms, prange->start, prange->last);
 607                        r = -ENOMEM;
 608                        goto out_oom;
 609                }
 610
 611                dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
 612                r = dma_mapping_error(dev, dst[i]);
 613                if (r) {
 614                        dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r);
 615                        goto out_oom;
 616                }
 617
 618                pr_debug_ratelimited("dma mapping dst to 0x%llx, pfn 0x%lx\n",
 619                                     dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
 620
 621                migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
 622                j++;
 623        }
 624
 625        r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j,
 626                                         FROM_VRAM_TO_RAM, mfence);
 627
 628out_oom:
 629        if (r) {
 630                pr_debug("failed %d copy to ram\n", r);
 631                while (i--) {
 632                        svm_migrate_put_sys_page(dst[i]);
 633                        migrate->dst[i] = 0;
 634                }
 635        }
 636
 637        return r;
 638}
 639
 640/**
 641 * svm_migrate_vma_to_ram - migrate range inside one vma from device to system
 642 *
 643 * @adev: amdgpu device to migrate from
 644 * @prange: svm range structure
 645 * @vma: vm_area_struct that range [start, end] belongs to
 646 * @start: range start virtual address in pages
 647 * @end: range end virtual address in pages
 648 *
 649 * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
 650 *
 651 * Return:
 652 *   0 - success with all pages migrated
 653 *   negative values - indicate error
 654 *   positive values - partial migration, number of pages not migrated
 655 */
 656static long
 657svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
 658                       struct vm_area_struct *vma, uint64_t start, uint64_t end)
 659{
 660        uint64_t npages = (end - start) >> PAGE_SHIFT;
 661        unsigned long upages = npages;
 662        unsigned long cpages = 0;
 663        struct kfd_process_device *pdd;
 664        struct dma_fence *mfence = NULL;
 665        struct migrate_vma migrate;
 666        dma_addr_t *scratch;
 667        void *buf;
 668        int r = -ENOMEM;
 669
 670        memset(&migrate, 0, sizeof(migrate));
 671        migrate.vma = vma;
 672        migrate.start = start;
 673        migrate.end = end;
 674        migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
 675        migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
 676
 677        buf = kvcalloc(npages,
 678                       2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t),
 679                       GFP_KERNEL);
 680
 681        if (!buf)
 682                goto out;
 683
 684        migrate.src = buf;
 685        migrate.dst = migrate.src + npages;
 686        scratch = (dma_addr_t *)(migrate.dst + npages);
 687
 688        r = migrate_vma_setup(&migrate);
 689        if (r) {
 690                dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
 691                        __func__, r, prange->start, prange->last);
 692                goto out_free;
 693        }
 694
 695        cpages = migrate.cpages;
 696        if (!cpages) {
 697                pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n",
 698                         prange->start, prange->last);
 699                upages = svm_migrate_unsuccessful_pages(&migrate);
 700                goto out_free;
 701        }
 702        if (cpages != npages)
 703                pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
 704                         cpages, npages);
 705        else
 706                pr_debug("0x%lx pages migrated\n", cpages);
 707
 708        r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
 709                                    scratch, npages);
 710        migrate_vma_pages(&migrate);
 711
 712        upages = svm_migrate_unsuccessful_pages(&migrate);
 713        pr_debug("unsuccessful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
 714                 upages, cpages, migrate.npages);
 715
 716        svm_migrate_copy_done(adev, mfence);
 717        migrate_vma_finalize(&migrate);
 718        svm_range_dma_unmap(adev->dev, scratch, 0, npages);
 719
 720out_free:
 721        kvfree(buf);
 722out:
 723        if (!r && cpages) {
 724                pdd = svm_range_get_pdd_by_adev(prange, adev);
 725                if (pdd)
 726                        WRITE_ONCE(pdd->page_out, pdd->page_out + cpages);
 727        }
 728        return r ? r : upages;
 729}
 730
 731/**
 732 * svm_migrate_vram_to_ram - migrate svm range from device to system
 733 * @prange: range structure
 734 * @mm: process mm, use current->mm if NULL
 735 *
 736 * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
 737 *
 738 * Return:
 739 * 0 - OK, otherwise error code
 740 */
 741int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
 742{
 743        struct amdgpu_device *adev;
 744        struct vm_area_struct *vma;
 745        unsigned long addr;
 746        unsigned long start;
 747        unsigned long end;
 748        unsigned long upages = 0;
 749        long r = 0;
 750
 751        if (!prange->actual_loc) {
 752                pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
 753                         prange->start, prange->last);
 754                return 0;
 755        }
 756
 757        adev = svm_range_get_adev_by_id(prange, prange->actual_loc);
 758        if (!adev) {
 759                pr_debug("failed to get device by id 0x%x\n",
 760                         prange->actual_loc);
 761                return -ENODEV;
 762        }
 763
 764        pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n",
 765                 prange->svms, prange, prange->start, prange->last,
 766                 prange->actual_loc);
 767
 768        start = prange->start << PAGE_SHIFT;
 769        end = (prange->last + 1) << PAGE_SHIFT;
 770
 771        for (addr = start; addr < end;) {
 772                unsigned long next;
 773
 774                vma = find_vma(mm, addr);
 775                if (!vma || addr < vma->vm_start) {
 776                        pr_debug("failed to find vma for prange %p\n", prange);
 777                        r = -EFAULT;
 778                        break;
 779                }
 780
 781                next = min(vma->vm_end, end);
 782                r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next);
 783                if (r < 0) {
 784                        pr_debug("failed %ld to migrate prange %p\n", r, prange);
 785                        break;
 786                } else {
 787                        upages += r;
 788                }
 789                addr = next;
 790        }
 791
 792        if (r >= 0 && !upages) {
 793                svm_range_vram_node_free(prange);
 794                prange->actual_loc = 0;
 795        }
 796
 797        return r < 0 ? r : 0;
 798}
 799
 800/**
 801 * svm_migrate_vram_to_vram - migrate svm range from device to device
 802 * @prange: range structure
 803 * @best_loc: the device to migrate to
 804 * @mm: process mm, use current->mm if NULL
 805 *
 806 * Context: Process context, caller hold mmap read lock, svms lock, prange lock
 807 *
 808 * Return:
 809 * 0 - OK, otherwise error code
 810 */
 811static int
 812svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
 813                         struct mm_struct *mm)
 814{
 815        int r, retries = 3;
 816
 817        /*
 818         * TODO: for both devices with PCIe large bar or on same xgmi hive, skip
 819         * system memory as migration bridge
 820         */
 821
 822        pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
 823
 824        do {
 825                r = svm_migrate_vram_to_ram(prange, mm);
 826                if (r)
 827                        return r;
 828        } while (prange->actual_loc && --retries);
 829
 830        if (prange->actual_loc)
 831                return -EDEADLK;
 832
 833        return svm_migrate_ram_to_vram(prange, best_loc, mm);
 834}
 835
 836int
 837svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
 838                    struct mm_struct *mm)
 839{
 840        if  (!prange->actual_loc)
 841                return svm_migrate_ram_to_vram(prange, best_loc, mm);
 842        else
 843                return svm_migrate_vram_to_vram(prange, best_loc, mm);
 844
 845}
 846
 847/**
 848 * svm_migrate_to_ram - CPU page fault handler
 849 * @vmf: CPU vm fault vma, address
 850 *
 851 * Context: vm fault handler, caller holds the mmap read lock
 852 *
 853 * Return:
 854 * 0 - OK
 855 * VM_FAULT_SIGBUS - notice application to have SIGBUS page fault
 856 */
 857static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
 858{
 859        unsigned long addr = vmf->address;
 860        struct vm_area_struct *vma;
 861        enum svm_work_list_ops op;
 862        struct svm_range *parent;
 863        struct svm_range *prange;
 864        struct kfd_process *p;
 865        struct mm_struct *mm;
 866        int r = 0;
 867
 868        vma = vmf->vma;
 869        mm = vma->vm_mm;
 870
 871        p = kfd_lookup_process_by_mm(vma->vm_mm);
 872        if (!p) {
 873                pr_debug("failed find process at fault address 0x%lx\n", addr);
 874                return VM_FAULT_SIGBUS;
 875        }
 876        if (READ_ONCE(p->svms.faulting_task) == current) {
 877                pr_debug("skipping ram migration\n");
 878                kfd_unref_process(p);
 879                return 0;
 880        }
 881        addr >>= PAGE_SHIFT;
 882        pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr);
 883
 884        mutex_lock(&p->svms.lock);
 885
 886        prange = svm_range_from_addr(&p->svms, addr, &parent);
 887        if (!prange) {
 888                pr_debug("cannot find svm range at 0x%lx\n", addr);
 889                r = -EFAULT;
 890                goto out;
 891        }
 892
 893        mutex_lock(&parent->migrate_mutex);
 894        if (prange != parent)
 895                mutex_lock_nested(&prange->migrate_mutex, 1);
 896
 897        if (!prange->actual_loc)
 898                goto out_unlock_prange;
 899
 900        svm_range_lock(parent);
 901        if (prange != parent)
 902                mutex_lock_nested(&prange->lock, 1);
 903        r = svm_range_split_by_granularity(p, mm, addr, parent, prange);
 904        if (prange != parent)
 905                mutex_unlock(&prange->lock);
 906        svm_range_unlock(parent);
 907        if (r) {
 908                pr_debug("failed %d to split range by granularity\n", r);
 909                goto out_unlock_prange;
 910        }
 911
 912        r = svm_migrate_vram_to_ram(prange, mm);
 913        if (r)
 914                pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r,
 915                         prange, prange->start, prange->last);
 916
 917        /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
 918        if (p->xnack_enabled && parent == prange)
 919                op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
 920        else
 921                op = SVM_OP_UPDATE_RANGE_NOTIFIER;
 922        svm_range_add_list_work(&p->svms, parent, mm, op);
 923        schedule_deferred_list_work(&p->svms);
 924
 925out_unlock_prange:
 926        if (prange != parent)
 927                mutex_unlock(&prange->migrate_mutex);
 928        mutex_unlock(&parent->migrate_mutex);
 929out:
 930        mutex_unlock(&p->svms.lock);
 931        kfd_unref_process(p);
 932
 933        pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr);
 934
 935        return r ? VM_FAULT_SIGBUS : 0;
 936}
 937
 938static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
 939        .page_free              = svm_migrate_page_free,
 940        .migrate_to_ram         = svm_migrate_to_ram,
 941};
 942
 943/* Each VRAM page uses sizeof(struct page) on system memory */
 944#define SVM_HMM_PAGE_STRUCT_SIZE(size) ((size)/PAGE_SIZE * sizeof(struct page))
 945
 946int svm_migrate_init(struct amdgpu_device *adev)
 947{
 948        struct kfd_dev *kfddev = adev->kfd.dev;
 949        struct dev_pagemap *pgmap;
 950        struct resource *res;
 951        unsigned long size;
 952        void *r;
 953
 954        /* Page migration works on Vega10 or newer */
 955        if (!KFD_IS_SOC15(kfddev))
 956                return -EINVAL;
 957
 958        pgmap = &kfddev->pgmap;
 959        memset(pgmap, 0, sizeof(*pgmap));
 960
 961        /* TODO: register all vram to HMM for now.
 962         * should remove reserved size
 963         */
 964        size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20);
 965        res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
 966        if (IS_ERR(res))
 967                return -ENOMEM;
 968
 969        pgmap->type = MEMORY_DEVICE_PRIVATE;
 970        pgmap->nr_range = 1;
 971        pgmap->range.start = res->start;
 972        pgmap->range.end = res->end;
 973        pgmap->ops = &svm_migrate_pgmap_ops;
 974        pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev);
 975        pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
 976
 977        /* Device manager releases device-specific resources, memory region and
 978         * pgmap when driver disconnects from device.
 979         */
 980        r = devm_memremap_pages(adev->dev, pgmap);
 981        if (IS_ERR(r)) {
 982                pr_err("failed to register HMM device memory\n");
 983
 984                /* Disable SVM support capability */
 985                pgmap->type = 0;
 986                devm_release_mem_region(adev->dev, res->start, resource_size(res));
 987                return PTR_ERR(r);
 988        }
 989
 990        pr_debug("reserve %ldMB system memory for VRAM pages struct\n",
 991                 SVM_HMM_PAGE_STRUCT_SIZE(size) >> 20);
 992
 993        amdgpu_amdkfd_reserve_system_mem(SVM_HMM_PAGE_STRUCT_SIZE(size));
 994
 995        pr_info("HMM registered %ldMB device memory\n", size >> 20);
 996
 997        return 0;
 998}
 999