linux/drivers/gpu/drm/nouveau/nouveau_dmem.c
<<
>>
Prefs
   1/*
   2 * Copyright 2018 Red Hat Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22#include "nouveau_dmem.h"
  23#include "nouveau_drv.h"
  24#include "nouveau_chan.h"
  25#include "nouveau_dma.h"
  26#include "nouveau_mem.h"
  27#include "nouveau_bo.h"
  28
  29#include <nvif/class.h>
  30#include <nvif/object.h>
  31#include <nvif/if500b.h>
  32#include <nvif/if900b.h>
  33
  34#include <linux/sched/mm.h>
  35#include <linux/hmm.h>
  36
  37/*
  38 * FIXME: this is ugly right now we are using TTM to allocate vram and we pin
  39 * it in vram while in use. We likely want to overhaul memory management for
  40 * nouveau to be more page like (not necessarily with system page size but a
  41 * bigger page size) at lowest level and have some shim layer on top that would
  42 * provide the same functionality as TTM.
  43 */
  44#define DMEM_CHUNK_SIZE (2UL << 20)
  45#define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT)
  46
  47struct nouveau_migrate;
  48
  49enum nouveau_aper {
  50        NOUVEAU_APER_VIRT,
  51        NOUVEAU_APER_VRAM,
  52        NOUVEAU_APER_HOST,
  53};
  54
  55typedef int (*nouveau_migrate_copy_t)(struct nouveau_drm *drm, u64 npages,
  56                                      enum nouveau_aper, u64 dst_addr,
  57                                      enum nouveau_aper, u64 src_addr);
  58
  59struct nouveau_dmem_chunk {
  60        struct list_head list;
  61        struct nouveau_bo *bo;
  62        struct nouveau_drm *drm;
  63        unsigned long pfn_first;
  64        unsigned long callocated;
  65        unsigned long bitmap[BITS_TO_LONGS(DMEM_CHUNK_NPAGES)];
  66        spinlock_t lock;
  67};
  68
  69struct nouveau_dmem_migrate {
  70        nouveau_migrate_copy_t copy_func;
  71        struct nouveau_channel *chan;
  72};
  73
  74struct nouveau_dmem {
  75        struct hmm_devmem *devmem;
  76        struct nouveau_dmem_migrate migrate;
  77        struct list_head chunk_free;
  78        struct list_head chunk_full;
  79        struct list_head chunk_empty;
  80        struct mutex mutex;
  81};
  82
  83struct nouveau_dmem_fault {
  84        struct nouveau_drm *drm;
  85        struct nouveau_fence *fence;
  86        dma_addr_t *dma;
  87        unsigned long npages;
  88};
  89
  90struct nouveau_migrate {
  91        struct vm_area_struct *vma;
  92        struct nouveau_drm *drm;
  93        struct nouveau_fence *fence;
  94        unsigned long npages;
  95        dma_addr_t *dma;
  96        unsigned long dma_nr;
  97};
  98
  99static void
 100nouveau_dmem_free(struct hmm_devmem *devmem, struct page *page)
 101{
 102        struct nouveau_dmem_chunk *chunk;
 103        unsigned long idx;
 104
 105        chunk = (void *)hmm_devmem_page_get_drvdata(page);
 106        idx = page_to_pfn(page) - chunk->pfn_first;
 107
 108        /*
 109         * FIXME:
 110         *
 111         * This is really a bad example, we need to overhaul nouveau memory
 112         * management to be more page focus and allow lighter locking scheme
 113         * to be use in the process.
 114         */
 115        spin_lock(&chunk->lock);
 116        clear_bit(idx, chunk->bitmap);
 117        WARN_ON(!chunk->callocated);
 118        chunk->callocated--;
 119        /*
 120         * FIXME when chunk->callocated reach 0 we should add the chunk to
 121         * a reclaim list so that it can be freed in case of memory pressure.
 122         */
 123        spin_unlock(&chunk->lock);
 124}
 125
 126static void
 127nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma,
 128                                  const unsigned long *src_pfns,
 129                                  unsigned long *dst_pfns,
 130                                  unsigned long start,
 131                                  unsigned long end,
 132                                  void *private)
 133{
 134        struct nouveau_dmem_fault *fault = private;
 135        struct nouveau_drm *drm = fault->drm;
 136        struct device *dev = drm->dev->dev;
 137        unsigned long addr, i, npages = 0;
 138        nouveau_migrate_copy_t copy;
 139        int ret;
 140
 141
 142        /* First allocate new memory */
 143        for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) {
 144                struct page *dpage, *spage;
 145
 146                dst_pfns[i] = 0;
 147                spage = migrate_pfn_to_page(src_pfns[i]);
 148                if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE))
 149                        continue;
 150
 151                dpage = hmm_vma_alloc_locked_page(vma, addr);
 152                if (!dpage) {
 153                        dst_pfns[i] = MIGRATE_PFN_ERROR;
 154                        continue;
 155                }
 156
 157                dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) |
 158                              MIGRATE_PFN_LOCKED;
 159                npages++;
 160        }
 161
 162        /* Allocate storage for DMA addresses, so we can unmap later. */
 163        fault->dma = kmalloc(sizeof(*fault->dma) * npages, GFP_KERNEL);
 164        if (!fault->dma)
 165                goto error;
 166
 167        /* Copy things over */
 168        copy = drm->dmem->migrate.copy_func;
 169        for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) {
 170                struct nouveau_dmem_chunk *chunk;
 171                struct page *spage, *dpage;
 172                u64 src_addr, dst_addr;
 173
 174                dpage = migrate_pfn_to_page(dst_pfns[i]);
 175                if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR)
 176                        continue;
 177
 178                spage = migrate_pfn_to_page(src_pfns[i]);
 179                if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) {
 180                        dst_pfns[i] = MIGRATE_PFN_ERROR;
 181                        __free_page(dpage);
 182                        continue;
 183                }
 184
 185                fault->dma[fault->npages] =
 186                        dma_map_page_attrs(dev, dpage, 0, PAGE_SIZE,
 187                                           PCI_DMA_BIDIRECTIONAL,
 188                                           DMA_ATTR_SKIP_CPU_SYNC);
 189                if (dma_mapping_error(dev, fault->dma[fault->npages])) {
 190                        dst_pfns[i] = MIGRATE_PFN_ERROR;
 191                        __free_page(dpage);
 192                        continue;
 193                }
 194
 195                dst_addr = fault->dma[fault->npages++];
 196
 197                chunk = (void *)hmm_devmem_page_get_drvdata(spage);
 198                src_addr = page_to_pfn(spage) - chunk->pfn_first;
 199                src_addr = (src_addr << PAGE_SHIFT) + chunk->bo->bo.offset;
 200
 201                ret = copy(drm, 1, NOUVEAU_APER_HOST, dst_addr,
 202                                   NOUVEAU_APER_VRAM, src_addr);
 203                if (ret) {
 204                        dst_pfns[i] = MIGRATE_PFN_ERROR;
 205                        __free_page(dpage);
 206                        continue;
 207                }
 208        }
 209
 210        nouveau_fence_new(drm->dmem->migrate.chan, false, &fault->fence);
 211
 212        return;
 213
 214error:
 215        for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) {
 216                struct page *page;
 217
 218                if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR)
 219                        continue;
 220
 221                page = migrate_pfn_to_page(dst_pfns[i]);
 222                dst_pfns[i] = MIGRATE_PFN_ERROR;
 223                if (page == NULL)
 224                        continue;
 225
 226                __free_page(page);
 227        }
 228}
 229
 230void nouveau_dmem_fault_finalize_and_map(struct vm_area_struct *vma,
 231                                         const unsigned long *src_pfns,
 232                                         const unsigned long *dst_pfns,
 233                                         unsigned long start,
 234                                         unsigned long end,
 235                                         void *private)
 236{
 237        struct nouveau_dmem_fault *fault = private;
 238        struct nouveau_drm *drm = fault->drm;
 239
 240        if (fault->fence) {
 241                nouveau_fence_wait(fault->fence, true, false);
 242                nouveau_fence_unref(&fault->fence);
 243        } else {
 244                /*
 245                 * FIXME wait for channel to be IDLE before calling finalizing
 246                 * the hmem object below (nouveau_migrate_hmem_fini()).
 247                 */
 248        }
 249
 250        while (fault->npages--) {
 251                dma_unmap_page(drm->dev->dev, fault->dma[fault->npages],
 252                               PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 253        }
 254        kfree(fault->dma);
 255}
 256
 257static const struct migrate_vma_ops nouveau_dmem_fault_migrate_ops = {
 258        .alloc_and_copy         = nouveau_dmem_fault_alloc_and_copy,
 259        .finalize_and_map       = nouveau_dmem_fault_finalize_and_map,
 260};
 261
 262static vm_fault_t
 263nouveau_dmem_fault(struct hmm_devmem *devmem,
 264                   struct vm_area_struct *vma,
 265                   unsigned long addr,
 266                   const struct page *page,
 267                   unsigned int flags,
 268                   pmd_t *pmdp)
 269{
 270        struct drm_device *drm_dev = dev_get_drvdata(devmem->device);
 271        unsigned long src[1] = {0}, dst[1] = {0};
 272        struct nouveau_dmem_fault fault = {0};
 273        int ret;
 274
 275
 276
 277        /*
 278         * FIXME what we really want is to find some heuristic to migrate more
 279         * than just one page on CPU fault. When such fault happens it is very
 280         * likely that more surrounding page will CPU fault too.
 281         */
 282        fault.drm = nouveau_drm(drm_dev);
 283        ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vma, addr,
 284                          addr + PAGE_SIZE, src, dst, &fault);
 285        if (ret)
 286                return VM_FAULT_SIGBUS;
 287
 288        if (dst[0] == MIGRATE_PFN_ERROR)
 289                return VM_FAULT_SIGBUS;
 290
 291        return 0;
 292}
 293
 294static const struct hmm_devmem_ops
 295nouveau_dmem_devmem_ops = {
 296        .free = nouveau_dmem_free,
 297        .fault = nouveau_dmem_fault,
 298};
 299
 300static int
 301nouveau_dmem_chunk_alloc(struct nouveau_drm *drm)
 302{
 303        struct nouveau_dmem_chunk *chunk;
 304        int ret;
 305
 306        if (drm->dmem == NULL)
 307                return -EINVAL;
 308
 309        mutex_lock(&drm->dmem->mutex);
 310        chunk = list_first_entry_or_null(&drm->dmem->chunk_empty,
 311                                         struct nouveau_dmem_chunk,
 312                                         list);
 313        if (chunk == NULL) {
 314                mutex_unlock(&drm->dmem->mutex);
 315                return -ENOMEM;
 316        }
 317
 318        list_del(&chunk->list);
 319        mutex_unlock(&drm->dmem->mutex);
 320
 321        ret = nouveau_bo_new(&drm->client, DMEM_CHUNK_SIZE, 0,
 322                             TTM_PL_FLAG_VRAM, 0, 0, NULL, NULL,
 323                             &chunk->bo);
 324        if (ret)
 325                goto out;
 326
 327        ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false);
 328        if (ret) {
 329                nouveau_bo_ref(NULL, &chunk->bo);
 330                goto out;
 331        }
 332
 333        bitmap_zero(chunk->bitmap, DMEM_CHUNK_NPAGES);
 334        spin_lock_init(&chunk->lock);
 335
 336out:
 337        mutex_lock(&drm->dmem->mutex);
 338        if (chunk->bo)
 339                list_add(&chunk->list, &drm->dmem->chunk_empty);
 340        else
 341                list_add_tail(&chunk->list, &drm->dmem->chunk_empty);
 342        mutex_unlock(&drm->dmem->mutex);
 343
 344        return ret;
 345}
 346
 347static struct nouveau_dmem_chunk *
 348nouveau_dmem_chunk_first_free_locked(struct nouveau_drm *drm)
 349{
 350        struct nouveau_dmem_chunk *chunk;
 351
 352        chunk = list_first_entry_or_null(&drm->dmem->chunk_free,
 353                                         struct nouveau_dmem_chunk,
 354                                         list);
 355        if (chunk)
 356                return chunk;
 357
 358        chunk = list_first_entry_or_null(&drm->dmem->chunk_empty,
 359                                         struct nouveau_dmem_chunk,
 360                                         list);
 361        if (chunk->bo)
 362                return chunk;
 363
 364        return NULL;
 365}
 366
 367static int
 368nouveau_dmem_pages_alloc(struct nouveau_drm *drm,
 369                         unsigned long npages,
 370                         unsigned long *pages)
 371{
 372        struct nouveau_dmem_chunk *chunk;
 373        unsigned long c;
 374        int ret;
 375
 376        memset(pages, 0xff, npages * sizeof(*pages));
 377
 378        mutex_lock(&drm->dmem->mutex);
 379        for (c = 0; c < npages;) {
 380                unsigned long i;
 381
 382                chunk = nouveau_dmem_chunk_first_free_locked(drm);
 383                if (chunk == NULL) {
 384                        mutex_unlock(&drm->dmem->mutex);
 385                        ret = nouveau_dmem_chunk_alloc(drm);
 386                        if (ret) {
 387                                if (c)
 388                                        break;
 389                                return ret;
 390                        }
 391                        continue;
 392                }
 393
 394                spin_lock(&chunk->lock);
 395                i = find_first_zero_bit(chunk->bitmap, DMEM_CHUNK_NPAGES);
 396                while (i < DMEM_CHUNK_NPAGES && c < npages) {
 397                        pages[c] = chunk->pfn_first + i;
 398                        set_bit(i, chunk->bitmap);
 399                        chunk->callocated++;
 400                        c++;
 401
 402                        i = find_next_zero_bit(chunk->bitmap,
 403                                        DMEM_CHUNK_NPAGES, i);
 404                }
 405                spin_unlock(&chunk->lock);
 406        }
 407        mutex_unlock(&drm->dmem->mutex);
 408
 409        return 0;
 410}
 411
 412static struct page *
 413nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm)
 414{
 415        unsigned long pfns[1];
 416        struct page *page;
 417        int ret;
 418
 419        /* FIXME stop all the miss-match API ... */
 420        ret = nouveau_dmem_pages_alloc(drm, 1, pfns);
 421        if (ret)
 422                return NULL;
 423
 424        page = pfn_to_page(pfns[0]);
 425        get_page(page);
 426        lock_page(page);
 427        return page;
 428}
 429
 430static void
 431nouveau_dmem_page_free_locked(struct nouveau_drm *drm, struct page *page)
 432{
 433        unlock_page(page);
 434        put_page(page);
 435}
 436
 437void
 438nouveau_dmem_resume(struct nouveau_drm *drm)
 439{
 440        struct nouveau_dmem_chunk *chunk;
 441        int ret;
 442
 443        if (drm->dmem == NULL)
 444                return;
 445
 446        mutex_lock(&drm->dmem->mutex);
 447        list_for_each_entry (chunk, &drm->dmem->chunk_free, list) {
 448                ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false);
 449                /* FIXME handle pin failure */
 450                WARN_ON(ret);
 451        }
 452        list_for_each_entry (chunk, &drm->dmem->chunk_full, list) {
 453                ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false);
 454                /* FIXME handle pin failure */
 455                WARN_ON(ret);
 456        }
 457        mutex_unlock(&drm->dmem->mutex);
 458}
 459
 460void
 461nouveau_dmem_suspend(struct nouveau_drm *drm)
 462{
 463        struct nouveau_dmem_chunk *chunk;
 464
 465        if (drm->dmem == NULL)
 466                return;
 467
 468        mutex_lock(&drm->dmem->mutex);
 469        list_for_each_entry (chunk, &drm->dmem->chunk_free, list) {
 470                nouveau_bo_unpin(chunk->bo);
 471        }
 472        list_for_each_entry (chunk, &drm->dmem->chunk_full, list) {
 473                nouveau_bo_unpin(chunk->bo);
 474        }
 475        mutex_unlock(&drm->dmem->mutex);
 476}
 477
 478void
 479nouveau_dmem_fini(struct nouveau_drm *drm)
 480{
 481        struct nouveau_dmem_chunk *chunk, *tmp;
 482
 483        if (drm->dmem == NULL)
 484                return;
 485
 486        mutex_lock(&drm->dmem->mutex);
 487
 488        WARN_ON(!list_empty(&drm->dmem->chunk_free));
 489        WARN_ON(!list_empty(&drm->dmem->chunk_full));
 490
 491        list_for_each_entry_safe (chunk, tmp, &drm->dmem->chunk_empty, list) {
 492                if (chunk->bo) {
 493                        nouveau_bo_unpin(chunk->bo);
 494                        nouveau_bo_ref(NULL, &chunk->bo);
 495                }
 496                list_del(&chunk->list);
 497                kfree(chunk);
 498        }
 499
 500        mutex_unlock(&drm->dmem->mutex);
 501}
 502
 503static int
 504nvc0b5_migrate_copy(struct nouveau_drm *drm, u64 npages,
 505                    enum nouveau_aper dst_aper, u64 dst_addr,
 506                    enum nouveau_aper src_aper, u64 src_addr)
 507{
 508        struct nouveau_channel *chan = drm->dmem->migrate.chan;
 509        u32 launch_dma = (1 << 9) /* MULTI_LINE_ENABLE. */ |
 510                         (1 << 8) /* DST_MEMORY_LAYOUT_PITCH. */ |
 511                         (1 << 7) /* SRC_MEMORY_LAYOUT_PITCH. */ |
 512                         (1 << 2) /* FLUSH_ENABLE_TRUE. */ |
 513                         (2 << 0) /* DATA_TRANSFER_TYPE_NON_PIPELINED. */;
 514        int ret;
 515
 516        ret = RING_SPACE(chan, 13);
 517        if (ret)
 518                return ret;
 519
 520        if (src_aper != NOUVEAU_APER_VIRT) {
 521                switch (src_aper) {
 522                case NOUVEAU_APER_VRAM:
 523                        BEGIN_IMC0(chan, NvSubCopy, 0x0260, 0);
 524                        break;
 525                case NOUVEAU_APER_HOST:
 526                        BEGIN_IMC0(chan, NvSubCopy, 0x0260, 1);
 527                        break;
 528                default:
 529                        return -EINVAL;
 530                }
 531                launch_dma |= 0x00001000; /* SRC_TYPE_PHYSICAL. */
 532        }
 533
 534        if (dst_aper != NOUVEAU_APER_VIRT) {
 535                switch (dst_aper) {
 536                case NOUVEAU_APER_VRAM:
 537                        BEGIN_IMC0(chan, NvSubCopy, 0x0264, 0);
 538                        break;
 539                case NOUVEAU_APER_HOST:
 540                        BEGIN_IMC0(chan, NvSubCopy, 0x0264, 1);
 541                        break;
 542                default:
 543                        return -EINVAL;
 544                }
 545                launch_dma |= 0x00002000; /* DST_TYPE_PHYSICAL. */
 546        }
 547
 548        BEGIN_NVC0(chan, NvSubCopy, 0x0400, 8);
 549        OUT_RING  (chan, upper_32_bits(src_addr));
 550        OUT_RING  (chan, lower_32_bits(src_addr));
 551        OUT_RING  (chan, upper_32_bits(dst_addr));
 552        OUT_RING  (chan, lower_32_bits(dst_addr));
 553        OUT_RING  (chan, PAGE_SIZE);
 554        OUT_RING  (chan, PAGE_SIZE);
 555        OUT_RING  (chan, PAGE_SIZE);
 556        OUT_RING  (chan, npages);
 557        BEGIN_NVC0(chan, NvSubCopy, 0x0300, 1);
 558        OUT_RING  (chan, launch_dma);
 559        return 0;
 560}
 561
 562static int
 563nouveau_dmem_migrate_init(struct nouveau_drm *drm)
 564{
 565        switch (drm->ttm.copy.oclass) {
 566        case PASCAL_DMA_COPY_A:
 567        case PASCAL_DMA_COPY_B:
 568        case  VOLTA_DMA_COPY_A:
 569        case TURING_DMA_COPY_A:
 570                drm->dmem->migrate.copy_func = nvc0b5_migrate_copy;
 571                drm->dmem->migrate.chan = drm->ttm.chan;
 572                return 0;
 573        default:
 574                break;
 575        }
 576        return -ENODEV;
 577}
 578
 579void
 580nouveau_dmem_init(struct nouveau_drm *drm)
 581{
 582        struct device *device = drm->dev->dev;
 583        unsigned long i, size;
 584        int ret;
 585
 586        /* This only make sense on PASCAL or newer */
 587        if (drm->client.device.info.family < NV_DEVICE_INFO_V0_PASCAL)
 588                return;
 589
 590        if (!(drm->dmem = kzalloc(sizeof(*drm->dmem), GFP_KERNEL)))
 591                return;
 592
 593        mutex_init(&drm->dmem->mutex);
 594        INIT_LIST_HEAD(&drm->dmem->chunk_free);
 595        INIT_LIST_HEAD(&drm->dmem->chunk_full);
 596        INIT_LIST_HEAD(&drm->dmem->chunk_empty);
 597
 598        size = ALIGN(drm->client.device.info.ram_user, DMEM_CHUNK_SIZE);
 599
 600        /* Initialize migration dma helpers before registering memory */
 601        ret = nouveau_dmem_migrate_init(drm);
 602        if (ret) {
 603                kfree(drm->dmem);
 604                drm->dmem = NULL;
 605                return;
 606        }
 607
 608        /*
 609         * FIXME we need some kind of policy to decide how much VRAM we
 610         * want to register with HMM. For now just register everything
 611         * and latter if we want to do thing like over commit then we
 612         * could revisit this.
 613         */
 614        drm->dmem->devmem = hmm_devmem_add(&nouveau_dmem_devmem_ops,
 615                                           device, size);
 616        if (IS_ERR(drm->dmem->devmem)) {
 617                kfree(drm->dmem);
 618                drm->dmem = NULL;
 619                return;
 620        }
 621
 622        for (i = 0; i < (size / DMEM_CHUNK_SIZE); ++i) {
 623                struct nouveau_dmem_chunk *chunk;
 624                struct page *page;
 625                unsigned long j;
 626
 627                chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
 628                if (chunk == NULL) {
 629                        nouveau_dmem_fini(drm);
 630                        return;
 631                }
 632
 633                chunk->drm = drm;
 634                chunk->pfn_first = drm->dmem->devmem->pfn_first;
 635                chunk->pfn_first += (i * DMEM_CHUNK_NPAGES);
 636                list_add_tail(&chunk->list, &drm->dmem->chunk_empty);
 637
 638                page = pfn_to_page(chunk->pfn_first);
 639                for (j = 0; j < DMEM_CHUNK_NPAGES; ++j, ++page) {
 640                        hmm_devmem_page_set_drvdata(page, (long)chunk);
 641                }
 642        }
 643
 644        NV_INFO(drm, "DMEM: registered %ldMB of device memory\n", size >> 20);
 645}
 646
 647static void
 648nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma,
 649                                    const unsigned long *src_pfns,
 650                                    unsigned long *dst_pfns,
 651                                    unsigned long start,
 652                                    unsigned long end,
 653                                    void *private)
 654{
 655        struct nouveau_migrate *migrate = private;
 656        struct nouveau_drm *drm = migrate->drm;
 657        struct device *dev = drm->dev->dev;
 658        unsigned long addr, i, npages = 0;
 659        nouveau_migrate_copy_t copy;
 660        int ret;
 661
 662        /* First allocate new memory */
 663        for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) {
 664                struct page *dpage, *spage;
 665
 666                dst_pfns[i] = 0;
 667                spage = migrate_pfn_to_page(src_pfns[i]);
 668                if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE))
 669                        continue;
 670
 671                dpage = nouveau_dmem_page_alloc_locked(drm);
 672                if (!dpage)
 673                        continue;
 674
 675                dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) |
 676                              MIGRATE_PFN_LOCKED |
 677                              MIGRATE_PFN_DEVICE;
 678                npages++;
 679        }
 680
 681        if (!npages)
 682                return;
 683
 684        /* Allocate storage for DMA addresses, so we can unmap later. */
 685        migrate->dma = kmalloc(sizeof(*migrate->dma) * npages, GFP_KERNEL);
 686        if (!migrate->dma)
 687                goto error;
 688
 689        /* Copy things over */
 690        copy = drm->dmem->migrate.copy_func;
 691        for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) {
 692                struct nouveau_dmem_chunk *chunk;
 693                struct page *spage, *dpage;
 694                u64 src_addr, dst_addr;
 695
 696                dpage = migrate_pfn_to_page(dst_pfns[i]);
 697                if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR)
 698                        continue;
 699
 700                chunk = (void *)hmm_devmem_page_get_drvdata(dpage);
 701                dst_addr = page_to_pfn(dpage) - chunk->pfn_first;
 702                dst_addr = (dst_addr << PAGE_SHIFT) + chunk->bo->bo.offset;
 703
 704                spage = migrate_pfn_to_page(src_pfns[i]);
 705                if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) {
 706                        nouveau_dmem_page_free_locked(drm, dpage);
 707                        dst_pfns[i] = 0;
 708                        continue;
 709                }
 710
 711                migrate->dma[migrate->dma_nr] =
 712                        dma_map_page_attrs(dev, spage, 0, PAGE_SIZE,
 713                                           PCI_DMA_BIDIRECTIONAL,
 714                                           DMA_ATTR_SKIP_CPU_SYNC);
 715                if (dma_mapping_error(dev, migrate->dma[migrate->dma_nr])) {
 716                        nouveau_dmem_page_free_locked(drm, dpage);
 717                        dst_pfns[i] = 0;
 718                        continue;
 719                }
 720
 721                src_addr = migrate->dma[migrate->dma_nr++];
 722
 723                ret = copy(drm, 1, NOUVEAU_APER_VRAM, dst_addr,
 724                                   NOUVEAU_APER_HOST, src_addr);
 725                if (ret) {
 726                        nouveau_dmem_page_free_locked(drm, dpage);
 727                        dst_pfns[i] = 0;
 728                        continue;
 729                }
 730        }
 731
 732        nouveau_fence_new(drm->dmem->migrate.chan, false, &migrate->fence);
 733
 734        return;
 735
 736error:
 737        for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) {
 738                struct page *page;
 739
 740                if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR)
 741                        continue;
 742
 743                page = migrate_pfn_to_page(dst_pfns[i]);
 744                dst_pfns[i] = MIGRATE_PFN_ERROR;
 745                if (page == NULL)
 746                        continue;
 747
 748                __free_page(page);
 749        }
 750}
 751
 752void nouveau_dmem_migrate_finalize_and_map(struct vm_area_struct *vma,
 753                                           const unsigned long *src_pfns,
 754                                           const unsigned long *dst_pfns,
 755                                           unsigned long start,
 756                                           unsigned long end,
 757                                           void *private)
 758{
 759        struct nouveau_migrate *migrate = private;
 760        struct nouveau_drm *drm = migrate->drm;
 761
 762        if (migrate->fence) {
 763                nouveau_fence_wait(migrate->fence, true, false);
 764                nouveau_fence_unref(&migrate->fence);
 765        } else {
 766                /*
 767                 * FIXME wait for channel to be IDLE before finalizing
 768                 * the hmem object below (nouveau_migrate_hmem_fini()) ?
 769                 */
 770        }
 771
 772        while (migrate->dma_nr--) {
 773                dma_unmap_page(drm->dev->dev, migrate->dma[migrate->dma_nr],
 774                               PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 775        }
 776        kfree(migrate->dma);
 777
 778        /*
 779         * FIXME optimization: update GPU page table to point to newly
 780         * migrated memory.
 781         */
 782}
 783
 784static const struct migrate_vma_ops nouveau_dmem_migrate_ops = {
 785        .alloc_and_copy         = nouveau_dmem_migrate_alloc_and_copy,
 786        .finalize_and_map       = nouveau_dmem_migrate_finalize_and_map,
 787};
 788
 789int
 790nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
 791                         struct vm_area_struct *vma,
 792                         unsigned long start,
 793                         unsigned long end)
 794{
 795        unsigned long *src_pfns, *dst_pfns, npages;
 796        struct nouveau_migrate migrate = {0};
 797        unsigned long i, c, max;
 798        int ret = 0;
 799
 800        npages = (end - start) >> PAGE_SHIFT;
 801        max = min(SG_MAX_SINGLE_ALLOC, npages);
 802        src_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL);
 803        if (src_pfns == NULL)
 804                return -ENOMEM;
 805        dst_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL);
 806        if (dst_pfns == NULL) {
 807                kfree(src_pfns);
 808                return -ENOMEM;
 809        }
 810
 811        migrate.drm = drm;
 812        migrate.vma = vma;
 813        migrate.npages = npages;
 814        for (i = 0; i < npages; i += c) {
 815                unsigned long next;
 816
 817                c = min(SG_MAX_SINGLE_ALLOC, npages);
 818                next = start + (c << PAGE_SHIFT);
 819                ret = migrate_vma(&nouveau_dmem_migrate_ops, vma, start,
 820                                  next, src_pfns, dst_pfns, &migrate);
 821                if (ret)
 822                        goto out;
 823                start = next;
 824        }
 825
 826out:
 827        kfree(dst_pfns);
 828        kfree(src_pfns);
 829        return ret;
 830}
 831
 832static inline bool
 833nouveau_dmem_page(struct nouveau_drm *drm, struct page *page)
 834{
 835        if (!is_device_private_page(page))
 836                return false;
 837
 838        if (drm->dmem->devmem != page->pgmap->data)
 839                return false;
 840
 841        return true;
 842}
 843
 844void
 845nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
 846                         struct hmm_range *range)
 847{
 848        unsigned long i, npages;
 849
 850        npages = (range->end - range->start) >> PAGE_SHIFT;
 851        for (i = 0; i < npages; ++i) {
 852                struct nouveau_dmem_chunk *chunk;
 853                struct page *page;
 854                uint64_t addr;
 855
 856                page = hmm_pfn_to_page(range, range->pfns[i]);
 857                if (page == NULL)
 858                        continue;
 859
 860                if (!(range->pfns[i] & range->flags[HMM_PFN_DEVICE_PRIVATE])) {
 861                        continue;
 862                }
 863
 864                if (!nouveau_dmem_page(drm, page)) {
 865                        WARN(1, "Some unknown device memory !\n");
 866                        range->pfns[i] = 0;
 867                        continue;
 868                }
 869
 870                chunk = (void *)hmm_devmem_page_get_drvdata(page);
 871                addr = page_to_pfn(page) - chunk->pfn_first;
 872                addr = (addr + chunk->bo->bo.mem.start) << PAGE_SHIFT;
 873
 874                range->pfns[i] &= ((1UL << range->pfn_shift) - 1);
 875                range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift;
 876        }
 877}
 878