linux/drivers/gpu/drm/nouveau/nouveau_dmem.c
<<
>>
Prefs
   1/*
   2 * Copyright 2018 Red Hat Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22#include "nouveau_dmem.h"
  23#include "nouveau_drv.h"
  24#include "nouveau_chan.h"
  25#include "nouveau_dma.h"
  26#include "nouveau_mem.h"
  27#include "nouveau_bo.h"
  28
  29#include <nvif/class.h>
  30#include <nvif/object.h>
  31#include <nvif/if500b.h>
  32#include <nvif/if900b.h>
  33
  34#include <linux/sched/mm.h>
  35#include <linux/hmm.h>
  36
  37/*
  38 * FIXME: this is ugly right now we are using TTM to allocate vram and we pin
  39 * it in vram while in use. We likely want to overhaul memory management for
  40 * nouveau to be more page like (not necessarily with system page size but a
  41 * bigger page size) at lowest level and have some shim layer on top that would
  42 * provide the same functionality as TTM.
  43 */
  44#define DMEM_CHUNK_SIZE (2UL << 20)
  45#define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT)
  46
  47struct nouveau_migrate;
  48
  49enum nouveau_aper {
  50        NOUVEAU_APER_VIRT,
  51        NOUVEAU_APER_VRAM,
  52        NOUVEAU_APER_HOST,
  53};
  54
  55typedef int (*nouveau_migrate_copy_t)(struct nouveau_drm *drm, u64 npages,
  56                                      enum nouveau_aper, u64 dst_addr,
  57                                      enum nouveau_aper, u64 src_addr);
  58
  59struct nouveau_dmem_chunk {
  60        struct list_head list;
  61        struct nouveau_bo *bo;
  62        struct nouveau_drm *drm;
  63        unsigned long pfn_first;
  64        unsigned long callocated;
  65        unsigned long bitmap[BITS_TO_LONGS(DMEM_CHUNK_NPAGES)];
  66        spinlock_t lock;
  67};
  68
  69struct nouveau_dmem_migrate {
  70        nouveau_migrate_copy_t copy_func;
  71        struct nouveau_channel *chan;
  72};
  73
  74struct nouveau_dmem {
  75        struct nouveau_drm *drm;
  76        struct dev_pagemap pagemap;
  77        struct nouveau_dmem_migrate migrate;
  78        struct list_head chunk_free;
  79        struct list_head chunk_full;
  80        struct list_head chunk_empty;
  81        struct mutex mutex;
  82};
  83
  84static inline struct nouveau_dmem *page_to_dmem(struct page *page)
  85{
  86        return container_of(page->pgmap, struct nouveau_dmem, pagemap);
  87}
  88
  89struct nouveau_dmem_fault {
  90        struct nouveau_drm *drm;
  91        struct nouveau_fence *fence;
  92        dma_addr_t *dma;
  93        unsigned long npages;
  94};
  95
  96struct nouveau_migrate {
  97        struct vm_area_struct *vma;
  98        struct nouveau_drm *drm;
  99        struct nouveau_fence *fence;
 100        unsigned long npages;
 101        dma_addr_t *dma;
 102        unsigned long dma_nr;
 103};
 104
 105static void nouveau_dmem_page_free(struct page *page)
 106{
 107        struct nouveau_dmem_chunk *chunk = page->zone_device_data;
 108        unsigned long idx = page_to_pfn(page) - chunk->pfn_first;
 109
 110        /*
 111         * FIXME:
 112         *
 113         * This is really a bad example, we need to overhaul nouveau memory
 114         * management to be more page focus and allow lighter locking scheme
 115         * to be use in the process.
 116         */
 117        spin_lock(&chunk->lock);
 118        clear_bit(idx, chunk->bitmap);
 119        WARN_ON(!chunk->callocated);
 120        chunk->callocated--;
 121        /*
 122         * FIXME when chunk->callocated reach 0 we should add the chunk to
 123         * a reclaim list so that it can be freed in case of memory pressure.
 124         */
 125        spin_unlock(&chunk->lock);
 126}
 127
 128static void
 129nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma,
 130                                  const unsigned long *src_pfns,
 131                                  unsigned long *dst_pfns,
 132                                  unsigned long start,
 133                                  unsigned long end,
 134                                  void *private)
 135{
 136        struct nouveau_dmem_fault *fault = private;
 137        struct nouveau_drm *drm = fault->drm;
 138        struct device *dev = drm->dev->dev;
 139        unsigned long addr, i, npages = 0;
 140        nouveau_migrate_copy_t copy;
 141        int ret;
 142
 143
 144        /* First allocate new memory */
 145        for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) {
 146                struct page *dpage, *spage;
 147
 148                dst_pfns[i] = 0;
 149                spage = migrate_pfn_to_page(src_pfns[i]);
 150                if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE))
 151                        continue;
 152
 153                dpage = alloc_page_vma(GFP_HIGHUSER, vma, addr);
 154                if (!dpage) {
 155                        dst_pfns[i] = MIGRATE_PFN_ERROR;
 156                        continue;
 157                }
 158                lock_page(dpage);
 159
 160                dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) |
 161                              MIGRATE_PFN_LOCKED;
 162                npages++;
 163        }
 164
 165        /* Allocate storage for DMA addresses, so we can unmap later. */
 166        fault->dma = kmalloc(sizeof(*fault->dma) * npages, GFP_KERNEL);
 167        if (!fault->dma)
 168                goto error;
 169
 170        /* Copy things over */
 171        copy = drm->dmem->migrate.copy_func;
 172        for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) {
 173                struct nouveau_dmem_chunk *chunk;
 174                struct page *spage, *dpage;
 175                u64 src_addr, dst_addr;
 176
 177                dpage = migrate_pfn_to_page(dst_pfns[i]);
 178                if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR)
 179                        continue;
 180
 181                spage = migrate_pfn_to_page(src_pfns[i]);
 182                if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) {
 183                        dst_pfns[i] = MIGRATE_PFN_ERROR;
 184                        __free_page(dpage);
 185                        continue;
 186                }
 187
 188                fault->dma[fault->npages] =
 189                        dma_map_page_attrs(dev, dpage, 0, PAGE_SIZE,
 190                                           PCI_DMA_BIDIRECTIONAL,
 191                                           DMA_ATTR_SKIP_CPU_SYNC);
 192                if (dma_mapping_error(dev, fault->dma[fault->npages])) {
 193                        dst_pfns[i] = MIGRATE_PFN_ERROR;
 194                        __free_page(dpage);
 195                        continue;
 196                }
 197
 198                dst_addr = fault->dma[fault->npages++];
 199
 200                chunk = spage->zone_device_data;
 201                src_addr = page_to_pfn(spage) - chunk->pfn_first;
 202                src_addr = (src_addr << PAGE_SHIFT) + chunk->bo->bo.offset;
 203
 204                ret = copy(drm, 1, NOUVEAU_APER_HOST, dst_addr,
 205                                   NOUVEAU_APER_VRAM, src_addr);
 206                if (ret) {
 207                        dst_pfns[i] = MIGRATE_PFN_ERROR;
 208                        __free_page(dpage);
 209                        continue;
 210                }
 211        }
 212
 213        nouveau_fence_new(drm->dmem->migrate.chan, false, &fault->fence);
 214
 215        return;
 216
 217error:
 218        for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) {
 219                struct page *page;
 220
 221                if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR)
 222                        continue;
 223
 224                page = migrate_pfn_to_page(dst_pfns[i]);
 225                dst_pfns[i] = MIGRATE_PFN_ERROR;
 226                if (page == NULL)
 227                        continue;
 228
 229                __free_page(page);
 230        }
 231}
 232
 233void nouveau_dmem_fault_finalize_and_map(struct vm_area_struct *vma,
 234                                         const unsigned long *src_pfns,
 235                                         const unsigned long *dst_pfns,
 236                                         unsigned long start,
 237                                         unsigned long end,
 238                                         void *private)
 239{
 240        struct nouveau_dmem_fault *fault = private;
 241        struct nouveau_drm *drm = fault->drm;
 242
 243        if (fault->fence) {
 244                nouveau_fence_wait(fault->fence, true, false);
 245                nouveau_fence_unref(&fault->fence);
 246        } else {
 247                /*
 248                 * FIXME wait for channel to be IDLE before calling finalizing
 249                 * the hmem object below (nouveau_migrate_hmem_fini()).
 250                 */
 251        }
 252
 253        while (fault->npages--) {
 254                dma_unmap_page(drm->dev->dev, fault->dma[fault->npages],
 255                               PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 256        }
 257        kfree(fault->dma);
 258}
 259
 260static const struct migrate_vma_ops nouveau_dmem_fault_migrate_ops = {
 261        .alloc_and_copy         = nouveau_dmem_fault_alloc_and_copy,
 262        .finalize_and_map       = nouveau_dmem_fault_finalize_and_map,
 263};
 264
 265static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
 266{
 267        struct nouveau_dmem *dmem = page_to_dmem(vmf->page);
 268        unsigned long src[1] = {0}, dst[1] = {0};
 269        struct nouveau_dmem_fault fault = { .drm = dmem->drm };
 270        int ret;
 271
 272        /*
 273         * FIXME what we really want is to find some heuristic to migrate more
 274         * than just one page on CPU fault. When such fault happens it is very
 275         * likely that more surrounding page will CPU fault too.
 276         */
 277        ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vmf->vma,
 278                        vmf->address, vmf->address + PAGE_SIZE,
 279                        src, dst, &fault);
 280        if (ret)
 281                return VM_FAULT_SIGBUS;
 282
 283        if (dst[0] == MIGRATE_PFN_ERROR)
 284                return VM_FAULT_SIGBUS;
 285
 286        return 0;
 287}
 288
 289static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = {
 290        .page_free              = nouveau_dmem_page_free,
 291        .migrate_to_ram         = nouveau_dmem_migrate_to_ram,
 292};
 293
 294static int
 295nouveau_dmem_chunk_alloc(struct nouveau_drm *drm)
 296{
 297        struct nouveau_dmem_chunk *chunk;
 298        int ret;
 299
 300        if (drm->dmem == NULL)
 301                return -EINVAL;
 302
 303        mutex_lock(&drm->dmem->mutex);
 304        chunk = list_first_entry_or_null(&drm->dmem->chunk_empty,
 305                                         struct nouveau_dmem_chunk,
 306                                         list);
 307        if (chunk == NULL) {
 308                mutex_unlock(&drm->dmem->mutex);
 309                return -ENOMEM;
 310        }
 311
 312        list_del(&chunk->list);
 313        mutex_unlock(&drm->dmem->mutex);
 314
 315        ret = nouveau_bo_new(&drm->client, DMEM_CHUNK_SIZE, 0,
 316                             TTM_PL_FLAG_VRAM, 0, 0, NULL, NULL,
 317                             &chunk->bo);
 318        if (ret)
 319                goto out;
 320
 321        ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false);
 322        if (ret) {
 323                nouveau_bo_ref(NULL, &chunk->bo);
 324                goto out;
 325        }
 326
 327        bitmap_zero(chunk->bitmap, DMEM_CHUNK_NPAGES);
 328        spin_lock_init(&chunk->lock);
 329
 330out:
 331        mutex_lock(&drm->dmem->mutex);
 332        if (chunk->bo)
 333                list_add(&chunk->list, &drm->dmem->chunk_empty);
 334        else
 335                list_add_tail(&chunk->list, &drm->dmem->chunk_empty);
 336        mutex_unlock(&drm->dmem->mutex);
 337
 338        return ret;
 339}
 340
 341static struct nouveau_dmem_chunk *
 342nouveau_dmem_chunk_first_free_locked(struct nouveau_drm *drm)
 343{
 344        struct nouveau_dmem_chunk *chunk;
 345
 346        chunk = list_first_entry_or_null(&drm->dmem->chunk_free,
 347                                         struct nouveau_dmem_chunk,
 348                                         list);
 349        if (chunk)
 350                return chunk;
 351
 352        chunk = list_first_entry_or_null(&drm->dmem->chunk_empty,
 353                                         struct nouveau_dmem_chunk,
 354                                         list);
 355        if (chunk->bo)
 356                return chunk;
 357
 358        return NULL;
 359}
 360
 361static int
 362nouveau_dmem_pages_alloc(struct nouveau_drm *drm,
 363                         unsigned long npages,
 364                         unsigned long *pages)
 365{
 366        struct nouveau_dmem_chunk *chunk;
 367        unsigned long c;
 368        int ret;
 369
 370        memset(pages, 0xff, npages * sizeof(*pages));
 371
 372        mutex_lock(&drm->dmem->mutex);
 373        for (c = 0; c < npages;) {
 374                unsigned long i;
 375
 376                chunk = nouveau_dmem_chunk_first_free_locked(drm);
 377                if (chunk == NULL) {
 378                        mutex_unlock(&drm->dmem->mutex);
 379                        ret = nouveau_dmem_chunk_alloc(drm);
 380                        if (ret) {
 381                                if (c)
 382                                        return 0;
 383                                return ret;
 384                        }
 385                        mutex_lock(&drm->dmem->mutex);
 386                        continue;
 387                }
 388
 389                spin_lock(&chunk->lock);
 390                i = find_first_zero_bit(chunk->bitmap, DMEM_CHUNK_NPAGES);
 391                while (i < DMEM_CHUNK_NPAGES && c < npages) {
 392                        pages[c] = chunk->pfn_first + i;
 393                        set_bit(i, chunk->bitmap);
 394                        chunk->callocated++;
 395                        c++;
 396
 397                        i = find_next_zero_bit(chunk->bitmap,
 398                                        DMEM_CHUNK_NPAGES, i);
 399                }
 400                spin_unlock(&chunk->lock);
 401        }
 402        mutex_unlock(&drm->dmem->mutex);
 403
 404        return 0;
 405}
 406
 407static struct page *
 408nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm)
 409{
 410        unsigned long pfns[1];
 411        struct page *page;
 412        int ret;
 413
 414        /* FIXME stop all the miss-match API ... */
 415        ret = nouveau_dmem_pages_alloc(drm, 1, pfns);
 416        if (ret)
 417                return NULL;
 418
 419        page = pfn_to_page(pfns[0]);
 420        get_page(page);
 421        lock_page(page);
 422        return page;
 423}
 424
 425static void
 426nouveau_dmem_page_free_locked(struct nouveau_drm *drm, struct page *page)
 427{
 428        unlock_page(page);
 429        put_page(page);
 430}
 431
 432void
 433nouveau_dmem_resume(struct nouveau_drm *drm)
 434{
 435        struct nouveau_dmem_chunk *chunk;
 436        int ret;
 437
 438        if (drm->dmem == NULL)
 439                return;
 440
 441        mutex_lock(&drm->dmem->mutex);
 442        list_for_each_entry (chunk, &drm->dmem->chunk_free, list) {
 443                ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false);
 444                /* FIXME handle pin failure */
 445                WARN_ON(ret);
 446        }
 447        list_for_each_entry (chunk, &drm->dmem->chunk_full, list) {
 448                ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false);
 449                /* FIXME handle pin failure */
 450                WARN_ON(ret);
 451        }
 452        mutex_unlock(&drm->dmem->mutex);
 453}
 454
 455void
 456nouveau_dmem_suspend(struct nouveau_drm *drm)
 457{
 458        struct nouveau_dmem_chunk *chunk;
 459
 460        if (drm->dmem == NULL)
 461                return;
 462
 463        mutex_lock(&drm->dmem->mutex);
 464        list_for_each_entry (chunk, &drm->dmem->chunk_free, list) {
 465                nouveau_bo_unpin(chunk->bo);
 466        }
 467        list_for_each_entry (chunk, &drm->dmem->chunk_full, list) {
 468                nouveau_bo_unpin(chunk->bo);
 469        }
 470        mutex_unlock(&drm->dmem->mutex);
 471}
 472
 473void
 474nouveau_dmem_fini(struct nouveau_drm *drm)
 475{
 476        struct nouveau_dmem_chunk *chunk, *tmp;
 477
 478        if (drm->dmem == NULL)
 479                return;
 480
 481        mutex_lock(&drm->dmem->mutex);
 482
 483        WARN_ON(!list_empty(&drm->dmem->chunk_free));
 484        WARN_ON(!list_empty(&drm->dmem->chunk_full));
 485
 486        list_for_each_entry_safe (chunk, tmp, &drm->dmem->chunk_empty, list) {
 487                if (chunk->bo) {
 488                        nouveau_bo_unpin(chunk->bo);
 489                        nouveau_bo_ref(NULL, &chunk->bo);
 490                }
 491                list_del(&chunk->list);
 492                kfree(chunk);
 493        }
 494
 495        mutex_unlock(&drm->dmem->mutex);
 496}
 497
 498static int
 499nvc0b5_migrate_copy(struct nouveau_drm *drm, u64 npages,
 500                    enum nouveau_aper dst_aper, u64 dst_addr,
 501                    enum nouveau_aper src_aper, u64 src_addr)
 502{
 503        struct nouveau_channel *chan = drm->dmem->migrate.chan;
 504        u32 launch_dma = (1 << 9) /* MULTI_LINE_ENABLE. */ |
 505                         (1 << 8) /* DST_MEMORY_LAYOUT_PITCH. */ |
 506                         (1 << 7) /* SRC_MEMORY_LAYOUT_PITCH. */ |
 507                         (1 << 2) /* FLUSH_ENABLE_TRUE. */ |
 508                         (2 << 0) /* DATA_TRANSFER_TYPE_NON_PIPELINED. */;
 509        int ret;
 510
 511        ret = RING_SPACE(chan, 13);
 512        if (ret)
 513                return ret;
 514
 515        if (src_aper != NOUVEAU_APER_VIRT) {
 516                switch (src_aper) {
 517                case NOUVEAU_APER_VRAM:
 518                        BEGIN_IMC0(chan, NvSubCopy, 0x0260, 0);
 519                        break;
 520                case NOUVEAU_APER_HOST:
 521                        BEGIN_IMC0(chan, NvSubCopy, 0x0260, 1);
 522                        break;
 523                default:
 524                        return -EINVAL;
 525                }
 526                launch_dma |= 0x00001000; /* SRC_TYPE_PHYSICAL. */
 527        }
 528
 529        if (dst_aper != NOUVEAU_APER_VIRT) {
 530                switch (dst_aper) {
 531                case NOUVEAU_APER_VRAM:
 532                        BEGIN_IMC0(chan, NvSubCopy, 0x0264, 0);
 533                        break;
 534                case NOUVEAU_APER_HOST:
 535                        BEGIN_IMC0(chan, NvSubCopy, 0x0264, 1);
 536                        break;
 537                default:
 538                        return -EINVAL;
 539                }
 540                launch_dma |= 0x00002000; /* DST_TYPE_PHYSICAL. */
 541        }
 542
 543        BEGIN_NVC0(chan, NvSubCopy, 0x0400, 8);
 544        OUT_RING  (chan, upper_32_bits(src_addr));
 545        OUT_RING  (chan, lower_32_bits(src_addr));
 546        OUT_RING  (chan, upper_32_bits(dst_addr));
 547        OUT_RING  (chan, lower_32_bits(dst_addr));
 548        OUT_RING  (chan, PAGE_SIZE);
 549        OUT_RING  (chan, PAGE_SIZE);
 550        OUT_RING  (chan, PAGE_SIZE);
 551        OUT_RING  (chan, npages);
 552        BEGIN_NVC0(chan, NvSubCopy, 0x0300, 1);
 553        OUT_RING  (chan, launch_dma);
 554        return 0;
 555}
 556
 557static int
 558nouveau_dmem_migrate_init(struct nouveau_drm *drm)
 559{
 560        switch (drm->ttm.copy.oclass) {
 561        case PASCAL_DMA_COPY_A:
 562        case PASCAL_DMA_COPY_B:
 563        case  VOLTA_DMA_COPY_A:
 564        case TURING_DMA_COPY_A:
 565                drm->dmem->migrate.copy_func = nvc0b5_migrate_copy;
 566                drm->dmem->migrate.chan = drm->ttm.chan;
 567                return 0;
 568        default:
 569                break;
 570        }
 571        return -ENODEV;
 572}
 573
 574void
 575nouveau_dmem_init(struct nouveau_drm *drm)
 576{
 577        struct device *device = drm->dev->dev;
 578        struct resource *res;
 579        unsigned long i, size, pfn_first;
 580        int ret;
 581
 582        /* This only make sense on PASCAL or newer */
 583        if (drm->client.device.info.family < NV_DEVICE_INFO_V0_PASCAL)
 584                return;
 585
 586        if (!(drm->dmem = kzalloc(sizeof(*drm->dmem), GFP_KERNEL)))
 587                return;
 588
 589        drm->dmem->drm = drm;
 590        mutex_init(&drm->dmem->mutex);
 591        INIT_LIST_HEAD(&drm->dmem->chunk_free);
 592        INIT_LIST_HEAD(&drm->dmem->chunk_full);
 593        INIT_LIST_HEAD(&drm->dmem->chunk_empty);
 594
 595        size = ALIGN(drm->client.device.info.ram_user, DMEM_CHUNK_SIZE);
 596
 597        /* Initialize migration dma helpers before registering memory */
 598        ret = nouveau_dmem_migrate_init(drm);
 599        if (ret)
 600                goto out_free;
 601
 602        /*
 603         * FIXME we need some kind of policy to decide how much VRAM we
 604         * want to register with HMM. For now just register everything
 605         * and latter if we want to do thing like over commit then we
 606         * could revisit this.
 607         */
 608        res = devm_request_free_mem_region(device, &iomem_resource, size);
 609        if (IS_ERR(res))
 610                goto out_free;
 611        drm->dmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
 612        drm->dmem->pagemap.res = *res;
 613        drm->dmem->pagemap.ops = &nouveau_dmem_pagemap_ops;
 614        if (IS_ERR(devm_memremap_pages(device, &drm->dmem->pagemap)))
 615                goto out_free;
 616
 617        pfn_first = res->start >> PAGE_SHIFT;
 618        for (i = 0; i < (size / DMEM_CHUNK_SIZE); ++i) {
 619                struct nouveau_dmem_chunk *chunk;
 620                struct page *page;
 621                unsigned long j;
 622
 623                chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
 624                if (chunk == NULL) {
 625                        nouveau_dmem_fini(drm);
 626                        return;
 627                }
 628
 629                chunk->drm = drm;
 630                chunk->pfn_first = pfn_first + (i * DMEM_CHUNK_NPAGES);
 631                list_add_tail(&chunk->list, &drm->dmem->chunk_empty);
 632
 633                page = pfn_to_page(chunk->pfn_first);
 634                for (j = 0; j < DMEM_CHUNK_NPAGES; ++j, ++page)
 635                        page->zone_device_data = chunk;
 636        }
 637
 638        NV_INFO(drm, "DMEM: registered %ldMB of device memory\n", size >> 20);
 639        return;
 640out_free:
 641        kfree(drm->dmem);
 642        drm->dmem = NULL;
 643}
 644
 645static void
 646nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma,
 647                                    const unsigned long *src_pfns,
 648                                    unsigned long *dst_pfns,
 649                                    unsigned long start,
 650                                    unsigned long end,
 651                                    void *private)
 652{
 653        struct nouveau_migrate *migrate = private;
 654        struct nouveau_drm *drm = migrate->drm;
 655        struct device *dev = drm->dev->dev;
 656        unsigned long addr, i, npages = 0;
 657        nouveau_migrate_copy_t copy;
 658        int ret;
 659
 660        /* First allocate new memory */
 661        for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) {
 662                struct page *dpage, *spage;
 663
 664                dst_pfns[i] = 0;
 665                spage = migrate_pfn_to_page(src_pfns[i]);
 666                if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE))
 667                        continue;
 668
 669                dpage = nouveau_dmem_page_alloc_locked(drm);
 670                if (!dpage)
 671                        continue;
 672
 673                dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) |
 674                              MIGRATE_PFN_LOCKED |
 675                              MIGRATE_PFN_DEVICE;
 676                npages++;
 677        }
 678
 679        if (!npages)
 680                return;
 681
 682        /* Allocate storage for DMA addresses, so we can unmap later. */
 683        migrate->dma = kmalloc(sizeof(*migrate->dma) * npages, GFP_KERNEL);
 684        if (!migrate->dma)
 685                goto error;
 686
 687        /* Copy things over */
 688        copy = drm->dmem->migrate.copy_func;
 689        for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) {
 690                struct nouveau_dmem_chunk *chunk;
 691                struct page *spage, *dpage;
 692                u64 src_addr, dst_addr;
 693
 694                dpage = migrate_pfn_to_page(dst_pfns[i]);
 695                if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR)
 696                        continue;
 697
 698                chunk = dpage->zone_device_data;
 699                dst_addr = page_to_pfn(dpage) - chunk->pfn_first;
 700                dst_addr = (dst_addr << PAGE_SHIFT) + chunk->bo->bo.offset;
 701
 702                spage = migrate_pfn_to_page(src_pfns[i]);
 703                if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) {
 704                        nouveau_dmem_page_free_locked(drm, dpage);
 705                        dst_pfns[i] = 0;
 706                        continue;
 707                }
 708
 709                migrate->dma[migrate->dma_nr] =
 710                        dma_map_page_attrs(dev, spage, 0, PAGE_SIZE,
 711                                           PCI_DMA_BIDIRECTIONAL,
 712                                           DMA_ATTR_SKIP_CPU_SYNC);
 713                if (dma_mapping_error(dev, migrate->dma[migrate->dma_nr])) {
 714                        nouveau_dmem_page_free_locked(drm, dpage);
 715                        dst_pfns[i] = 0;
 716                        continue;
 717                }
 718
 719                src_addr = migrate->dma[migrate->dma_nr++];
 720
 721                ret = copy(drm, 1, NOUVEAU_APER_VRAM, dst_addr,
 722                                   NOUVEAU_APER_HOST, src_addr);
 723                if (ret) {
 724                        nouveau_dmem_page_free_locked(drm, dpage);
 725                        dst_pfns[i] = 0;
 726                        continue;
 727                }
 728        }
 729
 730        nouveau_fence_new(drm->dmem->migrate.chan, false, &migrate->fence);
 731
 732        return;
 733
 734error:
 735        for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) {
 736                struct page *page;
 737
 738                if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR)
 739                        continue;
 740
 741                page = migrate_pfn_to_page(dst_pfns[i]);
 742                dst_pfns[i] = MIGRATE_PFN_ERROR;
 743                if (page == NULL)
 744                        continue;
 745
 746                __free_page(page);
 747        }
 748}
 749
 750void nouveau_dmem_migrate_finalize_and_map(struct vm_area_struct *vma,
 751                                           const unsigned long *src_pfns,
 752                                           const unsigned long *dst_pfns,
 753                                           unsigned long start,
 754                                           unsigned long end,
 755                                           void *private)
 756{
 757        struct nouveau_migrate *migrate = private;
 758        struct nouveau_drm *drm = migrate->drm;
 759
 760        if (migrate->fence) {
 761                nouveau_fence_wait(migrate->fence, true, false);
 762                nouveau_fence_unref(&migrate->fence);
 763        } else {
 764                /*
 765                 * FIXME wait for channel to be IDLE before finalizing
 766                 * the hmem object below (nouveau_migrate_hmem_fini()) ?
 767                 */
 768        }
 769
 770        while (migrate->dma_nr--) {
 771                dma_unmap_page(drm->dev->dev, migrate->dma[migrate->dma_nr],
 772                               PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 773        }
 774        kfree(migrate->dma);
 775
 776        /*
 777         * FIXME optimization: update GPU page table to point to newly
 778         * migrated memory.
 779         */
 780}
 781
 782static const struct migrate_vma_ops nouveau_dmem_migrate_ops = {
 783        .alloc_and_copy         = nouveau_dmem_migrate_alloc_and_copy,
 784        .finalize_and_map       = nouveau_dmem_migrate_finalize_and_map,
 785};
 786
 787int
 788nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
 789                         struct vm_area_struct *vma,
 790                         unsigned long start,
 791                         unsigned long end)
 792{
 793        unsigned long *src_pfns, *dst_pfns, npages;
 794        struct nouveau_migrate migrate = {0};
 795        unsigned long i, c, max;
 796        int ret = 0;
 797
 798        npages = (end - start) >> PAGE_SHIFT;
 799        max = min(SG_MAX_SINGLE_ALLOC, npages);
 800        src_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL);
 801        if (src_pfns == NULL)
 802                return -ENOMEM;
 803        dst_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL);
 804        if (dst_pfns == NULL) {
 805                kfree(src_pfns);
 806                return -ENOMEM;
 807        }
 808
 809        migrate.drm = drm;
 810        migrate.vma = vma;
 811        migrate.npages = npages;
 812        for (i = 0; i < npages; i += c) {
 813                unsigned long next;
 814
 815                c = min(SG_MAX_SINGLE_ALLOC, npages);
 816                next = start + (c << PAGE_SHIFT);
 817                ret = migrate_vma(&nouveau_dmem_migrate_ops, vma, start,
 818                                  next, src_pfns, dst_pfns, &migrate);
 819                if (ret)
 820                        goto out;
 821                start = next;
 822        }
 823
 824out:
 825        kfree(dst_pfns);
 826        kfree(src_pfns);
 827        return ret;
 828}
 829
 830static inline bool
 831nouveau_dmem_page(struct nouveau_drm *drm, struct page *page)
 832{
 833        return is_device_private_page(page) && drm->dmem == page_to_dmem(page);
 834}
 835
 836void
 837nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
 838                         struct hmm_range *range)
 839{
 840        unsigned long i, npages;
 841
 842        npages = (range->end - range->start) >> PAGE_SHIFT;
 843        for (i = 0; i < npages; ++i) {
 844                struct nouveau_dmem_chunk *chunk;
 845                struct page *page;
 846                uint64_t addr;
 847
 848                page = hmm_pfn_to_page(range, range->pfns[i]);
 849                if (page == NULL)
 850                        continue;
 851
 852                if (!(range->pfns[i] & range->flags[HMM_PFN_DEVICE_PRIVATE])) {
 853                        continue;
 854                }
 855
 856                if (!nouveau_dmem_page(drm, page)) {
 857                        WARN(1, "Some unknown device memory !\n");
 858                        range->pfns[i] = 0;
 859                        continue;
 860                }
 861
 862                chunk = page->zone_device_data;
 863                addr = page_to_pfn(page) - chunk->pfn_first;
 864                addr = (addr + chunk->bo->bo.mem.start) << PAGE_SHIFT;
 865
 866                range->pfns[i] &= ((1UL << range->pfn_shift) - 1);
 867                range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift;
 868        }
 869}
 870