linux/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
<<
>>
Prefs
   1/*
   2 * Copyright 2017 Red Hat Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22#define NVKM_VMM_LEVELS_MAX 5
  23#include "vmm.h"
  24
  25#include <subdev/fb.h>
  26
  27static void
  28nvkm_vmm_pt_del(struct nvkm_vmm_pt **ppgt)
  29{
  30        struct nvkm_vmm_pt *pgt = *ppgt;
  31        if (pgt) {
  32                kvfree(pgt->pde);
  33                kfree(pgt);
  34                *ppgt = NULL;
  35        }
  36}
  37
  38
  39static struct nvkm_vmm_pt *
  40nvkm_vmm_pt_new(const struct nvkm_vmm_desc *desc, bool sparse,
  41                const struct nvkm_vmm_page *page)
  42{
  43        const u32 pten = 1 << desc->bits;
  44        struct nvkm_vmm_pt *pgt;
  45        u32 lpte = 0;
  46
  47        if (desc->type > PGT) {
  48                if (desc->type == SPT) {
  49                        const struct nvkm_vmm_desc *pair = page[-1].desc;
  50                        lpte = pten >> (desc->bits - pair->bits);
  51                } else {
  52                        lpte = pten;
  53                }
  54        }
  55
  56        if (!(pgt = kzalloc(sizeof(*pgt) + lpte, GFP_KERNEL)))
  57                return NULL;
  58        pgt->page = page ? page->shift : 0;
  59        pgt->sparse = sparse;
  60
  61        if (desc->type == PGD) {
  62                pgt->pde = kvcalloc(pten, sizeof(*pgt->pde), GFP_KERNEL);
  63                if (!pgt->pde) {
  64                        kfree(pgt);
  65                        return NULL;
  66                }
  67        }
  68
  69        return pgt;
  70}
  71
  72struct nvkm_vmm_iter {
  73        const struct nvkm_vmm_page *page;
  74        const struct nvkm_vmm_desc *desc;
  75        struct nvkm_vmm *vmm;
  76        u64 cnt;
  77        u16 max, lvl;
  78        u32 pte[NVKM_VMM_LEVELS_MAX];
  79        struct nvkm_vmm_pt *pt[NVKM_VMM_LEVELS_MAX];
  80        int flush;
  81};
  82
  83#ifdef CONFIG_NOUVEAU_DEBUG_MMU
  84static const char *
  85nvkm_vmm_desc_type(const struct nvkm_vmm_desc *desc)
  86{
  87        switch (desc->type) {
  88        case PGD: return "PGD";
  89        case PGT: return "PGT";
  90        case SPT: return "SPT";
  91        case LPT: return "LPT";
  92        default:
  93                return "UNKNOWN";
  94        }
  95}
  96
  97static void
  98nvkm_vmm_trace(struct nvkm_vmm_iter *it, char *buf)
  99{
 100        int lvl;
 101        for (lvl = it->max; lvl >= 0; lvl--) {
 102                if (lvl >= it->lvl)
 103                        buf += sprintf(buf,  "%05x:", it->pte[lvl]);
 104                else
 105                        buf += sprintf(buf, "xxxxx:");
 106        }
 107}
 108
 109#define TRA(i,f,a...) do {                                                     \
 110        char _buf[NVKM_VMM_LEVELS_MAX * 7];                                    \
 111        struct nvkm_vmm_iter *_it = (i);                                       \
 112        nvkm_vmm_trace(_it, _buf);                                             \
 113        VMM_TRACE(_it->vmm, "%s "f, _buf, ##a);                                \
 114} while(0)
 115#else
 116#define TRA(i,f,a...)
 117#endif
 118
 119static inline void
 120nvkm_vmm_flush_mark(struct nvkm_vmm_iter *it)
 121{
 122        it->flush = min(it->flush, it->max - it->lvl);
 123}
 124
 125static inline void
 126nvkm_vmm_flush(struct nvkm_vmm_iter *it)
 127{
 128        if (it->flush != NVKM_VMM_LEVELS_MAX) {
 129                if (it->vmm->func->flush) {
 130                        TRA(it, "flush: %d", it->flush);
 131                        it->vmm->func->flush(it->vmm, it->flush);
 132                }
 133                it->flush = NVKM_VMM_LEVELS_MAX;
 134        }
 135}
 136
 137static void
 138nvkm_vmm_unref_pdes(struct nvkm_vmm_iter *it)
 139{
 140        const struct nvkm_vmm_desc *desc = it->desc;
 141        const int type = desc[it->lvl].type == SPT;
 142        struct nvkm_vmm_pt *pgd = it->pt[it->lvl + 1];
 143        struct nvkm_vmm_pt *pgt = it->pt[it->lvl];
 144        struct nvkm_mmu_pt *pt = pgt->pt[type];
 145        struct nvkm_vmm *vmm = it->vmm;
 146        u32 pdei = it->pte[it->lvl + 1];
 147
 148        /* Recurse up the tree, unreferencing/destroying unneeded PDs. */
 149        it->lvl++;
 150        if (--pgd->refs[0]) {
 151                const struct nvkm_vmm_desc_func *func = desc[it->lvl].func;
 152                /* PD has other valid PDEs, so we need a proper update. */
 153                TRA(it, "PDE unmap %s", nvkm_vmm_desc_type(&desc[it->lvl - 1]));
 154                pgt->pt[type] = NULL;
 155                if (!pgt->refs[!type]) {
 156                        /* PDE no longer required. */
 157                        if (pgd->pt[0]) {
 158                                if (pgt->sparse) {
 159                                        func->sparse(vmm, pgd->pt[0], pdei, 1);
 160                                        pgd->pde[pdei] = NVKM_VMM_PDE_SPARSE;
 161                                } else {
 162                                        func->unmap(vmm, pgd->pt[0], pdei, 1);
 163                                        pgd->pde[pdei] = NULL;
 164                                }
 165                        } else {
 166                                /* Special handling for Tesla-class GPUs,
 167                                 * where there's no central PD, but each
 168                                 * instance has its own embedded PD.
 169                                 */
 170                                func->pde(vmm, pgd, pdei);
 171                                pgd->pde[pdei] = NULL;
 172                        }
 173                } else {
 174                        /* PDE was pointing at dual-PTs and we're removing
 175                         * one of them, leaving the other in place.
 176                         */
 177                        func->pde(vmm, pgd, pdei);
 178                }
 179
 180                /* GPU may have cached the PTs, flush before freeing. */
 181                nvkm_vmm_flush_mark(it);
 182                nvkm_vmm_flush(it);
 183        } else {
 184                /* PD has no valid PDEs left, so we can just destroy it. */
 185                nvkm_vmm_unref_pdes(it);
 186        }
 187
 188        /* Destroy PD/PT. */
 189        TRA(it, "PDE free %s", nvkm_vmm_desc_type(&desc[it->lvl - 1]));
 190        nvkm_mmu_ptc_put(vmm->mmu, vmm->bootstrapped, &pt);
 191        if (!pgt->refs[!type])
 192                nvkm_vmm_pt_del(&pgt);
 193        it->lvl--;
 194}
 195
 196static void
 197nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
 198                     const struct nvkm_vmm_desc *desc, u32 ptei, u32 ptes)
 199{
 200        const struct nvkm_vmm_desc *pair = it->page[-1].desc;
 201        const u32 sptb = desc->bits - pair->bits;
 202        const u32 sptn = 1 << sptb;
 203        struct nvkm_vmm *vmm = it->vmm;
 204        u32 spti = ptei & (sptn - 1), lpti, pteb;
 205
 206        /* Determine how many SPTEs are being touched under each LPTE,
 207         * and drop reference counts.
 208         */
 209        for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) {
 210                const u32 pten = min(sptn - spti, ptes);
 211                pgt->pte[lpti] -= pten;
 212                ptes -= pten;
 213        }
 214
 215        /* We're done here if there's no corresponding LPT. */
 216        if (!pgt->refs[0])
 217                return;
 218
 219        for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) {
 220                /* Skip over any LPTEs that still have valid SPTEs. */
 221                if (pgt->pte[pteb] & NVKM_VMM_PTE_SPTES) {
 222                        for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
 223                                if (!(pgt->pte[ptei] & NVKM_VMM_PTE_SPTES))
 224                                        break;
 225                        }
 226                        continue;
 227                }
 228
 229                /* As there's no more non-UNMAPPED SPTEs left in the range
 230                 * covered by a number of LPTEs, the LPTEs once again take
 231                 * control over their address range.
 232                 *
 233                 * Determine how many LPTEs need to transition state.
 234                 */
 235                pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID;
 236                for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
 237                        if (pgt->pte[ptei] & NVKM_VMM_PTE_SPTES)
 238                                break;
 239                        pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID;
 240                }
 241
 242                if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) {
 243                        TRA(it, "LPTE %05x: U -> S %d PTEs", pteb, ptes);
 244                        pair->func->sparse(vmm, pgt->pt[0], pteb, ptes);
 245                } else
 246                if (pair->func->invalid) {
 247                        /* If the MMU supports it, restore the LPTE to the
 248                         * INVALID state to tell the MMU there is no point
 249                         * trying to fetch the corresponding SPTEs.
 250                         */
 251                        TRA(it, "LPTE %05x: U -> I %d PTEs", pteb, ptes);
 252                        pair->func->invalid(vmm, pgt->pt[0], pteb, ptes);
 253                }
 254        }
 255}
 256
 257static bool
 258nvkm_vmm_unref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes)
 259{
 260        const struct nvkm_vmm_desc *desc = it->desc;
 261        const int type = desc->type == SPT;
 262        struct nvkm_vmm_pt *pgt = it->pt[0];
 263
 264        /* Drop PTE references. */
 265        pgt->refs[type] -= ptes;
 266
 267        /* Dual-PTs need special handling, unless PDE becoming invalid. */
 268        if (desc->type == SPT && (pgt->refs[0] || pgt->refs[1]))
 269                nvkm_vmm_unref_sptes(it, pgt, desc, ptei, ptes);
 270
 271        /* PT no longer neeed?  Destroy it. */
 272        if (!pgt->refs[type]) {
 273                it->lvl++;
 274                TRA(it, "%s empty", nvkm_vmm_desc_type(desc));
 275                it->lvl--;
 276                nvkm_vmm_unref_pdes(it);
 277                return false; /* PTE writes for unmap() not necessary. */
 278        }
 279
 280        return true;
 281}
 282
 283static void
 284nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
 285                   const struct nvkm_vmm_desc *desc, u32 ptei, u32 ptes)
 286{
 287        const struct nvkm_vmm_desc *pair = it->page[-1].desc;
 288        const u32 sptb = desc->bits - pair->bits;
 289        const u32 sptn = 1 << sptb;
 290        struct nvkm_vmm *vmm = it->vmm;
 291        u32 spti = ptei & (sptn - 1), lpti, pteb;
 292
 293        /* Determine how many SPTEs are being touched under each LPTE,
 294         * and increase reference counts.
 295         */
 296        for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) {
 297                const u32 pten = min(sptn - spti, ptes);
 298                pgt->pte[lpti] += pten;
 299                ptes -= pten;
 300        }
 301
 302        /* We're done here if there's no corresponding LPT. */
 303        if (!pgt->refs[0])
 304                return;
 305
 306        for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) {
 307                /* Skip over any LPTEs that already have valid SPTEs. */
 308                if (pgt->pte[pteb] & NVKM_VMM_PTE_VALID) {
 309                        for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
 310                                if (!(pgt->pte[ptei] & NVKM_VMM_PTE_VALID))
 311                                        break;
 312                        }
 313                        continue;
 314                }
 315
 316                /* As there are now non-UNMAPPED SPTEs in the range covered
 317                 * by a number of LPTEs, we need to transfer control of the
 318                 * address range to the SPTEs.
 319                 *
 320                 * Determine how many LPTEs need to transition state.
 321                 */
 322                pgt->pte[ptei] |= NVKM_VMM_PTE_VALID;
 323                for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
 324                        if (pgt->pte[ptei] & NVKM_VMM_PTE_VALID)
 325                                break;
 326                        pgt->pte[ptei] |= NVKM_VMM_PTE_VALID;
 327                }
 328
 329                if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) {
 330                        const u32 spti = pteb * sptn;
 331                        const u32 sptc = ptes * sptn;
 332                        /* The entire LPTE is marked as sparse, we need
 333                         * to make sure that the SPTEs are too.
 334                         */
 335                        TRA(it, "SPTE %05x: U -> S %d PTEs", spti, sptc);
 336                        desc->func->sparse(vmm, pgt->pt[1], spti, sptc);
 337                        /* Sparse LPTEs prevent SPTEs from being accessed. */
 338                        TRA(it, "LPTE %05x: S -> U %d PTEs", pteb, ptes);
 339                        pair->func->unmap(vmm, pgt->pt[0], pteb, ptes);
 340                } else
 341                if (pair->func->invalid) {
 342                        /* MMU supports blocking SPTEs by marking an LPTE
 343                         * as INVALID.  We need to reverse that here.
 344                         */
 345                        TRA(it, "LPTE %05x: I -> U %d PTEs", pteb, ptes);
 346                        pair->func->unmap(vmm, pgt->pt[0], pteb, ptes);
 347                }
 348        }
 349}
 350
 351static bool
 352nvkm_vmm_ref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes)
 353{
 354        const struct nvkm_vmm_desc *desc = it->desc;
 355        const int type = desc->type == SPT;
 356        struct nvkm_vmm_pt *pgt = it->pt[0];
 357
 358        /* Take PTE references. */
 359        pgt->refs[type] += ptes;
 360
 361        /* Dual-PTs need special handling. */
 362        if (desc->type == SPT)
 363                nvkm_vmm_ref_sptes(it, pgt, desc, ptei, ptes);
 364
 365        return true;
 366}
 367
 368static void
 369nvkm_vmm_sparse_ptes(const struct nvkm_vmm_desc *desc,
 370                     struct nvkm_vmm_pt *pgt, u32 ptei, u32 ptes)
 371{
 372        if (desc->type == PGD) {
 373                while (ptes--)
 374                        pgt->pde[ptei++] = NVKM_VMM_PDE_SPARSE;
 375        } else
 376        if (desc->type == LPT) {
 377                memset(&pgt->pte[ptei], NVKM_VMM_PTE_SPARSE, ptes);
 378        }
 379}
 380
 381static bool
 382nvkm_vmm_sparse_unref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes)
 383{
 384        struct nvkm_vmm_pt *pt = it->pt[0];
 385        if (it->desc->type == PGD)
 386                memset(&pt->pde[ptei], 0x00, sizeof(pt->pde[0]) * ptes);
 387        else
 388        if (it->desc->type == LPT)
 389                memset(&pt->pte[ptei], 0x00, sizeof(pt->pte[0]) * ptes);
 390        return nvkm_vmm_unref_ptes(it, ptei, ptes);
 391}
 392
 393static bool
 394nvkm_vmm_sparse_ref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes)
 395{
 396        nvkm_vmm_sparse_ptes(it->desc, it->pt[0], ptei, ptes);
 397        return nvkm_vmm_ref_ptes(it, ptei, ptes);
 398}
 399
 400static bool
 401nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei)
 402{
 403        const struct nvkm_vmm_desc *desc = &it->desc[it->lvl - 1];
 404        const int type = desc->type == SPT;
 405        struct nvkm_vmm_pt *pgt = pgd->pde[pdei];
 406        const bool zero = !pgt->sparse && !desc->func->invalid;
 407        struct nvkm_vmm *vmm = it->vmm;
 408        struct nvkm_mmu *mmu = vmm->mmu;
 409        struct nvkm_mmu_pt *pt;
 410        u32 pten = 1 << desc->bits;
 411        u32 pteb, ptei, ptes;
 412        u32 size = desc->size * pten;
 413
 414        pgd->refs[0]++;
 415
 416        pgt->pt[type] = nvkm_mmu_ptc_get(mmu, size, desc->align, zero);
 417        if (!pgt->pt[type]) {
 418                it->lvl--;
 419                nvkm_vmm_unref_pdes(it);
 420                return false;
 421        }
 422
 423        if (zero)
 424                goto done;
 425
 426        pt = pgt->pt[type];
 427
 428        if (desc->type == LPT && pgt->refs[1]) {
 429                /* SPT already exists covering the same range as this LPT,
 430                 * which means we need to be careful that any LPTEs which
 431                 * overlap valid SPTEs are unmapped as opposed to invalid
 432                 * or sparse, which would prevent the MMU from looking at
 433                 * the SPTEs on some GPUs.
 434                 */
 435                for (ptei = pteb = 0; ptei < pten; pteb = ptei) {
 436                        bool spte = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES;
 437                        for (ptes = 1, ptei++; ptei < pten; ptes++, ptei++) {
 438                                bool next = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES;
 439                                if (spte != next)
 440                                        break;
 441                        }
 442
 443                        if (!spte) {
 444                                if (pgt->sparse)
 445                                        desc->func->sparse(vmm, pt, pteb, ptes);
 446                                else
 447                                        desc->func->invalid(vmm, pt, pteb, ptes);
 448                                memset(&pgt->pte[pteb], 0x00, ptes);
 449                        } else {
 450                                desc->func->unmap(vmm, pt, pteb, ptes);
 451                                while (ptes--)
 452                                        pgt->pte[pteb++] |= NVKM_VMM_PTE_VALID;
 453                        }
 454                }
 455        } else {
 456                if (pgt->sparse) {
 457                        nvkm_vmm_sparse_ptes(desc, pgt, 0, pten);
 458                        desc->func->sparse(vmm, pt, 0, pten);
 459                } else {
 460                        desc->func->invalid(vmm, pt, 0, pten);
 461                }
 462        }
 463
 464done:
 465        TRA(it, "PDE write %s", nvkm_vmm_desc_type(desc));
 466        it->desc[it->lvl].func->pde(it->vmm, pgd, pdei);
 467        nvkm_vmm_flush_mark(it);
 468        return true;
 469}
 470
 471static bool
 472nvkm_vmm_ref_swpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei)
 473{
 474        const struct nvkm_vmm_desc *desc = &it->desc[it->lvl - 1];
 475        struct nvkm_vmm_pt *pgt = pgd->pde[pdei];
 476
 477        pgt = nvkm_vmm_pt_new(desc, NVKM_VMM_PDE_SPARSED(pgt), it->page);
 478        if (!pgt) {
 479                if (!pgd->refs[0])
 480                        nvkm_vmm_unref_pdes(it);
 481                return false;
 482        }
 483
 484        pgd->pde[pdei] = pgt;
 485        return true;
 486}
 487
 488static inline u64
 489nvkm_vmm_iter(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 490              u64 addr, u64 size, const char *name, bool ref,
 491              bool (*REF_PTES)(struct nvkm_vmm_iter *, u32, u32),
 492              nvkm_vmm_pte_func MAP_PTES, struct nvkm_vmm_map *map,
 493              nvkm_vmm_pxe_func CLR_PTES)
 494{
 495        const struct nvkm_vmm_desc *desc = page->desc;
 496        struct nvkm_vmm_iter it;
 497        u64 bits = addr >> page->shift;
 498
 499        it.page = page;
 500        it.desc = desc;
 501        it.vmm = vmm;
 502        it.cnt = size >> page->shift;
 503        it.flush = NVKM_VMM_LEVELS_MAX;
 504
 505        /* Deconstruct address into PTE indices for each mapping level. */
 506        for (it.lvl = 0; desc[it.lvl].bits; it.lvl++) {
 507                it.pte[it.lvl] = bits & ((1 << desc[it.lvl].bits) - 1);
 508                bits >>= desc[it.lvl].bits;
 509        }
 510        it.max = --it.lvl;
 511        it.pt[it.max] = vmm->pd;
 512
 513        it.lvl = 0;
 514        TRA(&it, "%s: %016llx %016llx %d %lld PTEs", name,
 515                 addr, size, page->shift, it.cnt);
 516        it.lvl = it.max;
 517
 518        /* Depth-first traversal of page tables. */
 519        while (it.cnt) {
 520                struct nvkm_vmm_pt *pgt = it.pt[it.lvl];
 521                const int type = desc->type == SPT;
 522                const u32 pten = 1 << desc->bits;
 523                const u32 ptei = it.pte[0];
 524                const u32 ptes = min_t(u64, it.cnt, pten - ptei);
 525
 526                /* Walk down the tree, finding page tables for each level. */
 527                for (; it.lvl; it.lvl--) {
 528                        const u32 pdei = it.pte[it.lvl];
 529                        struct nvkm_vmm_pt *pgd = pgt;
 530
 531                        /* Software PT. */
 532                        if (ref && NVKM_VMM_PDE_INVALID(pgd->pde[pdei])) {
 533                                if (!nvkm_vmm_ref_swpt(&it, pgd, pdei))
 534                                        goto fail;
 535                        }
 536                        it.pt[it.lvl - 1] = pgt = pgd->pde[pdei];
 537
 538                        /* Hardware PT.
 539                         *
 540                         * This is a separate step from above due to GF100 and
 541                         * newer having dual page tables at some levels, which
 542                         * are refcounted independently.
 543                         */
 544                        if (ref && !pgt->refs[desc[it.lvl - 1].type == SPT]) {
 545                                if (!nvkm_vmm_ref_hwpt(&it, pgd, pdei))
 546                                        goto fail;
 547                        }
 548                }
 549
 550                /* Handle PTE updates. */
 551                if (!REF_PTES || REF_PTES(&it, ptei, ptes)) {
 552                        struct nvkm_mmu_pt *pt = pgt->pt[type];
 553                        if (MAP_PTES || CLR_PTES) {
 554                                if (MAP_PTES)
 555                                        MAP_PTES(vmm, pt, ptei, ptes, map);
 556                                else
 557                                        CLR_PTES(vmm, pt, ptei, ptes);
 558                                nvkm_vmm_flush_mark(&it);
 559                        }
 560                }
 561
 562                /* Walk back up the tree to the next position. */
 563                it.pte[it.lvl] += ptes;
 564                it.cnt -= ptes;
 565                if (it.cnt) {
 566                        while (it.pte[it.lvl] == (1 << desc[it.lvl].bits)) {
 567                                it.pte[it.lvl++] = 0;
 568                                it.pte[it.lvl]++;
 569                        }
 570                }
 571        };
 572
 573        nvkm_vmm_flush(&it);
 574        return ~0ULL;
 575
 576fail:
 577        /* Reconstruct the failure address so the caller is able to
 578         * reverse any partially completed operations.
 579         */
 580        addr = it.pte[it.max--];
 581        do {
 582                addr  = addr << desc[it.max].bits;
 583                addr |= it.pte[it.max];
 584        } while (it.max--);
 585
 586        return addr << page->shift;
 587}
 588
 589static void
 590nvkm_vmm_ptes_sparse_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 591                         u64 addr, u64 size)
 592{
 593        nvkm_vmm_iter(vmm, page, addr, size, "sparse unref", false,
 594                      nvkm_vmm_sparse_unref_ptes, NULL, NULL,
 595                      page->desc->func->invalid ?
 596                      page->desc->func->invalid : page->desc->func->unmap);
 597}
 598
 599static int
 600nvkm_vmm_ptes_sparse_get(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 601                         u64 addr, u64 size)
 602{
 603        if ((page->type & NVKM_VMM_PAGE_SPARSE)) {
 604                u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "sparse ref",
 605                                         true, nvkm_vmm_sparse_ref_ptes, NULL,
 606                                         NULL, page->desc->func->sparse);
 607                if (fail != ~0ULL) {
 608                        if ((size = fail - addr))
 609                                nvkm_vmm_ptes_sparse_put(vmm, page, addr, size);
 610                        return -ENOMEM;
 611                }
 612                return 0;
 613        }
 614        return -EINVAL;
 615}
 616
 617static int
 618nvkm_vmm_ptes_sparse(struct nvkm_vmm *vmm, u64 addr, u64 size, bool ref)
 619{
 620        const struct nvkm_vmm_page *page = vmm->func->page;
 621        int m = 0, i;
 622        u64 start = addr;
 623        u64 block;
 624
 625        while (size) {
 626                /* Limit maximum page size based on remaining size. */
 627                while (size < (1ULL << page[m].shift))
 628                        m++;
 629                i = m;
 630
 631                /* Find largest page size suitable for alignment. */
 632                while (!IS_ALIGNED(addr, 1ULL << page[i].shift))
 633                        i++;
 634
 635                /* Determine number of PTEs at this page size. */
 636                if (i != m) {
 637                        /* Limited to alignment boundary of next page size. */
 638                        u64 next = 1ULL << page[i - 1].shift;
 639                        u64 part = ALIGN(addr, next) - addr;
 640                        if (size - part >= next)
 641                                block = (part >> page[i].shift) << page[i].shift;
 642                        else
 643                                block = (size >> page[i].shift) << page[i].shift;
 644                } else {
 645                        block = (size >> page[i].shift) << page[i].shift;
 646                }
 647
 648                /* Perform operation. */
 649                if (ref) {
 650                        int ret = nvkm_vmm_ptes_sparse_get(vmm, &page[i], addr, block);
 651                        if (ret) {
 652                                if ((size = addr - start))
 653                                        nvkm_vmm_ptes_sparse(vmm, start, size, false);
 654                                return ret;
 655                        }
 656                } else {
 657                        nvkm_vmm_ptes_sparse_put(vmm, &page[i], addr, block);
 658                }
 659
 660                size -= block;
 661                addr += block;
 662        }
 663
 664        return 0;
 665}
 666
 667static void
 668nvkm_vmm_ptes_unmap_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 669                        u64 addr, u64 size, bool sparse)
 670{
 671        const struct nvkm_vmm_desc_func *func = page->desc->func;
 672        nvkm_vmm_iter(vmm, page, addr, size, "unmap + unref",
 673                      false, nvkm_vmm_unref_ptes, NULL, NULL,
 674                      sparse ? func->sparse : func->invalid ? func->invalid :
 675                                                              func->unmap);
 676}
 677
 678static int
 679nvkm_vmm_ptes_get_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 680                      u64 addr, u64 size, struct nvkm_vmm_map *map,
 681                      nvkm_vmm_pte_func func)
 682{
 683        u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref + map", true,
 684                                 nvkm_vmm_ref_ptes, func, map, NULL);
 685        if (fail != ~0ULL) {
 686                if ((size = fail - addr))
 687                        nvkm_vmm_ptes_unmap_put(vmm, page, addr, size, false);
 688                return -ENOMEM;
 689        }
 690        return 0;
 691}
 692
 693static void
 694nvkm_vmm_ptes_unmap(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 695                    u64 addr, u64 size, bool sparse)
 696{
 697        const struct nvkm_vmm_desc_func *func = page->desc->func;
 698        nvkm_vmm_iter(vmm, page, addr, size, "unmap", false, NULL, NULL, NULL,
 699                      sparse ? func->sparse : func->invalid ? func->invalid :
 700                                                              func->unmap);
 701}
 702
 703static void
 704nvkm_vmm_ptes_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 705                  u64 addr, u64 size, struct nvkm_vmm_map *map,
 706                  nvkm_vmm_pte_func func)
 707{
 708        nvkm_vmm_iter(vmm, page, addr, size, "map", false,
 709                      NULL, func, map, NULL);
 710}
 711
 712static void
 713nvkm_vmm_ptes_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 714                  u64 addr, u64 size)
 715{
 716        nvkm_vmm_iter(vmm, page, addr, size, "unref", false,
 717                      nvkm_vmm_unref_ptes, NULL, NULL, NULL);
 718}
 719
 720static int
 721nvkm_vmm_ptes_get(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 722                  u64 addr, u64 size)
 723{
 724        u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref", true,
 725                                 nvkm_vmm_ref_ptes, NULL, NULL, NULL);
 726        if (fail != ~0ULL) {
 727                if (fail != addr)
 728                        nvkm_vmm_ptes_put(vmm, page, addr, fail - addr);
 729                return -ENOMEM;
 730        }
 731        return 0;
 732}
 733
 734static inline struct nvkm_vma *
 735nvkm_vma_new(u64 addr, u64 size)
 736{
 737        struct nvkm_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
 738        if (vma) {
 739                vma->addr = addr;
 740                vma->size = size;
 741                vma->page = NVKM_VMA_PAGE_NONE;
 742                vma->refd = NVKM_VMA_PAGE_NONE;
 743        }
 744        return vma;
 745}
 746
 747struct nvkm_vma *
 748nvkm_vma_tail(struct nvkm_vma *vma, u64 tail)
 749{
 750        struct nvkm_vma *new;
 751
 752        BUG_ON(vma->size == tail);
 753
 754        if (!(new = nvkm_vma_new(vma->addr + (vma->size - tail), tail)))
 755                return NULL;
 756        vma->size -= tail;
 757
 758        new->mapref = vma->mapref;
 759        new->sparse = vma->sparse;
 760        new->page = vma->page;
 761        new->refd = vma->refd;
 762        new->used = vma->used;
 763        new->part = vma->part;
 764        new->user = vma->user;
 765        new->busy = vma->busy;
 766        list_add(&new->head, &vma->head);
 767        return new;
 768}
 769
 770static inline void
 771nvkm_vmm_free_remove(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
 772{
 773        rb_erase(&vma->tree, &vmm->free);
 774}
 775
 776static inline void
 777nvkm_vmm_free_delete(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
 778{
 779        nvkm_vmm_free_remove(vmm, vma);
 780        list_del(&vma->head);
 781        kfree(vma);
 782}
 783
 784static void
 785nvkm_vmm_free_insert(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
 786{
 787        struct rb_node **ptr = &vmm->free.rb_node;
 788        struct rb_node *parent = NULL;
 789
 790        while (*ptr) {
 791                struct nvkm_vma *this = rb_entry(*ptr, typeof(*this), tree);
 792                parent = *ptr;
 793                if (vma->size < this->size)
 794                        ptr = &parent->rb_left;
 795                else
 796                if (vma->size > this->size)
 797                        ptr = &parent->rb_right;
 798                else
 799                if (vma->addr < this->addr)
 800                        ptr = &parent->rb_left;
 801                else
 802                if (vma->addr > this->addr)
 803                        ptr = &parent->rb_right;
 804                else
 805                        BUG();
 806        }
 807
 808        rb_link_node(&vma->tree, parent, ptr);
 809        rb_insert_color(&vma->tree, &vmm->free);
 810}
 811
 812static inline void
 813nvkm_vmm_node_remove(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
 814{
 815        rb_erase(&vma->tree, &vmm->root);
 816}
 817
 818static inline void
 819nvkm_vmm_node_delete(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
 820{
 821        nvkm_vmm_node_remove(vmm, vma);
 822        list_del(&vma->head);
 823        kfree(vma);
 824}
 825
 826static void
 827nvkm_vmm_node_insert(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
 828{
 829        struct rb_node **ptr = &vmm->root.rb_node;
 830        struct rb_node *parent = NULL;
 831
 832        while (*ptr) {
 833                struct nvkm_vma *this = rb_entry(*ptr, typeof(*this), tree);
 834                parent = *ptr;
 835                if (vma->addr < this->addr)
 836                        ptr = &parent->rb_left;
 837                else
 838                if (vma->addr > this->addr)
 839                        ptr = &parent->rb_right;
 840                else
 841                        BUG();
 842        }
 843
 844        rb_link_node(&vma->tree, parent, ptr);
 845        rb_insert_color(&vma->tree, &vmm->root);
 846}
 847
 848struct nvkm_vma *
 849nvkm_vmm_node_search(struct nvkm_vmm *vmm, u64 addr)
 850{
 851        struct rb_node *node = vmm->root.rb_node;
 852        while (node) {
 853                struct nvkm_vma *vma = rb_entry(node, typeof(*vma), tree);
 854                if (addr < vma->addr)
 855                        node = node->rb_left;
 856                else
 857                if (addr >= vma->addr + vma->size)
 858                        node = node->rb_right;
 859                else
 860                        return vma;
 861        }
 862        return NULL;
 863}
 864
 865#define node(root, dir) (((root)->head.dir == &vmm->list) ? NULL :             \
 866        list_entry((root)->head.dir, struct nvkm_vma, head))
 867
 868static struct nvkm_vma *
 869nvkm_vmm_node_merge(struct nvkm_vmm *vmm, struct nvkm_vma *prev,
 870                    struct nvkm_vma *vma, struct nvkm_vma *next, u64 size)
 871{
 872        if (next) {
 873                if (vma->size == size) {
 874                        vma->size += next->size;
 875                        nvkm_vmm_node_delete(vmm, next);
 876                        if (prev) {
 877                                prev->size += vma->size;
 878                                nvkm_vmm_node_delete(vmm, vma);
 879                                return prev;
 880                        }
 881                        return vma;
 882                }
 883                BUG_ON(prev);
 884
 885                nvkm_vmm_node_remove(vmm, next);
 886                vma->size -= size;
 887                next->addr -= size;
 888                next->size += size;
 889                nvkm_vmm_node_insert(vmm, next);
 890                return next;
 891        }
 892
 893        if (prev) {
 894                if (vma->size != size) {
 895                        nvkm_vmm_node_remove(vmm, vma);
 896                        prev->size += size;
 897                        vma->addr += size;
 898                        vma->size -= size;
 899                        nvkm_vmm_node_insert(vmm, vma);
 900                } else {
 901                        prev->size += vma->size;
 902                        nvkm_vmm_node_delete(vmm, vma);
 903                }
 904                return prev;
 905        }
 906
 907        return vma;
 908}
 909
 910struct nvkm_vma *
 911nvkm_vmm_node_split(struct nvkm_vmm *vmm,
 912                    struct nvkm_vma *vma, u64 addr, u64 size)
 913{
 914        struct nvkm_vma *prev = NULL;
 915
 916        if (vma->addr != addr) {
 917                prev = vma;
 918                if (!(vma = nvkm_vma_tail(vma, vma->size + vma->addr - addr)))
 919                        return NULL;
 920                vma->part = true;
 921                nvkm_vmm_node_insert(vmm, vma);
 922        }
 923
 924        if (vma->size != size) {
 925                struct nvkm_vma *tmp;
 926                if (!(tmp = nvkm_vma_tail(vma, vma->size - size))) {
 927                        nvkm_vmm_node_merge(vmm, prev, vma, NULL, vma->size);
 928                        return NULL;
 929                }
 930                tmp->part = true;
 931                nvkm_vmm_node_insert(vmm, tmp);
 932        }
 933
 934        return vma;
 935}
 936
 937static void
 938nvkm_vmm_dtor(struct nvkm_vmm *vmm)
 939{
 940        struct nvkm_vma *vma;
 941        struct rb_node *node;
 942
 943        while ((node = rb_first(&vmm->root))) {
 944                struct nvkm_vma *vma = rb_entry(node, typeof(*vma), tree);
 945                nvkm_vmm_put(vmm, &vma);
 946        }
 947
 948        if (vmm->bootstrapped) {
 949                const struct nvkm_vmm_page *page = vmm->func->page;
 950                const u64 limit = vmm->limit - vmm->start;
 951
 952                while (page[1].shift)
 953                        page++;
 954
 955                nvkm_mmu_ptc_dump(vmm->mmu);
 956                nvkm_vmm_ptes_put(vmm, page, vmm->start, limit);
 957        }
 958
 959        vma = list_first_entry(&vmm->list, typeof(*vma), head);
 960        list_del(&vma->head);
 961        kfree(vma);
 962        WARN_ON(!list_empty(&vmm->list));
 963
 964        if (vmm->nullp) {
 965                dma_free_coherent(vmm->mmu->subdev.device->dev, 16 * 1024,
 966                                  vmm->nullp, vmm->null);
 967        }
 968
 969        if (vmm->pd) {
 970                nvkm_mmu_ptc_put(vmm->mmu, true, &vmm->pd->pt[0]);
 971                nvkm_vmm_pt_del(&vmm->pd);
 972        }
 973}
 974
 975int
 976nvkm_vmm_ctor(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu,
 977              u32 pd_header, u64 addr, u64 size, struct lock_class_key *key,
 978              const char *name, struct nvkm_vmm *vmm)
 979{
 980        static struct lock_class_key _key;
 981        const struct nvkm_vmm_page *page = func->page;
 982        const struct nvkm_vmm_desc *desc;
 983        struct nvkm_vma *vma;
 984        int levels, bits = 0;
 985
 986        vmm->func = func;
 987        vmm->mmu = mmu;
 988        vmm->name = name;
 989        vmm->debug = mmu->subdev.debug;
 990        kref_init(&vmm->kref);
 991
 992        __mutex_init(&vmm->mutex, "&vmm->mutex", key ? key : &_key);
 993
 994        /* Locate the smallest page size supported by the backend, it will
 995         * have the the deepest nesting of page tables.
 996         */
 997        while (page[1].shift)
 998                page++;
 999
1000        /* Locate the structure that describes the layout of the top-level
1001         * page table, and determine the number of valid bits in a virtual
1002         * address.
1003         */
1004        for (levels = 0, desc = page->desc; desc->bits; desc++, levels++)
1005                bits += desc->bits;
1006        bits += page->shift;
1007        desc--;
1008
1009        if (WARN_ON(levels > NVKM_VMM_LEVELS_MAX))
1010                return -EINVAL;
1011
1012        vmm->start = addr;
1013        vmm->limit = size ? (addr + size) : (1ULL << bits);
1014        if (vmm->start > vmm->limit || vmm->limit > (1ULL << bits))
1015                return -EINVAL;
1016
1017        /* Allocate top-level page table. */
1018        vmm->pd = nvkm_vmm_pt_new(desc, false, NULL);
1019        if (!vmm->pd)
1020                return -ENOMEM;
1021        vmm->pd->refs[0] = 1;
1022        INIT_LIST_HEAD(&vmm->join);
1023
1024        /* ... and the GPU storage for it, except on Tesla-class GPUs that
1025         * have the PD embedded in the instance structure.
1026         */
1027        if (desc->size) {
1028                const u32 size = pd_header + desc->size * (1 << desc->bits);
1029                vmm->pd->pt[0] = nvkm_mmu_ptc_get(mmu, size, desc->align, true);
1030                if (!vmm->pd->pt[0])
1031                        return -ENOMEM;
1032        }
1033
1034        /* Initialise address-space MM. */
1035        INIT_LIST_HEAD(&vmm->list);
1036        vmm->free = RB_ROOT;
1037        vmm->root = RB_ROOT;
1038
1039        if (!(vma = nvkm_vma_new(vmm->start, vmm->limit - vmm->start)))
1040                return -ENOMEM;
1041
1042        nvkm_vmm_free_insert(vmm, vma);
1043        list_add(&vma->head, &vmm->list);
1044        return 0;
1045}
1046
1047int
1048nvkm_vmm_new_(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu,
1049              u32 hdr, u64 addr, u64 size, struct lock_class_key *key,
1050              const char *name, struct nvkm_vmm **pvmm)
1051{
1052        if (!(*pvmm = kzalloc(sizeof(**pvmm), GFP_KERNEL)))
1053                return -ENOMEM;
1054        return nvkm_vmm_ctor(func, mmu, hdr, addr, size, key, name, *pvmm);
1055}
1056
1057void
1058nvkm_vmm_unmap_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
1059{
1060        struct nvkm_vma *next = node(vma, next);
1061        struct nvkm_vma *prev = NULL;
1062
1063        nvkm_memory_tags_put(vma->memory, vmm->mmu->subdev.device, &vma->tags);
1064        nvkm_memory_unref(&vma->memory);
1065
1066        if (!vma->part || ((prev = node(vma, prev)), prev->memory))
1067                prev = NULL;
1068        if (!next->part || next->memory)
1069                next = NULL;
1070        nvkm_vmm_node_merge(vmm, prev, vma, next, vma->size);
1071}
1072
1073void
1074nvkm_vmm_unmap_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
1075{
1076        const struct nvkm_vmm_page *page = &vmm->func->page[vma->refd];
1077
1078        if (vma->mapref) {
1079                nvkm_vmm_ptes_unmap_put(vmm, page, vma->addr, vma->size, vma->sparse);
1080                vma->refd = NVKM_VMA_PAGE_NONE;
1081        } else {
1082                nvkm_vmm_ptes_unmap(vmm, page, vma->addr, vma->size, vma->sparse);
1083        }
1084
1085        nvkm_vmm_unmap_region(vmm, vma);
1086}
1087
1088void
1089nvkm_vmm_unmap(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
1090{
1091        if (vma->memory) {
1092                mutex_lock(&vmm->mutex);
1093                nvkm_vmm_unmap_locked(vmm, vma);
1094                mutex_unlock(&vmm->mutex);
1095        }
1096}
1097
1098static int
1099nvkm_vmm_map_valid(struct nvkm_vmm *vmm, struct nvkm_vma *vma,
1100                   void *argv, u32 argc, struct nvkm_vmm_map *map)
1101{
1102        switch (nvkm_memory_target(map->memory)) {
1103        case NVKM_MEM_TARGET_VRAM:
1104                if (!(map->page->type & NVKM_VMM_PAGE_VRAM)) {
1105                        VMM_DEBUG(vmm, "%d !VRAM", map->page->shift);
1106                        return -EINVAL;
1107                }
1108                break;
1109        case NVKM_MEM_TARGET_HOST:
1110        case NVKM_MEM_TARGET_NCOH:
1111                if (!(map->page->type & NVKM_VMM_PAGE_HOST)) {
1112                        VMM_DEBUG(vmm, "%d !HOST", map->page->shift);
1113                        return -EINVAL;
1114                }
1115                break;
1116        default:
1117                WARN_ON(1);
1118                return -ENOSYS;
1119        }
1120
1121        if (!IS_ALIGNED(     vma->addr, 1ULL << map->page->shift) ||
1122            !IS_ALIGNED((u64)vma->size, 1ULL << map->page->shift) ||
1123            !IS_ALIGNED(   map->offset, 1ULL << map->page->shift) ||
1124            nvkm_memory_page(map->memory) < map->page->shift) {
1125                VMM_DEBUG(vmm, "alignment %016llx %016llx %016llx %d %d",
1126                    vma->addr, (u64)vma->size, map->offset, map->page->shift,
1127                    nvkm_memory_page(map->memory));
1128                return -EINVAL;
1129        }
1130
1131        return vmm->func->valid(vmm, argv, argc, map);
1132}
1133
1134static int
1135nvkm_vmm_map_choose(struct nvkm_vmm *vmm, struct nvkm_vma *vma,
1136                    void *argv, u32 argc, struct nvkm_vmm_map *map)
1137{
1138        for (map->page = vmm->func->page; map->page->shift; map->page++) {
1139                VMM_DEBUG(vmm, "trying %d", map->page->shift);
1140                if (!nvkm_vmm_map_valid(vmm, vma, argv, argc, map))
1141                        return 0;
1142        }
1143        return -EINVAL;
1144}
1145
1146static int
1147nvkm_vmm_map_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma,
1148                    void *argv, u32 argc, struct nvkm_vmm_map *map)
1149{
1150        nvkm_vmm_pte_func func;
1151        int ret;
1152
1153        /* Make sure we won't overrun the end of the memory object. */
1154        if (unlikely(nvkm_memory_size(map->memory) < map->offset + vma->size)) {
1155                VMM_DEBUG(vmm, "overrun %016llx %016llx %016llx",
1156                          nvkm_memory_size(map->memory),
1157                          map->offset, (u64)vma->size);
1158                return -EINVAL;
1159        }
1160
1161        /* Check remaining arguments for validity. */
1162        if (vma->page == NVKM_VMA_PAGE_NONE &&
1163            vma->refd == NVKM_VMA_PAGE_NONE) {
1164                /* Find the largest page size we can perform the mapping at. */
1165                const u32 debug = vmm->debug;
1166                vmm->debug = 0;
1167                ret = nvkm_vmm_map_choose(vmm, vma, argv, argc, map);
1168                vmm->debug = debug;
1169                if (ret) {
1170                        VMM_DEBUG(vmm, "invalid at any page size");
1171                        nvkm_vmm_map_choose(vmm, vma, argv, argc, map);
1172                        return -EINVAL;
1173                }
1174        } else {
1175                /* Page size of the VMA is already pre-determined. */
1176                if (vma->refd != NVKM_VMA_PAGE_NONE)
1177                        map->page = &vmm->func->page[vma->refd];
1178                else
1179                        map->page = &vmm->func->page[vma->page];
1180
1181                ret = nvkm_vmm_map_valid(vmm, vma, argv, argc, map);
1182                if (ret) {
1183                        VMM_DEBUG(vmm, "invalid %d\n", ret);
1184                        return ret;
1185                }
1186        }
1187
1188        /* Deal with the 'offset' argument, and fetch the backend function. */
1189        map->off = map->offset;
1190        if (map->mem) {
1191                for (; map->off; map->mem = map->mem->next) {
1192                        u64 size = (u64)map->mem->length << NVKM_RAM_MM_SHIFT;
1193                        if (size > map->off)
1194                                break;
1195                        map->off -= size;
1196                }
1197                func = map->page->desc->func->mem;
1198        } else
1199        if (map->sgl) {
1200                for (; map->off; map->sgl = sg_next(map->sgl)) {
1201                        u64 size = sg_dma_len(map->sgl);
1202                        if (size > map->off)
1203                                break;
1204                        map->off -= size;
1205                }
1206                func = map->page->desc->func->sgl;
1207        } else {
1208                map->dma += map->offset >> PAGE_SHIFT;
1209                map->off  = map->offset & PAGE_MASK;
1210                func = map->page->desc->func->dma;
1211        }
1212
1213        /* Perform the map. */
1214        if (vma->refd == NVKM_VMA_PAGE_NONE) {
1215                ret = nvkm_vmm_ptes_get_map(vmm, map->page, vma->addr, vma->size, map, func);
1216                if (ret)
1217                        return ret;
1218
1219                vma->refd = map->page - vmm->func->page;
1220        } else {
1221                nvkm_vmm_ptes_map(vmm, map->page, vma->addr, vma->size, map, func);
1222        }
1223
1224        nvkm_memory_tags_put(vma->memory, vmm->mmu->subdev.device, &vma->tags);
1225        nvkm_memory_unref(&vma->memory);
1226        vma->memory = nvkm_memory_ref(map->memory);
1227        vma->tags = map->tags;
1228        return 0;
1229}
1230
1231int
1232nvkm_vmm_map(struct nvkm_vmm *vmm, struct nvkm_vma *vma, void *argv, u32 argc,
1233             struct nvkm_vmm_map *map)
1234{
1235        int ret;
1236        mutex_lock(&vmm->mutex);
1237        ret = nvkm_vmm_map_locked(vmm, vma, argv, argc, map);
1238        vma->busy = false;
1239        mutex_unlock(&vmm->mutex);
1240        return ret;
1241}
1242
1243static void
1244nvkm_vmm_put_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
1245{
1246        struct nvkm_vma *prev, *next;
1247
1248        if ((prev = node(vma, prev)) && !prev->used) {
1249                vma->addr  = prev->addr;
1250                vma->size += prev->size;
1251                nvkm_vmm_free_delete(vmm, prev);
1252        }
1253
1254        if ((next = node(vma, next)) && !next->used) {
1255                vma->size += next->size;
1256                nvkm_vmm_free_delete(vmm, next);
1257        }
1258
1259        nvkm_vmm_free_insert(vmm, vma);
1260}
1261
1262void
1263nvkm_vmm_put_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
1264{
1265        const struct nvkm_vmm_page *page = vmm->func->page;
1266        struct nvkm_vma *next = vma;
1267
1268        BUG_ON(vma->part);
1269
1270        if (vma->mapref || !vma->sparse) {
1271                do {
1272                        const bool map = next->memory != NULL;
1273                        const u8  refd = next->refd;
1274                        const u64 addr = next->addr;
1275                        u64 size = next->size;
1276
1277                        /* Merge regions that are in the same state. */
1278                        while ((next = node(next, next)) && next->part &&
1279                               (next->memory != NULL) == map &&
1280                               (next->refd == refd))
1281                                size += next->size;
1282
1283                        if (map) {
1284                                /* Region(s) are mapped, merge the unmap
1285                                 * and dereference into a single walk of
1286                                 * the page tree.
1287                                 */
1288                                nvkm_vmm_ptes_unmap_put(vmm, &page[refd], addr,
1289                                                        size, vma->sparse);
1290                        } else
1291                        if (refd != NVKM_VMA_PAGE_NONE) {
1292                                /* Drop allocation-time PTE references. */
1293                                nvkm_vmm_ptes_put(vmm, &page[refd], addr, size);
1294                        }
1295                } while (next && next->part);
1296        }
1297
1298        /* Merge any mapped regions that were split from the initial
1299         * address-space allocation back into the allocated VMA, and
1300         * release memory/compression resources.
1301         */
1302        next = vma;
1303        do {
1304                if (next->memory)
1305                        nvkm_vmm_unmap_region(vmm, next);
1306        } while ((next = node(vma, next)) && next->part);
1307
1308        if (vma->sparse && !vma->mapref) {
1309                /* Sparse region that was allocated with a fixed page size,
1310                 * meaning all relevant PTEs were referenced once when the
1311                 * region was allocated, and remained that way, regardless
1312                 * of whether memory was mapped into it afterwards.
1313                 *
1314                 * The process of unmapping, unsparsing, and dereferencing
1315                 * PTEs can be done in a single page tree walk.
1316                 */
1317                nvkm_vmm_ptes_sparse_put(vmm, &page[vma->refd], vma->addr, vma->size);
1318        } else
1319        if (vma->sparse) {
1320                /* Sparse region that wasn't allocated with a fixed page size,
1321                 * PTE references were taken both at allocation time (to make
1322                 * the GPU see the region as sparse), and when mapping memory
1323                 * into the region.
1324                 *
1325                 * The latter was handled above, and the remaining references
1326                 * are dealt with here.
1327                 */
1328                nvkm_vmm_ptes_sparse(vmm, vma->addr, vma->size, false);
1329        }
1330
1331        /* Remove VMA from the list of allocated nodes. */
1332        nvkm_vmm_node_remove(vmm, vma);
1333
1334        /* Merge VMA back into the free list. */
1335        vma->page = NVKM_VMA_PAGE_NONE;
1336        vma->refd = NVKM_VMA_PAGE_NONE;
1337        vma->used = false;
1338        vma->user = false;
1339        nvkm_vmm_put_region(vmm, vma);
1340}
1341
1342void
1343nvkm_vmm_put(struct nvkm_vmm *vmm, struct nvkm_vma **pvma)
1344{
1345        struct nvkm_vma *vma = *pvma;
1346        if (vma) {
1347                mutex_lock(&vmm->mutex);
1348                nvkm_vmm_put_locked(vmm, vma);
1349                mutex_unlock(&vmm->mutex);
1350                *pvma = NULL;
1351        }
1352}
1353
1354int
1355nvkm_vmm_get_locked(struct nvkm_vmm *vmm, bool getref, bool mapref, bool sparse,
1356                    u8 shift, u8 align, u64 size, struct nvkm_vma **pvma)
1357{
1358        const struct nvkm_vmm_page *page = &vmm->func->page[NVKM_VMA_PAGE_NONE];
1359        struct rb_node *node = NULL, *temp;
1360        struct nvkm_vma *vma = NULL, *tmp;
1361        u64 addr, tail;
1362        int ret;
1363
1364        VMM_TRACE(vmm, "getref %d mapref %d sparse %d "
1365                       "shift: %d align: %d size: %016llx",
1366                  getref, mapref, sparse, shift, align, size);
1367
1368        /* Zero-sized, or lazily-allocated sparse VMAs, make no sense. */
1369        if (unlikely(!size || (!getref && !mapref && sparse))) {
1370                VMM_DEBUG(vmm, "args %016llx %d %d %d",
1371                          size, getref, mapref, sparse);
1372                return -EINVAL;
1373        }
1374
1375        /* Tesla-class GPUs can only select page size per-PDE, which means
1376         * we're required to know the mapping granularity up-front to find
1377         * a suitable region of address-space.
1378         *
1379         * The same goes if we're requesting up-front allocation of PTES.
1380         */
1381        if (unlikely((getref || vmm->func->page_block) && !shift)) {
1382                VMM_DEBUG(vmm, "page size required: %d %016llx",
1383                          getref, vmm->func->page_block);
1384                return -EINVAL;
1385        }
1386
1387        /* If a specific page size was requested, determine its index and
1388         * make sure the requested size is a multiple of the page size.
1389         */
1390        if (shift) {
1391                for (page = vmm->func->page; page->shift; page++) {
1392                        if (shift == page->shift)
1393                                break;
1394                }
1395
1396                if (!page->shift || !IS_ALIGNED(size, 1ULL << page->shift)) {
1397                        VMM_DEBUG(vmm, "page %d %016llx", shift, size);
1398                        return -EINVAL;
1399                }
1400                align = max_t(u8, align, shift);
1401        } else {
1402                align = max_t(u8, align, 12);
1403        }
1404
1405        /* Locate smallest block that can possibly satisfy the allocation. */
1406        temp = vmm->free.rb_node;
1407        while (temp) {
1408                struct nvkm_vma *this = rb_entry(temp, typeof(*this), tree);
1409                if (this->size < size) {
1410                        temp = temp->rb_right;
1411                } else {
1412                        node = temp;
1413                        temp = temp->rb_left;
1414                }
1415        }
1416
1417        if (unlikely(!node))
1418                return -ENOSPC;
1419
1420        /* Take into account alignment restrictions, trying larger blocks
1421         * in turn until we find a suitable free block.
1422         */
1423        do {
1424                struct nvkm_vma *this = rb_entry(node, typeof(*this), tree);
1425                struct nvkm_vma *prev = node(this, prev);
1426                struct nvkm_vma *next = node(this, next);
1427                const int p = page - vmm->func->page;
1428
1429                addr = this->addr;
1430                if (vmm->func->page_block && prev && prev->page != p)
1431                        addr = ALIGN(addr, vmm->func->page_block);
1432                addr = ALIGN(addr, 1ULL << align);
1433
1434                tail = this->addr + this->size;
1435                if (vmm->func->page_block && next && next->page != p)
1436                        tail = ALIGN_DOWN(tail, vmm->func->page_block);
1437
1438                if (addr <= tail && tail - addr >= size) {
1439                        nvkm_vmm_free_remove(vmm, this);
1440                        vma = this;
1441                        break;
1442                }
1443        } while ((node = rb_next(node)));
1444
1445        if (unlikely(!vma))
1446                return -ENOSPC;
1447
1448        /* If the VMA we found isn't already exactly the requested size,
1449         * it needs to be split, and the remaining free blocks returned.
1450         */
1451        if (addr != vma->addr) {
1452                if (!(tmp = nvkm_vma_tail(vma, vma->size + vma->addr - addr))) {
1453                        nvkm_vmm_put_region(vmm, vma);
1454                        return -ENOMEM;
1455                }
1456                nvkm_vmm_free_insert(vmm, vma);
1457                vma = tmp;
1458        }
1459
1460        if (size != vma->size) {
1461                if (!(tmp = nvkm_vma_tail(vma, vma->size - size))) {
1462                        nvkm_vmm_put_region(vmm, vma);
1463                        return -ENOMEM;
1464                }
1465                nvkm_vmm_free_insert(vmm, tmp);
1466        }
1467
1468        /* Pre-allocate page tables and/or setup sparse mappings. */
1469        if (sparse && getref)
1470                ret = nvkm_vmm_ptes_sparse_get(vmm, page, vma->addr, vma->size);
1471        else if (sparse)
1472                ret = nvkm_vmm_ptes_sparse(vmm, vma->addr, vma->size, true);
1473        else if (getref)
1474                ret = nvkm_vmm_ptes_get(vmm, page, vma->addr, vma->size);
1475        else
1476                ret = 0;
1477        if (ret) {
1478                nvkm_vmm_put_region(vmm, vma);
1479                return ret;
1480        }
1481
1482        vma->mapref = mapref && !getref;
1483        vma->sparse = sparse;
1484        vma->page = page - vmm->func->page;
1485        vma->refd = getref ? vma->page : NVKM_VMA_PAGE_NONE;
1486        vma->used = true;
1487        nvkm_vmm_node_insert(vmm, vma);
1488        *pvma = vma;
1489        return 0;
1490}
1491
1492int
1493nvkm_vmm_get(struct nvkm_vmm *vmm, u8 page, u64 size, struct nvkm_vma **pvma)
1494{
1495        int ret;
1496        mutex_lock(&vmm->mutex);
1497        ret = nvkm_vmm_get_locked(vmm, false, true, false, page, 0, size, pvma);
1498        mutex_unlock(&vmm->mutex);
1499        return ret;
1500}
1501
1502void
1503nvkm_vmm_part(struct nvkm_vmm *vmm, struct nvkm_memory *inst)
1504{
1505        if (inst && vmm->func->part) {
1506                mutex_lock(&vmm->mutex);
1507                vmm->func->part(vmm, inst);
1508                mutex_unlock(&vmm->mutex);
1509        }
1510}
1511
1512int
1513nvkm_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst)
1514{
1515        int ret = 0;
1516        if (vmm->func->join) {
1517                mutex_lock(&vmm->mutex);
1518                ret = vmm->func->join(vmm, inst);
1519                mutex_unlock(&vmm->mutex);
1520        }
1521        return ret;
1522}
1523
1524static bool
1525nvkm_vmm_boot_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes)
1526{
1527        const struct nvkm_vmm_desc *desc = it->desc;
1528        const int type = desc->type == SPT;
1529        nvkm_memory_boot(it->pt[0]->pt[type]->memory, it->vmm);
1530        return false;
1531}
1532
1533int
1534nvkm_vmm_boot(struct nvkm_vmm *vmm)
1535{
1536        const struct nvkm_vmm_page *page = vmm->func->page;
1537        const u64 limit = vmm->limit - vmm->start;
1538        int ret;
1539
1540        while (page[1].shift)
1541                page++;
1542
1543        ret = nvkm_vmm_ptes_get(vmm, page, vmm->start, limit);
1544        if (ret)
1545                return ret;
1546
1547        nvkm_vmm_iter(vmm, page, vmm->start, limit, "bootstrap", false,
1548                      nvkm_vmm_boot_ptes, NULL, NULL, NULL);
1549        vmm->bootstrapped = true;
1550        return 0;
1551}
1552
1553static void
1554nvkm_vmm_del(struct kref *kref)
1555{
1556        struct nvkm_vmm *vmm = container_of(kref, typeof(*vmm), kref);
1557        nvkm_vmm_dtor(vmm);
1558        kfree(vmm);
1559}
1560
1561void
1562nvkm_vmm_unref(struct nvkm_vmm **pvmm)
1563{
1564        struct nvkm_vmm *vmm = *pvmm;
1565        if (vmm) {
1566                kref_put(&vmm->kref, nvkm_vmm_del);
1567                *pvmm = NULL;
1568        }
1569}
1570
1571struct nvkm_vmm *
1572nvkm_vmm_ref(struct nvkm_vmm *vmm)
1573{
1574        if (vmm)
1575                kref_get(&vmm->kref);
1576        return vmm;
1577}
1578
1579int
1580nvkm_vmm_new(struct nvkm_device *device, u64 addr, u64 size, void *argv,
1581             u32 argc, struct lock_class_key *key, const char *name,
1582             struct nvkm_vmm **pvmm)
1583{
1584        struct nvkm_mmu *mmu = device->mmu;
1585        struct nvkm_vmm *vmm = NULL;
1586        int ret;
1587        ret = mmu->func->vmm.ctor(mmu, addr, size, argv, argc, key, name, &vmm);
1588        if (ret)
1589                nvkm_vmm_unref(&vmm);
1590        *pvmm = vmm;
1591        return ret;
1592}
1593