LXR linux/drivers/misc/habanalabs/common/memory.c

   1// SPDX-License-Identifier: GPL-2.0
   2
   3/*
   4 * Copyright 2016-2019 HabanaLabs, Ltd.
   5 * All Rights Reserved.
   6 */
   7
   8#include <uapi/misc/habanalabs.h>
   9#include "habanalabs.h"
  10#include "../include/hw_ip/mmu/mmu_general.h"
  11
  12#include <linux/uaccess.h>
  13#include <linux/slab.h>
  14#include <linux/genalloc.h>
  15
  16#define HL_MMU_DEBUG    0
  17
  18/*
  19 * The va ranges in context object contain a list with the available chunks of
  20 * device virtual memory.
  21 * There is one range for host allocations and one for DRAM allocations.
  22 *
  23 * On initialization each range contains one chunk of all of its available
  24 * virtual range which is a half of the total device virtual range.
  25 *
  26 * On each mapping of physical pages, a suitable virtual range chunk (with a
  27 * minimum size) is selected from the list. If the chunk size equals the
  28 * requested size, the chunk is returned. Otherwise, the chunk is split into
  29 * two chunks - one to return as result and a remainder to stay in the list.
  30 *
  31 * On each Unmapping of a virtual address, the relevant virtual chunk is
  32 * returned to the list. The chunk is added to the list and if its edges match
  33 * the edges of the adjacent chunks (means a contiguous chunk can be created),
  34 * the chunks are merged.
  35 *
  36 * On finish, the list is checked to have only one chunk of all the relevant
  37 * virtual range (which is a half of the device total virtual range).
  38 * If not (means not all mappings were unmapped), a warning is printed.
  39 */
  40
  41/*
  42 * alloc_device_memory - allocate device memory
  43 *
  44 * @ctx                 : current context
  45 * @args                : host parameters containing the requested size
  46 * @ret_handle          : result handle
  47 *
  48 * This function does the following:
  49 * - Allocate the requested size rounded up to 2MB pages
  50 * - Return unique handle
  51 */
  52static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
  53                                u32 *ret_handle)
  54{
  55        struct hl_device *hdev = ctx->hdev;
  56        struct hl_vm *vm = &hdev->vm;
  57        struct hl_vm_phys_pg_pack *phys_pg_pack;
  58        u64 paddr = 0, total_size, num_pgs, i;
  59        u32 num_curr_pgs, page_size, page_shift;
  60        int handle, rc;
  61        bool contiguous;
  62
  63        num_curr_pgs = 0;
  64        page_size = hdev->asic_prop.dram_page_size;
  65        page_shift = __ffs(page_size);
  66        num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift;
  67        total_size = num_pgs << page_shift;
  68
  69        if (!total_size) {
  70                dev_err(hdev->dev, "Cannot allocate 0 bytes\n");
  71                return -EINVAL;
  72        }
  73
  74        contiguous = args->flags & HL_MEM_CONTIGUOUS;
  75
  76        if (contiguous) {
  77                paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size);
  78                if (!paddr) {
  79                        dev_err(hdev->dev,
  80                                "failed to allocate %llu contiguous pages with total size of %llu\n",
  81                                num_pgs, total_size);
  82                        return -ENOMEM;
  83                }
  84        }
  85
  86        phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
  87        if (!phys_pg_pack) {
  88                rc = -ENOMEM;
  89                goto pages_pack_err;
  90        }
  91
  92        phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK;
  93        phys_pg_pack->asid = ctx->asid;
  94        phys_pg_pack->npages = num_pgs;
  95        phys_pg_pack->page_size = page_size;
  96        phys_pg_pack->total_size = total_size;
  97        phys_pg_pack->flags = args->flags;
  98        phys_pg_pack->contiguous = contiguous;
  99
 100        phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL);
 101        if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) {
 102                rc = -ENOMEM;
 103                goto pages_arr_err;
 104        }
 105
 106        if (phys_pg_pack->contiguous) {
 107                for (i = 0 ; i < num_pgs ; i++)
 108                        phys_pg_pack->pages[i] = paddr + i * page_size;
 109        } else {
 110                for (i = 0 ; i < num_pgs ; i++) {
 111                        phys_pg_pack->pages[i] = (u64) gen_pool_alloc(
 112                                                        vm->dram_pg_pool,
 113                                                        page_size);
 114                        if (!phys_pg_pack->pages[i]) {
 115                                dev_err(hdev->dev,
 116                                        "Failed to allocate device memory (out of memory)\n");
 117                                rc = -ENOMEM;
 118                                goto page_err;
 119                        }
 120
 121                        num_curr_pgs++;
 122                }
 123        }
 124
 125        spin_lock(&vm->idr_lock);
 126        handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
 127                                GFP_ATOMIC);
 128        spin_unlock(&vm->idr_lock);
 129
 130        if (handle < 0) {
 131                dev_err(hdev->dev, "Failed to get handle for page\n");
 132                rc = -EFAULT;
 133                goto idr_err;
 134        }
 135
 136        for (i = 0 ; i < num_pgs ; i++)
 137                kref_get(&vm->dram_pg_pool_refcount);
 138
 139        phys_pg_pack->handle = handle;
 140
 141        atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem);
 142        atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem);
 143
 144        *ret_handle = handle;
 145
 146        return 0;
 147
 148idr_err:
 149page_err:
 150        if (!phys_pg_pack->contiguous)
 151                for (i = 0 ; i < num_curr_pgs ; i++)
 152                        gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i],
 153                                        page_size);
 154
 155        kvfree(phys_pg_pack->pages);
 156pages_arr_err:
 157        kfree(phys_pg_pack);
 158pages_pack_err:
 159        if (contiguous)
 160                gen_pool_free(vm->dram_pg_pool, paddr, total_size);
 161
 162        return rc;
 163}
 164
 165/*
 166 * dma_map_host_va - DMA mapping of the given host virtual address.
 167 * @hdev: habanalabs device structure
 168 * @addr: the host virtual address of the memory area
 169 * @size: the size of the memory area
 170 * @p_userptr: pointer to result userptr structure
 171 *
 172 * This function does the following:
 173 * - Allocate userptr structure
 174 * - Pin the given host memory using the userptr structure
 175 * - Perform DMA mapping to have the DMA addresses of the pages
 176 */
 177static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
 178                                struct hl_userptr **p_userptr)
 179{
 180        struct hl_userptr *userptr;
 181        int rc;
 182
 183        userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
 184        if (!userptr) {
 185                rc = -ENOMEM;
 186                goto userptr_err;
 187        }
 188
 189        rc = hl_pin_host_memory(hdev, addr, size, userptr);
 190        if (rc) {
 191                dev_err(hdev->dev, "Failed to pin host memory\n");
 192                goto pin_err;
 193        }
 194
 195        rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
 196                                        userptr->sgt->nents, DMA_BIDIRECTIONAL);
 197        if (rc) {
 198                dev_err(hdev->dev, "failed to map sgt with DMA region\n");
 199                goto dma_map_err;
 200        }
 201
 202        userptr->dma_mapped = true;
 203        userptr->dir = DMA_BIDIRECTIONAL;
 204        userptr->vm_type = VM_TYPE_USERPTR;
 205
 206        *p_userptr = userptr;
 207
 208        return 0;
 209
 210dma_map_err:
 211        hl_unpin_host_memory(hdev, userptr);
 212pin_err:
 213        kfree(userptr);
 214userptr_err:
 215
 216        return rc;
 217}
 218
 219/*
 220 * dma_unmap_host_va - DMA unmapping of the given host virtual address.
 221 * @hdev: habanalabs device structure
 222 * @userptr: userptr to free
 223 *
 224 * This function does the following:
 225 * - Unpins the physical pages
 226 * - Frees the userptr structure
 227 */
 228static void dma_unmap_host_va(struct hl_device *hdev,
 229                                struct hl_userptr *userptr)
 230{
 231        hl_unpin_host_memory(hdev, userptr);
 232        kfree(userptr);
 233}
 234
 235/*
 236 * dram_pg_pool_do_release - free DRAM pages pool
 237 *
 238 * @ref                 : pointer to reference object
 239 *
 240 * This function does the following:
 241 * - Frees the idr structure of physical pages handles
 242 * - Frees the generic pool of DRAM physical pages
 243 */
 244static void dram_pg_pool_do_release(struct kref *ref)
 245{
 246        struct hl_vm *vm = container_of(ref, struct hl_vm,
 247                        dram_pg_pool_refcount);
 248
 249        /*
 250         * free the idr here as only here we know for sure that there are no
 251         * allocated physical pages and hence there are no handles in use
 252         */
 253        idr_destroy(&vm->phys_pg_pack_handles);
 254        gen_pool_destroy(vm->dram_pg_pool);
 255}
 256
 257/*
 258 * free_phys_pg_pack - free physical page pack
 259 * @hdev: habanalabs device structure
 260 * @phys_pg_pack: physical page pack to free
 261 *
 262 * This function does the following:
 263 * - For DRAM memory only, iterate over the pack and free each physical block
 264 *   structure by returning it to the general pool
 265 * - Free the hl_vm_phys_pg_pack structure
 266 */
 267static void free_phys_pg_pack(struct hl_device *hdev,
 268                                struct hl_vm_phys_pg_pack *phys_pg_pack)
 269{
 270        struct hl_vm *vm = &hdev->vm;
 271        u64 i;
 272
 273        if (!phys_pg_pack->created_from_userptr) {
 274                if (phys_pg_pack->contiguous) {
 275                        gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0],
 276                                        phys_pg_pack->total_size);
 277
 278                        for (i = 0; i < phys_pg_pack->npages ; i++)
 279                                kref_put(&vm->dram_pg_pool_refcount,
 280                                        dram_pg_pool_do_release);
 281                } else {
 282                        for (i = 0 ; i < phys_pg_pack->npages ; i++) {
 283                                gen_pool_free(vm->dram_pg_pool,
 284                                                phys_pg_pack->pages[i],
 285                                                phys_pg_pack->page_size);
 286                                kref_put(&vm->dram_pg_pool_refcount,
 287                                        dram_pg_pool_do_release);
 288                        }
 289                }
 290        }
 291
 292        kvfree(phys_pg_pack->pages);
 293        kfree(phys_pg_pack);
 294}
 295
 296/*
 297 * free_device_memory - free device memory
 298 *
 299 * @ctx                  : current context
 300 * @handle              : handle of the memory chunk to free
 301 *
 302 * This function does the following:
 303 * - Free the device memory related to the given handle
 304 */
 305static int free_device_memory(struct hl_ctx *ctx, u32 handle)
 306{
 307        struct hl_device *hdev = ctx->hdev;
 308        struct hl_vm *vm = &hdev->vm;
 309        struct hl_vm_phys_pg_pack *phys_pg_pack;
 310
 311        spin_lock(&vm->idr_lock);
 312        phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
 313        if (phys_pg_pack) {
 314                if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) {
 315                        dev_err(hdev->dev, "handle %u is mapped, cannot free\n",
 316                                handle);
 317                        spin_unlock(&vm->idr_lock);
 318                        return -EINVAL;
 319                }
 320
 321                /*
 322                 * must remove from idr before the freeing of the physical
 323                 * pages as the refcount of the pool is also the trigger of the
 324                 * idr destroy
 325                 */
 326                idr_remove(&vm->phys_pg_pack_handles, handle);
 327                spin_unlock(&vm->idr_lock);
 328
 329                atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem);
 330                atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem);
 331
 332                free_phys_pg_pack(hdev, phys_pg_pack);
 333        } else {
 334                spin_unlock(&vm->idr_lock);
 335                dev_err(hdev->dev,
 336                        "free device memory failed, no match for handle %u\n",
 337                        handle);
 338                return -EINVAL;
 339        }
 340
 341        return 0;
 342}
 343
 344/*
 345 * clear_va_list_locked - free virtual addresses list
 346 *
 347 * @hdev                : habanalabs device structure
 348 * @va_list             : list of virtual addresses to free
 349 *
 350 * This function does the following:
 351 * - Iterate over the list and free each virtual addresses block
 352 *
 353 * This function should be called only when va_list lock is taken
 354 */
 355static void clear_va_list_locked(struct hl_device *hdev,
 356                struct list_head *va_list)
 357{
 358        struct hl_vm_va_block *va_block, *tmp;
 359
 360        list_for_each_entry_safe(va_block, tmp, va_list, node) {
 361                list_del(&va_block->node);
 362                kfree(va_block);
 363        }
 364}
 365
 366/*
 367 * print_va_list_locked    - print virtual addresses list
 368 *
 369 * @hdev                : habanalabs device structure
 370 * @va_list             : list of virtual addresses to print
 371 *
 372 * This function does the following:
 373 * - Iterate over the list and print each virtual addresses block
 374 *
 375 * This function should be called only when va_list lock is taken
 376 */
 377static void print_va_list_locked(struct hl_device *hdev,
 378                struct list_head *va_list)
 379{
 380#if HL_MMU_DEBUG
 381        struct hl_vm_va_block *va_block;
 382
 383        dev_dbg(hdev->dev, "print va list:\n");
 384
 385        list_for_each_entry(va_block, va_list, node)
 386                dev_dbg(hdev->dev,
 387                        "va block, start: 0x%llx, end: 0x%llx, size: %llu\n",
 388                        va_block->start, va_block->end, va_block->size);
 389#endif
 390}
 391
 392/*
 393 * merge_va_blocks_locked - merge a virtual block if possible
 394 *
 395 * @hdev                : pointer to the habanalabs device structure
 396 * @va_list             : pointer to the virtual addresses block list
 397 * @va_block            : virtual block to merge with adjacent blocks
 398 *
 399 * This function does the following:
 400 * - Merge the given blocks with the adjacent blocks if their virtual ranges
 401 *   create a contiguous virtual range
 402 *
 403 * This Function should be called only when va_list lock is taken
 404 */
 405static void merge_va_blocks_locked(struct hl_device *hdev,
 406                struct list_head *va_list, struct hl_vm_va_block *va_block)
 407{
 408        struct hl_vm_va_block *prev, *next;
 409
 410        prev = list_prev_entry(va_block, node);
 411        if (&prev->node != va_list && prev->end + 1 == va_block->start) {
 412                prev->end = va_block->end;
 413                prev->size = prev->end - prev->start;
 414                list_del(&va_block->node);
 415                kfree(va_block);
 416                va_block = prev;
 417        }
 418
 419        next = list_next_entry(va_block, node);
 420        if (&next->node != va_list && va_block->end + 1 == next->start) {
 421                next->start = va_block->start;
 422                next->size = next->end - next->start;
 423                list_del(&va_block->node);
 424                kfree(va_block);
 425        }
 426}
 427
 428/*
 429 * add_va_block_locked - add a virtual block to the virtual addresses list
 430 *
 431 * @hdev                : pointer to the habanalabs device structure
 432 * @va_list             : pointer to the virtual addresses block list
 433 * @start               : start virtual address
 434 * @end                 : end virtual address
 435 *
 436 * This function does the following:
 437 * - Add the given block to the virtual blocks list and merge with other
 438 * blocks if a contiguous virtual block can be created
 439 *
 440 * This Function should be called only when va_list lock is taken
 441 */
 442static int add_va_block_locked(struct hl_device *hdev,
 443                struct list_head *va_list, u64 start, u64 end)
 444{
 445        struct hl_vm_va_block *va_block, *res = NULL;
 446        u64 size = end - start;
 447
 448        print_va_list_locked(hdev, va_list);
 449
 450        list_for_each_entry(va_block, va_list, node) {
 451                /* TODO: remove upon matureness */
 452                if (hl_mem_area_crosses_range(start, size, va_block->start,
 453                                va_block->end)) {
 454                        dev_err(hdev->dev,
 455                                "block crossing ranges at start 0x%llx, end 0x%llx\n",
 456                                va_block->start, va_block->end);
 457                        return -EINVAL;
 458                }
 459
 460                if (va_block->end < start)
 461                        res = va_block;
 462        }
 463
 464        va_block = kmalloc(sizeof(*va_block), GFP_KERNEL);
 465        if (!va_block)
 466                return -ENOMEM;
 467
 468        va_block->start = start;
 469        va_block->end = end;
 470        va_block->size = size;
 471
 472        if (!res)
 473                list_add(&va_block->node, va_list);
 474        else
 475                list_add(&va_block->node, &res->node);
 476
 477        merge_va_blocks_locked(hdev, va_list, va_block);
 478
 479        print_va_list_locked(hdev, va_list);
 480
 481        return 0;
 482}
 483
 484/*
 485 * add_va_block - wrapper for add_va_block_locked
 486 *
 487 * @hdev                : pointer to the habanalabs device structure
 488 * @va_list             : pointer to the virtual addresses block list
 489 * @start               : start virtual address
 490 * @end                 : end virtual address
 491 *
 492 * This function does the following:
 493 * - Takes the list lock and calls add_va_block_locked
 494 */
 495static inline int add_va_block(struct hl_device *hdev,
 496                struct hl_va_range *va_range, u64 start, u64 end)
 497{
 498        int rc;
 499
 500        mutex_lock(&va_range->lock);
 501        rc = add_va_block_locked(hdev, &va_range->list, start, end);
 502        mutex_unlock(&va_range->lock);
 503
 504        return rc;
 505}
 506
 507/*
 508 * get_va_block() - get a virtual block for the given size and alignment.
 509 * @hdev: pointer to the habanalabs device structure.
 510 * @va_range: pointer to the virtual addresses range.
 511 * @size: requested block size.
 512 * @hint_addr: hint for requested address by the user.
 513 * @va_block_align: required alignment of the virtual block start address.
 514 *
 515 * This function does the following:
 516 * - Iterate on the virtual block list to find a suitable virtual block for the
 517 *   given size and alignment.
 518 * - Reserve the requested block and update the list.
 519 * - Return the start address of the virtual block.
 520 */
 521static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range,
 522                        u64 size, u64 hint_addr, u32 va_block_align)
 523{
 524        struct hl_vm_va_block *va_block, *new_va_block = NULL;
 525        u64 valid_start, valid_size, prev_start, prev_end, align_mask,
 526                res_valid_start = 0, res_valid_size = 0;
 527        bool add_prev = false;
 528
 529        align_mask = ~((u64)va_block_align - 1);
 530
 531        /* check if hint_addr is aligned */
 532        if (hint_addr & (va_block_align - 1))
 533                hint_addr = 0;
 534
 535        mutex_lock(&va_range->lock);
 536
 537        print_va_list_locked(hdev, &va_range->list);
 538
 539        list_for_each_entry(va_block, &va_range->list, node) {
 540                /* calc the first possible aligned addr */
 541                valid_start = va_block->start;
 542
 543                if (valid_start & (va_block_align - 1)) {
 544                        valid_start &= align_mask;
 545                        valid_start += va_block_align;
 546                        if (valid_start > va_block->end)
 547                                continue;
 548                }
 549
 550                valid_size = va_block->end - valid_start;
 551
 552                if (valid_size >= size &&
 553                        (!new_va_block || valid_size < res_valid_size)) {
 554                        new_va_block = va_block;
 555                        res_valid_start = valid_start;
 556                        res_valid_size = valid_size;
 557                }
 558
 559                if (hint_addr && hint_addr >= valid_start &&
 560                                ((hint_addr + size) <= va_block->end)) {
 561                        new_va_block = va_block;
 562                        res_valid_start = hint_addr;
 563                        res_valid_size = valid_size;
 564                        break;
 565                }
 566        }
 567
 568        if (!new_va_block) {
 569                dev_err(hdev->dev, "no available va block for size %llu\n",
 570                                size);
 571                goto out;
 572        }
 573
 574        if (res_valid_start > new_va_block->start) {
 575                prev_start = new_va_block->start;
 576                prev_end = res_valid_start - 1;
 577
 578                new_va_block->start = res_valid_start;
 579                new_va_block->size = res_valid_size;
 580
 581                add_prev = true;
 582        }
 583
 584        if (new_va_block->size > size) {
 585                new_va_block->start += size;
 586                new_va_block->size = new_va_block->end - new_va_block->start;
 587        } else {
 588                list_del(&new_va_block->node);
 589                kfree(new_va_block);
 590        }
 591
 592        if (add_prev)
 593                add_va_block_locked(hdev, &va_range->list, prev_start,
 594                                prev_end);
 595
 596        print_va_list_locked(hdev, &va_range->list);
 597out:
 598        mutex_unlock(&va_range->lock);
 599
 600        return res_valid_start;
 601}
 602
 603/*
 604 * get_sg_info - get number of pages and the DMA address from SG list
 605 *
 606 * @sg                 : the SG list
 607 * @dma_addr           : pointer to DMA address to return
 608 *
 609 * Calculate the number of consecutive pages described by the SG list. Take the
 610 * offset of the address in the first page, add to it the length and round it up
 611 * to the number of needed pages.
 612 */
 613static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
 614{
 615        *dma_addr = sg_dma_address(sg);
 616
 617        return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) +
 618                        (PAGE_SIZE - 1)) >> PAGE_SHIFT;
 619}
 620
 621/*
 622 * init_phys_pg_pack_from_userptr - initialize physical page pack from host
 623 *                                  memory
 624 * @ctx: current context
 625 * @userptr: userptr to initialize from
 626 * @pphys_pg_pack: result pointer
 627 *
 628 * This function does the following:
 629 * - Pin the physical pages related to the given virtual block
 630 * - Create a physical page pack from the physical pages related to the given
 631 *   virtual block
 632 */
 633static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
 634                                struct hl_userptr *userptr,
 635                                struct hl_vm_phys_pg_pack **pphys_pg_pack)
 636{
 637        struct hl_vm_phys_pg_pack *phys_pg_pack;
 638        struct scatterlist *sg;
 639        dma_addr_t dma_addr;
 640        u64 page_mask, total_npages;
 641        u32 npages, page_size = PAGE_SIZE,
 642                huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
 643        bool first = true, is_huge_page_opt = true;
 644        int rc, i, j;
 645        u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
 646
 647        phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
 648        if (!phys_pg_pack)
 649                return -ENOMEM;
 650
 651        phys_pg_pack->vm_type = userptr->vm_type;
 652        phys_pg_pack->created_from_userptr = true;
 653        phys_pg_pack->asid = ctx->asid;
 654        atomic_set(&phys_pg_pack->mapping_cnt, 1);
 655
 656        /* Only if all dma_addrs are aligned to 2MB and their
 657         * sizes is at least 2MB, we can use huge page mapping.
 658         * We limit the 2MB optimization to this condition,
 659         * since later on we acquire the related VA range as one
 660         * consecutive block.
 661         */
 662        total_npages = 0;
 663        for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
 664                npages = get_sg_info(sg, &dma_addr);
 665
 666                total_npages += npages;
 667
 668                if ((npages % pgs_in_huge_page) ||
 669                                        (dma_addr & (huge_page_size - 1)))
 670                        is_huge_page_opt = false;
 671        }
 672
 673        if (is_huge_page_opt) {
 674                page_size = huge_page_size;
 675                do_div(total_npages, pgs_in_huge_page);
 676        }
 677
 678        page_mask = ~(((u64) page_size) - 1);
 679
 680        phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64),
 681                                                GFP_KERNEL);
 682        if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) {
 683                rc = -ENOMEM;
 684                goto page_pack_arr_mem_err;
 685        }
 686
 687        phys_pg_pack->npages = total_npages;
 688        phys_pg_pack->page_size = page_size;
 689        phys_pg_pack->total_size = total_npages * page_size;
 690
 691        j = 0;
 692        for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
 693                npages = get_sg_info(sg, &dma_addr);
 694
 695                /* align down to physical page size and save the offset */
 696                if (first) {
 697                        first = false;
 698                        phys_pg_pack->offset = dma_addr & (page_size - 1);
 699                        dma_addr &= page_mask;
 700                }
 701
 702                while (npages) {
 703                        phys_pg_pack->pages[j++] = dma_addr;
 704                        dma_addr += page_size;
 705
 706                        if (is_huge_page_opt)
 707                                npages -= pgs_in_huge_page;
 708                        else
 709                                npages--;
 710                }
 711        }
 712
 713        *pphys_pg_pack = phys_pg_pack;
 714
 715        return 0;
 716
 717page_pack_arr_mem_err:
 718        kfree(phys_pg_pack);
 719
 720        return rc;
 721}
 722
 723/*
 724 * map_phys_pg_pack - maps the physical page pack.
 725 * @ctx: current context
 726 * @vaddr: start address of the virtual area to map from
 727 * @phys_pg_pack: the pack of physical pages to map to
 728 *
 729 * This function does the following:
 730 * - Maps each chunk of virtual memory to matching physical chunk
 731 * - Stores number of successful mappings in the given argument
 732 * - Returns 0 on success, error code otherwise
 733 */
 734static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
 735                                struct hl_vm_phys_pg_pack *phys_pg_pack)
 736{
 737        struct hl_device *hdev = ctx->hdev;
 738        u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i;
 739        u32 page_size = phys_pg_pack->page_size;
 740        int rc = 0;
 741
 742        for (i = 0 ; i < phys_pg_pack->npages ; i++) {
 743                paddr = phys_pg_pack->pages[i];
 744
 745                rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size,
 746                                (i + 1) == phys_pg_pack->npages);
 747                if (rc) {
 748                        dev_err(hdev->dev,
 749                                "map failed for handle %u, npages: %llu, mapped: %llu",
 750                                phys_pg_pack->handle, phys_pg_pack->npages,
 751                                mapped_pg_cnt);
 752                        goto err;
 753                }
 754
 755                mapped_pg_cnt++;
 756                next_vaddr += page_size;
 757        }
 758
 759        return 0;
 760
 761err:
 762        next_vaddr = vaddr;
 763        for (i = 0 ; i < mapped_pg_cnt ; i++) {
 764                if (hl_mmu_unmap(ctx, next_vaddr, page_size,
 765                                        (i + 1) == mapped_pg_cnt))
 766                        dev_warn_ratelimited(hdev->dev,
 767                                "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
 768                                        phys_pg_pack->handle, next_vaddr,
 769                                        phys_pg_pack->pages[i], page_size);
 770
 771                next_vaddr += page_size;
 772        }
 773
 774        return rc;
 775}
 776
 777/*
 778 * unmap_phys_pg_pack - unmaps the physical page pack
 779 * @ctx: current context
 780 * @vaddr: start address of the virtual area to unmap
 781 * @phys_pg_pack: the pack of physical pages to unmap
 782 */
 783static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
 784                                struct hl_vm_phys_pg_pack *phys_pg_pack)
 785{
 786        struct hl_device *hdev = ctx->hdev;
 787        u64 next_vaddr, i;
 788        u32 page_size;
 789
 790        page_size = phys_pg_pack->page_size;
 791        next_vaddr = vaddr;
 792
 793        for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
 794                if (hl_mmu_unmap(ctx, next_vaddr, page_size,
 795                                       (i + 1) == phys_pg_pack->npages))
 796                        dev_warn_ratelimited(hdev->dev,
 797                        "unmap failed for vaddr: 0x%llx\n", next_vaddr);
 798
 799                /*
 800                 * unmapping on Palladium can be really long, so avoid a CPU
 801                 * soft lockup bug by sleeping a little between unmapping pages
 802                 */
 803                if (hdev->pldm)
 804                        usleep_range(500, 1000);
 805        }
 806}
 807
 808static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
 809                                u64 *paddr)
 810{
 811        struct hl_device *hdev = ctx->hdev;
 812        struct hl_vm *vm = &hdev->vm;
 813        struct hl_vm_phys_pg_pack *phys_pg_pack;
 814        u32 handle;
 815
 816        handle = lower_32_bits(args->map_device.handle);
 817        spin_lock(&vm->idr_lock);
 818        phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
 819        if (!phys_pg_pack) {
 820                spin_unlock(&vm->idr_lock);
 821                dev_err(hdev->dev, "no match for handle %u\n", handle);
 822                return -EINVAL;
 823        }
 824
 825        *paddr = phys_pg_pack->pages[0];
 826
 827        spin_unlock(&vm->idr_lock);
 828
 829        return 0;
 830}
 831
 832/*
 833 * map_device_va - map the given memory
 834 *
 835 * @ctx          : current context
 836 * @args         : host parameters with handle/host virtual address
 837 * @device_addr  : pointer to result device virtual address
 838 *
 839 * This function does the following:
 840 * - If given a physical device memory handle, map to a device virtual block
 841 *   and return the start address of this block
 842 * - If given a host virtual address and size, find the related physical pages,
 843 *   map a device virtual block to this pages and return the start address of
 844 *   this block
 845 */
 846static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 847                u64 *device_addr)
 848{
 849        struct hl_device *hdev = ctx->hdev;
 850        struct hl_vm *vm = &hdev->vm;
 851        struct hl_vm_phys_pg_pack *phys_pg_pack;
 852        struct hl_userptr *userptr = NULL;
 853        struct hl_vm_hash_node *hnode;
 854        struct hl_va_range *va_range;
 855        enum vm_type_t *vm_type;
 856        u64 ret_vaddr, hint_addr;
 857        u32 handle = 0, va_block_align;
 858        int rc;
 859        bool is_userptr = args->flags & HL_MEM_USERPTR;
 860
 861        /* Assume failure */
 862        *device_addr = 0;
 863
 864        if (is_userptr) {
 865                u64 addr = args->map_host.host_virt_addr,
 866                        size = args->map_host.mem_size;
 867                u32 page_size = hdev->asic_prop.pmmu.page_size,
 868                        huge_page_size = hdev->asic_prop.pmmu_huge.page_size;
 869
 870                rc = dma_map_host_va(hdev, addr, size, &userptr);
 871                if (rc) {
 872                        dev_err(hdev->dev, "failed to get userptr from va\n");
 873                        return rc;
 874                }
 875
 876                rc = init_phys_pg_pack_from_userptr(ctx, userptr,
 877                                &phys_pg_pack);
 878                if (rc) {
 879                        dev_err(hdev->dev,
 880                                "unable to init page pack for vaddr 0x%llx\n",
 881                                addr);
 882                        goto init_page_pack_err;
 883                }
 884
 885                vm_type = (enum vm_type_t *) userptr;
 886                hint_addr = args->map_host.hint_addr;
 887                handle = phys_pg_pack->handle;
 888
 889                /* get required alignment */
 890                if (phys_pg_pack->page_size == page_size) {
 891                        va_range = ctx->host_va_range;
 892
 893                        /*
 894                         * huge page alignment may be needed in case of regular
 895                         * page mapping, depending on the host VA alignment
 896                         */
 897                        if (addr & (huge_page_size - 1))
 898                                va_block_align = page_size;
 899                        else
 900                                va_block_align = huge_page_size;
 901                } else {
 902                        /*
 903                         * huge page alignment is needed in case of huge page
 904                         * mapping
 905                         */
 906                        va_range = ctx->host_huge_va_range;
 907                        va_block_align = huge_page_size;
 908                }
 909        } else {
 910                handle = lower_32_bits(args->map_device.handle);
 911
 912                spin_lock(&vm->idr_lock);
 913                phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
 914                if (!phys_pg_pack) {
 915                        spin_unlock(&vm->idr_lock);
 916                        dev_err(hdev->dev,
 917                                "no match for handle %u\n", handle);
 918                        return -EINVAL;
 919                }
 920
 921                /* increment now to avoid freeing device memory while mapping */
 922                atomic_inc(&phys_pg_pack->mapping_cnt);
 923
 924                spin_unlock(&vm->idr_lock);
 925
 926                vm_type = (enum vm_type_t *) phys_pg_pack;
 927
 928                hint_addr = args->map_device.hint_addr;
 929
 930                /* DRAM VA alignment is the same as the DRAM page size */
 931                va_range = ctx->dram_va_range;
 932                va_block_align = hdev->asic_prop.dmmu.page_size;
 933        }
 934
 935        /*
 936         * relevant for mapping device physical memory only, as host memory is
 937         * implicitly shared
 938         */
 939        if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) &&
 940                        phys_pg_pack->asid != ctx->asid) {
 941                dev_err(hdev->dev,
 942                        "Failed to map memory, handle %u is not shared\n",
 943                        handle);
 944                rc = -EPERM;
 945                goto shared_err;
 946        }
 947
 948        hnode = kzalloc(sizeof(*hnode), GFP_KERNEL);
 949        if (!hnode) {
 950                rc = -ENOMEM;
 951                goto hnode_err;
 952        }
 953
 954        ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
 955                                        hint_addr, va_block_align);
 956        if (!ret_vaddr) {
 957                dev_err(hdev->dev, "no available va block for handle %u\n",
 958                                handle);
 959                rc = -ENOMEM;
 960                goto va_block_err;
 961        }
 962
 963        mutex_lock(&ctx->mmu_lock);
 964
 965        rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
 966        if (rc) {
 967                mutex_unlock(&ctx->mmu_lock);
 968                dev_err(hdev->dev, "mapping page pack failed for handle %u\n",
 969                                handle);
 970                goto map_err;
 971        }
 972
 973        rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type);
 974
 975        mutex_unlock(&ctx->mmu_lock);
 976
 977        if (rc) {
 978                dev_err(hdev->dev,
 979                        "mapping handle %u failed due to MMU cache invalidation\n",
 980                        handle);
 981                goto map_err;
 982        }
 983
 984        ret_vaddr += phys_pg_pack->offset;
 985
 986        hnode->ptr = vm_type;
 987        hnode->vaddr = ret_vaddr;
 988
 989        mutex_lock(&ctx->mem_hash_lock);
 990        hash_add(ctx->mem_hash, &hnode->node, ret_vaddr);
 991        mutex_unlock(&ctx->mem_hash_lock);
 992
 993        *device_addr = ret_vaddr;
 994
 995        if (is_userptr)
 996                free_phys_pg_pack(hdev, phys_pg_pack);
 997
 998        return 0;
 999
1000map_err:

1001        if (add_va_block(hdev, va_range, ret_vaddr,
1002                                ret_vaddr + phys_pg_pack->total_size - 1))
1003                dev_warn(hdev->dev,
1004                        "release va block failed for handle 0x%x, vaddr: 0x%llx\n",
1005                                handle, ret_vaddr);
1006
1007va_block_err:
1008        kfree(hnode);
1009hnode_err:
1010shared_err:
1011        atomic_dec(&phys_pg_pack->mapping_cnt);
1012        if (is_userptr)
1013                free_phys_pg_pack(hdev, phys_pg_pack);
1014init_page_pack_err:
1015        if (is_userptr)
1016                dma_unmap_host_va(hdev, userptr);
1017
1018        return rc;
1019}
1020
1021/*
1022 * unmap_device_va      - unmap the given device virtual address
1023 *
1024 * @ctx                 : current context
1025 * @vaddr               : device virtual address to unmap
1026 * @ctx_free            : true if in context free flow, false otherwise.
1027 *
1028 * This function does the following:
1029 * - Unmap the physical pages related to the given virtual address
1030 * - return the device virtual block to the virtual block list
1031 */
1032static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
1033{
1034        struct hl_device *hdev = ctx->hdev;
1035        struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
1036        struct hl_vm_hash_node *hnode = NULL;
1037        struct hl_userptr *userptr = NULL;
1038        struct hl_va_range *va_range;
1039        enum vm_type_t *vm_type;
1040        bool is_userptr;
1041        int rc = 0;
1042
1043        /* protect from double entrance */
1044        mutex_lock(&ctx->mem_hash_lock);
1045        hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr)
1046                if (vaddr == hnode->vaddr)
1047                        break;
1048
1049        if (!hnode) {
1050                mutex_unlock(&ctx->mem_hash_lock);
1051                dev_err(hdev->dev,
1052                        "unmap failed, no mem hnode for vaddr 0x%llx\n",
1053                        vaddr);
1054                return -EINVAL;
1055        }
1056
1057        hash_del(&hnode->node);
1058        mutex_unlock(&ctx->mem_hash_lock);
1059
1060        vm_type = hnode->ptr;
1061
1062        if (*vm_type == VM_TYPE_USERPTR) {
1063                is_userptr = true;
1064                userptr = hnode->ptr;
1065                rc = init_phys_pg_pack_from_userptr(ctx, userptr,
1066                                                        &phys_pg_pack);
1067                if (rc) {
1068                        dev_err(hdev->dev,
1069                                "unable to init page pack for vaddr 0x%llx\n",
1070                                vaddr);
1071                        goto vm_type_err;
1072                }
1073
1074                if (phys_pg_pack->page_size ==
1075                                        hdev->asic_prop.pmmu.page_size)
1076                        va_range = ctx->host_va_range;
1077                else
1078                        va_range = ctx->host_huge_va_range;
1079        } else if (*vm_type == VM_TYPE_PHYS_PACK) {
1080                is_userptr = false;
1081                va_range = ctx->dram_va_range;
1082                phys_pg_pack = hnode->ptr;
1083        } else {
1084                dev_warn(hdev->dev,
1085                        "unmap failed, unknown vm desc for vaddr 0x%llx\n",
1086                                vaddr);
1087                rc = -EFAULT;
1088                goto vm_type_err;
1089        }
1090
1091        if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) {
1092                dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr);
1093                rc = -EINVAL;
1094                goto mapping_cnt_err;
1095        }
1096
1097        vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
1098
1099        mutex_lock(&ctx->mmu_lock);
1100
1101        unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack);
1102
1103        /*
1104         * During context free this function is called in a loop to clean all
1105         * the context mappings. Hence the cache invalidation can be called once
1106         * at the loop end rather than for each iteration
1107         */
1108        if (!ctx_free)
1109                rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
1110                                                                *vm_type);
1111
1112        mutex_unlock(&ctx->mmu_lock);
1113
1114        /*
1115         * If the context is closing we don't need to check for the MMU cache
1116         * invalidation return code and update the VA free list as in this flow
1117         * we invalidate the MMU cache outside of this unmap function and the VA
1118         * free list will be freed anyway.
1119         */
1120        if (!ctx_free) {
1121                int tmp_rc;
1122
1123                if (rc)
1124                        dev_err(hdev->dev,
1125                                "unmapping vaddr 0x%llx failed due to MMU cache invalidation\n",
1126                                vaddr);
1127
1128                tmp_rc = add_va_block(hdev, va_range, vaddr,
1129                                        vaddr + phys_pg_pack->total_size - 1);
1130                if (tmp_rc) {
1131                        dev_warn(hdev->dev,
1132                                        "add va block failed for vaddr: 0x%llx\n",
1133                                        vaddr);
1134                        if (!rc)
1135                                rc = tmp_rc;
1136                }
1137        }
1138
1139        atomic_dec(&phys_pg_pack->mapping_cnt);
1140        kfree(hnode);
1141
1142        if (is_userptr) {
1143                free_phys_pg_pack(hdev, phys_pg_pack);
1144                dma_unmap_host_va(hdev, userptr);
1145        }
1146
1147        return rc;
1148
1149mapping_cnt_err:
1150        if (is_userptr)
1151                free_phys_pg_pack(hdev, phys_pg_pack);
1152vm_type_err:
1153        mutex_lock(&ctx->mem_hash_lock);
1154        hash_add(ctx->mem_hash, &hnode->node, vaddr);
1155        mutex_unlock(&ctx->mem_hash_lock);
1156
1157        return rc;
1158}
1159
1160static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
1161{
1162        struct hl_device *hdev = hpriv->hdev;
1163        struct hl_ctx *ctx = hpriv->ctx;
1164        u64 device_addr = 0;
1165        u32 handle = 0;
1166        int rc;
1167
1168        switch (args->in.op) {
1169        case HL_MEM_OP_ALLOC:
1170                if (args->in.alloc.mem_size == 0) {
1171                        dev_err(hdev->dev,
1172                                "alloc size must be larger than 0\n");
1173                        rc = -EINVAL;
1174                        goto out;
1175                }
1176
1177                /* Force contiguous as there are no real MMU
1178                 * translations to overcome physical memory gaps
1179                 */
1180                args->in.flags |= HL_MEM_CONTIGUOUS;
1181                rc = alloc_device_memory(ctx, &args->in, &handle);
1182
1183                memset(args, 0, sizeof(*args));
1184                args->out.handle = (__u64) handle;
1185                break;
1186
1187        case HL_MEM_OP_FREE:
1188                rc = free_device_memory(ctx, args->in.free.handle);
1189                break;
1190
1191        case HL_MEM_OP_MAP:
1192                if (args->in.flags & HL_MEM_USERPTR) {
1193                        device_addr = args->in.map_host.host_virt_addr;
1194                        rc = 0;
1195                } else {
1196                        rc = get_paddr_from_handle(ctx, &args->in,
1197                                        &device_addr);
1198                }
1199
1200                memset(args, 0, sizeof(*args));
1201                args->out.device_virt_addr = device_addr;
1202                break;
1203
1204        case HL_MEM_OP_UNMAP:
1205                rc = 0;
1206                break;
1207
1208        default:
1209                dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
1210                rc = -ENOTTY;
1211                break;
1212        }
1213
1214out:
1215        return rc;
1216}
1217
1218int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
1219{
1220        union hl_mem_args *args = data;
1221        struct hl_device *hdev = hpriv->hdev;
1222        struct hl_ctx *ctx = hpriv->ctx;
1223        u64 device_addr = 0;
1224        u32 handle = 0;
1225        int rc;
1226
1227        if (hl_device_disabled_or_in_reset(hdev)) {
1228                dev_warn_ratelimited(hdev->dev,
1229                        "Device is %s. Can't execute MEMORY IOCTL\n",
1230                        atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
1231                return -EBUSY;
1232        }
1233
1234        if (!hdev->mmu_enable)
1235                return mem_ioctl_no_mmu(hpriv, args);
1236
1237        switch (args->in.op) {
1238        case HL_MEM_OP_ALLOC:
1239                if (!hdev->dram_supports_virtual_memory) {
1240                        dev_err(hdev->dev, "DRAM alloc is not supported\n");
1241                        rc = -EINVAL;
1242                        goto out;
1243                }
1244
1245                if (args->in.alloc.mem_size == 0) {
1246                        dev_err(hdev->dev,
1247                                "alloc size must be larger than 0\n");
1248                        rc = -EINVAL;
1249                        goto out;
1250                }
1251                rc = alloc_device_memory(ctx, &args->in, &handle);
1252
1253                memset(args, 0, sizeof(*args));
1254                args->out.handle = (__u64) handle;
1255                break;
1256
1257        case HL_MEM_OP_FREE:
1258                rc = free_device_memory(ctx, args->in.free.handle);
1259                break;
1260
1261        case HL_MEM_OP_MAP:
1262                rc = map_device_va(ctx, &args->in, &device_addr);
1263
1264                memset(args, 0, sizeof(*args));
1265                args->out.device_virt_addr = device_addr;
1266                break;
1267
1268        case HL_MEM_OP_UNMAP:
1269                rc = unmap_device_va(ctx, args->in.unmap.device_virt_addr,
1270                                        false);
1271                break;
1272
1273        default:
1274                dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
1275                rc = -ENOTTY;
1276                break;
1277        }
1278
1279out:
1280        return rc;
1281}
1282
1283static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
1284                                u32 npages, u64 start, u32 offset,
1285                                struct hl_userptr *userptr)
1286{
1287        int rc;
1288
1289        if (!access_ok((void __user *) (uintptr_t) addr, size)) {
1290                dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr);
1291                return -EFAULT;
1292        }
1293
1294        userptr->vec = frame_vector_create(npages);
1295        if (!userptr->vec) {
1296                dev_err(hdev->dev, "Failed to create frame vector\n");
1297                return -ENOMEM;
1298        }
1299
1300        rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE,
1301                                userptr->vec);
1302
1303        if (rc != npages) {
1304                dev_err(hdev->dev,
1305                        "Failed to map host memory, user ptr probably wrong\n");
1306                if (rc < 0)
1307                        goto destroy_framevec;
1308                rc = -EFAULT;
1309                goto put_framevec;
1310        }
1311
1312        if (frame_vector_to_pages(userptr->vec) < 0) {
1313                dev_err(hdev->dev,
1314                        "Failed to translate frame vector to pages\n");
1315                rc = -EFAULT;
1316                goto put_framevec;
1317        }
1318
1319        rc = sg_alloc_table_from_pages(userptr->sgt,
1320                                        frame_vector_pages(userptr->vec),
1321                                        npages, offset, size, GFP_ATOMIC);
1322        if (rc < 0) {
1323                dev_err(hdev->dev, "failed to create SG table from pages\n");
1324                goto put_framevec;
1325        }
1326
1327        return 0;
1328
1329put_framevec:
1330        put_vaddr_frames(userptr->vec);
1331destroy_framevec:
1332        frame_vector_destroy(userptr->vec);
1333        return rc;
1334}
1335
1336/*
1337 * hl_pin_host_memory - pins a chunk of host memory.
1338 * @hdev: pointer to the habanalabs device structure
1339 * @addr: the host virtual address of the memory area
1340 * @size: the size of the memory area
1341 * @userptr: pointer to hl_userptr structure
1342 *
1343 * This function does the following:
1344 * - Pins the physical pages
1345 * - Create an SG list from those pages
1346 */
1347int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
1348                                        struct hl_userptr *userptr)
1349{
1350        u64 start, end;
1351        u32 npages, offset;
1352        int rc;
1353
1354        if (!size) {
1355                dev_err(hdev->dev, "size to pin is invalid - %llu\n", size);
1356                return -EINVAL;
1357        }
1358
1359        /*
1360         * If the combination of the address and size requested for this memory
1361         * region causes an integer overflow, return error.
1362         */
1363        if (((addr + size) < addr) ||
1364                        PAGE_ALIGN(addr + size) < (addr + size)) {
1365                dev_err(hdev->dev,
1366                        "user pointer 0x%llx + %llu causes integer overflow\n",
1367                        addr, size);
1368                return -EINVAL;
1369        }
1370
1371        /*
1372         * This function can be called also from data path, hence use atomic
1373         * always as it is not a big allocation.
1374         */
1375        userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC);
1376        if (!userptr->sgt)
1377                return -ENOMEM;
1378
1379        start = addr & PAGE_MASK;
1380        offset = addr & ~PAGE_MASK;
1381        end = PAGE_ALIGN(addr + size);
1382        npages = (end - start) >> PAGE_SHIFT;
1383
1384        userptr->size = size;
1385        userptr->addr = addr;
1386        userptr->dma_mapped = false;
1387        INIT_LIST_HEAD(&userptr->job_node);
1388
1389        rc = get_user_memory(hdev, addr, size, npages, start, offset,
1390                                userptr);
1391        if (rc) {
1392                dev_err(hdev->dev,
1393                        "failed to get user memory for address 0x%llx\n",
1394                        addr);
1395                goto free_sgt;
1396        }
1397
1398        hl_debugfs_add_userptr(hdev, userptr);
1399
1400        return 0;
1401
1402free_sgt:
1403        kfree(userptr->sgt);
1404        return rc;
1405}
1406
1407/*
1408 * hl_unpin_host_memory - unpins a chunk of host memory.
1409 * @hdev: pointer to the habanalabs device structure
1410 * @userptr: pointer to hl_userptr structure
1411 *
1412 * This function does the following:
1413 * - Unpins the physical pages related to the host memory
1414 * - Free the SG list
1415 */
1416void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
1417{
1418        struct page **pages;
1419
1420        hl_debugfs_remove_userptr(hdev, userptr);
1421
1422        if (userptr->dma_mapped)
1423                hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl,
1424                                                        userptr->sgt->nents,
1425                                                        userptr->dir);
1426
1427        pages = frame_vector_pages(userptr->vec);
1428        if (!IS_ERR(pages)) {
1429                int i;
1430
1431                for (i = 0; i < frame_vector_count(userptr->vec); i++)
1432                        set_page_dirty_lock(pages[i]);
1433        }
1434        put_vaddr_frames(userptr->vec);
1435        frame_vector_destroy(userptr->vec);
1436
1437        list_del(&userptr->job_node);
1438
1439        sg_free_table(userptr->sgt);
1440        kfree(userptr->sgt);
1441}
1442
1443/*
1444 * hl_userptr_delete_list - clear userptr list
1445 *
1446 * @hdev                : pointer to the habanalabs device structure
1447 * @userptr_list        : pointer to the list to clear
1448 *
1449 * This function does the following:
1450 * - Iterates over the list and unpins the host memory and frees the userptr
1451 *   structure.
1452 */
1453void hl_userptr_delete_list(struct hl_device *hdev,
1454                                struct list_head *userptr_list)
1455{
1456        struct hl_userptr *userptr, *tmp;
1457
1458        list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) {
1459                hl_unpin_host_memory(hdev, userptr);
1460                kfree(userptr);
1461        }
1462
1463        INIT_LIST_HEAD(userptr_list);
1464}
1465
1466/*
1467 * hl_userptr_is_pinned - returns whether the given userptr is pinned
1468 *
1469 * @hdev                : pointer to the habanalabs device structure
1470 * @userptr_list        : pointer to the list to clear
1471 * @userptr             : pointer to userptr to check
1472 *
1473 * This function does the following:
1474 * - Iterates over the list and checks if the given userptr is in it, means is
1475 *   pinned. If so, returns true, otherwise returns false.
1476 */
1477bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
1478                                u32 size, struct list_head *userptr_list,
1479                                struct hl_userptr **userptr)
1480{
1481        list_for_each_entry((*userptr), userptr_list, job_node) {
1482                if ((addr == (*userptr)->addr) && (size == (*userptr)->size))
1483                        return true;
1484        }
1485
1486        return false;
1487}
1488
1489/*
1490 * va_range_init - initialize virtual addresses range
1491 * @hdev: pointer to the habanalabs device structure
1492 * @va_range: pointer to the range to initialize
1493 * @start: range start address
1494 * @end: range end address
1495 *
1496 * This function does the following:
1497 * - Initializes the virtual addresses list of the given range with the given
1498 *   addresses.
1499 */
1500static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
1501                                u64 start, u64 end)
1502{
1503        int rc;
1504
1505        INIT_LIST_HEAD(&va_range->list);
1506
1507        /* PAGE_SIZE alignment */
1508
1509        if (start & (PAGE_SIZE - 1)) {
1510                start &= PAGE_MASK;
1511                start += PAGE_SIZE;
1512        }
1513
1514        if (end & (PAGE_SIZE - 1))
1515                end &= PAGE_MASK;
1516
1517        if (start >= end) {
1518                dev_err(hdev->dev, "too small vm range for va list\n");
1519                return -EFAULT;
1520        }
1521
1522        rc = add_va_block(hdev, va_range, start, end);
1523
1524        if (rc) {
1525                dev_err(hdev->dev, "Failed to init host va list\n");
1526                return rc;
1527        }
1528
1529        va_range->start_addr = start;
1530        va_range->end_addr = end;
1531
1532        return 0;
1533}
1534
1535/*
1536 * va_range_fini() - clear a virtual addresses range
1537 * @hdev: pointer to the habanalabs structure
1538 * va_range: pointer to virtual addresses range
1539 *
1540 * This function does the following:
1541 * - Frees the virtual addresses block list and its lock
1542 */
1543static void va_range_fini(struct hl_device *hdev,
1544                struct hl_va_range *va_range)
1545{
1546        mutex_lock(&va_range->lock);
1547        clear_va_list_locked(hdev, &va_range->list);
1548        mutex_unlock(&va_range->lock);
1549
1550        mutex_destroy(&va_range->lock);
1551        kfree(va_range);
1552}
1553
1554/*
1555 * vm_ctx_init_with_ranges() - initialize virtual memory for context
1556 * @ctx: pointer to the habanalabs context structure
1557 * @host_range_start: host virtual addresses range start.
1558 * @host_range_end: host virtual addresses range end.
1559 * @host_huge_range_start: host virtual addresses range start for memory
1560 *                          allocated with huge pages.
1561 * @host_huge_range_end: host virtual addresses range end for memory allocated
1562 *                        with huge pages.
1563 * @dram_range_start: dram virtual addresses range start.
1564 * @dram_range_end: dram virtual addresses range end.
1565 *
1566 * This function initializes the following:
1567 * - MMU for context
1568 * - Virtual address to area descriptor hashtable
1569 * - Virtual block list of available virtual memory
1570 */
1571static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
1572                                        u64 host_range_start,
1573                                        u64 host_range_end,
1574                                        u64 host_huge_range_start,
1575                                        u64 host_huge_range_end,
1576                                        u64 dram_range_start,
1577                                        u64 dram_range_end)
1578{
1579        struct hl_device *hdev = ctx->hdev;
1580        int rc;
1581
1582        ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL);
1583        if (!ctx->host_va_range)
1584                return -ENOMEM;
1585
1586        ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range),
1587                                                GFP_KERNEL);
1588        if (!ctx->host_huge_va_range) {
1589                rc =  -ENOMEM;
1590                goto host_huge_va_range_err;
1591        }
1592
1593        ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL);
1594        if (!ctx->dram_va_range) {
1595                rc = -ENOMEM;
1596                goto dram_va_range_err;
1597        }
1598
1599        rc = hl_mmu_ctx_init(ctx);
1600        if (rc) {
1601                dev_err(hdev->dev, "failed to init context %d\n", ctx->asid);
1602                goto mmu_ctx_err;
1603        }
1604
1605        mutex_init(&ctx->mem_hash_lock);
1606        hash_init(ctx->mem_hash);
1607
1608        mutex_init(&ctx->host_va_range->lock);
1609
1610        rc = va_range_init(hdev, ctx->host_va_range, host_range_start,
1611                                host_range_end);
1612        if (rc) {
1613                dev_err(hdev->dev, "failed to init host vm range\n");
1614                goto host_page_range_err;
1615        }
1616
1617        if (hdev->pmmu_huge_range) {
1618                mutex_init(&ctx->host_huge_va_range->lock);
1619
1620                rc = va_range_init(hdev, ctx->host_huge_va_range,
1621                                        host_huge_range_start,
1622                                        host_huge_range_end);
1623                if (rc) {
1624                        dev_err(hdev->dev,
1625                                "failed to init host huge vm range\n");
1626                        goto host_hpage_range_err;
1627                }
1628        } else {
1629                kfree(ctx->host_huge_va_range);
1630                ctx->host_huge_va_range = ctx->host_va_range;
1631        }
1632
1633        mutex_init(&ctx->dram_va_range->lock);
1634
1635        rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start,
1636                        dram_range_end);
1637        if (rc) {
1638                dev_err(hdev->dev, "failed to init dram vm range\n");
1639                goto dram_vm_err;
1640        }
1641
1642        hl_debugfs_add_ctx_mem_hash(hdev, ctx);
1643
1644        return 0;
1645
1646dram_vm_err:
1647        mutex_destroy(&ctx->dram_va_range->lock);
1648
1649        if (hdev->pmmu_huge_range) {
1650                mutex_lock(&ctx->host_huge_va_range->lock);
1651                clear_va_list_locked(hdev, &ctx->host_huge_va_range->list);
1652                mutex_unlock(&ctx->host_huge_va_range->lock);
1653        }
1654host_hpage_range_err:
1655        if (hdev->pmmu_huge_range)
1656                mutex_destroy(&ctx->host_huge_va_range->lock);
1657        mutex_lock(&ctx->host_va_range->lock);
1658        clear_va_list_locked(hdev, &ctx->host_va_range->list);
1659        mutex_unlock(&ctx->host_va_range->lock);
1660host_page_range_err:
1661        mutex_destroy(&ctx->host_va_range->lock);
1662        mutex_destroy(&ctx->mem_hash_lock);
1663        hl_mmu_ctx_fini(ctx);
1664mmu_ctx_err:
1665        kfree(ctx->dram_va_range);
1666dram_va_range_err:
1667        kfree(ctx->host_huge_va_range);
1668host_huge_va_range_err:
1669        kfree(ctx->host_va_range);
1670
1671        return rc;
1672}
1673
1674int hl_vm_ctx_init(struct hl_ctx *ctx)
1675{
1676        struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
1677        u64 host_range_start, host_range_end, host_huge_range_start,
1678                host_huge_range_end, dram_range_start, dram_range_end;
1679
1680        atomic64_set(&ctx->dram_phys_mem, 0);
1681
1682        /*
1683         * - If MMU is enabled, init the ranges as usual.
1684         * - If MMU is disabled, in case of host mapping, the returned address
1685         *   is the given one.
1686         *   In case of DRAM mapping, the returned address is the physical
1687         *   address of the memory related to the given handle.
1688         */
1689        if (ctx->hdev->mmu_enable) {
1690                dram_range_start = prop->dmmu.start_addr;
1691                dram_range_end = prop->dmmu.end_addr;
1692                host_range_start = prop->pmmu.start_addr;
1693                host_range_end = prop->pmmu.end_addr;
1694                host_huge_range_start = prop->pmmu_huge.start_addr;
1695                host_huge_range_end = prop->pmmu_huge.end_addr;
1696        } else {
1697                dram_range_start = prop->dram_user_base_address;
1698                dram_range_end = prop->dram_end_address;
1699                host_range_start = prop->dram_user_base_address;
1700                host_range_end = prop->dram_end_address;
1701                host_huge_range_start = prop->dram_user_base_address;
1702                host_huge_range_end = prop->dram_end_address;
1703        }
1704
1705        return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
1706                                        host_huge_range_start,
1707                                        host_huge_range_end,
1708                                        dram_range_start,
1709                                        dram_range_end);
1710}
1711
1712/*
1713 * hl_vm_ctx_fini       - virtual memory teardown of context
1714 *
1715 * @ctx                 : pointer to the habanalabs context structure
1716 *
1717 * This function perform teardown the following:
1718 * - Virtual block list of available virtual memory
1719 * - Virtual address to area descriptor hashtable
1720 * - MMU for context
1721 *
1722 * In addition this function does the following:
1723 * - Unmaps the existing hashtable nodes if the hashtable is not empty. The
1724 *   hashtable should be empty as no valid mappings should exist at this
1725 *   point.
1726 * - Frees any existing physical page list from the idr which relates to the
1727 *   current context asid.
1728 * - This function checks the virtual block list for correctness. At this point
1729 *   the list should contain one element which describes the whole virtual
1730 *   memory range of the context. Otherwise, a warning is printed.
1731 */
1732void hl_vm_ctx_fini(struct hl_ctx *ctx)
1733{
1734        struct hl_device *hdev = ctx->hdev;
1735        struct hl_vm *vm = &hdev->vm;
1736        struct hl_vm_phys_pg_pack *phys_pg_list;
1737        struct hl_vm_hash_node *hnode;
1738        struct hlist_node *tmp_node;
1739        int i;
1740
1741        hl_debugfs_remove_ctx_mem_hash(hdev, ctx);
1742
1743        /*
1744         * Clearly something went wrong on hard reset so no point in printing
1745         * another side effect error
1746         */
1747        if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash))
1748                dev_notice(hdev->dev,
1749                        "user released device without removing its memory mappings\n");
1750
1751        hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
1752                dev_dbg(hdev->dev,
1753                        "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n",
1754                        hnode->vaddr, ctx->asid);
1755                unmap_device_va(ctx, hnode->vaddr, true);
1756        }
1757
1758        /* invalidate the cache once after the unmapping loop */
1759        hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
1760        hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK);
1761
1762        spin_lock(&vm->idr_lock);
1763        idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
1764                if (phys_pg_list->asid == ctx->asid) {
1765                        dev_dbg(hdev->dev,
1766                                "page list 0x%px of asid %d is still alive\n",
1767                                phys_pg_list, ctx->asid);
1768                        atomic64_sub(phys_pg_list->total_size,
1769                                        &hdev->dram_used_mem);
1770                        free_phys_pg_pack(hdev, phys_pg_list);
1771                        idr_remove(&vm->phys_pg_pack_handles, i);
1772                }
1773        spin_unlock(&vm->idr_lock);
1774
1775        va_range_fini(hdev, ctx->dram_va_range);
1776        if (hdev->pmmu_huge_range)
1777                va_range_fini(hdev, ctx->host_huge_va_range);
1778        va_range_fini(hdev, ctx->host_va_range);
1779
1780        mutex_destroy(&ctx->mem_hash_lock);
1781        hl_mmu_ctx_fini(ctx);
1782}
1783
1784/*
1785 * hl_vm_init           - initialize virtual memory module
1786 *
1787 * @hdev                : pointer to the habanalabs device structure
1788 *
1789 * This function initializes the following:
1790 * - MMU module
1791 * - DRAM physical pages pool of 2MB
1792 * - Idr for device memory allocation handles
1793 */
1794int hl_vm_init(struct hl_device *hdev)
1795{
1796        struct asic_fixed_properties *prop = &hdev->asic_prop;
1797        struct hl_vm *vm = &hdev->vm;
1798        int rc;
1799
1800        vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1);
1801        if (!vm->dram_pg_pool) {
1802                dev_err(hdev->dev, "Failed to create dram page pool\n");
1803                return -ENOMEM;
1804        }
1805
1806        kref_init(&vm->dram_pg_pool_refcount);
1807
1808        rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address,
1809                        prop->dram_end_address - prop->dram_user_base_address,
1810                        -1);
1811
1812        if (rc) {
1813                dev_err(hdev->dev,
1814                        "Failed to add memory to dram page pool %d\n", rc);
1815                goto pool_add_err;
1816        }
1817
1818        spin_lock_init(&vm->idr_lock);
1819        idr_init(&vm->phys_pg_pack_handles);
1820
1821        atomic64_set(&hdev->dram_used_mem, 0);
1822
1823        vm->init_done = true;
1824
1825        return 0;
1826
1827pool_add_err:
1828        gen_pool_destroy(vm->dram_pg_pool);
1829
1830        return rc;
1831}
1832
1833/*
1834 * hl_vm_fini           - virtual memory module teardown
1835 *
1836 * @hdev                : pointer to the habanalabs device structure
1837 *
1838 * This function perform teardown to the following:
1839 * - Idr for device memory allocation handles
1840 * - DRAM physical pages pool of 2MB
1841 * - MMU module
1842 */
1843void hl_vm_fini(struct hl_device *hdev)
1844{
1845        struct hl_vm *vm = &hdev->vm;
1846
1847        if (!vm->init_done)
1848                return;
1849
1850        /*
1851         * At this point all the contexts should be freed and hence no DRAM
1852         * memory should be in use. Hence the DRAM pool should be freed here.
1853         */
1854        if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1)
1855                dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n",
1856                                __func__);
1857
1858        vm->init_done = false;
1859}
1860