linux/drivers/misc/habanalabs/mmu.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3/*
   4 * Copyright 2016-2019 HabanaLabs, Ltd.
   5 * All Rights Reserved.
   6 */
   7
   8#include "habanalabs.h"
   9#include "include/hw_ip/mmu/mmu_general.h"
  10
  11#include <linux/genalloc.h>
  12#include <linux/slab.h>
  13
  14static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
  15
  16static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
  17{
  18        struct pgt_info *pgt_info = NULL;
  19
  20        hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
  21                                (unsigned long) hop_addr)
  22                if (hop_addr == pgt_info->shadow_addr)
  23                        break;
  24
  25        return pgt_info;
  26}
  27
  28static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
  29{
  30        struct hl_device *hdev = ctx->hdev;
  31        struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
  32
  33        gen_pool_free(hdev->mmu_pgt_pool, pgt_info->phys_addr,
  34                        hdev->asic_prop.mmu_hop_table_size);
  35        hash_del(&pgt_info->node);
  36        kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
  37        kfree(pgt_info);
  38}
  39
  40static u64 alloc_hop(struct hl_ctx *ctx)
  41{
  42        struct hl_device *hdev = ctx->hdev;
  43        struct asic_fixed_properties *prop = &hdev->asic_prop;
  44        struct pgt_info *pgt_info;
  45        u64 phys_addr, shadow_addr;
  46
  47        pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
  48        if (!pgt_info)
  49                return ULLONG_MAX;
  50
  51        phys_addr = (u64) gen_pool_alloc(hdev->mmu_pgt_pool,
  52                                        prop->mmu_hop_table_size);
  53        if (!phys_addr) {
  54                dev_err(hdev->dev, "failed to allocate page\n");
  55                goto pool_add_err;
  56        }
  57
  58        shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
  59                                                GFP_KERNEL);
  60        if (!shadow_addr)
  61                goto shadow_err;
  62
  63        pgt_info->phys_addr = phys_addr;
  64        pgt_info->shadow_addr = shadow_addr;
  65        pgt_info->ctx = ctx;
  66        pgt_info->num_of_ptes = 0;
  67        hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
  68
  69        return shadow_addr;
  70
  71shadow_err:
  72        gen_pool_free(hdev->mmu_pgt_pool, phys_addr, prop->mmu_hop_table_size);
  73pool_add_err:
  74        kfree(pgt_info);
  75
  76        return ULLONG_MAX;
  77}
  78
  79static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
  80{
  81        return ctx->hdev->asic_prop.mmu_pgt_addr +
  82                        (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
  83}
  84
  85static inline u64 get_hop0_addr(struct hl_ctx *ctx)
  86{
  87        return (u64) (uintptr_t) ctx->hdev->mmu_shadow_hop0 +
  88                        (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
  89}
  90
  91static inline void flush(struct hl_ctx *ctx)
  92{
  93        /* flush all writes from all cores to reach PCI */
  94        mb();
  95        ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
  96}
  97
  98/* transform the value to physical address when writing to H/W */
  99static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
 100{
 101        /*
 102         * The value to write is actually the address of the next shadow hop +
 103         * flags at the 12 LSBs.
 104         * Hence in order to get the value to write to the physical PTE, we
 105         * clear the 12 LSBs and translate the shadow hop to its associated
 106         * physical hop, and add back the original 12 LSBs.
 107         */
 108        u64 phys_val = get_phys_addr(ctx, val & PTE_PHYS_ADDR_MASK) |
 109                                (val & OFFSET_MASK);
 110
 111        ctx->hdev->asic_funcs->write_pte(ctx->hdev,
 112                                        get_phys_addr(ctx, shadow_pte_addr),
 113                                        phys_val);
 114
 115        *(u64 *) (uintptr_t) shadow_pte_addr = val;
 116}
 117
 118/* do not transform the value to physical address when writing to H/W */
 119static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
 120                                        u64 val)
 121{
 122        ctx->hdev->asic_funcs->write_pte(ctx->hdev,
 123                                        get_phys_addr(ctx, shadow_pte_addr),
 124                                        val);
 125        *(u64 *) (uintptr_t) shadow_pte_addr = val;
 126}
 127
 128/* clear the last and present bits */
 129static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
 130{
 131        /* no need to transform the value to physical address */
 132        write_final_pte(ctx, pte_addr, 0);
 133}
 134
 135static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
 136{
 137        get_pgt_info(ctx, hop_addr)->num_of_ptes++;
 138}
 139
 140/*
 141 * put_pte - decrement the num of ptes and free the hop if possible
 142 *
 143 * @ctx: pointer to the context structure
 144 * @hop_addr: addr of the hop
 145 *
 146 * This function returns the number of ptes left on this hop. If the number is
 147 * 0, it means the pte was freed.
 148 */
 149static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
 150{
 151        struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
 152        int num_of_ptes_left;
 153
 154        pgt_info->num_of_ptes--;
 155
 156        /*
 157         * Need to save the number of ptes left because free_hop might free
 158         * the pgt_info
 159         */
 160        num_of_ptes_left = pgt_info->num_of_ptes;
 161        if (!num_of_ptes_left)
 162                free_hop(ctx, hop_addr);
 163
 164        return num_of_ptes_left;
 165}
 166
 167static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
 168                                        u64 virt_addr, u64 mask, u64 shift)
 169{
 170        return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
 171                        ((virt_addr & mask) >> shift);
 172}
 173
 174static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
 175{
 176        return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP0_MASK, HOP0_SHIFT);
 177}
 178
 179static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
 180{
 181        return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP1_MASK, HOP1_SHIFT);
 182}
 183
 184static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
 185{
 186        return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP2_MASK, HOP2_SHIFT);
 187}
 188
 189static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
 190{
 191        return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP3_MASK, HOP3_SHIFT);
 192}
 193
 194static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
 195{
 196        return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP4_MASK, HOP4_SHIFT);
 197}
 198
 199static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
 200{
 201        if (curr_pte & PAGE_PRESENT_MASK)
 202                return curr_pte & PHYS_ADDR_MASK;
 203        else
 204                return ULLONG_MAX;
 205}
 206
 207static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
 208                                                bool *is_new_hop)
 209{
 210        u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
 211
 212        if (hop_addr == ULLONG_MAX) {
 213                hop_addr = alloc_hop(ctx);
 214                *is_new_hop = (hop_addr != ULLONG_MAX);
 215        }
 216
 217        return hop_addr;
 218}
 219
 220/* translates shadow address inside hop to a physical address */
 221static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
 222{
 223        u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
 224        u64 shadow_hop_addr = shadow_addr & ~page_mask;
 225        u64 pte_offset = shadow_addr & page_mask;
 226        u64 phys_hop_addr;
 227
 228        if (shadow_hop_addr != get_hop0_addr(ctx))
 229                phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
 230        else
 231                phys_hop_addr = get_phys_hop0_addr(ctx);
 232
 233        return phys_hop_addr + pte_offset;
 234}
 235
 236static int dram_default_mapping_init(struct hl_ctx *ctx)
 237{
 238        struct hl_device *hdev = ctx->hdev;
 239        struct asic_fixed_properties *prop = &hdev->asic_prop;
 240        u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
 241                hop2_pte_addr, hop3_pte_addr, pte_val;
 242        int rc, i, j, hop3_allocated = 0;
 243
 244        if ((!hdev->dram_supports_virtual_memory) ||
 245                        (!hdev->dram_default_page_mapping) ||
 246                        (ctx->asid == HL_KERNEL_ASID_ID))
 247                return 0;
 248
 249        num_of_hop3 = prop->dram_size_for_default_page_mapping;
 250        do_div(num_of_hop3, prop->dram_page_size);
 251        do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
 252
 253        /* add hop1 and hop2 */
 254        total_hops = num_of_hop3 + 2;
 255
 256        ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops,  GFP_KERNEL);
 257        if (!ctx->dram_default_hops)
 258                return -ENOMEM;
 259
 260        hop0_addr = get_hop0_addr(ctx);
 261
 262        hop1_addr = alloc_hop(ctx);
 263        if (hop1_addr == ULLONG_MAX) {
 264                dev_err(hdev->dev, "failed to alloc hop 1\n");
 265                rc = -ENOMEM;
 266                goto hop1_err;
 267        }
 268
 269        ctx->dram_default_hops[total_hops - 1] = hop1_addr;
 270
 271        hop2_addr = alloc_hop(ctx);
 272        if (hop2_addr == ULLONG_MAX) {
 273                dev_err(hdev->dev, "failed to alloc hop 2\n");
 274                rc = -ENOMEM;
 275                goto hop2_err;
 276        }
 277
 278        ctx->dram_default_hops[total_hops - 2] = hop2_addr;
 279
 280        for (i = 0 ; i < num_of_hop3 ; i++) {
 281                ctx->dram_default_hops[i] = alloc_hop(ctx);
 282                if (ctx->dram_default_hops[i] == ULLONG_MAX) {
 283                        dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
 284                        rc = -ENOMEM;
 285                        goto hop3_err;
 286                }
 287                hop3_allocated++;
 288        }
 289
 290        /* need only pte 0 in hops 0 and 1 */
 291        pte_val = (hop1_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 292        write_pte(ctx, hop0_addr, pte_val);
 293
 294        pte_val = (hop2_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 295        write_pte(ctx, hop1_addr, pte_val);
 296        get_pte(ctx, hop1_addr);
 297
 298        hop2_pte_addr = hop2_addr;
 299        for (i = 0 ; i < num_of_hop3 ; i++) {
 300                pte_val = (ctx->dram_default_hops[i] & PTE_PHYS_ADDR_MASK) |
 301                                PAGE_PRESENT_MASK;
 302                write_pte(ctx, hop2_pte_addr, pte_val);
 303                get_pte(ctx, hop2_addr);
 304                hop2_pte_addr += HL_PTE_SIZE;
 305        }
 306
 307        pte_val = (prop->mmu_dram_default_page_addr & PTE_PHYS_ADDR_MASK) |
 308                        LAST_MASK | PAGE_PRESENT_MASK;
 309
 310        for (i = 0 ; i < num_of_hop3 ; i++) {
 311                hop3_pte_addr = ctx->dram_default_hops[i];
 312                for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
 313                        write_final_pte(ctx, hop3_pte_addr, pte_val);
 314                        get_pte(ctx, ctx->dram_default_hops[i]);
 315                        hop3_pte_addr += HL_PTE_SIZE;
 316                }
 317        }
 318
 319        flush(ctx);
 320
 321        return 0;
 322
 323hop3_err:
 324        for (i = 0 ; i < hop3_allocated ; i++)
 325                free_hop(ctx, ctx->dram_default_hops[i]);
 326
 327        free_hop(ctx, hop2_addr);
 328hop2_err:
 329        free_hop(ctx, hop1_addr);
 330hop1_err:
 331        kfree(ctx->dram_default_hops);
 332
 333        return rc;
 334}
 335
 336static void dram_default_mapping_fini(struct hl_ctx *ctx)
 337{
 338        struct hl_device *hdev = ctx->hdev;
 339        struct asic_fixed_properties *prop = &hdev->asic_prop;
 340        u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
 341                hop2_pte_addr, hop3_pte_addr;
 342        int i, j;
 343
 344        if ((!hdev->dram_supports_virtual_memory) ||
 345                        (!hdev->dram_default_page_mapping) ||
 346                        (ctx->asid == HL_KERNEL_ASID_ID))
 347                return;
 348
 349        num_of_hop3 = prop->dram_size_for_default_page_mapping;
 350        do_div(num_of_hop3, prop->dram_page_size);
 351        do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
 352
 353        hop0_addr = get_hop0_addr(ctx);
 354        /* add hop1 and hop2 */
 355        total_hops = num_of_hop3 + 2;
 356        hop1_addr = ctx->dram_default_hops[total_hops - 1];
 357        hop2_addr = ctx->dram_default_hops[total_hops - 2];
 358
 359        for (i = 0 ; i < num_of_hop3 ; i++) {
 360                hop3_pte_addr = ctx->dram_default_hops[i];
 361                for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
 362                        clear_pte(ctx, hop3_pte_addr);
 363                        put_pte(ctx, ctx->dram_default_hops[i]);
 364                        hop3_pte_addr += HL_PTE_SIZE;
 365                }
 366        }
 367
 368        hop2_pte_addr = hop2_addr;
 369        hop2_pte_addr = hop2_addr;
 370        for (i = 0 ; i < num_of_hop3 ; i++) {
 371                clear_pte(ctx, hop2_pte_addr);
 372                put_pte(ctx, hop2_addr);
 373                hop2_pte_addr += HL_PTE_SIZE;
 374        }
 375
 376        clear_pte(ctx, hop1_addr);
 377        put_pte(ctx, hop1_addr);
 378        clear_pte(ctx, hop0_addr);
 379
 380        kfree(ctx->dram_default_hops);
 381
 382        flush(ctx);
 383}
 384
 385/**
 386 * hl_mmu_init() - initialize the MMU module.
 387 * @hdev: habanalabs device structure.
 388 *
 389 * This function does the following:
 390 * - Create a pool of pages for pgt_infos.
 391 * - Create a shadow table for pgt
 392 *
 393 * Return: 0 for success, non-zero for failure.
 394 */
 395int hl_mmu_init(struct hl_device *hdev)
 396{
 397        struct asic_fixed_properties *prop = &hdev->asic_prop;
 398        int rc;
 399
 400        if (!hdev->mmu_enable)
 401                return 0;
 402
 403        /* MMU H/W init was already done in device hw_init() */
 404
 405        hdev->mmu_pgt_pool =
 406                        gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
 407
 408        if (!hdev->mmu_pgt_pool) {
 409                dev_err(hdev->dev, "Failed to create page gen pool\n");
 410                return -ENOMEM;
 411        }
 412
 413        rc = gen_pool_add(hdev->mmu_pgt_pool, prop->mmu_pgt_addr +
 414                        prop->mmu_hop0_tables_total_size,
 415                        prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
 416                        -1);
 417        if (rc) {
 418                dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
 419                goto err_pool_add;
 420        }
 421
 422        hdev->mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
 423                                        prop->mmu_hop_table_size,
 424                                        GFP_KERNEL | __GFP_ZERO);
 425        if (!hdev->mmu_shadow_hop0) {
 426                rc = -ENOMEM;
 427                goto err_pool_add;
 428        }
 429
 430        return 0;
 431
 432err_pool_add:
 433        gen_pool_destroy(hdev->mmu_pgt_pool);
 434
 435        return rc;
 436}
 437
 438/**
 439 * hl_mmu_fini() - release the MMU module.
 440 * @hdev: habanalabs device structure.
 441 *
 442 * This function does the following:
 443 * - Disable MMU in H/W.
 444 * - Free the pgt_infos pool.
 445 *
 446 * All contexts should be freed before calling this function.
 447 */
 448void hl_mmu_fini(struct hl_device *hdev)
 449{
 450        if (!hdev->mmu_enable)
 451                return;
 452
 453        kvfree(hdev->mmu_shadow_hop0);
 454        gen_pool_destroy(hdev->mmu_pgt_pool);
 455
 456        /* MMU H/W fini will be done in device hw_fini() */
 457}
 458
 459/**
 460 * hl_mmu_ctx_init() - initialize a context for using the MMU module.
 461 * @ctx: pointer to the context structure to initialize.
 462 *
 463 * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
 464 * page tables hops related to this context.
 465 * Return: 0 on success, non-zero otherwise.
 466 */
 467int hl_mmu_ctx_init(struct hl_ctx *ctx)
 468{
 469        struct hl_device *hdev = ctx->hdev;
 470
 471        if (!hdev->mmu_enable)
 472                return 0;
 473
 474        mutex_init(&ctx->mmu_lock);
 475        hash_init(ctx->mmu_phys_hash);
 476        hash_init(ctx->mmu_shadow_hash);
 477
 478        return dram_default_mapping_init(ctx);
 479}
 480
 481/*
 482 * hl_mmu_ctx_fini - disable a ctx from using the mmu module
 483 *
 484 * @ctx: pointer to the context structure
 485 *
 486 * This function does the following:
 487 * - Free any pgts which were not freed yet
 488 * - Free the mutex
 489 * - Free DRAM default page mapping hops
 490 */
 491void hl_mmu_ctx_fini(struct hl_ctx *ctx)
 492{
 493        struct hl_device *hdev = ctx->hdev;
 494        struct pgt_info *pgt_info;
 495        struct hlist_node *tmp;
 496        int i;
 497
 498        if (!hdev->mmu_enable)
 499                return;
 500
 501        dram_default_mapping_fini(ctx);
 502
 503        if (!hash_empty(ctx->mmu_shadow_hash))
 504                dev_err(hdev->dev, "ctx is freed while it has pgts in use\n");
 505
 506        hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
 507                dev_err(hdev->dev,
 508                        "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
 509                        pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
 510                free_hop(ctx, pgt_info->shadow_addr);
 511        }
 512
 513        mutex_destroy(&ctx->mmu_lock);
 514}
 515
 516static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
 517{
 518        struct hl_device *hdev = ctx->hdev;
 519        struct asic_fixed_properties *prop = &hdev->asic_prop;
 520        u64 hop0_addr = 0, hop0_pte_addr = 0,
 521                hop1_addr = 0, hop1_pte_addr = 0,
 522                hop2_addr = 0, hop2_pte_addr = 0,
 523                hop3_addr = 0, hop3_pte_addr = 0,
 524                hop4_addr = 0, hop4_pte_addr = 0,
 525                curr_pte;
 526        bool is_dram_addr, is_huge, clear_hop3 = true;
 527
 528        is_dram_addr = hl_mem_area_inside_range(virt_addr, PAGE_SIZE_2MB,
 529                                prop->va_space_dram_start_address,
 530                                prop->va_space_dram_end_address);
 531
 532        hop0_addr = get_hop0_addr(ctx);
 533        hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
 534
 535        curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
 536
 537        hop1_addr = get_next_hop_addr(ctx, curr_pte);
 538
 539        if (hop1_addr == ULLONG_MAX)
 540                goto not_mapped;
 541
 542        hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
 543
 544        curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
 545
 546        hop2_addr = get_next_hop_addr(ctx, curr_pte);
 547
 548        if (hop2_addr == ULLONG_MAX)
 549                goto not_mapped;
 550
 551        hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
 552
 553        curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
 554
 555        hop3_addr = get_next_hop_addr(ctx, curr_pte);
 556
 557        if (hop3_addr == ULLONG_MAX)
 558                goto not_mapped;
 559
 560        hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
 561
 562        curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
 563
 564        is_huge = curr_pte & LAST_MASK;
 565
 566        if (is_dram_addr && !is_huge) {
 567                dev_err(hdev->dev,
 568                                "DRAM unmapping should use huge pages only\n");
 569                return -EFAULT;
 570        }
 571
 572        if (!is_huge) {
 573                hop4_addr = get_next_hop_addr(ctx, curr_pte);
 574
 575                if (hop4_addr == ULLONG_MAX)
 576                        goto not_mapped;
 577
 578                hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
 579
 580                curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
 581
 582                clear_hop3 = false;
 583        }
 584
 585        if (hdev->dram_default_page_mapping && is_dram_addr) {
 586                u64 default_pte = (prop->mmu_dram_default_page_addr &
 587                                PTE_PHYS_ADDR_MASK) | LAST_MASK |
 588                                        PAGE_PRESENT_MASK;
 589                if (curr_pte == default_pte) {
 590                        dev_err(hdev->dev,
 591                                "DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",
 592                                        virt_addr);
 593                        goto not_mapped;
 594                }
 595
 596                if (!(curr_pte & PAGE_PRESENT_MASK)) {
 597                        dev_err(hdev->dev,
 598                                "DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n",
 599                                        virt_addr);
 600                        goto not_mapped;
 601                }
 602
 603                write_final_pte(ctx, hop3_pte_addr, default_pte);
 604                put_pte(ctx, hop3_addr);
 605        } else {
 606                if (!(curr_pte & PAGE_PRESENT_MASK))
 607                        goto not_mapped;
 608
 609                if (hop4_addr)
 610                        clear_pte(ctx, hop4_pte_addr);
 611                else
 612                        clear_pte(ctx, hop3_pte_addr);
 613
 614                if (hop4_addr && !put_pte(ctx, hop4_addr))
 615                        clear_hop3 = true;
 616
 617                if (!clear_hop3)
 618                        goto flush;
 619
 620                clear_pte(ctx, hop3_pte_addr);
 621
 622                if (put_pte(ctx, hop3_addr))
 623                        goto flush;
 624
 625                clear_pte(ctx, hop2_pte_addr);
 626
 627                if (put_pte(ctx, hop2_addr))
 628                        goto flush;
 629
 630                clear_pte(ctx, hop1_pte_addr);
 631
 632                if (put_pte(ctx, hop1_addr))
 633                        goto flush;
 634
 635                clear_pte(ctx, hop0_pte_addr);
 636        }
 637
 638flush:
 639        flush(ctx);
 640
 641        return 0;
 642
 643not_mapped:
 644        dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
 645                virt_addr);
 646
 647        return -EINVAL;
 648}
 649
 650/*
 651 * hl_mmu_unmap - unmaps a virtual addr
 652 *
 653 * @ctx: pointer to the context structure
 654 * @virt_addr: virt addr to map from
 655 * @page_size: size of the page to unmap
 656 *
 657 * This function does the following:
 658 * - Check that the virt addr is mapped
 659 * - Unmap the virt addr and frees pgts if possible
 660 * - Returns 0 on success, -EINVAL if the given addr is not mapped
 661 *
 662 * Because this function changes the page tables in the device and because it
 663 * changes the MMU hash, it must be protected by a lock.
 664 * However, because it maps only a single page, the lock should be implemented
 665 * in a higher level in order to protect the entire mapping of the memory area
 666 */
 667int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
 668{
 669        struct hl_device *hdev = ctx->hdev;
 670        u64 real_virt_addr;
 671        u32 real_page_size, npages;
 672        int i, rc;
 673
 674        if (!hdev->mmu_enable)
 675                return 0;
 676
 677        /*
 678         * The H/W handles mapping of 4KB/2MB page. Hence if the host page size
 679         * is bigger, we break it to sub-pages and unmap them separately.
 680         */
 681        if ((page_size % PAGE_SIZE_2MB) == 0) {
 682                real_page_size = PAGE_SIZE_2MB;
 683        } else if ((page_size % PAGE_SIZE_4KB) == 0) {
 684                real_page_size = PAGE_SIZE_4KB;
 685        } else {
 686                dev_err(hdev->dev,
 687                        "page size of %u is not 4KB nor 2MB aligned, can't unmap\n",
 688                                page_size);
 689
 690                return -EFAULT;
 691        }
 692
 693        npages = page_size / real_page_size;
 694        real_virt_addr = virt_addr;
 695
 696        for (i = 0 ; i < npages ; i++) {
 697                rc = _hl_mmu_unmap(ctx, real_virt_addr);
 698                if (rc)
 699                        return rc;
 700
 701                real_virt_addr += real_page_size;
 702        }
 703
 704        return 0;
 705}
 706
 707static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 708                u32 page_size)
 709{
 710        struct hl_device *hdev = ctx->hdev;
 711        struct asic_fixed_properties *prop = &hdev->asic_prop;
 712        u64 hop0_addr = 0, hop0_pte_addr = 0,
 713                hop1_addr = 0, hop1_pte_addr = 0,
 714                hop2_addr = 0, hop2_pte_addr = 0,
 715                hop3_addr = 0, hop3_pte_addr = 0,
 716                hop4_addr = 0, hop4_pte_addr = 0,
 717                curr_pte = 0;
 718        bool hop1_new = false, hop2_new = false, hop3_new = false,
 719                hop4_new = false, is_huge, is_dram_addr;
 720        int rc = -ENOMEM;
 721
 722        /*
 723         * This mapping function can map a 4KB/2MB page. For 2MB page there are
 724         * only 3 hops rather than 4. Currently the DRAM allocation uses 2MB
 725         * pages only but user memory could have been allocated with one of the
 726         * two page sizes. Since this is a common code for all the three cases,
 727         * we need this hugs page check.
 728         */
 729        is_huge = page_size == PAGE_SIZE_2MB;
 730
 731        is_dram_addr = hl_mem_area_inside_range(virt_addr, page_size,
 732                                prop->va_space_dram_start_address,
 733                                prop->va_space_dram_end_address);
 734
 735        if (is_dram_addr && !is_huge) {
 736                dev_err(hdev->dev, "DRAM mapping should use huge pages only\n");
 737                return -EFAULT;
 738        }
 739
 740        hop0_addr = get_hop0_addr(ctx);
 741        hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
 742        curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
 743
 744        hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
 745        if (hop1_addr == ULLONG_MAX)
 746                goto err;
 747
 748        hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
 749        curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
 750
 751        hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
 752        if (hop2_addr == ULLONG_MAX)
 753                goto err;
 754
 755        hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
 756        curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
 757
 758        hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
 759        if (hop3_addr == ULLONG_MAX)
 760                goto err;
 761
 762        hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
 763        curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
 764
 765        if (!is_huge) {
 766                hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
 767                if (hop4_addr == ULLONG_MAX)
 768                        goto err;
 769
 770                hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
 771                curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
 772        }
 773
 774        if (hdev->dram_default_page_mapping && is_dram_addr) {
 775                u64 default_pte = (prop->mmu_dram_default_page_addr &
 776                                        PTE_PHYS_ADDR_MASK) | LAST_MASK |
 777                                                PAGE_PRESENT_MASK;
 778
 779                if (curr_pte != default_pte) {
 780                        dev_err(hdev->dev,
 781                                "DRAM: mapping already exists for virt_addr 0x%llx\n",
 782                                        virt_addr);
 783                        rc = -EINVAL;
 784                        goto err;
 785                }
 786
 787                if (hop1_new || hop2_new || hop3_new || hop4_new) {
 788                        dev_err(hdev->dev,
 789                                "DRAM mapping should not allocate more hops\n");
 790                        rc = -EFAULT;
 791                        goto err;
 792                }
 793        } else if (curr_pte & PAGE_PRESENT_MASK) {
 794                dev_err(hdev->dev,
 795                        "mapping already exists for virt_addr 0x%llx\n",
 796                                virt_addr);
 797
 798                dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
 799                        *(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr);
 800                dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
 801                        *(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr);
 802                dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
 803                        *(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
 804                dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
 805                        *(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
 806
 807                if (!is_huge)
 808                        dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
 809                                *(u64 *) (uintptr_t) hop4_pte_addr,
 810                                hop4_pte_addr);
 811
 812                rc = -EINVAL;
 813                goto err;
 814        }
 815
 816        curr_pte = (phys_addr & PTE_PHYS_ADDR_MASK) | LAST_MASK
 817                        | PAGE_PRESENT_MASK;
 818
 819        if (is_huge)
 820                write_final_pte(ctx, hop3_pte_addr, curr_pte);
 821        else
 822                write_final_pte(ctx, hop4_pte_addr, curr_pte);
 823
 824        if (hop1_new) {
 825                curr_pte =
 826                        (hop1_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 827                write_pte(ctx, hop0_pte_addr, curr_pte);
 828        }
 829        if (hop2_new) {
 830                curr_pte =
 831                        (hop2_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 832                write_pte(ctx, hop1_pte_addr, curr_pte);
 833                get_pte(ctx, hop1_addr);
 834        }
 835        if (hop3_new) {
 836                curr_pte =
 837                        (hop3_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 838                write_pte(ctx, hop2_pte_addr, curr_pte);
 839                get_pte(ctx, hop2_addr);
 840        }
 841
 842        if (!is_huge) {
 843                if (hop4_new) {
 844                        curr_pte = (hop4_addr & PTE_PHYS_ADDR_MASK) |
 845                                        PAGE_PRESENT_MASK;
 846                        write_pte(ctx, hop3_pte_addr, curr_pte);
 847                        get_pte(ctx, hop3_addr);
 848                }
 849
 850                get_pte(ctx, hop4_addr);
 851        } else {
 852                get_pte(ctx, hop3_addr);
 853        }
 854
 855        flush(ctx);
 856
 857        return 0;
 858
 859err:
 860        if (hop4_new)
 861                free_hop(ctx, hop4_addr);
 862        if (hop3_new)
 863                free_hop(ctx, hop3_addr);
 864        if (hop2_new)
 865                free_hop(ctx, hop2_addr);
 866        if (hop1_new)
 867                free_hop(ctx, hop1_addr);
 868
 869        return rc;
 870}
 871
 872/*
 873 * hl_mmu_map - maps a virtual addr to physical addr
 874 *
 875 * @ctx: pointer to the context structure
 876 * @virt_addr: virt addr to map from
 877 * @phys_addr: phys addr to map to
 878 * @page_size: physical page size
 879 *
 880 * This function does the following:
 881 * - Check that the virt addr is not mapped
 882 * - Allocate pgts as necessary in order to map the virt addr to the phys
 883 * - Returns 0 on success, -EINVAL if addr is already mapped, or -ENOMEM.
 884 *
 885 * Because this function changes the page tables in the device and because it
 886 * changes the MMU hash, it must be protected by a lock.
 887 * However, because it maps only a single page, the lock should be implemented
 888 * in a higher level in order to protect the entire mapping of the memory area
 889 */
 890int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
 891{
 892        struct hl_device *hdev = ctx->hdev;
 893        u64 real_virt_addr, real_phys_addr;
 894        u32 real_page_size, npages;
 895        int i, rc, mapped_cnt = 0;
 896
 897        if (!hdev->mmu_enable)
 898                return 0;
 899
 900        /*
 901         * The H/W handles mapping of 4KB/2MB page. Hence if the host page size
 902         * is bigger, we break it to sub-pages and map them separately.
 903         */
 904        if ((page_size % PAGE_SIZE_2MB) == 0) {
 905                real_page_size = PAGE_SIZE_2MB;
 906        } else if ((page_size % PAGE_SIZE_4KB) == 0) {
 907                real_page_size = PAGE_SIZE_4KB;
 908        } else {
 909                dev_err(hdev->dev,
 910                        "page size of %u is not 4KB nor 2MB aligned, can't map\n",
 911                                page_size);
 912
 913                return -EFAULT;
 914        }
 915
 916        WARN_ONCE((phys_addr & (real_page_size - 1)),
 917                "Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size",
 918                phys_addr, real_page_size);
 919
 920        npages = page_size / real_page_size;
 921        real_virt_addr = virt_addr;
 922        real_phys_addr = phys_addr;
 923
 924        for (i = 0 ; i < npages ; i++) {
 925                rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr,
 926                                real_page_size);
 927                if (rc)
 928                        goto err;
 929
 930                real_virt_addr += real_page_size;
 931                real_phys_addr += real_page_size;
 932                mapped_cnt++;
 933        }
 934
 935        return 0;
 936
 937err:
 938        real_virt_addr = virt_addr;
 939        for (i = 0 ; i < mapped_cnt ; i++) {
 940                if (_hl_mmu_unmap(ctx, real_virt_addr))
 941                        dev_warn_ratelimited(hdev->dev,
 942                                "failed to unmap va: 0x%llx\n", real_virt_addr);
 943
 944                real_virt_addr += real_page_size;
 945        }
 946
 947        return rc;
 948}
 949
 950/*
 951 * hl_mmu_swap_out - marks all mapping of the given ctx as swapped out
 952 *
 953 * @ctx: pointer to the context structure
 954 *
 955 */
 956void hl_mmu_swap_out(struct hl_ctx *ctx)
 957{
 958
 959}
 960
 961/*
 962 * hl_mmu_swap_in - marks all mapping of the given ctx as swapped in
 963 *
 964 * @ctx: pointer to the context structure
 965 *
 966 */
 967void hl_mmu_swap_in(struct hl_ctx *ctx)
 968{
 969
 970}
 971