linux/arch/x86/mm/ioremap.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Re-map IO memory to kernel address space so that we can access it.
   4 * This is needed for high PCI addresses that aren't mapped in the
   5 * 640k-1MB IO memory area on PC's
   6 *
   7 * (C) Copyright 1995 1996 Linus Torvalds
   8 */
   9
  10#include <linux/memblock.h>
  11#include <linux/init.h>
  12#include <linux/io.h>
  13#include <linux/ioport.h>
  14#include <linux/slab.h>
  15#include <linux/vmalloc.h>
  16#include <linux/mmiotrace.h>
  17#include <linux/mem_encrypt.h>
  18#include <linux/efi.h>
  19#include <linux/pgtable.h>
  20
  21#include <asm/set_memory.h>
  22#include <asm/e820/api.h>
  23#include <asm/efi.h>
  24#include <asm/fixmap.h>
  25#include <asm/tlbflush.h>
  26#include <asm/pgalloc.h>
  27#include <asm/memtype.h>
  28#include <asm/setup.h>
  29
  30#include "physaddr.h"
  31
  32/*
  33 * Descriptor controlling ioremap() behavior.
  34 */
  35struct ioremap_desc {
  36        unsigned int flags;
  37};
  38
  39/*
  40 * Fix up the linear direct mapping of the kernel to avoid cache attribute
  41 * conflicts.
  42 */
  43int ioremap_change_attr(unsigned long vaddr, unsigned long size,
  44                        enum page_cache_mode pcm)
  45{
  46        unsigned long nrpages = size >> PAGE_SHIFT;
  47        int err;
  48
  49        switch (pcm) {
  50        case _PAGE_CACHE_MODE_UC:
  51        default:
  52                err = _set_memory_uc(vaddr, nrpages);
  53                break;
  54        case _PAGE_CACHE_MODE_WC:
  55                err = _set_memory_wc(vaddr, nrpages);
  56                break;
  57        case _PAGE_CACHE_MODE_WT:
  58                err = _set_memory_wt(vaddr, nrpages);
  59                break;
  60        case _PAGE_CACHE_MODE_WB:
  61                err = _set_memory_wb(vaddr, nrpages);
  62                break;
  63        }
  64
  65        return err;
  66}
  67
  68/* Does the range (or a subset of) contain normal RAM? */
  69static unsigned int __ioremap_check_ram(struct resource *res)
  70{
  71        unsigned long start_pfn, stop_pfn;
  72        unsigned long i;
  73
  74        if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM)
  75                return 0;
  76
  77        start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT;
  78        stop_pfn = (res->end + 1) >> PAGE_SHIFT;
  79        if (stop_pfn > start_pfn) {
  80                for (i = 0; i < (stop_pfn - start_pfn); ++i)
  81                        if (pfn_valid(start_pfn + i) &&
  82                            !PageReserved(pfn_to_page(start_pfn + i)))
  83                                return IORES_MAP_SYSTEM_RAM;
  84        }
  85
  86        return 0;
  87}
  88
  89/*
  90 * In a SEV guest, NONE and RESERVED should not be mapped encrypted because
  91 * there the whole memory is already encrypted.
  92 */
  93static unsigned int __ioremap_check_encrypted(struct resource *res)
  94{
  95        if (!sev_active())
  96                return 0;
  97
  98        switch (res->desc) {
  99        case IORES_DESC_NONE:
 100        case IORES_DESC_RESERVED:
 101                break;
 102        default:
 103                return IORES_MAP_ENCRYPTED;
 104        }
 105
 106        return 0;
 107}
 108
 109/*
 110 * The EFI runtime services data area is not covered by walk_mem_res(), but must
 111 * be mapped encrypted when SEV is active.
 112 */
 113static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *desc)
 114{
 115        if (!sev_active())
 116                return;
 117
 118        if (!IS_ENABLED(CONFIG_EFI))
 119                return;
 120
 121        if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA ||
 122            (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA &&
 123             efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME))
 124                desc->flags |= IORES_MAP_ENCRYPTED;
 125}
 126
 127static int __ioremap_collect_map_flags(struct resource *res, void *arg)
 128{
 129        struct ioremap_desc *desc = arg;
 130
 131        if (!(desc->flags & IORES_MAP_SYSTEM_RAM))
 132                desc->flags |= __ioremap_check_ram(res);
 133
 134        if (!(desc->flags & IORES_MAP_ENCRYPTED))
 135                desc->flags |= __ioremap_check_encrypted(res);
 136
 137        return ((desc->flags & (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)) ==
 138                               (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED));
 139}
 140
 141/*
 142 * To avoid multiple resource walks, this function walks resources marked as
 143 * IORESOURCE_MEM and IORESOURCE_BUSY and looking for system RAM and/or a
 144 * resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES).
 145 *
 146 * After that, deal with misc other ranges in __ioremap_check_other() which do
 147 * not fall into the above category.
 148 */
 149static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
 150                                struct ioremap_desc *desc)
 151{
 152        u64 start, end;
 153
 154        start = (u64)addr;
 155        end = start + size - 1;
 156        memset(desc, 0, sizeof(struct ioremap_desc));
 157
 158        walk_mem_res(start, end, desc, __ioremap_collect_map_flags);
 159
 160        __ioremap_check_other(addr, desc);
 161}
 162
 163/*
 164 * Remap an arbitrary physical address space into the kernel virtual
 165 * address space. It transparently creates kernel huge I/O mapping when
 166 * the physical address is aligned by a huge page size (1GB or 2MB) and
 167 * the requested size is at least the huge page size.
 168 *
 169 * NOTE: MTRRs can override PAT memory types with a 4KB granularity.
 170 * Therefore, the mapping code falls back to use a smaller page toward 4KB
 171 * when a mapping range is covered by non-WB type of MTRRs.
 172 *
 173 * NOTE! We need to allow non-page-aligned mappings too: we will obviously
 174 * have to convert them into an offset in a page-aligned mapping, but the
 175 * caller shouldn't need to know that small detail.
 176 */
 177static void __iomem *
 178__ioremap_caller(resource_size_t phys_addr, unsigned long size,
 179                 enum page_cache_mode pcm, void *caller, bool encrypted)
 180{
 181        unsigned long offset, vaddr;
 182        resource_size_t last_addr;
 183        const resource_size_t unaligned_phys_addr = phys_addr;
 184        const unsigned long unaligned_size = size;
 185        struct ioremap_desc io_desc;
 186        struct vm_struct *area;
 187        enum page_cache_mode new_pcm;
 188        pgprot_t prot;
 189        int retval;
 190        void __iomem *ret_addr;
 191
 192        /* Don't allow wraparound or zero size */
 193        last_addr = phys_addr + size - 1;
 194        if (!size || last_addr < phys_addr)
 195                return NULL;
 196
 197        if (!phys_addr_valid(phys_addr)) {
 198                printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
 199                       (unsigned long long)phys_addr);
 200                WARN_ON_ONCE(1);
 201                return NULL;
 202        }
 203
 204        __ioremap_check_mem(phys_addr, size, &io_desc);
 205
 206        /*
 207         * Don't allow anybody to remap normal RAM that we're using..
 208         */
 209        if (io_desc.flags & IORES_MAP_SYSTEM_RAM) {
 210                WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
 211                          &phys_addr, &last_addr);
 212                return NULL;
 213        }
 214
 215        /*
 216         * Mappings have to be page-aligned
 217         */
 218        offset = phys_addr & ~PAGE_MASK;
 219        phys_addr &= PHYSICAL_PAGE_MASK;
 220        size = PAGE_ALIGN(last_addr+1) - phys_addr;
 221
 222        retval = memtype_reserve(phys_addr, (u64)phys_addr + size,
 223                                                pcm, &new_pcm);
 224        if (retval) {
 225                printk(KERN_ERR "ioremap memtype_reserve failed %d\n", retval);
 226                return NULL;
 227        }
 228
 229        if (pcm != new_pcm) {
 230                if (!is_new_memtype_allowed(phys_addr, size, pcm, new_pcm)) {
 231                        printk(KERN_ERR
 232                "ioremap error for 0x%llx-0x%llx, requested 0x%x, got 0x%x\n",
 233                                (unsigned long long)phys_addr,
 234                                (unsigned long long)(phys_addr + size),
 235                                pcm, new_pcm);
 236                        goto err_free_memtype;
 237                }
 238                pcm = new_pcm;
 239        }
 240
 241        /*
 242         * If the page being mapped is in memory and SEV is active then
 243         * make sure the memory encryption attribute is enabled in the
 244         * resulting mapping.
 245         */
 246        prot = PAGE_KERNEL_IO;
 247        if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted)
 248                prot = pgprot_encrypted(prot);
 249
 250        switch (pcm) {
 251        case _PAGE_CACHE_MODE_UC:
 252        default:
 253                prot = __pgprot(pgprot_val(prot) |
 254                                cachemode2protval(_PAGE_CACHE_MODE_UC));
 255                break;
 256        case _PAGE_CACHE_MODE_UC_MINUS:
 257                prot = __pgprot(pgprot_val(prot) |
 258                                cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
 259                break;
 260        case _PAGE_CACHE_MODE_WC:
 261                prot = __pgprot(pgprot_val(prot) |
 262                                cachemode2protval(_PAGE_CACHE_MODE_WC));
 263                break;
 264        case _PAGE_CACHE_MODE_WT:
 265                prot = __pgprot(pgprot_val(prot) |
 266                                cachemode2protval(_PAGE_CACHE_MODE_WT));
 267                break;
 268        case _PAGE_CACHE_MODE_WB:
 269                break;
 270        }
 271
 272        /*
 273         * Ok, go for it..
 274         */
 275        area = get_vm_area_caller(size, VM_IOREMAP, caller);
 276        if (!area)
 277                goto err_free_memtype;
 278        area->phys_addr = phys_addr;
 279        vaddr = (unsigned long) area->addr;
 280
 281        if (memtype_kernel_map_sync(phys_addr, size, pcm))
 282                goto err_free_area;
 283
 284        if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
 285                goto err_free_area;
 286
 287        ret_addr = (void __iomem *) (vaddr + offset);
 288        mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
 289
 290        /*
 291         * Check if the request spans more than any BAR in the iomem resource
 292         * tree.
 293         */
 294        if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size))
 295                pr_warn("caller %pS mapping multiple BARs\n", caller);
 296
 297        return ret_addr;
 298err_free_area:
 299        free_vm_area(area);
 300err_free_memtype:
 301        memtype_free(phys_addr, phys_addr + size);
 302        return NULL;
 303}
 304
 305/**
 306 * ioremap     -   map bus memory into CPU space
 307 * @phys_addr:    bus address of the memory
 308 * @size:      size of the resource to map
 309 *
 310 * ioremap performs a platform specific sequence of operations to
 311 * make bus memory CPU accessible via the readb/readw/readl/writeb/
 312 * writew/writel functions and the other mmio helpers. The returned
 313 * address is not guaranteed to be usable directly as a virtual
 314 * address.
 315 *
 316 * This version of ioremap ensures that the memory is marked uncachable
 317 * on the CPU as well as honouring existing caching rules from things like
 318 * the PCI bus. Note that there are other caches and buffers on many
 319 * busses. In particular driver authors should read up on PCI writes
 320 *
 321 * It's useful if some control registers are in such an area and
 322 * write combining or read caching is not desirable:
 323 *
 324 * Must be freed with iounmap.
 325 */
 326void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
 327{
 328        /*
 329         * Ideally, this should be:
 330         *      pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
 331         *
 332         * Till we fix all X drivers to use ioremap_wc(), we will use
 333         * UC MINUS. Drivers that are certain they need or can already
 334         * be converted over to strong UC can use ioremap_uc().
 335         */
 336        enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
 337
 338        return __ioremap_caller(phys_addr, size, pcm,
 339                                __builtin_return_address(0), false);
 340}
 341EXPORT_SYMBOL(ioremap);
 342
 343/**
 344 * ioremap_uc     -   map bus memory into CPU space as strongly uncachable
 345 * @phys_addr:    bus address of the memory
 346 * @size:      size of the resource to map
 347 *
 348 * ioremap_uc performs a platform specific sequence of operations to
 349 * make bus memory CPU accessible via the readb/readw/readl/writeb/
 350 * writew/writel functions and the other mmio helpers. The returned
 351 * address is not guaranteed to be usable directly as a virtual
 352 * address.
 353 *
 354 * This version of ioremap ensures that the memory is marked with a strong
 355 * preference as completely uncachable on the CPU when possible. For non-PAT
 356 * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
 357 * systems this will set the PAT entry for the pages as strong UC.  This call
 358 * will honor existing caching rules from things like the PCI bus. Note that
 359 * there are other caches and buffers on many busses. In particular driver
 360 * authors should read up on PCI writes.
 361 *
 362 * It's useful if some control registers are in such an area and
 363 * write combining or read caching is not desirable:
 364 *
 365 * Must be freed with iounmap.
 366 */
 367void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
 368{
 369        enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;
 370
 371        return __ioremap_caller(phys_addr, size, pcm,
 372                                __builtin_return_address(0), false);
 373}
 374EXPORT_SYMBOL_GPL(ioremap_uc);
 375
 376/**
 377 * ioremap_wc   -       map memory into CPU space write combined
 378 * @phys_addr:  bus address of the memory
 379 * @size:       size of the resource to map
 380 *
 381 * This version of ioremap ensures that the memory is marked write combining.
 382 * Write combining allows faster writes to some hardware devices.
 383 *
 384 * Must be freed with iounmap.
 385 */
 386void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
 387{
 388        return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
 389                                        __builtin_return_address(0), false);
 390}
 391EXPORT_SYMBOL(ioremap_wc);
 392
 393/**
 394 * ioremap_wt   -       map memory into CPU space write through
 395 * @phys_addr:  bus address of the memory
 396 * @size:       size of the resource to map
 397 *
 398 * This version of ioremap ensures that the memory is marked write through.
 399 * Write through stores data into memory while keeping the cache up-to-date.
 400 *
 401 * Must be freed with iounmap.
 402 */
 403void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
 404{
 405        return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
 406                                        __builtin_return_address(0), false);
 407}
 408EXPORT_SYMBOL(ioremap_wt);
 409
 410void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size)
 411{
 412        return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
 413                                __builtin_return_address(0), true);
 414}
 415EXPORT_SYMBOL(ioremap_encrypted);
 416
 417void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
 418{
 419        return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
 420                                __builtin_return_address(0), false);
 421}
 422EXPORT_SYMBOL(ioremap_cache);
 423
 424void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
 425                                unsigned long prot_val)
 426{
 427        return __ioremap_caller(phys_addr, size,
 428                                pgprot2cachemode(__pgprot(prot_val)),
 429                                __builtin_return_address(0), false);
 430}
 431EXPORT_SYMBOL(ioremap_prot);
 432
 433/**
 434 * iounmap - Free a IO remapping
 435 * @addr: virtual address from ioremap_*
 436 *
 437 * Caller must ensure there is only one unmapping for the same pointer.
 438 */
 439void iounmap(volatile void __iomem *addr)
 440{
 441        struct vm_struct *p, *o;
 442
 443        if ((void __force *)addr <= high_memory)
 444                return;
 445
 446        /*
 447         * The PCI/ISA range special-casing was removed from __ioremap()
 448         * so this check, in theory, can be removed. However, there are
 449         * cases where iounmap() is called for addresses not obtained via
 450         * ioremap() (vga16fb for example). Add a warning so that these
 451         * cases can be caught and fixed.
 452         */
 453        if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
 454            (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) {
 455                WARN(1, "iounmap() called for ISA range not obtained using ioremap()\n");
 456                return;
 457        }
 458
 459        mmiotrace_iounmap(addr);
 460
 461        addr = (volatile void __iomem *)
 462                (PAGE_MASK & (unsigned long __force)addr);
 463
 464        /* Use the vm area unlocked, assuming the caller
 465           ensures there isn't another iounmap for the same address
 466           in parallel. Reuse of the virtual address is prevented by
 467           leaving it in the global lists until we're done with it.
 468           cpa takes care of the direct mappings. */
 469        p = find_vm_area((void __force *)addr);
 470
 471        if (!p) {
 472                printk(KERN_ERR "iounmap: bad address %p\n", addr);
 473                dump_stack();
 474                return;
 475        }
 476
 477        memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p));
 478
 479        /* Finally remove it */
 480        o = remove_vm_area((void __force *)addr);
 481        BUG_ON(p != o || o == NULL);
 482        kfree(p);
 483}
 484EXPORT_SYMBOL(iounmap);
 485
 486/*
 487 * Convert a physical pointer to a virtual kernel pointer for /dev/mem
 488 * access
 489 */
 490void *xlate_dev_mem_ptr(phys_addr_t phys)
 491{
 492        unsigned long start  = phys &  PAGE_MASK;
 493        unsigned long offset = phys & ~PAGE_MASK;
 494        void *vaddr;
 495
 496        /* memremap() maps if RAM, otherwise falls back to ioremap() */
 497        vaddr = memremap(start, PAGE_SIZE, MEMREMAP_WB);
 498
 499        /* Only add the offset on success and return NULL if memremap() failed */
 500        if (vaddr)
 501                vaddr += offset;
 502
 503        return vaddr;
 504}
 505
 506void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
 507{
 508        memunmap((void *)((unsigned long)addr & PAGE_MASK));
 509}
 510
 511/*
 512 * Examine the physical address to determine if it is an area of memory
 513 * that should be mapped decrypted.  If the memory is not part of the
 514 * kernel usable area it was accessed and created decrypted, so these
 515 * areas should be mapped decrypted. And since the encryption key can
 516 * change across reboots, persistent memory should also be mapped
 517 * decrypted.
 518 *
 519 * If SEV is active, that implies that BIOS/UEFI also ran encrypted so
 520 * only persistent memory should be mapped decrypted.
 521 */
 522static bool memremap_should_map_decrypted(resource_size_t phys_addr,
 523                                          unsigned long size)
 524{
 525        int is_pmem;
 526
 527        /*
 528         * Check if the address is part of a persistent memory region.
 529         * This check covers areas added by E820, EFI and ACPI.
 530         */
 531        is_pmem = region_intersects(phys_addr, size, IORESOURCE_MEM,
 532                                    IORES_DESC_PERSISTENT_MEMORY);
 533        if (is_pmem != REGION_DISJOINT)
 534                return true;
 535
 536        /*
 537         * Check if the non-volatile attribute is set for an EFI
 538         * reserved area.
 539         */
 540        if (efi_enabled(EFI_BOOT)) {
 541                switch (efi_mem_type(phys_addr)) {
 542                case EFI_RESERVED_TYPE:
 543                        if (efi_mem_attributes(phys_addr) & EFI_MEMORY_NV)
 544                                return true;
 545                        break;
 546                default:
 547                        break;
 548                }
 549        }
 550
 551        /* Check if the address is outside kernel usable area */
 552        switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
 553        case E820_TYPE_RESERVED:
 554        case E820_TYPE_ACPI:
 555        case E820_TYPE_NVS:
 556        case E820_TYPE_UNUSABLE:
 557                /* For SEV, these areas are encrypted */
 558                if (sev_active())
 559                        break;
 560                fallthrough;
 561
 562        case E820_TYPE_PRAM:
 563                return true;
 564        default:
 565                break;
 566        }
 567
 568        return false;
 569}
 570
 571/*
 572 * Examine the physical address to determine if it is EFI data. Check
 573 * it against the boot params structure and EFI tables and memory types.
 574 */
 575static bool memremap_is_efi_data(resource_size_t phys_addr,
 576                                 unsigned long size)
 577{
 578        u64 paddr;
 579
 580        /* Check if the address is part of EFI boot/runtime data */
 581        if (!efi_enabled(EFI_BOOT))
 582                return false;
 583
 584        paddr = boot_params.efi_info.efi_memmap_hi;
 585        paddr <<= 32;
 586        paddr |= boot_params.efi_info.efi_memmap;
 587        if (phys_addr == paddr)
 588                return true;
 589
 590        paddr = boot_params.efi_info.efi_systab_hi;
 591        paddr <<= 32;
 592        paddr |= boot_params.efi_info.efi_systab;
 593        if (phys_addr == paddr)
 594                return true;
 595
 596        if (efi_is_table_address(phys_addr))
 597                return true;
 598
 599        switch (efi_mem_type(phys_addr)) {
 600        case EFI_BOOT_SERVICES_DATA:
 601        case EFI_RUNTIME_SERVICES_DATA:
 602                return true;
 603        default:
 604                break;
 605        }
 606
 607        return false;
 608}
 609
 610/*
 611 * Examine the physical address to determine if it is boot data by checking
 612 * it against the boot params setup_data chain.
 613 */
 614static bool memremap_is_setup_data(resource_size_t phys_addr,
 615                                   unsigned long size)
 616{
 617        struct setup_data *data;
 618        u64 paddr, paddr_next;
 619
 620        paddr = boot_params.hdr.setup_data;
 621        while (paddr) {
 622                unsigned int len;
 623
 624                if (phys_addr == paddr)
 625                        return true;
 626
 627                data = memremap(paddr, sizeof(*data),
 628                                MEMREMAP_WB | MEMREMAP_DEC);
 629
 630                paddr_next = data->next;
 631                len = data->len;
 632
 633                if ((phys_addr > paddr) && (phys_addr < (paddr + len))) {
 634                        memunmap(data);
 635                        return true;
 636                }
 637
 638                if (data->type == SETUP_INDIRECT &&
 639                    ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
 640                        paddr = ((struct setup_indirect *)data->data)->addr;
 641                        len = ((struct setup_indirect *)data->data)->len;
 642                }
 643
 644                memunmap(data);
 645
 646                if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
 647                        return true;
 648
 649                paddr = paddr_next;
 650        }
 651
 652        return false;
 653}
 654
 655/*
 656 * Examine the physical address to determine if it is boot data by checking
 657 * it against the boot params setup_data chain (early boot version).
 658 */
 659static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
 660                                                unsigned long size)
 661{
 662        struct setup_data *data;
 663        u64 paddr, paddr_next;
 664
 665        paddr = boot_params.hdr.setup_data;
 666        while (paddr) {
 667                unsigned int len;
 668
 669                if (phys_addr == paddr)
 670                        return true;
 671
 672                data = early_memremap_decrypted(paddr, sizeof(*data));
 673
 674                paddr_next = data->next;
 675                len = data->len;
 676
 677                early_memunmap(data, sizeof(*data));
 678
 679                if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
 680                        return true;
 681
 682                paddr = paddr_next;
 683        }
 684
 685        return false;
 686}
 687
 688/*
 689 * Architecture function to determine if RAM remap is allowed. By default, a
 690 * RAM remap will map the data as encrypted. Determine if a RAM remap should
 691 * not be done so that the data will be mapped decrypted.
 692 */
 693bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
 694                                 unsigned long flags)
 695{
 696        if (!mem_encrypt_active())
 697                return true;
 698
 699        if (flags & MEMREMAP_ENC)
 700                return true;
 701
 702        if (flags & MEMREMAP_DEC)
 703                return false;
 704
 705        if (sme_active()) {
 706                if (memremap_is_setup_data(phys_addr, size) ||
 707                    memremap_is_efi_data(phys_addr, size))
 708                        return false;
 709        }
 710
 711        return !memremap_should_map_decrypted(phys_addr, size);
 712}
 713
 714/*
 715 * Architecture override of __weak function to adjust the protection attributes
 716 * used when remapping memory. By default, early_memremap() will map the data
 717 * as encrypted. Determine if an encrypted mapping should not be done and set
 718 * the appropriate protection attributes.
 719 */
 720pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
 721                                             unsigned long size,
 722                                             pgprot_t prot)
 723{
 724        bool encrypted_prot;
 725
 726        if (!mem_encrypt_active())
 727                return prot;
 728
 729        encrypted_prot = true;
 730
 731        if (sme_active()) {
 732                if (early_memremap_is_setup_data(phys_addr, size) ||
 733                    memremap_is_efi_data(phys_addr, size))
 734                        encrypted_prot = false;
 735        }
 736
 737        if (encrypted_prot && memremap_should_map_decrypted(phys_addr, size))
 738                encrypted_prot = false;
 739
 740        return encrypted_prot ? pgprot_encrypted(prot)
 741                              : pgprot_decrypted(prot);
 742}
 743
 744bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size)
 745{
 746        return arch_memremap_can_ram_remap(phys_addr, size, 0);
 747}
 748
 749#ifdef CONFIG_AMD_MEM_ENCRYPT
 750/* Remap memory with encryption */
 751void __init *early_memremap_encrypted(resource_size_t phys_addr,
 752                                      unsigned long size)
 753{
 754        return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC);
 755}
 756
 757/*
 758 * Remap memory with encryption and write-protected - cannot be called
 759 * before pat_init() is called
 760 */
 761void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
 762                                         unsigned long size)
 763{
 764        if (!x86_has_pat_wp())
 765                return NULL;
 766        return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC_WP);
 767}
 768
 769/* Remap memory without encryption */
 770void __init *early_memremap_decrypted(resource_size_t phys_addr,
 771                                      unsigned long size)
 772{
 773        return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC);
 774}
 775
 776/*
 777 * Remap memory without encryption and write-protected - cannot be called
 778 * before pat_init() is called
 779 */
 780void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
 781                                         unsigned long size)
 782{
 783        if (!x86_has_pat_wp())
 784                return NULL;
 785        return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC_WP);
 786}
 787#endif  /* CONFIG_AMD_MEM_ENCRYPT */
 788
 789static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
 790
 791static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
 792{
 793        /* Don't assume we're using swapper_pg_dir at this point */
 794        pgd_t *base = __va(read_cr3_pa());
 795        pgd_t *pgd = &base[pgd_index(addr)];
 796        p4d_t *p4d = p4d_offset(pgd, addr);
 797        pud_t *pud = pud_offset(p4d, addr);
 798        pmd_t *pmd = pmd_offset(pud, addr);
 799
 800        return pmd;
 801}
 802
 803static inline pte_t * __init early_ioremap_pte(unsigned long addr)
 804{
 805        return &bm_pte[pte_index(addr)];
 806}
 807
 808bool __init is_early_ioremap_ptep(pte_t *ptep)
 809{
 810        return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
 811}
 812
 813void __init early_ioremap_init(void)
 814{
 815        pmd_t *pmd;
 816
 817#ifdef CONFIG_X86_64
 818        BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
 819#else
 820        WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
 821#endif
 822
 823        early_ioremap_setup();
 824
 825        pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
 826        memset(bm_pte, 0, sizeof(bm_pte));
 827        pmd_populate_kernel(&init_mm, pmd, bm_pte);
 828
 829        /*
 830         * The boot-ioremap range spans multiple pmds, for which
 831         * we are not prepared:
 832         */
 833#define __FIXADDR_TOP (-PAGE_SIZE)
 834        BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
 835                     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
 836#undef __FIXADDR_TOP
 837        if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
 838                WARN_ON(1);
 839                printk(KERN_WARNING "pmd %p != %p\n",
 840                       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
 841                printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
 842                        fix_to_virt(FIX_BTMAP_BEGIN));
 843                printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
 844                        fix_to_virt(FIX_BTMAP_END));
 845
 846                printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
 847                printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
 848                       FIX_BTMAP_BEGIN);
 849        }
 850}
 851
 852void __init __early_set_fixmap(enum fixed_addresses idx,
 853                               phys_addr_t phys, pgprot_t flags)
 854{
 855        unsigned long addr = __fix_to_virt(idx);
 856        pte_t *pte;
 857
 858        if (idx >= __end_of_fixed_addresses) {
 859                BUG();
 860                return;
 861        }
 862        pte = early_ioremap_pte(addr);
 863
 864        /* Sanitize 'prot' against any unsupported bits: */
 865        pgprot_val(flags) &= __supported_pte_mask;
 866
 867        if (pgprot_val(flags))
 868                set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
 869        else
 870                pte_clear(&init_mm, addr, pte);
 871        flush_tlb_one_kernel(addr);
 872}
 873