linux/arch/x86/mm/ioremap.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Re-map IO memory to kernel address space so that we can access it.
   4 * This is needed for high PCI addresses that aren't mapped in the
   5 * 640k-1MB IO memory area on PC's
   6 *
   7 * (C) Copyright 1995 1996 Linus Torvalds
   8 */
   9
  10#include <linux/memblock.h>
  11#include <linux/init.h>
  12#include <linux/io.h>
  13#include <linux/ioport.h>
  14#include <linux/slab.h>
  15#include <linux/vmalloc.h>
  16#include <linux/mmiotrace.h>
  17#include <linux/mem_encrypt.h>
  18#include <linux/efi.h>
  19#include <linux/pgtable.h>
  20
  21#include <asm/set_memory.h>
  22#include <asm/e820/api.h>
  23#include <asm/efi.h>
  24#include <asm/fixmap.h>
  25#include <asm/tlbflush.h>
  26#include <asm/pgalloc.h>
  27#include <asm/memtype.h>
  28#include <asm/setup.h>
  29
  30#include "physaddr.h"
  31
  32/*
  33 * Descriptor controlling ioremap() behavior.
  34 */
  35struct ioremap_desc {
  36        unsigned int flags;
  37};
  38
  39/*
  40 * Fix up the linear direct mapping of the kernel to avoid cache attribute
  41 * conflicts.
  42 */
  43int ioremap_change_attr(unsigned long vaddr, unsigned long size,
  44                        enum page_cache_mode pcm)
  45{
  46        unsigned long nrpages = size >> PAGE_SHIFT;
  47        int err;
  48
  49        switch (pcm) {
  50        case _PAGE_CACHE_MODE_UC:
  51        default:
  52                err = _set_memory_uc(vaddr, nrpages);
  53                break;
  54        case _PAGE_CACHE_MODE_WC:
  55                err = _set_memory_wc(vaddr, nrpages);
  56                break;
  57        case _PAGE_CACHE_MODE_WT:
  58                err = _set_memory_wt(vaddr, nrpages);
  59                break;
  60        case _PAGE_CACHE_MODE_WB:
  61                err = _set_memory_wb(vaddr, nrpages);
  62                break;
  63        }
  64
  65        return err;
  66}
  67
  68/* Does the range (or a subset of) contain normal RAM? */
  69static unsigned int __ioremap_check_ram(struct resource *res)
  70{
  71        unsigned long start_pfn, stop_pfn;
  72        unsigned long i;
  73
  74        if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM)
  75                return 0;
  76
  77        start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT;
  78        stop_pfn = (res->end + 1) >> PAGE_SHIFT;
  79        if (stop_pfn > start_pfn) {
  80                for (i = 0; i < (stop_pfn - start_pfn); ++i)
  81                        if (pfn_valid(start_pfn + i) &&
  82                            !PageReserved(pfn_to_page(start_pfn + i)))
  83                                return IORES_MAP_SYSTEM_RAM;
  84        }
  85
  86        return 0;
  87}
  88
  89/*
  90 * In a SEV guest, NONE and RESERVED should not be mapped encrypted because
  91 * there the whole memory is already encrypted.
  92 */
  93static unsigned int __ioremap_check_encrypted(struct resource *res)
  94{
  95        if (!sev_active())
  96                return 0;
  97
  98        switch (res->desc) {
  99        case IORES_DESC_NONE:
 100        case IORES_DESC_RESERVED:
 101                break;
 102        default:
 103                return IORES_MAP_ENCRYPTED;
 104        }
 105
 106        return 0;
 107}
 108
 109/*
 110 * The EFI runtime services data area is not covered by walk_mem_res(), but must
 111 * be mapped encrypted when SEV is active.
 112 */
 113static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *desc)
 114{
 115        if (!sev_active())
 116                return;
 117
 118        if (!IS_ENABLED(CONFIG_EFI))
 119                return;
 120
 121        if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA)
 122                desc->flags |= IORES_MAP_ENCRYPTED;
 123}
 124
 125static int __ioremap_collect_map_flags(struct resource *res, void *arg)
 126{
 127        struct ioremap_desc *desc = arg;
 128
 129        if (!(desc->flags & IORES_MAP_SYSTEM_RAM))
 130                desc->flags |= __ioremap_check_ram(res);
 131
 132        if (!(desc->flags & IORES_MAP_ENCRYPTED))
 133                desc->flags |= __ioremap_check_encrypted(res);
 134
 135        return ((desc->flags & (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)) ==
 136                               (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED));
 137}
 138
 139/*
 140 * To avoid multiple resource walks, this function walks resources marked as
 141 * IORESOURCE_MEM and IORESOURCE_BUSY and looking for system RAM and/or a
 142 * resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES).
 143 *
 144 * After that, deal with misc other ranges in __ioremap_check_other() which do
 145 * not fall into the above category.
 146 */
 147static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
 148                                struct ioremap_desc *desc)
 149{
 150        u64 start, end;
 151
 152        start = (u64)addr;
 153        end = start + size - 1;
 154        memset(desc, 0, sizeof(struct ioremap_desc));
 155
 156        walk_mem_res(start, end, desc, __ioremap_collect_map_flags);
 157
 158        __ioremap_check_other(addr, desc);
 159}
 160
 161/*
 162 * Remap an arbitrary physical address space into the kernel virtual
 163 * address space. It transparently creates kernel huge I/O mapping when
 164 * the physical address is aligned by a huge page size (1GB or 2MB) and
 165 * the requested size is at least the huge page size.
 166 *
 167 * NOTE: MTRRs can override PAT memory types with a 4KB granularity.
 168 * Therefore, the mapping code falls back to use a smaller page toward 4KB
 169 * when a mapping range is covered by non-WB type of MTRRs.
 170 *
 171 * NOTE! We need to allow non-page-aligned mappings too: we will obviously
 172 * have to convert them into an offset in a page-aligned mapping, but the
 173 * caller shouldn't need to know that small detail.
 174 */
 175static void __iomem *
 176__ioremap_caller(resource_size_t phys_addr, unsigned long size,
 177                 enum page_cache_mode pcm, void *caller, bool encrypted)
 178{
 179        unsigned long offset, vaddr;
 180        resource_size_t last_addr;
 181        const resource_size_t unaligned_phys_addr = phys_addr;
 182        const unsigned long unaligned_size = size;
 183        struct ioremap_desc io_desc;
 184        struct vm_struct *area;
 185        enum page_cache_mode new_pcm;
 186        pgprot_t prot;
 187        int retval;
 188        void __iomem *ret_addr;
 189
 190        /* Don't allow wraparound or zero size */
 191        last_addr = phys_addr + size - 1;
 192        if (!size || last_addr < phys_addr)
 193                return NULL;
 194
 195        if (!phys_addr_valid(phys_addr)) {
 196                printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
 197                       (unsigned long long)phys_addr);
 198                WARN_ON_ONCE(1);
 199                return NULL;
 200        }
 201
 202        __ioremap_check_mem(phys_addr, size, &io_desc);
 203
 204        /*
 205         * Don't allow anybody to remap normal RAM that we're using..
 206         */
 207        if (io_desc.flags & IORES_MAP_SYSTEM_RAM) {
 208                WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
 209                          &phys_addr, &last_addr);
 210                return NULL;
 211        }
 212
 213        /*
 214         * Mappings have to be page-aligned
 215         */
 216        offset = phys_addr & ~PAGE_MASK;
 217        phys_addr &= PHYSICAL_PAGE_MASK;
 218        size = PAGE_ALIGN(last_addr+1) - phys_addr;
 219
 220        retval = memtype_reserve(phys_addr, (u64)phys_addr + size,
 221                                                pcm, &new_pcm);
 222        if (retval) {
 223                printk(KERN_ERR "ioremap memtype_reserve failed %d\n", retval);
 224                return NULL;
 225        }
 226
 227        if (pcm != new_pcm) {
 228                if (!is_new_memtype_allowed(phys_addr, size, pcm, new_pcm)) {
 229                        printk(KERN_ERR
 230                "ioremap error for 0x%llx-0x%llx, requested 0x%x, got 0x%x\n",
 231                                (unsigned long long)phys_addr,
 232                                (unsigned long long)(phys_addr + size),
 233                                pcm, new_pcm);
 234                        goto err_free_memtype;
 235                }
 236                pcm = new_pcm;
 237        }
 238
 239        /*
 240         * If the page being mapped is in memory and SEV is active then
 241         * make sure the memory encryption attribute is enabled in the
 242         * resulting mapping.
 243         */
 244        prot = PAGE_KERNEL_IO;
 245        if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted)
 246                prot = pgprot_encrypted(prot);
 247
 248        switch (pcm) {
 249        case _PAGE_CACHE_MODE_UC:
 250        default:
 251                prot = __pgprot(pgprot_val(prot) |
 252                                cachemode2protval(_PAGE_CACHE_MODE_UC));
 253                break;
 254        case _PAGE_CACHE_MODE_UC_MINUS:
 255                prot = __pgprot(pgprot_val(prot) |
 256                                cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
 257                break;
 258        case _PAGE_CACHE_MODE_WC:
 259                prot = __pgprot(pgprot_val(prot) |
 260                                cachemode2protval(_PAGE_CACHE_MODE_WC));
 261                break;
 262        case _PAGE_CACHE_MODE_WT:
 263                prot = __pgprot(pgprot_val(prot) |
 264                                cachemode2protval(_PAGE_CACHE_MODE_WT));
 265                break;
 266        case _PAGE_CACHE_MODE_WB:
 267                break;
 268        }
 269
 270        /*
 271         * Ok, go for it..
 272         */
 273        area = get_vm_area_caller(size, VM_IOREMAP, caller);
 274        if (!area)
 275                goto err_free_memtype;
 276        area->phys_addr = phys_addr;
 277        vaddr = (unsigned long) area->addr;
 278
 279        if (memtype_kernel_map_sync(phys_addr, size, pcm))
 280                goto err_free_area;
 281
 282        if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
 283                goto err_free_area;
 284
 285        ret_addr = (void __iomem *) (vaddr + offset);
 286        mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
 287
 288        /*
 289         * Check if the request spans more than any BAR in the iomem resource
 290         * tree.
 291         */
 292        if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size))
 293                pr_warn("caller %pS mapping multiple BARs\n", caller);
 294
 295        return ret_addr;
 296err_free_area:
 297        free_vm_area(area);
 298err_free_memtype:
 299        memtype_free(phys_addr, phys_addr + size);
 300        return NULL;
 301}
 302
 303/**
 304 * ioremap     -   map bus memory into CPU space
 305 * @phys_addr:    bus address of the memory
 306 * @size:      size of the resource to map
 307 *
 308 * ioremap performs a platform specific sequence of operations to
 309 * make bus memory CPU accessible via the readb/readw/readl/writeb/
 310 * writew/writel functions and the other mmio helpers. The returned
 311 * address is not guaranteed to be usable directly as a virtual
 312 * address.
 313 *
 314 * This version of ioremap ensures that the memory is marked uncachable
 315 * on the CPU as well as honouring existing caching rules from things like
 316 * the PCI bus. Note that there are other caches and buffers on many
 317 * busses. In particular driver authors should read up on PCI writes
 318 *
 319 * It's useful if some control registers are in such an area and
 320 * write combining or read caching is not desirable:
 321 *
 322 * Must be freed with iounmap.
 323 */
 324void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
 325{
 326        /*
 327         * Ideally, this should be:
 328         *      pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
 329         *
 330         * Till we fix all X drivers to use ioremap_wc(), we will use
 331         * UC MINUS. Drivers that are certain they need or can already
 332         * be converted over to strong UC can use ioremap_uc().
 333         */
 334        enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
 335
 336        return __ioremap_caller(phys_addr, size, pcm,
 337                                __builtin_return_address(0), false);
 338}
 339EXPORT_SYMBOL(ioremap);
 340
 341/**
 342 * ioremap_uc     -   map bus memory into CPU space as strongly uncachable
 343 * @phys_addr:    bus address of the memory
 344 * @size:      size of the resource to map
 345 *
 346 * ioremap_uc performs a platform specific sequence of operations to
 347 * make bus memory CPU accessible via the readb/readw/readl/writeb/
 348 * writew/writel functions and the other mmio helpers. The returned
 349 * address is not guaranteed to be usable directly as a virtual
 350 * address.
 351 *
 352 * This version of ioremap ensures that the memory is marked with a strong
 353 * preference as completely uncachable on the CPU when possible. For non-PAT
 354 * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
 355 * systems this will set the PAT entry for the pages as strong UC.  This call
 356 * will honor existing caching rules from things like the PCI bus. Note that
 357 * there are other caches and buffers on many busses. In particular driver
 358 * authors should read up on PCI writes.
 359 *
 360 * It's useful if some control registers are in such an area and
 361 * write combining or read caching is not desirable:
 362 *
 363 * Must be freed with iounmap.
 364 */
 365void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
 366{
 367        enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;
 368
 369        return __ioremap_caller(phys_addr, size, pcm,
 370                                __builtin_return_address(0), false);
 371}
 372EXPORT_SYMBOL_GPL(ioremap_uc);
 373
 374/**
 375 * ioremap_wc   -       map memory into CPU space write combined
 376 * @phys_addr:  bus address of the memory
 377 * @size:       size of the resource to map
 378 *
 379 * This version of ioremap ensures that the memory is marked write combining.
 380 * Write combining allows faster writes to some hardware devices.
 381 *
 382 * Must be freed with iounmap.
 383 */
 384void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
 385{
 386        return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
 387                                        __builtin_return_address(0), false);
 388}
 389EXPORT_SYMBOL(ioremap_wc);
 390
 391/**
 392 * ioremap_wt   -       map memory into CPU space write through
 393 * @phys_addr:  bus address of the memory
 394 * @size:       size of the resource to map
 395 *
 396 * This version of ioremap ensures that the memory is marked write through.
 397 * Write through stores data into memory while keeping the cache up-to-date.
 398 *
 399 * Must be freed with iounmap.
 400 */
 401void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
 402{
 403        return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
 404                                        __builtin_return_address(0), false);
 405}
 406EXPORT_SYMBOL(ioremap_wt);
 407
 408void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size)
 409{
 410        return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
 411                                __builtin_return_address(0), true);
 412}
 413EXPORT_SYMBOL(ioremap_encrypted);
 414
 415void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
 416{
 417        return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
 418                                __builtin_return_address(0), false);
 419}
 420EXPORT_SYMBOL(ioremap_cache);
 421
 422void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
 423                                unsigned long prot_val)
 424{
 425        return __ioremap_caller(phys_addr, size,
 426                                pgprot2cachemode(__pgprot(prot_val)),
 427                                __builtin_return_address(0), false);
 428}
 429EXPORT_SYMBOL(ioremap_prot);
 430
 431/**
 432 * iounmap - Free a IO remapping
 433 * @addr: virtual address from ioremap_*
 434 *
 435 * Caller must ensure there is only one unmapping for the same pointer.
 436 */
 437void iounmap(volatile void __iomem *addr)
 438{
 439        struct vm_struct *p, *o;
 440
 441        if ((void __force *)addr <= high_memory)
 442                return;
 443
 444        /*
 445         * The PCI/ISA range special-casing was removed from __ioremap()
 446         * so this check, in theory, can be removed. However, there are
 447         * cases where iounmap() is called for addresses not obtained via
 448         * ioremap() (vga16fb for example). Add a warning so that these
 449         * cases can be caught and fixed.
 450         */
 451        if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
 452            (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) {
 453                WARN(1, "iounmap() called for ISA range not obtained using ioremap()\n");
 454                return;
 455        }
 456
 457        mmiotrace_iounmap(addr);
 458
 459        addr = (volatile void __iomem *)
 460                (PAGE_MASK & (unsigned long __force)addr);
 461
 462        /* Use the vm area unlocked, assuming the caller
 463           ensures there isn't another iounmap for the same address
 464           in parallel. Reuse of the virtual address is prevented by
 465           leaving it in the global lists until we're done with it.
 466           cpa takes care of the direct mappings. */
 467        p = find_vm_area((void __force *)addr);
 468
 469        if (!p) {
 470                printk(KERN_ERR "iounmap: bad address %p\n", addr);
 471                dump_stack();
 472                return;
 473        }
 474
 475        memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p));
 476
 477        /* Finally remove it */
 478        o = remove_vm_area((void __force *)addr);
 479        BUG_ON(p != o || o == NULL);
 480        kfree(p);
 481}
 482EXPORT_SYMBOL(iounmap);
 483
 484int __init arch_ioremap_p4d_supported(void)
 485{
 486        return 0;
 487}
 488
 489int __init arch_ioremap_pud_supported(void)
 490{
 491#ifdef CONFIG_X86_64
 492        return boot_cpu_has(X86_FEATURE_GBPAGES);
 493#else
 494        return 0;
 495#endif
 496}
 497
 498int __init arch_ioremap_pmd_supported(void)
 499{
 500        return boot_cpu_has(X86_FEATURE_PSE);
 501}
 502
 503/*
 504 * Convert a physical pointer to a virtual kernel pointer for /dev/mem
 505 * access
 506 */
 507void *xlate_dev_mem_ptr(phys_addr_t phys)
 508{
 509        unsigned long start  = phys &  PAGE_MASK;
 510        unsigned long offset = phys & ~PAGE_MASK;
 511        void *vaddr;
 512
 513        /* memremap() maps if RAM, otherwise falls back to ioremap() */
 514        vaddr = memremap(start, PAGE_SIZE, MEMREMAP_WB);
 515
 516        /* Only add the offset on success and return NULL if memremap() failed */
 517        if (vaddr)
 518                vaddr += offset;
 519
 520        return vaddr;
 521}
 522
 523void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
 524{
 525        memunmap((void *)((unsigned long)addr & PAGE_MASK));
 526}
 527
 528/*
 529 * Examine the physical address to determine if it is an area of memory
 530 * that should be mapped decrypted.  If the memory is not part of the
 531 * kernel usable area it was accessed and created decrypted, so these
 532 * areas should be mapped decrypted. And since the encryption key can
 533 * change across reboots, persistent memory should also be mapped
 534 * decrypted.
 535 *
 536 * If SEV is active, that implies that BIOS/UEFI also ran encrypted so
 537 * only persistent memory should be mapped decrypted.
 538 */
 539static bool memremap_should_map_decrypted(resource_size_t phys_addr,
 540                                          unsigned long size)
 541{
 542        int is_pmem;
 543
 544        /*
 545         * Check if the address is part of a persistent memory region.
 546         * This check covers areas added by E820, EFI and ACPI.
 547         */
 548        is_pmem = region_intersects(phys_addr, size, IORESOURCE_MEM,
 549                                    IORES_DESC_PERSISTENT_MEMORY);
 550        if (is_pmem != REGION_DISJOINT)
 551                return true;
 552
 553        /*
 554         * Check if the non-volatile attribute is set for an EFI
 555         * reserved area.
 556         */
 557        if (efi_enabled(EFI_BOOT)) {
 558                switch (efi_mem_type(phys_addr)) {
 559                case EFI_RESERVED_TYPE:
 560                        if (efi_mem_attributes(phys_addr) & EFI_MEMORY_NV)
 561                                return true;
 562                        break;
 563                default:
 564                        break;
 565                }
 566        }
 567
 568        /* Check if the address is outside kernel usable area */
 569        switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
 570        case E820_TYPE_RESERVED:
 571        case E820_TYPE_ACPI:
 572        case E820_TYPE_NVS:
 573        case E820_TYPE_UNUSABLE:
 574                /* For SEV, these areas are encrypted */
 575                if (sev_active())
 576                        break;
 577                fallthrough;
 578
 579        case E820_TYPE_PRAM:
 580                return true;
 581        default:
 582                break;
 583        }
 584
 585        return false;
 586}
 587
 588/*
 589 * Examine the physical address to determine if it is EFI data. Check
 590 * it against the boot params structure and EFI tables and memory types.
 591 */
 592static bool memremap_is_efi_data(resource_size_t phys_addr,
 593                                 unsigned long size)
 594{
 595        u64 paddr;
 596
 597        /* Check if the address is part of EFI boot/runtime data */
 598        if (!efi_enabled(EFI_BOOT))
 599                return false;
 600
 601        paddr = boot_params.efi_info.efi_memmap_hi;
 602        paddr <<= 32;
 603        paddr |= boot_params.efi_info.efi_memmap;
 604        if (phys_addr == paddr)
 605                return true;
 606
 607        paddr = boot_params.efi_info.efi_systab_hi;
 608        paddr <<= 32;
 609        paddr |= boot_params.efi_info.efi_systab;
 610        if (phys_addr == paddr)
 611                return true;
 612
 613        if (efi_is_table_address(phys_addr))
 614                return true;
 615
 616        switch (efi_mem_type(phys_addr)) {
 617        case EFI_BOOT_SERVICES_DATA:
 618        case EFI_RUNTIME_SERVICES_DATA:
 619                return true;
 620        default:
 621                break;
 622        }
 623
 624        return false;
 625}
 626
 627/*
 628 * Examine the physical address to determine if it is boot data by checking
 629 * it against the boot params setup_data chain.
 630 */
 631static bool memremap_is_setup_data(resource_size_t phys_addr,
 632                                   unsigned long size)
 633{
 634        struct setup_data *data;
 635        u64 paddr, paddr_next;
 636
 637        paddr = boot_params.hdr.setup_data;
 638        while (paddr) {
 639                unsigned int len;
 640
 641                if (phys_addr == paddr)
 642                        return true;
 643
 644                data = memremap(paddr, sizeof(*data),
 645                                MEMREMAP_WB | MEMREMAP_DEC);
 646
 647                paddr_next = data->next;
 648                len = data->len;
 649
 650                if ((phys_addr > paddr) && (phys_addr < (paddr + len))) {
 651                        memunmap(data);
 652                        return true;
 653                }
 654
 655                if (data->type == SETUP_INDIRECT &&
 656                    ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
 657                        paddr = ((struct setup_indirect *)data->data)->addr;
 658                        len = ((struct setup_indirect *)data->data)->len;
 659                }
 660
 661                memunmap(data);
 662
 663                if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
 664                        return true;
 665
 666                paddr = paddr_next;
 667        }
 668
 669        return false;
 670}
 671
 672/*
 673 * Examine the physical address to determine if it is boot data by checking
 674 * it against the boot params setup_data chain (early boot version).
 675 */
 676static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
 677                                                unsigned long size)
 678{
 679        struct setup_data *data;
 680        u64 paddr, paddr_next;
 681
 682        paddr = boot_params.hdr.setup_data;
 683        while (paddr) {
 684                unsigned int len;
 685
 686                if (phys_addr == paddr)
 687                        return true;
 688
 689                data = early_memremap_decrypted(paddr, sizeof(*data));
 690
 691                paddr_next = data->next;
 692                len = data->len;
 693
 694                early_memunmap(data, sizeof(*data));
 695
 696                if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
 697                        return true;
 698
 699                paddr = paddr_next;
 700        }
 701
 702        return false;
 703}
 704
 705/*
 706 * Architecture function to determine if RAM remap is allowed. By default, a
 707 * RAM remap will map the data as encrypted. Determine if a RAM remap should
 708 * not be done so that the data will be mapped decrypted.
 709 */
 710bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
 711                                 unsigned long flags)
 712{
 713        if (!mem_encrypt_active())
 714                return true;
 715
 716        if (flags & MEMREMAP_ENC)
 717                return true;
 718
 719        if (flags & MEMREMAP_DEC)
 720                return false;
 721
 722        if (sme_active()) {
 723                if (memremap_is_setup_data(phys_addr, size) ||
 724                    memremap_is_efi_data(phys_addr, size))
 725                        return false;
 726        }
 727
 728        return !memremap_should_map_decrypted(phys_addr, size);
 729}
 730
 731/*
 732 * Architecture override of __weak function to adjust the protection attributes
 733 * used when remapping memory. By default, early_memremap() will map the data
 734 * as encrypted. Determine if an encrypted mapping should not be done and set
 735 * the appropriate protection attributes.
 736 */
 737pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
 738                                             unsigned long size,
 739                                             pgprot_t prot)
 740{
 741        bool encrypted_prot;
 742
 743        if (!mem_encrypt_active())
 744                return prot;
 745
 746        encrypted_prot = true;
 747
 748        if (sme_active()) {
 749                if (early_memremap_is_setup_data(phys_addr, size) ||
 750                    memremap_is_efi_data(phys_addr, size))
 751                        encrypted_prot = false;
 752        }
 753
 754        if (encrypted_prot && memremap_should_map_decrypted(phys_addr, size))
 755                encrypted_prot = false;
 756
 757        return encrypted_prot ? pgprot_encrypted(prot)
 758                              : pgprot_decrypted(prot);
 759}
 760
 761bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size)
 762{
 763        return arch_memremap_can_ram_remap(phys_addr, size, 0);
 764}
 765
 766#ifdef CONFIG_AMD_MEM_ENCRYPT
 767/* Remap memory with encryption */
 768void __init *early_memremap_encrypted(resource_size_t phys_addr,
 769                                      unsigned long size)
 770{
 771        return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC);
 772}
 773
 774/*
 775 * Remap memory with encryption and write-protected - cannot be called
 776 * before pat_init() is called
 777 */
 778void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
 779                                         unsigned long size)
 780{
 781        if (!x86_has_pat_wp())
 782                return NULL;
 783        return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC_WP);
 784}
 785
 786/* Remap memory without encryption */
 787void __init *early_memremap_decrypted(resource_size_t phys_addr,
 788                                      unsigned long size)
 789{
 790        return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC);
 791}
 792
 793/*
 794 * Remap memory without encryption and write-protected - cannot be called
 795 * before pat_init() is called
 796 */
 797void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
 798                                         unsigned long size)
 799{
 800        if (!x86_has_pat_wp())
 801                return NULL;
 802        return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC_WP);
 803}
 804#endif  /* CONFIG_AMD_MEM_ENCRYPT */
 805
 806static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
 807
 808static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
 809{
 810        /* Don't assume we're using swapper_pg_dir at this point */
 811        pgd_t *base = __va(read_cr3_pa());
 812        pgd_t *pgd = &base[pgd_index(addr)];
 813        p4d_t *p4d = p4d_offset(pgd, addr);
 814        pud_t *pud = pud_offset(p4d, addr);
 815        pmd_t *pmd = pmd_offset(pud, addr);
 816
 817        return pmd;
 818}
 819
 820static inline pte_t * __init early_ioremap_pte(unsigned long addr)
 821{
 822        return &bm_pte[pte_index(addr)];
 823}
 824
 825bool __init is_early_ioremap_ptep(pte_t *ptep)
 826{
 827        return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
 828}
 829
 830void __init early_ioremap_init(void)
 831{
 832        pmd_t *pmd;
 833
 834#ifdef CONFIG_X86_64
 835        BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
 836#else
 837        WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
 838#endif
 839
 840        early_ioremap_setup();
 841
 842        pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
 843        memset(bm_pte, 0, sizeof(bm_pte));
 844        pmd_populate_kernel(&init_mm, pmd, bm_pte);
 845
 846        /*
 847         * The boot-ioremap range spans multiple pmds, for which
 848         * we are not prepared:
 849         */
 850#define __FIXADDR_TOP (-PAGE_SIZE)
 851        BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
 852                     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
 853#undef __FIXADDR_TOP
 854        if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
 855                WARN_ON(1);
 856                printk(KERN_WARNING "pmd %p != %p\n",
 857                       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
 858                printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
 859                        fix_to_virt(FIX_BTMAP_BEGIN));
 860                printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
 861                        fix_to_virt(FIX_BTMAP_END));
 862
 863                printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
 864                printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
 865                       FIX_BTMAP_BEGIN);
 866        }
 867}
 868
 869void __init __early_set_fixmap(enum fixed_addresses idx,
 870                               phys_addr_t phys, pgprot_t flags)
 871{
 872        unsigned long addr = __fix_to_virt(idx);
 873        pte_t *pte;
 874
 875        if (idx >= __end_of_fixed_addresses) {
 876                BUG();
 877                return;
 878        }
 879        pte = early_ioremap_pte(addr);
 880
 881        /* Sanitize 'prot' against any unsupported bits: */
 882        pgprot_val(flags) &= __supported_pte_mask;
 883
 884        if (pgprot_val(flags))
 885                set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
 886        else
 887                pte_clear(&init_mm, addr, pte);
 888        flush_tlb_one_kernel(addr);
 889}
 890