linux/arch/x86/mm/ioremap.c
<<
>>
Prefs
   1/*
   2 * Re-map IO memory to kernel address space so that we can access it.
   3 * This is needed for high PCI addresses that aren't mapped in the
   4 * 640k-1MB IO memory area on PC's
   5 *
   6 * (C) Copyright 1995 1996 Linus Torvalds
   7 */
   8
   9#include <linux/bootmem.h>
  10#include <linux/init.h>
  11#include <linux/io.h>
  12#include <linux/ioport.h>
  13#include <linux/slab.h>
  14#include <linux/vmalloc.h>
  15#include <linux/mmiotrace.h>
  16#include <linux/mem_encrypt.h>
  17#include <linux/efi.h>
  18
  19#include <asm/set_memory.h>
  20#include <asm/e820/api.h>
  21#include <asm/fixmap.h>
  22#include <asm/pgtable.h>
  23#include <asm/tlbflush.h>
  24#include <asm/pgalloc.h>
  25#include <asm/pat.h>
  26#include <asm/setup.h>
  27
  28#include "physaddr.h"
  29
  30/*
  31 * Fix up the linear direct mapping of the kernel to avoid cache attribute
  32 * conflicts.
  33 */
  34int ioremap_change_attr(unsigned long vaddr, unsigned long size,
  35                        enum page_cache_mode pcm)
  36{
  37        unsigned long nrpages = size >> PAGE_SHIFT;
  38        int err;
  39
  40        switch (pcm) {
  41        case _PAGE_CACHE_MODE_UC:
  42        default:
  43                err = _set_memory_uc(vaddr, nrpages);
  44                break;
  45        case _PAGE_CACHE_MODE_WC:
  46                err = _set_memory_wc(vaddr, nrpages);
  47                break;
  48        case _PAGE_CACHE_MODE_WT:
  49                err = _set_memory_wt(vaddr, nrpages);
  50                break;
  51        case _PAGE_CACHE_MODE_WB:
  52                err = _set_memory_wb(vaddr, nrpages);
  53                break;
  54        }
  55
  56        return err;
  57}
  58
  59static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages,
  60                               void *arg)
  61{
  62        unsigned long i;
  63
  64        for (i = 0; i < nr_pages; ++i)
  65                if (pfn_valid(start_pfn + i) &&
  66                    !PageReserved(pfn_to_page(start_pfn + i)))
  67                        return 1;
  68
  69        return 0;
  70}
  71
  72/*
  73 * Remap an arbitrary physical address space into the kernel virtual
  74 * address space. It transparently creates kernel huge I/O mapping when
  75 * the physical address is aligned by a huge page size (1GB or 2MB) and
  76 * the requested size is at least the huge page size.
  77 *
  78 * NOTE: MTRRs can override PAT memory types with a 4KB granularity.
  79 * Therefore, the mapping code falls back to use a smaller page toward 4KB
  80 * when a mapping range is covered by non-WB type of MTRRs.
  81 *
  82 * NOTE! We need to allow non-page-aligned mappings too: we will obviously
  83 * have to convert them into an offset in a page-aligned mapping, but the
  84 * caller shouldn't need to know that small detail.
  85 */
  86static void __iomem *__ioremap_caller(resource_size_t phys_addr,
  87                unsigned long size, enum page_cache_mode pcm, void *caller)
  88{
  89        unsigned long offset, vaddr;
  90        resource_size_t pfn, last_pfn, last_addr;
  91        const resource_size_t unaligned_phys_addr = phys_addr;
  92        const unsigned long unaligned_size = size;
  93        struct vm_struct *area;
  94        enum page_cache_mode new_pcm;
  95        pgprot_t prot;
  96        int retval;
  97        void __iomem *ret_addr;
  98
  99        /* Don't allow wraparound or zero size */
 100        last_addr = phys_addr + size - 1;
 101        if (!size || last_addr < phys_addr)
 102                return NULL;
 103
 104        if (!phys_addr_valid(phys_addr)) {
 105                printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
 106                       (unsigned long long)phys_addr);
 107                WARN_ON_ONCE(1);
 108                return NULL;
 109        }
 110
 111        /*
 112         * Don't allow anybody to remap normal RAM that we're using..
 113         */
 114        pfn      = phys_addr >> PAGE_SHIFT;
 115        last_pfn = last_addr >> PAGE_SHIFT;
 116        if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL,
 117                                          __ioremap_check_ram) == 1) {
 118                WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
 119                          &phys_addr, &last_addr);
 120                return NULL;
 121        }
 122
 123        /*
 124         * Mappings have to be page-aligned
 125         */
 126        offset = phys_addr & ~PAGE_MASK;
 127        phys_addr &= PHYSICAL_PAGE_MASK;
 128        size = PAGE_ALIGN(last_addr+1) - phys_addr;
 129
 130        retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
 131                                                pcm, &new_pcm);
 132        if (retval) {
 133                printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
 134                return NULL;
 135        }
 136
 137        if (pcm != new_pcm) {
 138                if (!is_new_memtype_allowed(phys_addr, size, pcm, new_pcm)) {
 139                        printk(KERN_ERR
 140                "ioremap error for 0x%llx-0x%llx, requested 0x%x, got 0x%x\n",
 141                                (unsigned long long)phys_addr,
 142                                (unsigned long long)(phys_addr + size),
 143                                pcm, new_pcm);
 144                        goto err_free_memtype;
 145                }
 146                pcm = new_pcm;
 147        }
 148
 149        prot = PAGE_KERNEL_IO;
 150        switch (pcm) {
 151        case _PAGE_CACHE_MODE_UC:
 152        default:
 153                prot = __pgprot(pgprot_val(prot) |
 154                                cachemode2protval(_PAGE_CACHE_MODE_UC));
 155                break;
 156        case _PAGE_CACHE_MODE_UC_MINUS:
 157                prot = __pgprot(pgprot_val(prot) |
 158                                cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
 159                break;
 160        case _PAGE_CACHE_MODE_WC:
 161                prot = __pgprot(pgprot_val(prot) |
 162                                cachemode2protval(_PAGE_CACHE_MODE_WC));
 163                break;
 164        case _PAGE_CACHE_MODE_WT:
 165                prot = __pgprot(pgprot_val(prot) |
 166                                cachemode2protval(_PAGE_CACHE_MODE_WT));
 167                break;
 168        case _PAGE_CACHE_MODE_WB:
 169                break;
 170        }
 171
 172        /*
 173         * Ok, go for it..
 174         */
 175        area = get_vm_area_caller(size, VM_IOREMAP, caller);
 176        if (!area)
 177                goto err_free_memtype;
 178        area->phys_addr = phys_addr;
 179        vaddr = (unsigned long) area->addr;
 180
 181        if (kernel_map_sync_memtype(phys_addr, size, pcm))
 182                goto err_free_area;
 183
 184        if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
 185                goto err_free_area;
 186
 187        ret_addr = (void __iomem *) (vaddr + offset);
 188        mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
 189
 190        /*
 191         * Check if the request spans more than any BAR in the iomem resource
 192         * tree.
 193         */
 194        if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size))
 195                pr_warn("caller %pS mapping multiple BARs\n", caller);
 196
 197        return ret_addr;
 198err_free_area:
 199        free_vm_area(area);
 200err_free_memtype:
 201        free_memtype(phys_addr, phys_addr + size);
 202        return NULL;
 203}
 204
 205/**
 206 * ioremap_nocache     -   map bus memory into CPU space
 207 * @phys_addr:    bus address of the memory
 208 * @size:      size of the resource to map
 209 *
 210 * ioremap_nocache performs a platform specific sequence of operations to
 211 * make bus memory CPU accessible via the readb/readw/readl/writeb/
 212 * writew/writel functions and the other mmio helpers. The returned
 213 * address is not guaranteed to be usable directly as a virtual
 214 * address.
 215 *
 216 * This version of ioremap ensures that the memory is marked uncachable
 217 * on the CPU as well as honouring existing caching rules from things like
 218 * the PCI bus. Note that there are other caches and buffers on many
 219 * busses. In particular driver authors should read up on PCI writes
 220 *
 221 * It's useful if some control registers are in such an area and
 222 * write combining or read caching is not desirable:
 223 *
 224 * Must be freed with iounmap.
 225 */
 226void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
 227{
 228        /*
 229         * Ideally, this should be:
 230         *      pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
 231         *
 232         * Till we fix all X drivers to use ioremap_wc(), we will use
 233         * UC MINUS. Drivers that are certain they need or can already
 234         * be converted over to strong UC can use ioremap_uc().
 235         */
 236        enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
 237
 238        return __ioremap_caller(phys_addr, size, pcm,
 239                                __builtin_return_address(0));
 240}
 241EXPORT_SYMBOL(ioremap_nocache);
 242
 243/**
 244 * ioremap_uc     -   map bus memory into CPU space as strongly uncachable
 245 * @phys_addr:    bus address of the memory
 246 * @size:      size of the resource to map
 247 *
 248 * ioremap_uc performs a platform specific sequence of operations to
 249 * make bus memory CPU accessible via the readb/readw/readl/writeb/
 250 * writew/writel functions and the other mmio helpers. The returned
 251 * address is not guaranteed to be usable directly as a virtual
 252 * address.
 253 *
 254 * This version of ioremap ensures that the memory is marked with a strong
 255 * preference as completely uncachable on the CPU when possible. For non-PAT
 256 * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
 257 * systems this will set the PAT entry for the pages as strong UC.  This call
 258 * will honor existing caching rules from things like the PCI bus. Note that
 259 * there are other caches and buffers on many busses. In particular driver
 260 * authors should read up on PCI writes.
 261 *
 262 * It's useful if some control registers are in such an area and
 263 * write combining or read caching is not desirable:
 264 *
 265 * Must be freed with iounmap.
 266 */
 267void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
 268{
 269        enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;
 270
 271        return __ioremap_caller(phys_addr, size, pcm,
 272                                __builtin_return_address(0));
 273}
 274EXPORT_SYMBOL_GPL(ioremap_uc);
 275
 276/**
 277 * ioremap_wc   -       map memory into CPU space write combined
 278 * @phys_addr:  bus address of the memory
 279 * @size:       size of the resource to map
 280 *
 281 * This version of ioremap ensures that the memory is marked write combining.
 282 * Write combining allows faster writes to some hardware devices.
 283 *
 284 * Must be freed with iounmap.
 285 */
 286void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
 287{
 288        return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
 289                                        __builtin_return_address(0));
 290}
 291EXPORT_SYMBOL(ioremap_wc);
 292
 293/**
 294 * ioremap_wt   -       map memory into CPU space write through
 295 * @phys_addr:  bus address of the memory
 296 * @size:       size of the resource to map
 297 *
 298 * This version of ioremap ensures that the memory is marked write through.
 299 * Write through stores data into memory while keeping the cache up-to-date.
 300 *
 301 * Must be freed with iounmap.
 302 */
 303void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
 304{
 305        return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
 306                                        __builtin_return_address(0));
 307}
 308EXPORT_SYMBOL(ioremap_wt);
 309
 310void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
 311{
 312        return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
 313                                __builtin_return_address(0));
 314}
 315EXPORT_SYMBOL(ioremap_cache);
 316
 317void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
 318                                unsigned long prot_val)
 319{
 320        return __ioremap_caller(phys_addr, size,
 321                                pgprot2cachemode(__pgprot(prot_val)),
 322                                __builtin_return_address(0));
 323}
 324EXPORT_SYMBOL(ioremap_prot);
 325
 326/**
 327 * iounmap - Free a IO remapping
 328 * @addr: virtual address from ioremap_*
 329 *
 330 * Caller must ensure there is only one unmapping for the same pointer.
 331 */
 332void iounmap(volatile void __iomem *addr)
 333{
 334        struct vm_struct *p, *o;
 335
 336        if ((void __force *)addr <= high_memory)
 337                return;
 338
 339        /*
 340         * The PCI/ISA range special-casing was removed from __ioremap()
 341         * so this check, in theory, can be removed. However, there are
 342         * cases where iounmap() is called for addresses not obtained via
 343         * ioremap() (vga16fb for example). Add a warning so that these
 344         * cases can be caught and fixed.
 345         */
 346        if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
 347            (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) {
 348                WARN(1, "iounmap() called for ISA range not obtained using ioremap()\n");
 349                return;
 350        }
 351
 352        addr = (volatile void __iomem *)
 353                (PAGE_MASK & (unsigned long __force)addr);
 354
 355        mmiotrace_iounmap(addr);
 356
 357        /* Use the vm area unlocked, assuming the caller
 358           ensures there isn't another iounmap for the same address
 359           in parallel. Reuse of the virtual address is prevented by
 360           leaving it in the global lists until we're done with it.
 361           cpa takes care of the direct mappings. */
 362        p = find_vm_area((void __force *)addr);
 363
 364        if (!p) {
 365                printk(KERN_ERR "iounmap: bad address %p\n", addr);
 366                dump_stack();
 367                return;
 368        }
 369
 370        free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
 371
 372        /* Finally remove it */
 373        o = remove_vm_area((void __force *)addr);
 374        BUG_ON(p != o || o == NULL);
 375        kfree(p);
 376}
 377EXPORT_SYMBOL(iounmap);
 378
 379int __init arch_ioremap_pud_supported(void)
 380{
 381#ifdef CONFIG_X86_64
 382        return boot_cpu_has(X86_FEATURE_GBPAGES);
 383#else
 384        return 0;
 385#endif
 386}
 387
 388int __init arch_ioremap_pmd_supported(void)
 389{
 390        return boot_cpu_has(X86_FEATURE_PSE);
 391}
 392
 393/*
 394 * Convert a physical pointer to a virtual kernel pointer for /dev/mem
 395 * access
 396 */
 397void *xlate_dev_mem_ptr(phys_addr_t phys)
 398{
 399        unsigned long start  = phys &  PAGE_MASK;
 400        unsigned long offset = phys & ~PAGE_MASK;
 401        void *vaddr;
 402
 403        /* memremap() maps if RAM, otherwise falls back to ioremap() */
 404        vaddr = memremap(start, PAGE_SIZE, MEMREMAP_WB);
 405
 406        /* Only add the offset on success and return NULL if memremap() failed */
 407        if (vaddr)
 408                vaddr += offset;
 409
 410        return vaddr;
 411}
 412
 413void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
 414{
 415        memunmap((void *)((unsigned long)addr & PAGE_MASK));
 416}
 417
 418/*
 419 * Examine the physical address to determine if it is an area of memory
 420 * that should be mapped decrypted.  If the memory is not part of the
 421 * kernel usable area it was accessed and created decrypted, so these
 422 * areas should be mapped decrypted. And since the encryption key can
 423 * change across reboots, persistent memory should also be mapped
 424 * decrypted.
 425 */
 426static bool memremap_should_map_decrypted(resource_size_t phys_addr,
 427                                          unsigned long size)
 428{
 429        int is_pmem;
 430
 431        /*
 432         * Check if the address is part of a persistent memory region.
 433         * This check covers areas added by E820, EFI and ACPI.
 434         */
 435        is_pmem = region_intersects(phys_addr, size, IORESOURCE_MEM,
 436                                    IORES_DESC_PERSISTENT_MEMORY);
 437        if (is_pmem != REGION_DISJOINT)
 438                return true;
 439
 440        /*
 441         * Check if the non-volatile attribute is set for an EFI
 442         * reserved area.
 443         */
 444        if (efi_enabled(EFI_BOOT)) {
 445                switch (efi_mem_type(phys_addr)) {
 446                case EFI_RESERVED_TYPE:
 447                        if (efi_mem_attributes(phys_addr) & EFI_MEMORY_NV)
 448                                return true;
 449                        break;
 450                default:
 451                        break;
 452                }
 453        }
 454
 455        /* Check if the address is outside kernel usable area */
 456        switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
 457        case E820_TYPE_RESERVED:
 458        case E820_TYPE_ACPI:
 459        case E820_TYPE_NVS:
 460        case E820_TYPE_UNUSABLE:
 461        case E820_TYPE_PRAM:
 462                return true;
 463        default:
 464                break;
 465        }
 466
 467        return false;
 468}
 469
 470/*
 471 * Examine the physical address to determine if it is EFI data. Check
 472 * it against the boot params structure and EFI tables and memory types.
 473 */
 474static bool memremap_is_efi_data(resource_size_t phys_addr,
 475                                 unsigned long size)
 476{
 477        u64 paddr;
 478
 479        /* Check if the address is part of EFI boot/runtime data */
 480        if (!efi_enabled(EFI_BOOT))
 481                return false;
 482
 483        paddr = boot_params.efi_info.efi_memmap_hi;
 484        paddr <<= 32;
 485        paddr |= boot_params.efi_info.efi_memmap;
 486        if (phys_addr == paddr)
 487                return true;
 488
 489        paddr = boot_params.efi_info.efi_systab_hi;
 490        paddr <<= 32;
 491        paddr |= boot_params.efi_info.efi_systab;
 492        if (phys_addr == paddr)
 493                return true;
 494
 495        if (efi_is_table_address(phys_addr))
 496                return true;
 497
 498        switch (efi_mem_type(phys_addr)) {
 499        case EFI_BOOT_SERVICES_DATA:
 500        case EFI_RUNTIME_SERVICES_DATA:
 501                return true;
 502        default:
 503                break;
 504        }
 505
 506        return false;
 507}
 508
 509/*
 510 * Examine the physical address to determine if it is boot data by checking
 511 * it against the boot params setup_data chain.
 512 */
 513static bool memremap_is_setup_data(resource_size_t phys_addr,
 514                                   unsigned long size)
 515{
 516        struct setup_data *data;
 517        u64 paddr, paddr_next;
 518
 519        paddr = boot_params.hdr.setup_data;
 520        while (paddr) {
 521                unsigned int len;
 522
 523                if (phys_addr == paddr)
 524                        return true;
 525
 526                data = memremap(paddr, sizeof(*data),
 527                                MEMREMAP_WB | MEMREMAP_DEC);
 528
 529                paddr_next = data->next;
 530                len = data->len;
 531
 532                memunmap(data);
 533
 534                if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
 535                        return true;
 536
 537                paddr = paddr_next;
 538        }
 539
 540        return false;
 541}
 542
 543/*
 544 * Examine the physical address to determine if it is boot data by checking
 545 * it against the boot params setup_data chain (early boot version).
 546 */
 547static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
 548                                                unsigned long size)
 549{
 550        struct setup_data *data;
 551        u64 paddr, paddr_next;
 552
 553        paddr = boot_params.hdr.setup_data;
 554        while (paddr) {
 555                unsigned int len;
 556
 557                if (phys_addr == paddr)
 558                        return true;
 559
 560                data = early_memremap_decrypted(paddr, sizeof(*data));
 561
 562                paddr_next = data->next;
 563                len = data->len;
 564
 565                early_memunmap(data, sizeof(*data));
 566
 567                if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
 568                        return true;
 569
 570                paddr = paddr_next;
 571        }
 572
 573        return false;
 574}
 575
 576/*
 577 * Architecture function to determine if RAM remap is allowed. By default, a
 578 * RAM remap will map the data as encrypted. Determine if a RAM remap should
 579 * not be done so that the data will be mapped decrypted.
 580 */
 581bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
 582                                 unsigned long flags)
 583{
 584        if (!sme_active())
 585                return true;
 586
 587        if (flags & MEMREMAP_ENC)
 588                return true;
 589
 590        if (flags & MEMREMAP_DEC)
 591                return false;
 592
 593        if (memremap_is_setup_data(phys_addr, size) ||
 594            memremap_is_efi_data(phys_addr, size) ||
 595            memremap_should_map_decrypted(phys_addr, size))
 596                return false;
 597
 598        return true;
 599}
 600
 601/*
 602 * Architecture override of __weak function to adjust the protection attributes
 603 * used when remapping memory. By default, early_memremap() will map the data
 604 * as encrypted. Determine if an encrypted mapping should not be done and set
 605 * the appropriate protection attributes.
 606 */
 607pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
 608                                             unsigned long size,
 609                                             pgprot_t prot)
 610{
 611        if (!sme_active())
 612                return prot;
 613
 614        if (early_memremap_is_setup_data(phys_addr, size) ||
 615            memremap_is_efi_data(phys_addr, size) ||
 616            memremap_should_map_decrypted(phys_addr, size))
 617                prot = pgprot_decrypted(prot);
 618        else
 619                prot = pgprot_encrypted(prot);
 620
 621        return prot;
 622}
 623
 624bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size)
 625{
 626        return arch_memremap_can_ram_remap(phys_addr, size, 0);
 627}
 628
 629#ifdef CONFIG_ARCH_USE_MEMREMAP_PROT
 630/* Remap memory with encryption */
 631void __init *early_memremap_encrypted(resource_size_t phys_addr,
 632                                      unsigned long size)
 633{
 634        return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC);
 635}
 636
 637/*
 638 * Remap memory with encryption and write-protected - cannot be called
 639 * before pat_init() is called
 640 */
 641void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
 642                                         unsigned long size)
 643{
 644        /* Be sure the write-protect PAT entry is set for write-protect */
 645        if (__pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] != _PAGE_CACHE_MODE_WP)
 646                return NULL;
 647
 648        return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC_WP);
 649}
 650
 651/* Remap memory without encryption */
 652void __init *early_memremap_decrypted(resource_size_t phys_addr,
 653                                      unsigned long size)
 654{
 655        return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC);
 656}
 657
 658/*
 659 * Remap memory without encryption and write-protected - cannot be called
 660 * before pat_init() is called
 661 */
 662void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
 663                                         unsigned long size)
 664{
 665        /* Be sure the write-protect PAT entry is set for write-protect */
 666        if (__pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] != _PAGE_CACHE_MODE_WP)
 667                return NULL;
 668
 669        return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC_WP);
 670}
 671#endif  /* CONFIG_ARCH_USE_MEMREMAP_PROT */
 672
 673static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
 674
 675static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
 676{
 677        /* Don't assume we're using swapper_pg_dir at this point */
 678        pgd_t *base = __va(read_cr3_pa());
 679        pgd_t *pgd = &base[pgd_index(addr)];
 680        p4d_t *p4d = p4d_offset(pgd, addr);
 681        pud_t *pud = pud_offset(p4d, addr);
 682        pmd_t *pmd = pmd_offset(pud, addr);
 683
 684        return pmd;
 685}
 686
 687static inline pte_t * __init early_ioremap_pte(unsigned long addr)
 688{
 689        return &bm_pte[pte_index(addr)];
 690}
 691
 692bool __init is_early_ioremap_ptep(pte_t *ptep)
 693{
 694        return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
 695}
 696
 697void __init early_ioremap_init(void)
 698{
 699        pmd_t *pmd;
 700
 701#ifdef CONFIG_X86_64
 702        BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
 703#else
 704        WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
 705#endif
 706
 707        early_ioremap_setup();
 708
 709        pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
 710        memset(bm_pte, 0, sizeof(bm_pte));
 711        pmd_populate_kernel(&init_mm, pmd, bm_pte);
 712
 713        /*
 714         * The boot-ioremap range spans multiple pmds, for which
 715         * we are not prepared:
 716         */
 717#define __FIXADDR_TOP (-PAGE_SIZE)
 718        BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
 719                     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
 720#undef __FIXADDR_TOP
 721        if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
 722                WARN_ON(1);
 723                printk(KERN_WARNING "pmd %p != %p\n",
 724                       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
 725                printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
 726                        fix_to_virt(FIX_BTMAP_BEGIN));
 727                printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
 728                        fix_to_virt(FIX_BTMAP_END));
 729
 730                printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
 731                printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
 732                       FIX_BTMAP_BEGIN);
 733        }
 734}
 735
 736void __init __early_set_fixmap(enum fixed_addresses idx,
 737                               phys_addr_t phys, pgprot_t flags)
 738{
 739        unsigned long addr = __fix_to_virt(idx);
 740        pte_t *pte;
 741
 742        if (idx >= __end_of_fixed_addresses) {
 743                BUG();
 744                return;
 745        }
 746        pte = early_ioremap_pte(addr);
 747
 748        if (pgprot_val(flags))
 749                set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
 750        else
 751                pte_clear(&init_mm, addr, pte);
 752        __flush_tlb_one(addr);
 753}
 754