linux/arch/x86/mm/cpu_entry_area.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3#include <linux/spinlock.h>
   4#include <linux/percpu.h>
   5#include <linux/kallsyms.h>
   6#include <linux/kcore.h>
   7#include <linux/pgtable.h>
   8
   9#include <asm/cpu_entry_area.h>
  10#include <asm/fixmap.h>
  11#include <asm/desc.h>
  12
  13static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
  14
  15#ifdef CONFIG_X86_64
  16static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
  17DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
  18#endif
  19
  20#ifdef CONFIG_X86_32
  21DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack);
  22#endif
  23
  24struct cpu_entry_area *get_cpu_entry_area(int cpu)
  25{
  26        unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
  27        BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
  28
  29        return (struct cpu_entry_area *) va;
  30}
  31EXPORT_SYMBOL(get_cpu_entry_area);
  32
  33void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
  34{
  35        unsigned long va = (unsigned long) cea_vaddr;
  36        pte_t pte = pfn_pte(pa >> PAGE_SHIFT, flags);
  37
  38        /*
  39         * The cpu_entry_area is shared between the user and kernel
  40         * page tables.  All of its ptes can safely be global.
  41         * _PAGE_GLOBAL gets reused to help indicate PROT_NONE for
  42         * non-present PTEs, so be careful not to set it in that
  43         * case to avoid confusion.
  44         */
  45        if (boot_cpu_has(X86_FEATURE_PGE) &&
  46            (pgprot_val(flags) & _PAGE_PRESENT))
  47                pte = pte_set_flags(pte, _PAGE_GLOBAL);
  48
  49        set_pte_vaddr(va, pte);
  50}
  51
  52static void __init
  53cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
  54{
  55        for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
  56                cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
  57}
  58
  59static void __init percpu_setup_debug_store(unsigned int cpu)
  60{
  61#ifdef CONFIG_CPU_SUP_INTEL
  62        unsigned int npages;
  63        void *cea;
  64
  65        if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
  66                return;
  67
  68        cea = &get_cpu_entry_area(cpu)->cpu_debug_store;
  69        npages = sizeof(struct debug_store) / PAGE_SIZE;
  70        BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0);
  71        cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages,
  72                             PAGE_KERNEL);
  73
  74        cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers;
  75        /*
  76         * Force the population of PMDs for not yet allocated per cpu
  77         * memory like debug store buffers.
  78         */
  79        npages = sizeof(struct debug_store_buffers) / PAGE_SIZE;
  80        for (; npages; npages--, cea += PAGE_SIZE)
  81                cea_set_pte(cea, 0, PAGE_NONE);
  82#endif
  83}
  84
  85#ifdef CONFIG_X86_64
  86
  87#define cea_map_stack(name) do {                                        \
  88        npages = sizeof(estacks->name## _stack) / PAGE_SIZE;            \
  89        cea_map_percpu_pages(cea->estacks.name## _stack,                \
  90                        estacks->name## _stack, npages, PAGE_KERNEL);   \
  91        } while (0)
  92
  93static void __init percpu_setup_exception_stacks(unsigned int cpu)
  94{
  95        struct exception_stacks *estacks = per_cpu_ptr(&exception_stacks, cpu);
  96        struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
  97        unsigned int npages;
  98
  99        BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
 100
 101        per_cpu(cea_exception_stacks, cpu) = &cea->estacks;
 102
 103        /*
 104         * The exceptions stack mappings in the per cpu area are protected
 105         * by guard pages so each stack must be mapped separately. DB2 is
 106         * not mapped; it just exists to catch triple nesting of #DB.
 107         */
 108        cea_map_stack(DF);
 109        cea_map_stack(NMI);
 110        cea_map_stack(DB);
 111        cea_map_stack(MCE);
 112}
 113#else
 114static inline void percpu_setup_exception_stacks(unsigned int cpu)
 115{
 116        struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
 117
 118        cea_map_percpu_pages(&cea->doublefault_stack,
 119                             &per_cpu(doublefault_stack, cpu), 1, PAGE_KERNEL);
 120}
 121#endif
 122
 123/* Setup the fixmap mappings only once per-processor */
 124static void __init setup_cpu_entry_area(unsigned int cpu)
 125{
 126        struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
 127#ifdef CONFIG_X86_64
 128        /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
 129        pgprot_t gdt_prot = PAGE_KERNEL_RO;
 130        pgprot_t tss_prot = PAGE_KERNEL_RO;
 131#else
 132        /*
 133         * On native 32-bit systems, the GDT cannot be read-only because
 134         * our double fault handler uses a task gate, and entering through
 135         * a task gate needs to change an available TSS to busy.  If the
 136         * GDT is read-only, that will triple fault.  The TSS cannot be
 137         * read-only because the CPU writes to it on task switches.
 138         *
 139         * On Xen PV, the GDT must be read-only because the hypervisor
 140         * requires it.
 141         */
 142        pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
 143                PAGE_KERNEL_RO : PAGE_KERNEL;
 144        pgprot_t tss_prot = PAGE_KERNEL;
 145#endif
 146
 147        cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot);
 148
 149        cea_map_percpu_pages(&cea->entry_stack_page,
 150                             per_cpu_ptr(&entry_stack_storage, cpu), 1,
 151                             PAGE_KERNEL);
 152
 153        /*
 154         * The Intel SDM says (Volume 3, 7.2.1):
 155         *
 156         *  Avoid placing a page boundary in the part of the TSS that the
 157         *  processor reads during a task switch (the first 104 bytes). The
 158         *  processor may not correctly perform address translations if a
 159         *  boundary occurs in this area. During a task switch, the processor
 160         *  reads and writes into the first 104 bytes of each TSS (using
 161         *  contiguous physical addresses beginning with the physical address
 162         *  of the first byte of the TSS). So, after TSS access begins, if
 163         *  part of the 104 bytes is not physically contiguous, the processor
 164         *  will access incorrect information without generating a page-fault
 165         *  exception.
 166         *
 167         * There are also a lot of errata involving the TSS spanning a page
 168         * boundary.  Assert that we're not doing that.
 169         */
 170        BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
 171                      offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
 172        BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
 173        /*
 174         * VMX changes the host TR limit to 0x67 after a VM exit. This is
 175         * okay, since 0x67 covers the size of struct x86_hw_tss. Make sure
 176         * that this is correct.
 177         */
 178        BUILD_BUG_ON(offsetof(struct tss_struct, x86_tss) != 0);
 179        BUILD_BUG_ON(sizeof(struct x86_hw_tss) != 0x68);
 180
 181        cea_map_percpu_pages(&cea->tss, &per_cpu(cpu_tss_rw, cpu),
 182                             sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
 183
 184#ifdef CONFIG_X86_32
 185        per_cpu(cpu_entry_area, cpu) = cea;
 186#endif
 187
 188        percpu_setup_exception_stacks(cpu);
 189
 190        percpu_setup_debug_store(cpu);
 191}
 192
 193static __init void setup_cpu_entry_area_ptes(void)
 194{
 195#ifdef CONFIG_X86_32
 196        unsigned long start, end;
 197
 198        /* The +1 is for the readonly IDT: */
 199        BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
 200        BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
 201        BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
 202
 203        start = CPU_ENTRY_AREA_BASE;
 204        end = start + CPU_ENTRY_AREA_MAP_SIZE;
 205
 206        /* Careful here: start + PMD_SIZE might wrap around */
 207        for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE)
 208                populate_extra_pte(start);
 209#endif
 210}
 211
 212void __init setup_cpu_entry_areas(void)
 213{
 214        unsigned int cpu;
 215
 216        setup_cpu_entry_area_ptes();
 217
 218        for_each_possible_cpu(cpu)
 219                setup_cpu_entry_area(cpu);
 220
 221        /*
 222         * This is the last essential update to swapper_pgdir which needs
 223         * to be synchronized to initial_page_table on 32bit.
 224         */
 225        sync_initial_page_table();
 226}
 227