linux/arch/x86/mm/kaslr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * This file implements KASLR memory randomization for x86_64. It randomizes
   4 * the virtual address space of kernel memory regions (physical memory
   5 * mapping, vmalloc & vmemmap) for x86_64. This security feature mitigates
   6 * exploits relying on predictable kernel addresses.
   7 *
   8 * Entropy is generated using the KASLR early boot functions now shared in
   9 * the lib directory (originally written by Kees Cook). Randomization is
  10 * done on PGD & P4D/PUD page table levels to increase possible addresses.
  11 * The physical memory mapping code was adapted to support P4D/PUD level
  12 * virtual addresses. This implementation on the best configuration provides
  13 * 30,000 possible virtual addresses in average for each memory region.
  14 * An additional low memory page is used to ensure each CPU can start with
  15 * a PGD aligned virtual address (for realmode).
  16 *
  17 * The order of each memory region is not changed. The feature looks at
  18 * the available space for the regions based on different configuration
  19 * options and randomizes the base and space between each. The size of the
  20 * physical memory mapping is the available physical memory.
  21 */
  22
  23#include <linux/kernel.h>
  24#include <linux/init.h>
  25#include <linux/random.h>
  26#include <linux/memblock.h>
  27#include <linux/pgtable.h>
  28
  29#include <asm/setup.h>
  30#include <asm/kaslr.h>
  31
  32#include "mm_internal.h"
  33
  34#define TB_SHIFT 40
  35
  36/*
  37 * The end address could depend on more configuration options to make the
  38 * highest amount of space for randomization available, but that's too hard
  39 * to keep straight and caused issues already.
  40 */
  41static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
  42
  43/*
  44 * Memory regions randomized by KASLR (except modules that use a separate logic
  45 * earlier during boot). The list is ordered based on virtual addresses. This
  46 * order is kept after randomization.
  47 */
  48static __initdata struct kaslr_memory_region {
  49        unsigned long *base;
  50        unsigned long size_tb;
  51} kaslr_regions[] = {
  52        { &page_offset_base, 0 },
  53        { &vmalloc_base, 0 },
  54        { &vmemmap_base, 0 },
  55};
  56
  57/* Get size in bytes used by the memory region */
  58static inline unsigned long get_padding(struct kaslr_memory_region *region)
  59{
  60        return (region->size_tb << TB_SHIFT);
  61}
  62
  63/* Initialize base and padding for each memory region randomized with KASLR */
  64void __init kernel_randomize_memory(void)
  65{
  66        size_t i;
  67        unsigned long vaddr_start, vaddr;
  68        unsigned long rand, memory_tb;
  69        struct rnd_state rand_state;
  70        unsigned long remain_entropy;
  71        unsigned long vmemmap_size;
  72
  73        vaddr_start = pgtable_l5_enabled() ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
  74        vaddr = vaddr_start;
  75
  76        /*
  77         * These BUILD_BUG_ON checks ensure the memory layout is consistent
  78         * with the vaddr_start/vaddr_end variables. These checks are very
  79         * limited....
  80         */
  81        BUILD_BUG_ON(vaddr_start >= vaddr_end);
  82        BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE);
  83        BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
  84
  85        if (!kaslr_memory_enabled())
  86                return;
  87
  88        kaslr_regions[0].size_tb = 1 << (MAX_PHYSMEM_BITS - TB_SHIFT);
  89        kaslr_regions[1].size_tb = VMALLOC_SIZE_TB;
  90
  91        /*
  92         * Update Physical memory mapping to available and
  93         * add padding if needed (especially for memory hotplug support).
  94         */
  95        BUG_ON(kaslr_regions[0].base != &page_offset_base);
  96        memory_tb = DIV_ROUND_UP(max_pfn << PAGE_SHIFT, 1UL << TB_SHIFT) +
  97                CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
  98
  99        /* Adapt physical memory region size based on available memory */
 100        if (memory_tb < kaslr_regions[0].size_tb)
 101                kaslr_regions[0].size_tb = memory_tb;
 102
 103        /*
 104         * Calculate the vmemmap region size in TBs, aligned to a TB
 105         * boundary.
 106         */
 107        vmemmap_size = (kaslr_regions[0].size_tb << (TB_SHIFT - PAGE_SHIFT)) *
 108                        sizeof(struct page);
 109        kaslr_regions[2].size_tb = DIV_ROUND_UP(vmemmap_size, 1UL << TB_SHIFT);
 110
 111        /* Calculate entropy available between regions */
 112        remain_entropy = vaddr_end - vaddr_start;
 113        for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++)
 114                remain_entropy -= get_padding(&kaslr_regions[i]);
 115
 116        prandom_seed_state(&rand_state, kaslr_get_random_long("Memory"));
 117
 118        for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++) {
 119                unsigned long entropy;
 120
 121                /*
 122                 * Select a random virtual address using the extra entropy
 123                 * available.
 124                 */
 125                entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
 126                prandom_bytes_state(&rand_state, &rand, sizeof(rand));
 127                entropy = (rand % (entropy + 1)) & PUD_MASK;
 128                vaddr += entropy;
 129                *kaslr_regions[i].base = vaddr;
 130
 131                /*
 132                 * Jump the region and add a minimum padding based on
 133                 * randomization alignment.
 134                 */
 135                vaddr += get_padding(&kaslr_regions[i]);
 136                vaddr = round_up(vaddr + 1, PUD_SIZE);
 137                remain_entropy -= entropy;
 138        }
 139}
 140
 141void __meminit init_trampoline_kaslr(void)
 142{
 143        pud_t *pud_page_tramp, *pud, *pud_tramp;
 144        p4d_t *p4d_page_tramp, *p4d, *p4d_tramp;
 145        unsigned long paddr, vaddr;
 146        pgd_t *pgd;
 147
 148        pud_page_tramp = alloc_low_page();
 149
 150        /*
 151         * There are two mappings for the low 1MB area, the direct mapping
 152         * and the 1:1 mapping for the real mode trampoline:
 153         *
 154         * Direct mapping: virt_addr = phys_addr + PAGE_OFFSET
 155         * 1:1 mapping:    virt_addr = phys_addr
 156         */
 157        paddr = 0;
 158        vaddr = (unsigned long)__va(paddr);
 159        pgd = pgd_offset_k(vaddr);
 160
 161        p4d = p4d_offset(pgd, vaddr);
 162        pud = pud_offset(p4d, vaddr);
 163
 164        pud_tramp = pud_page_tramp + pud_index(paddr);
 165        *pud_tramp = *pud;
 166
 167        if (pgtable_l5_enabled()) {
 168                p4d_page_tramp = alloc_low_page();
 169
 170                p4d_tramp = p4d_page_tramp + p4d_index(paddr);
 171
 172                set_p4d(p4d_tramp,
 173                        __p4d(_KERNPG_TABLE | __pa(pud_page_tramp)));
 174
 175                set_pgd(&trampoline_pgd_entry,
 176                        __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp)));
 177        } else {
 178                set_pgd(&trampoline_pgd_entry,
 179                        __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
 180        }
 181}
 182