linux/arch/x86/kernel/head_64.S
<<
>>
Prefs
   1/*
   2 *  linux/arch/x86_64/kernel/head.S -- start in 32bit and switch to 64bit
   3 *
   4 *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
   5 *  Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
   6 *  Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
   7 *  Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
   8 *  Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com>
   9 */
  10
  11
  12#include <linux/linkage.h>
  13#include <linux/threads.h>
  14#include <linux/init.h>
  15#include <asm/segment.h>
  16#include <asm/pgtable.h>
  17#include <asm/page.h>
  18#include <asm/msr.h>
  19#include <asm/cache.h>
  20#include <asm/processor-flags.h>
  21#include <asm/percpu.h>
  22
  23#ifdef CONFIG_PARAVIRT
  24#include <asm/asm-offsets.h>
  25#include <asm/paravirt.h>
  26#else
  27#define GET_CR2_INTO_RCX movq %cr2, %rcx
  28#endif
  29
  30/* we are not able to switch in one step to the final KERNEL ADRESS SPACE
  31 * because we need identity-mapped pages.
  32 *
  33 */
  34
  35#define pud_index(x)    (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
  36
  37L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET)
  38L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET)
  39L4_START_KERNEL = pgd_index(__START_KERNEL_map)
  40L3_START_KERNEL = pud_index(__START_KERNEL_map)
  41
  42        .text
  43        __HEAD
  44        .code64
  45        .globl startup_64
  46startup_64:
  47
  48        /*
  49         * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
  50         * and someone has loaded an identity mapped page table
  51         * for us.  These identity mapped page tables map all of the
  52         * kernel pages and possibly all of memory.
  53         *
  54         * %esi holds a physical pointer to real_mode_data.
  55         *
  56         * We come here either directly from a 64bit bootloader, or from
  57         * arch/x86_64/boot/compressed/head.S.
  58         *
  59         * We only come here initially at boot nothing else comes here.
  60         *
  61         * Since we may be loaded at an address different from what we were
  62         * compiled to run at we first fixup the physical addresses in our page
  63         * tables and then reload them.
  64         */
  65
  66        /* Compute the delta between the address I am compiled to run at and the
  67         * address I am actually running at.
  68         */
  69        leaq    _text(%rip), %rbp
  70        subq    $_text - __START_KERNEL_map, %rbp
  71
  72        /* Is the address not 2M aligned? */
  73        movq    %rbp, %rax
  74        andl    $~PMD_PAGE_MASK, %eax
  75        testl   %eax, %eax
  76        jnz     bad_address
  77
  78        /* Is the address too large? */
  79        leaq    _text(%rip), %rdx
  80        movq    $PGDIR_SIZE, %rax
  81        cmpq    %rax, %rdx
  82        jae     bad_address
  83
  84        /* Fixup the physical addresses in the page table
  85         */
  86        addq    %rbp, init_level4_pgt + 0(%rip)
  87        addq    %rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip)
  88        addq    %rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip)
  89
  90        addq    %rbp, level3_ident_pgt + 0(%rip)
  91
  92        addq    %rbp, level3_kernel_pgt + (510*8)(%rip)
  93        addq    %rbp, level3_kernel_pgt + (511*8)(%rip)
  94
  95        addq    %rbp, level2_fixmap_pgt + (506*8)(%rip)
  96
  97        /* Add an Identity mapping if I am above 1G */
  98        leaq    _text(%rip), %rdi
  99        andq    $PMD_PAGE_MASK, %rdi
 100
 101        movq    %rdi, %rax
 102        shrq    $PUD_SHIFT, %rax
 103        andq    $(PTRS_PER_PUD - 1), %rax
 104        jz      ident_complete
 105
 106        leaq    (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
 107        leaq    level3_ident_pgt(%rip), %rbx
 108        movq    %rdx, 0(%rbx, %rax, 8)
 109
 110        movq    %rdi, %rax
 111        shrq    $PMD_SHIFT, %rax
 112        andq    $(PTRS_PER_PMD - 1), %rax
 113        leaq    __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx
 114        leaq    level2_spare_pgt(%rip), %rbx
 115        movq    %rdx, 0(%rbx, %rax, 8)
 116ident_complete:
 117
 118        /*
 119         * Fixup the kernel text+data virtual addresses. Note that
 120         * we might write invalid pmds, when the kernel is relocated
 121         * cleanup_highmap() fixes this up along with the mappings
 122         * beyond _end.
 123         */
 124
 125        leaq    level2_kernel_pgt(%rip), %rdi
 126        leaq    4096(%rdi), %r8
 127        /* See if it is a valid page table entry */
 1281:      testq   $1, 0(%rdi)
 129        jz      2f
 130        addq    %rbp, 0(%rdi)
 131        /* Go to the next page */
 1322:      addq    $8, %rdi
 133        cmp     %r8, %rdi
 134        jne     1b
 135
 136        /* Fixup phys_base */
 137        addq    %rbp, phys_base(%rip)
 138
 139#ifdef CONFIG_X86_TRAMPOLINE
 140        addq    %rbp, trampoline_level4_pgt + 0(%rip)
 141        addq    %rbp, trampoline_level4_pgt + (511*8)(%rip)
 142#endif
 143
 144        /* Due to ENTRY(), sometimes the empty space gets filled with
 145         * zeros. Better take a jmp than relying on empty space being
 146         * filled with 0x90 (nop)
 147         */
 148        jmp secondary_startup_64
 149ENTRY(secondary_startup_64)
 150        /*
 151         * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
 152         * and someone has loaded a mapped page table.
 153         *
 154         * %esi holds a physical pointer to real_mode_data.
 155         *
 156         * We come here either from startup_64 (using physical addresses)
 157         * or from trampoline.S (using virtual addresses).
 158         *
 159         * Using virtual addresses from trampoline.S removes the need
 160         * to have any identity mapped pages in the kernel page table
 161         * after the boot processor executes this code.
 162         */
 163
 164        /* Enable PAE mode and PGE */
 165        movl    $(X86_CR4_PAE | X86_CR4_PGE), %eax
 166        movq    %rax, %cr4
 167
 168        /* Setup early boot stage 4 level pagetables. */
 169        movq    $(init_level4_pgt - __START_KERNEL_map), %rax
 170        addq    phys_base(%rip), %rax
 171        movq    %rax, %cr3
 172
 173        /* Ensure I am executing from virtual addresses */
 174        movq    $1f, %rax
 175        jmp     *%rax
 1761:
 177
 178        /* Check if nx is implemented */
 179        movl    $0x80000001, %eax
 180        cpuid
 181        movl    %edx,%edi
 182
 183        /* Setup EFER (Extended Feature Enable Register) */
 184        movl    $MSR_EFER, %ecx
 185        rdmsr
 186        btsl    $_EFER_SCE, %eax        /* Enable System Call */
 187        btl     $20,%edi                /* No Execute supported? */
 188        jnc     1f
 189        btsl    $_EFER_NX, %eax
 1901:      wrmsr                           /* Make changes effective */
 191
 192        /* Setup cr0 */
 193#define CR0_STATE       (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
 194                         X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
 195                         X86_CR0_PG)
 196        movl    $CR0_STATE, %eax
 197        /* Make changes effective */
 198        movq    %rax, %cr0
 199
 200        /* Setup a boot time stack */
 201        movq stack_start(%rip),%rsp
 202
 203        /* zero EFLAGS after setting rsp */
 204        pushq $0
 205        popfq
 206
 207        /*
 208         * We must switch to a new descriptor in kernel space for the GDT
 209         * because soon the kernel won't have access anymore to the userspace
 210         * addresses where we're currently running on. We have to do that here
 211         * because in 32bit we couldn't load a 64bit linear address.
 212         */
 213        lgdt    early_gdt_descr(%rip)
 214
 215        /* set up data segments. actually 0 would do too */
 216        movl $__KERNEL_DS,%eax
 217        movl %eax,%ds
 218        movl %eax,%ss
 219        movl %eax,%es
 220
 221        /*
 222         * We don't really need to load %fs or %gs, but load them anyway
 223         * to kill any stale realmode selectors.  This allows execution
 224         * under VT hardware.
 225         */
 226        movl %eax,%fs
 227        movl %eax,%gs
 228
 229        /* Set up %gs.
 230         *
 231         * The base of %gs always points to the bottom of the irqstack
 232         * union.  If the stack protector canary is enabled, it is
 233         * located at %gs:40.  Note that, on SMP, the boot cpu uses
 234         * init data section till per cpu areas are set up.
 235         */
 236        movl    $MSR_GS_BASE,%ecx
 237        movq    initial_gs(%rip),%rax
 238        movq    %rax,%rdx
 239        shrq    $32,%rdx
 240        wrmsr   
 241
 242        /* esi is pointer to real mode structure with interesting info.
 243           pass it to C */
 244        movl    %esi, %edi
 245        
 246        /* Finally jump to run C code and to be on real kernel address
 247         * Since we are running on identity-mapped space we have to jump
 248         * to the full 64bit address, this is only possible as indirect
 249         * jump.  In addition we need to ensure %cs is set so we make this
 250         * a far return.
 251         */
 252        movq    initial_code(%rip),%rax
 253        pushq   $0              # fake return address to stop unwinder
 254        pushq   $__KERNEL_CS    # set correct cs
 255        pushq   %rax            # target address in negative space
 256        lretq
 257
 258        /* SMP bootup changes these two */
 259        __REFDATA
 260        .align  8
 261        ENTRY(initial_code)
 262        .quad   x86_64_start_kernel
 263        ENTRY(initial_gs)
 264        .quad   INIT_PER_CPU_VAR(irq_stack_union)
 265        __FINITDATA
 266
 267        ENTRY(stack_start)
 268        .quad  init_thread_union+THREAD_SIZE-8
 269        .word  0
 270
 271bad_address:
 272        jmp bad_address
 273
 274        .section ".init.text","ax"
 275#ifdef CONFIG_EARLY_PRINTK
 276        .globl early_idt_handlers
 277early_idt_handlers:
 278        i = 0
 279        .rept NUM_EXCEPTION_VECTORS
 280        movl $i, %esi
 281        jmp early_idt_handler
 282        i = i + 1
 283        .endr
 284#endif
 285
 286ENTRY(early_idt_handler)
 287#ifdef CONFIG_EARLY_PRINTK
 288        cmpl $2,early_recursion_flag(%rip)
 289        jz  1f
 290        incl early_recursion_flag(%rip)
 291        GET_CR2_INTO_RCX
 292        movq %rcx,%r9
 293        xorl %r8d,%r8d          # zero for error code
 294        movl %esi,%ecx          # get vector number
 295        # Test %ecx against mask of vectors that push error code.
 296        cmpl $31,%ecx
 297        ja 0f
 298        movl $1,%eax
 299        salq %cl,%rax
 300        testl $0x27d00,%eax
 301        je 0f
 302        popq %r8                # get error code
 3030:      movq 0(%rsp),%rcx       # get ip
 304        movq 8(%rsp),%rdx       # get cs
 305        xorl %eax,%eax
 306        leaq early_idt_msg(%rip),%rdi
 307        call early_printk
 308        cmpl $2,early_recursion_flag(%rip)
 309        jz  1f
 310        call dump_stack
 311#ifdef CONFIG_KALLSYMS  
 312        leaq early_idt_ripmsg(%rip),%rdi
 313        movq 0(%rsp),%rsi       # get rip again
 314        call __print_symbol
 315#endif
 316#endif /* EARLY_PRINTK */
 3171:      hlt
 318        jmp 1b
 319
 320#ifdef CONFIG_EARLY_PRINTK
 321early_recursion_flag:
 322        .long 0
 323
 324early_idt_msg:
 325        .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n"
 326early_idt_ripmsg:
 327        .asciz "RIP %s\n"
 328#endif /* CONFIG_EARLY_PRINTK */
 329        .previous
 330
 331#define NEXT_PAGE(name) \
 332        .balign PAGE_SIZE; \
 333ENTRY(name)
 334
 335/* Automate the creation of 1 to 1 mapping pmd entries */
 336#define PMDS(START, PERM, COUNT)                        \
 337        i = 0 ;                                         \
 338        .rept (COUNT) ;                                 \
 339        .quad   (START) + (i << PMD_SHIFT) + (PERM) ;   \
 340        i = i + 1 ;                                     \
 341        .endr
 342
 343        /*
 344         * This default setting generates an ident mapping at address 0x100000
 345         * and a mapping for the kernel that precisely maps virtual address
 346         * 0xffffffff80000000 to physical address 0x000000. (always using
 347         * 2Mbyte large pages provided by PAE mode)
 348         */
 349NEXT_PAGE(init_level4_pgt)
 350        .quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
 351        .org    init_level4_pgt + L4_PAGE_OFFSET*8, 0
 352        .quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
 353        .org    init_level4_pgt + L4_START_KERNEL*8, 0
 354        /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
 355        .quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
 356
 357NEXT_PAGE(level3_ident_pgt)
 358        .quad   level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
 359        .fill   511,8,0
 360
 361NEXT_PAGE(level3_kernel_pgt)
 362        .fill   L3_START_KERNEL,8,0
 363        /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
 364        .quad   level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
 365        .quad   level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
 366
 367NEXT_PAGE(level2_fixmap_pgt)
 368        .fill   506,8,0
 369        .quad   level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
 370        /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
 371        .fill   5,8,0
 372
 373NEXT_PAGE(level1_fixmap_pgt)
 374        .fill   512,8,0
 375
 376NEXT_PAGE(level2_ident_pgt)
 377        /* Since I easily can, map the first 1G.
 378         * Don't set NX because code runs from these pages.
 379         */
 380        PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
 381
 382NEXT_PAGE(level2_kernel_pgt)
 383        /*
 384         * 512 MB kernel mapping. We spend a full page on this pagetable
 385         * anyway.
 386         *
 387         * The kernel code+data+bss must not be bigger than that.
 388         *
 389         * (NOTE: at +512MB starts the module area, see MODULES_VADDR.
 390         *  If you want to increase this then increase MODULES_VADDR
 391         *  too.)
 392         */
 393        PMDS(0, __PAGE_KERNEL_LARGE_EXEC,
 394                KERNEL_IMAGE_SIZE/PMD_SIZE)
 395
 396NEXT_PAGE(level2_spare_pgt)
 397        .fill   512, 8, 0
 398
 399#undef PMDS
 400#undef NEXT_PAGE
 401
 402        .data
 403        .align 16
 404        .globl early_gdt_descr
 405early_gdt_descr:
 406        .word   GDT_ENTRIES*8-1
 407early_gdt_descr_base:
 408        .quad   INIT_PER_CPU_VAR(gdt_page)
 409
 410ENTRY(phys_base)
 411        /* This must match the first entry in level2_kernel_pgt */
 412        .quad   0x0000000000000000
 413
 414#include "../../x86/xen/xen-head.S"
 415        
 416        .section .bss, "aw", @nobits
 417        .align L1_CACHE_BYTES
 418ENTRY(idt_table)
 419        .skip IDT_ENTRIES * 16
 420
 421        __PAGE_ALIGNED_BSS
 422        .align PAGE_SIZE
 423ENTRY(empty_zero_page)
 424        .skip PAGE_SIZE
 425