linux/arch/arm64/kernel/head.S
<<
>>
Prefs
   1/*
   2 * Low-level CPU initialisation
   3 * Based on arch/arm/kernel/head.S
   4 *
   5 * Copyright (C) 1994-2002 Russell King
   6 * Copyright (C) 2003-2012 ARM Ltd.
   7 * Authors:     Catalin Marinas <catalin.marinas@arm.com>
   8 *              Will Deacon <will.deacon@arm.com>
   9 *
  10 * This program is free software; you can redistribute it and/or modify
  11 * it under the terms of the GNU General Public License version 2 as
  12 * published by the Free Software Foundation.
  13 *
  14 * This program is distributed in the hope that it will be useful,
  15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 * GNU General Public License for more details.
  18 *
  19 * You should have received a copy of the GNU General Public License
  20 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  21 */
  22
  23#include <linux/linkage.h>
  24#include <linux/init.h>
  25#include <linux/irqchip/arm-gic-v3.h>
  26
  27#include <asm/assembler.h>
  28#include <asm/boot.h>
  29#include <asm/ptrace.h>
  30#include <asm/asm-offsets.h>
  31#include <asm/cache.h>
  32#include <asm/cputype.h>
  33#include <asm/elf.h>
  34#include <asm/image.h>
  35#include <asm/kernel-pgtable.h>
  36#include <asm/kvm_arm.h>
  37#include <asm/memory.h>
  38#include <asm/pgtable-hwdef.h>
  39#include <asm/pgtable.h>
  40#include <asm/page.h>
  41#include <asm/smp.h>
  42#include <asm/sysreg.h>
  43#include <asm/thread_info.h>
  44#include <asm/virt.h>
  45
  46#include "efi-header.S"
  47
  48#define __PHYS_OFFSET   (KERNEL_START - TEXT_OFFSET)
  49
  50#if (TEXT_OFFSET & 0xfff) != 0
  51#error TEXT_OFFSET must be at least 4KB aligned
  52#elif (PAGE_OFFSET & 0x1fffff) != 0
  53#error PAGE_OFFSET must be at least 2MB aligned
  54#elif TEXT_OFFSET > 0x1fffff
  55#error TEXT_OFFSET must be less than 2MB
  56#endif
  57
  58/*
  59 * Kernel startup entry point.
  60 * ---------------------------
  61 *
  62 * The requirements are:
  63 *   MMU = off, D-cache = off, I-cache = on or off,
  64 *   x0 = physical address to the FDT blob.
  65 *
  66 * This code is mostly position independent so you call this at
  67 * __pa(PAGE_OFFSET + TEXT_OFFSET).
  68 *
  69 * Note that the callee-saved registers are used for storing variables
  70 * that are useful before the MMU is enabled. The allocations are described
  71 * in the entry routines.
  72 */
  73        __HEAD
  74_head:
  75        /*
  76         * DO NOT MODIFY. Image header expected by Linux boot-loaders.
  77         */
  78#ifdef CONFIG_EFI
  79        /*
  80         * This add instruction has no meaningful effect except that
  81         * its opcode forms the magic "MZ" signature required by UEFI.
  82         */
  83        add     x13, x18, #0x16
  84        b       stext
  85#else
  86        b       stext                           // branch to kernel start, magic
  87        .long   0                               // reserved
  88#endif
  89        le64sym _kernel_offset_le               // Image load offset from start of RAM, little-endian
  90        le64sym _kernel_size_le                 // Effective size of kernel image, little-endian
  91        le64sym _kernel_flags_le                // Informative flags, little-endian
  92        .quad   0                               // reserved
  93        .quad   0                               // reserved
  94        .quad   0                               // reserved
  95        .ascii  ARM64_IMAGE_MAGIC               // Magic number
  96#ifdef CONFIG_EFI
  97        .long   pe_header - _head               // Offset to the PE header.
  98
  99pe_header:
 100        __EFI_PE_HEADER
 101#else
 102        .long   0                               // reserved
 103#endif
 104
 105        __INIT
 106
 107        /*
 108         * The following callee saved general purpose registers are used on the
 109         * primary lowlevel boot path:
 110         *
 111         *  Register   Scope                      Purpose
 112         *  x21        stext() .. start_kernel()  FDT pointer passed at boot in x0
 113         *  x23        stext() .. start_kernel()  physical misalignment/KASLR offset
 114         *  x28        __create_page_tables()     callee preserved temp register
 115         *  x19/x20    __primary_switch()         callee preserved temp registers
 116         */
 117ENTRY(stext)
 118        bl      preserve_boot_args
 119        bl      el2_setup                       // Drop to EL1, w0=cpu_boot_mode
 120        adrp    x23, __PHYS_OFFSET
 121        and     x23, x23, MIN_KIMG_ALIGN - 1    // KASLR offset, defaults to 0
 122        bl      set_cpu_boot_mode_flag
 123        bl      __create_page_tables
 124        /*
 125         * The following calls CPU setup code, see arch/arm64/mm/proc.S for
 126         * details.
 127         * On return, the CPU will be ready for the MMU to be turned on and
 128         * the TCR will have been set.
 129         */
 130        bl      __cpu_setup                     // initialise processor
 131        b       __primary_switch
 132ENDPROC(stext)
 133
 134/*
 135 * Preserve the arguments passed by the bootloader in x0 .. x3
 136 */
 137preserve_boot_args:
 138        mov     x21, x0                         // x21=FDT
 139
 140        adr_l   x0, boot_args                   // record the contents of
 141        stp     x21, x1, [x0]                   // x0 .. x3 at kernel entry
 142        stp     x2, x3, [x0, #16]
 143
 144        dmb     sy                              // needed before dc ivac with
 145                                                // MMU off
 146
 147        mov     x1, #0x20                       // 4 x 8 bytes
 148        b       __inval_dcache_area             // tail call
 149ENDPROC(preserve_boot_args)
 150
 151/*
 152 * Macro to create a table entry to the next page.
 153 *
 154 *      tbl:    page table address
 155 *      virt:   virtual address
 156 *      shift:  #imm page table shift
 157 *      ptrs:   #imm pointers per table page
 158 *
 159 * Preserves:   virt
 160 * Corrupts:    ptrs, tmp1, tmp2
 161 * Returns:     tbl -> next level table page address
 162 */
 163        .macro  create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
 164        add     \tmp1, \tbl, #PAGE_SIZE
 165        phys_to_pte \tmp2, \tmp1
 166        orr     \tmp2, \tmp2, #PMD_TYPE_TABLE   // address of next table and entry type
 167        lsr     \tmp1, \virt, #\shift
 168        sub     \ptrs, \ptrs, #1
 169        and     \tmp1, \tmp1, \ptrs             // table index
 170        str     \tmp2, [\tbl, \tmp1, lsl #3]
 171        add     \tbl, \tbl, #PAGE_SIZE          // next level table page
 172        .endm
 173
 174/*
 175 * Macro to populate page table entries, these entries can be pointers to the next level
 176 * or last level entries pointing to physical memory.
 177 *
 178 *      tbl:    page table address
 179 *      rtbl:   pointer to page table or physical memory
 180 *      index:  start index to write
 181 *      eindex: end index to write - [index, eindex] written to
 182 *      flags:  flags for pagetable entry to or in
 183 *      inc:    increment to rtbl between each entry
 184 *      tmp1:   temporary variable
 185 *
 186 * Preserves:   tbl, eindex, flags, inc
 187 * Corrupts:    index, tmp1
 188 * Returns:     rtbl
 189 */
 190        .macro populate_entries, tbl, rtbl, index, eindex, flags, inc, tmp1
 191.Lpe\@: phys_to_pte \tmp1, \rtbl
 192        orr     \tmp1, \tmp1, \flags    // tmp1 = table entry
 193        str     \tmp1, [\tbl, \index, lsl #3]
 194        add     \rtbl, \rtbl, \inc      // rtbl = pa next level
 195        add     \index, \index, #1
 196        cmp     \index, \eindex
 197        b.ls    .Lpe\@
 198        .endm
 199
 200/*
 201 * Compute indices of table entries from virtual address range. If multiple entries
 202 * were needed in the previous page table level then the next page table level is assumed
 203 * to be composed of multiple pages. (This effectively scales the end index).
 204 *
 205 *      vstart: virtual address of start of range
 206 *      vend:   virtual address of end of range
 207 *      shift:  shift used to transform virtual address into index
 208 *      ptrs:   number of entries in page table
 209 *      istart: index in table corresponding to vstart
 210 *      iend:   index in table corresponding to vend
 211 *      count:  On entry: how many extra entries were required in previous level, scales
 212 *                        our end index.
 213 *              On exit: returns how many extra entries required for next page table level
 214 *
 215 * Preserves:   vstart, vend, shift, ptrs
 216 * Returns:     istart, iend, count
 217 */
 218        .macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count
 219        lsr     \iend, \vend, \shift
 220        mov     \istart, \ptrs
 221        sub     \istart, \istart, #1
 222        and     \iend, \iend, \istart   // iend = (vend >> shift) & (ptrs - 1)
 223        mov     \istart, \ptrs
 224        mul     \istart, \istart, \count
 225        add     \iend, \iend, \istart   // iend += (count - 1) * ptrs
 226                                        // our entries span multiple tables
 227
 228        lsr     \istart, \vstart, \shift
 229        mov     \count, \ptrs
 230        sub     \count, \count, #1
 231        and     \istart, \istart, \count
 232
 233        sub     \count, \iend, \istart
 234        .endm
 235
 236/*
 237 * Map memory for specified virtual address range. Each level of page table needed supports
 238 * multiple entries. If a level requires n entries the next page table level is assumed to be
 239 * formed from n pages.
 240 *
 241 *      tbl:    location of page table
 242 *      rtbl:   address to be used for first level page table entry (typically tbl + PAGE_SIZE)
 243 *      vstart: start address to map
 244 *      vend:   end address to map - we map [vstart, vend]
 245 *      flags:  flags to use to map last level entries
 246 *      phys:   physical address corresponding to vstart - physical memory is contiguous
 247 *      pgds:   the number of pgd entries
 248 *
 249 * Temporaries: istart, iend, tmp, count, sv - these need to be different registers
 250 * Preserves:   vstart, vend, flags
 251 * Corrupts:    tbl, rtbl, istart, iend, tmp, count, sv
 252 */
 253        .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv
 254        add \rtbl, \tbl, #PAGE_SIZE
 255        mov \sv, \rtbl
 256        mov \count, #0
 257        compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count
 258        populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
 259        mov \tbl, \sv
 260        mov \sv, \rtbl
 261
 262#if SWAPPER_PGTABLE_LEVELS > 3
 263        compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count
 264        populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
 265        mov \tbl, \sv
 266        mov \sv, \rtbl
 267#endif
 268
 269#if SWAPPER_PGTABLE_LEVELS > 2
 270        compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count
 271        populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
 272        mov \tbl, \sv
 273#endif
 274
 275        compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count
 276        bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1
 277        populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
 278        .endm
 279
 280/*
 281 * Setup the initial page tables. We only setup the barest amount which is
 282 * required to get the kernel running. The following sections are required:
 283 *   - identity mapping to enable the MMU (low address, TTBR0)
 284 *   - first few MB of the kernel linear mapping to jump to once the MMU has
 285 *     been enabled
 286 */
 287__create_page_tables:
 288        mov     x28, lr
 289
 290        /*
 291         * Invalidate the init page tables to avoid potential dirty cache lines
 292         * being evicted. Other page tables are allocated in rodata as part of
 293         * the kernel image, and thus are clean to the PoC per the boot
 294         * protocol.
 295         */
 296        adrp    x0, init_pg_dir
 297        adrp    x1, init_pg_end
 298        sub     x1, x1, x0
 299        bl      __inval_dcache_area
 300
 301        /*
 302         * Clear the init page tables.
 303         */
 304        adrp    x0, init_pg_dir
 305        adrp    x1, init_pg_end
 306        sub     x1, x1, x0
 3071:      stp     xzr, xzr, [x0], #16
 308        stp     xzr, xzr, [x0], #16
 309        stp     xzr, xzr, [x0], #16
 310        stp     xzr, xzr, [x0], #16
 311        subs    x1, x1, #64
 312        b.ne    1b
 313
 314        mov     x7, SWAPPER_MM_MMUFLAGS
 315
 316        /*
 317         * Create the identity mapping.
 318         */
 319        adrp    x0, idmap_pg_dir
 320        adrp    x3, __idmap_text_start          // __pa(__idmap_text_start)
 321
 322#ifdef CONFIG_ARM64_USER_VA_BITS_52
 323        mrs_s   x6, SYS_ID_AA64MMFR2_EL1
 324        and     x6, x6, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
 325        mov     x5, #52
 326        cbnz    x6, 1f
 327#endif
 328        mov     x5, #VA_BITS
 3291:
 330        adr_l   x6, vabits_user
 331        str     x5, [x6]
 332        dmb     sy
 333        dc      ivac, x6                // Invalidate potentially stale cache line
 334
 335        /*
 336         * VA_BITS may be too small to allow for an ID mapping to be created
 337         * that covers system RAM if that is located sufficiently high in the
 338         * physical address space. So for the ID map, use an extended virtual
 339         * range in that case, and configure an additional translation level
 340         * if needed.
 341         *
 342         * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
 343         * entire ID map region can be mapped. As T0SZ == (64 - #bits used),
 344         * this number conveniently equals the number of leading zeroes in
 345         * the physical address of __idmap_text_end.
 346         */
 347        adrp    x5, __idmap_text_end
 348        clz     x5, x5
 349        cmp     x5, TCR_T0SZ(VA_BITS)   // default T0SZ small enough?
 350        b.ge    1f                      // .. then skip VA range extension
 351
 352        adr_l   x6, idmap_t0sz
 353        str     x5, [x6]
 354        dmb     sy
 355        dc      ivac, x6                // Invalidate potentially stale cache line
 356
 357#if (VA_BITS < 48)
 358#define EXTRA_SHIFT     (PGDIR_SHIFT + PAGE_SHIFT - 3)
 359#define EXTRA_PTRS      (1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
 360
 361        /*
 362         * If VA_BITS < 48, we have to configure an additional table level.
 363         * First, we have to verify our assumption that the current value of
 364         * VA_BITS was chosen such that all translation levels are fully
 365         * utilised, and that lowering T0SZ will always result in an additional
 366         * translation level to be configured.
 367         */
 368#if VA_BITS != EXTRA_SHIFT
 369#error "Mismatch between VA_BITS and page size/number of translation levels"
 370#endif
 371
 372        mov     x4, EXTRA_PTRS
 373        create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
 374#else
 375        /*
 376         * If VA_BITS == 48, we don't have to configure an additional
 377         * translation level, but the top-level table has more entries.
 378         */
 379        mov     x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
 380        str_l   x4, idmap_ptrs_per_pgd, x5
 381#endif
 3821:
 383        ldr_l   x4, idmap_ptrs_per_pgd
 384        mov     x5, x3                          // __pa(__idmap_text_start)
 385        adr_l   x6, __idmap_text_end            // __pa(__idmap_text_end)
 386
 387        map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14
 388
 389        /*
 390         * Map the kernel image (starting with PHYS_OFFSET).
 391         */
 392        adrp    x0, init_pg_dir
 393        mov_q   x5, KIMAGE_VADDR + TEXT_OFFSET  // compile time __va(_text)
 394        add     x5, x5, x23                     // add KASLR displacement
 395        mov     x4, PTRS_PER_PGD
 396        adrp    x6, _end                        // runtime __pa(_end)
 397        adrp    x3, _text                       // runtime __pa(_text)
 398        sub     x6, x6, x3                      // _end - _text
 399        add     x6, x6, x5                      // runtime __va(_end)
 400
 401        map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14
 402
 403        /*
 404         * Since the page tables have been populated with non-cacheable
 405         * accesses (MMU disabled), invalidate the idmap and swapper page
 406         * tables again to remove any speculatively loaded cache lines.
 407         */
 408        adrp    x0, idmap_pg_dir
 409        adrp    x1, init_pg_end
 410        sub     x1, x1, x0
 411        dmb     sy
 412        bl      __inval_dcache_area
 413
 414        ret     x28
 415ENDPROC(__create_page_tables)
 416        .ltorg
 417
 418/*
 419 * The following fragment of code is executed with the MMU enabled.
 420 *
 421 *   x0 = __PHYS_OFFSET
 422 */
 423__primary_switched:
 424        adrp    x4, init_thread_union
 425        add     sp, x4, #THREAD_SIZE
 426        adr_l   x5, init_task
 427        msr     sp_el0, x5                      // Save thread_info
 428
 429        adr_l   x8, vectors                     // load VBAR_EL1 with virtual
 430        msr     vbar_el1, x8                    // vector table address
 431        isb
 432
 433        stp     xzr, x30, [sp, #-16]!
 434        mov     x29, sp
 435
 436        str_l   x21, __fdt_pointer, x5          // Save FDT pointer
 437
 438        ldr_l   x4, kimage_vaddr                // Save the offset between
 439        sub     x4, x4, x0                      // the kernel virtual and
 440        str_l   x4, kimage_voffset, x5          // physical mappings
 441
 442        // Clear BSS
 443        adr_l   x0, __bss_start
 444        mov     x1, xzr
 445        adr_l   x2, __bss_stop
 446        sub     x2, x2, x0
 447        bl      __pi_memset
 448        dsb     ishst                           // Make zero page visible to PTW
 449
 450#ifdef CONFIG_KASAN
 451        bl      kasan_early_init
 452#endif
 453#ifdef CONFIG_RANDOMIZE_BASE
 454        tst     x23, ~(MIN_KIMG_ALIGN - 1)      // already running randomized?
 455        b.ne    0f
 456        mov     x0, x21                         // pass FDT address in x0
 457        bl      kaslr_early_init                // parse FDT for KASLR options
 458        cbz     x0, 0f                          // KASLR disabled? just proceed
 459        orr     x23, x23, x0                    // record KASLR offset
 460        ldp     x29, x30, [sp], #16             // we must enable KASLR, return
 461        ret                                     // to __primary_switch()
 4620:
 463#endif
 464        add     sp, sp, #16
 465        mov     x29, #0
 466        mov     x30, #0
 467        b       start_kernel
 468ENDPROC(__primary_switched)
 469
 470/*
 471 * end early head section, begin head code that is also used for
 472 * hotplug and needs to have the same protections as the text region
 473 */
 474        .section ".idmap.text","awx"
 475
 476ENTRY(kimage_vaddr)
 477        .quad           _text - TEXT_OFFSET
 478EXPORT_SYMBOL(kimage_vaddr)
 479
 480/*
 481 * If we're fortunate enough to boot at EL2, ensure that the world is
 482 * sane before dropping to EL1.
 483 *
 484 * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
 485 * booted in EL1 or EL2 respectively.
 486 */
 487ENTRY(el2_setup)
 488        msr     SPsel, #1                       // We want to use SP_EL{1,2}
 489        mrs     x0, CurrentEL
 490        cmp     x0, #CurrentEL_EL2
 491        b.eq    1f
 492        mov_q   x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
 493        msr     sctlr_el1, x0
 494        mov     w0, #BOOT_CPU_MODE_EL1          // This cpu booted in EL1
 495        isb
 496        ret
 497
 4981:      mov_q   x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
 499        msr     sctlr_el2, x0
 500
 501#ifdef CONFIG_ARM64_VHE
 502        /*
 503         * Check for VHE being present. For the rest of the EL2 setup,
 504         * x2 being non-zero indicates that we do have VHE, and that the
 505         * kernel is intended to run at EL2.
 506         */
 507        mrs     x2, id_aa64mmfr1_el1
 508        ubfx    x2, x2, #8, #4
 509#else
 510        mov     x2, xzr
 511#endif
 512
 513        /* Hyp configuration. */
 514        mov_q   x0, HCR_HOST_NVHE_FLAGS
 515        cbz     x2, set_hcr
 516        mov_q   x0, HCR_HOST_VHE_FLAGS
 517set_hcr:
 518        msr     hcr_el2, x0
 519        isb
 520
 521        /*
 522         * Allow Non-secure EL1 and EL0 to access physical timer and counter.
 523         * This is not necessary for VHE, since the host kernel runs in EL2,
 524         * and EL0 accesses are configured in the later stage of boot process.
 525         * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout
 526         * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined
 527         * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1
 528         * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in
 529         * EL2.
 530         */
 531        cbnz    x2, 1f
 532        mrs     x0, cnthctl_el2
 533        orr     x0, x0, #3                      // Enable EL1 physical timers
 534        msr     cnthctl_el2, x0
 5351:
 536        msr     cntvoff_el2, xzr                // Clear virtual offset
 537
 538#ifdef CONFIG_ARM_GIC_V3
 539        /* GICv3 system register access */
 540        mrs     x0, id_aa64pfr0_el1
 541        ubfx    x0, x0, #24, #4
 542        cmp     x0, #1
 543        b.ne    3f
 544
 545        mrs_s   x0, SYS_ICC_SRE_EL2
 546        orr     x0, x0, #ICC_SRE_EL2_SRE        // Set ICC_SRE_EL2.SRE==1
 547        orr     x0, x0, #ICC_SRE_EL2_ENABLE     // Set ICC_SRE_EL2.Enable==1
 548        msr_s   SYS_ICC_SRE_EL2, x0
 549        isb                                     // Make sure SRE is now set
 550        mrs_s   x0, SYS_ICC_SRE_EL2             // Read SRE back,
 551        tbz     x0, #0, 3f                      // and check that it sticks
 552        msr_s   SYS_ICH_HCR_EL2, xzr            // Reset ICC_HCR_EL2 to defaults
 553
 5543:
 555#endif
 556
 557        /* Populate ID registers. */
 558        mrs     x0, midr_el1
 559        mrs     x1, mpidr_el1
 560        msr     vpidr_el2, x0
 561        msr     vmpidr_el2, x1
 562
 563#ifdef CONFIG_COMPAT
 564        msr     hstr_el2, xzr                   // Disable CP15 traps to EL2
 565#endif
 566
 567        /* EL2 debug */
 568        mrs     x1, id_aa64dfr0_el1             // Check ID_AA64DFR0_EL1 PMUVer
 569        sbfx    x0, x1, #8, #4
 570        cmp     x0, #1
 571        b.lt    4f                              // Skip if no PMU present
 572        mrs     x0, pmcr_el0                    // Disable debug access traps
 573        ubfx    x0, x0, #11, #5                 // to EL2 and allow access to
 5744:
 575        csel    x3, xzr, x0, lt                 // all PMU counters from EL1
 576
 577        /* Statistical profiling */
 578        ubfx    x0, x1, #32, #4                 // Check ID_AA64DFR0_EL1 PMSVer
 579        cbz     x0, 7f                          // Skip if SPE not present
 580        cbnz    x2, 6f                          // VHE?
 581        mrs_s   x4, SYS_PMBIDR_EL1              // If SPE available at EL2,
 582        and     x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT)
 583        cbnz    x4, 5f                          // then permit sampling of physical
 584        mov     x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \
 585                      1 << SYS_PMSCR_EL2_PA_SHIFT)
 586        msr_s   SYS_PMSCR_EL2, x4               // addresses and physical counter
 5875:
 588        mov     x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
 589        orr     x3, x3, x1                      // If we don't have VHE, then
 590        b       7f                              // use EL1&0 translation.
 5916:                                              // For VHE, use EL2 translation
 592        orr     x3, x3, #MDCR_EL2_TPMS          // and disable access from EL1
 5937:
 594        msr     mdcr_el2, x3                    // Configure debug traps
 595
 596        /* LORegions */
 597        mrs     x1, id_aa64mmfr1_el1
 598        ubfx    x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
 599        cbz     x0, 1f
 600        msr_s   SYS_LORC_EL1, xzr
 6011:
 602
 603        /* Stage-2 translation */
 604        msr     vttbr_el2, xzr
 605
 606        cbz     x2, install_el2_stub
 607
 608        mov     w0, #BOOT_CPU_MODE_EL2          // This CPU booted in EL2
 609        isb
 610        ret
 611
 612install_el2_stub:
 613        /*
 614         * When VHE is not in use, early init of EL2 and EL1 needs to be
 615         * done here.
 616         * When VHE _is_ in use, EL1 will not be used in the host and
 617         * requires no configuration, and all non-hyp-specific EL2 setup
 618         * will be done via the _EL1 system register aliases in __cpu_setup.
 619         */
 620        mov_q   x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
 621        msr     sctlr_el1, x0
 622
 623        /* Coprocessor traps. */
 624        mov     x0, #0x33ff
 625        msr     cptr_el2, x0                    // Disable copro. traps to EL2
 626
 627        /* SVE register access */
 628        mrs     x1, id_aa64pfr0_el1
 629        ubfx    x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
 630        cbz     x1, 7f
 631
 632        bic     x0, x0, #CPTR_EL2_TZ            // Also disable SVE traps
 633        msr     cptr_el2, x0                    // Disable copro. traps to EL2
 634        isb
 635        mov     x1, #ZCR_ELx_LEN_MASK           // SVE: Enable full vector
 636        msr_s   SYS_ZCR_EL2, x1                 // length for EL1.
 637
 638        /* Hypervisor stub */
 6397:      adr_l   x0, __hyp_stub_vectors
 640        msr     vbar_el2, x0
 641
 642        /* spsr */
 643        mov     x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
 644                      PSR_MODE_EL1h)
 645        msr     spsr_el2, x0
 646        msr     elr_el2, lr
 647        mov     w0, #BOOT_CPU_MODE_EL2          // This CPU booted in EL2
 648        eret
 649ENDPROC(el2_setup)
 650
 651/*
 652 * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
 653 * in w0. See arch/arm64/include/asm/virt.h for more info.
 654 */
 655set_cpu_boot_mode_flag:
 656        adr_l   x1, __boot_cpu_mode
 657        cmp     w0, #BOOT_CPU_MODE_EL2
 658        b.ne    1f
 659        add     x1, x1, #4
 6601:      str     w0, [x1]                        // This CPU has booted in EL1
 661        dmb     sy
 662        dc      ivac, x1                        // Invalidate potentially stale cache line
 663        ret
 664ENDPROC(set_cpu_boot_mode_flag)
 665
 666/*
 667 * These values are written with the MMU off, but read with the MMU on.
 668 * Writers will invalidate the corresponding address, discarding up to a
 669 * 'Cache Writeback Granule' (CWG) worth of data. The linker script ensures
 670 * sufficient alignment that the CWG doesn't overlap another section.
 671 */
 672        .pushsection ".mmuoff.data.write", "aw"
 673/*
 674 * We need to find out the CPU boot mode long after boot, so we need to
 675 * store it in a writable variable.
 676 *
 677 * This is not in .bss, because we set it sufficiently early that the boot-time
 678 * zeroing of .bss would clobber it.
 679 */
 680ENTRY(__boot_cpu_mode)
 681        .long   BOOT_CPU_MODE_EL2
 682        .long   BOOT_CPU_MODE_EL1
 683/*
 684 * The booting CPU updates the failed status @__early_cpu_boot_status,
 685 * with MMU turned off.
 686 */
 687ENTRY(__early_cpu_boot_status)
 688        .long   0
 689
 690        .popsection
 691
 692        /*
 693         * This provides a "holding pen" for platforms to hold all secondary
 694         * cores are held until we're ready for them to initialise.
 695         */
 696ENTRY(secondary_holding_pen)
 697        bl      el2_setup                       // Drop to EL1, w0=cpu_boot_mode
 698        bl      set_cpu_boot_mode_flag
 699        mrs     x0, mpidr_el1
 700        mov_q   x1, MPIDR_HWID_BITMASK
 701        and     x0, x0, x1
 702        adr_l   x3, secondary_holding_pen_release
 703pen:    ldr     x4, [x3]
 704        cmp     x4, x0
 705        b.eq    secondary_startup
 706        wfe
 707        b       pen
 708ENDPROC(secondary_holding_pen)
 709
 710        /*
 711         * Secondary entry point that jumps straight into the kernel. Only to
 712         * be used where CPUs are brought online dynamically by the kernel.
 713         */
 714ENTRY(secondary_entry)
 715        bl      el2_setup                       // Drop to EL1
 716        bl      set_cpu_boot_mode_flag
 717        b       secondary_startup
 718ENDPROC(secondary_entry)
 719
 720secondary_startup:
 721        /*
 722         * Common entry point for secondary CPUs.
 723         */
 724        bl      __cpu_secondary_check52bitva
 725        bl      __cpu_setup                     // initialise processor
 726        adrp    x1, swapper_pg_dir
 727        bl      __enable_mmu
 728        ldr     x8, =__secondary_switched
 729        br      x8
 730ENDPROC(secondary_startup)
 731
 732__secondary_switched:
 733        adr_l   x5, vectors
 734        msr     vbar_el1, x5
 735        isb
 736
 737        adr_l   x0, secondary_data
 738        ldr     x1, [x0, #CPU_BOOT_STACK]       // get secondary_data.stack
 739        mov     sp, x1
 740        ldr     x2, [x0, #CPU_BOOT_TASK]
 741        msr     sp_el0, x2
 742        mov     x29, #0
 743        mov     x30, #0
 744        b       secondary_start_kernel
 745ENDPROC(__secondary_switched)
 746
 747/*
 748 * The booting CPU updates the failed status @__early_cpu_boot_status,
 749 * with MMU turned off.
 750 *
 751 * update_early_cpu_boot_status tmp, status
 752 *  - Corrupts tmp1, tmp2
 753 *  - Writes 'status' to __early_cpu_boot_status and makes sure
 754 *    it is committed to memory.
 755 */
 756
 757        .macro  update_early_cpu_boot_status status, tmp1, tmp2
 758        mov     \tmp2, #\status
 759        adr_l   \tmp1, __early_cpu_boot_status
 760        str     \tmp2, [\tmp1]
 761        dmb     sy
 762        dc      ivac, \tmp1                     // Invalidate potentially stale cache line
 763        .endm
 764
 765/*
 766 * Enable the MMU.
 767 *
 768 *  x0  = SCTLR_EL1 value for turning on the MMU.
 769 *  x1  = TTBR1_EL1 value
 770 *
 771 * Returns to the caller via x30/lr. This requires the caller to be covered
 772 * by the .idmap.text section.
 773 *
 774 * Checks if the selected granule size is supported by the CPU.
 775 * If it isn't, park the CPU
 776 */
 777ENTRY(__enable_mmu)
 778        mrs     x2, ID_AA64MMFR0_EL1
 779        ubfx    x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4
 780        cmp     x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
 781        b.ne    __no_granule_support
 782        update_early_cpu_boot_status 0, x2, x3
 783        adrp    x2, idmap_pg_dir
 784        phys_to_ttbr x1, x1
 785        phys_to_ttbr x2, x2
 786        msr     ttbr0_el1, x2                   // load TTBR0
 787        offset_ttbr1 x1
 788        msr     ttbr1_el1, x1                   // load TTBR1
 789        isb
 790        msr     sctlr_el1, x0
 791        isb
 792        /*
 793         * Invalidate the local I-cache so that any instructions fetched
 794         * speculatively from the PoC are discarded, since they may have
 795         * been dynamically patched at the PoU.
 796         */
 797        ic      iallu
 798        dsb     nsh
 799        isb
 800        ret
 801ENDPROC(__enable_mmu)
 802
 803ENTRY(__cpu_secondary_check52bitva)
 804#ifdef CONFIG_ARM64_USER_VA_BITS_52
 805        ldr_l   x0, vabits_user
 806        cmp     x0, #52
 807        b.ne    2f
 808
 809        mrs_s   x0, SYS_ID_AA64MMFR2_EL1
 810        and     x0, x0, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
 811        cbnz    x0, 2f
 812
 813        adr_l   x0, va52mismatch
 814        mov     w1, #1
 815        strb    w1, [x0]
 816        dmb     sy
 817        dc      ivac, x0        // Invalidate potentially stale cache line
 818
 819        update_early_cpu_boot_status CPU_STUCK_IN_KERNEL, x0, x1
 8201:      wfe
 821        wfi
 822        b       1b
 823
 824#endif
 8252:      ret
 826ENDPROC(__cpu_secondary_check52bitva)
 827
 828__no_granule_support:
 829        /* Indicate that this CPU can't boot and is stuck in the kernel */
 830        update_early_cpu_boot_status CPU_STUCK_IN_KERNEL, x1, x2
 8311:
 832        wfe
 833        wfi
 834        b       1b
 835ENDPROC(__no_granule_support)
 836
 837#ifdef CONFIG_RELOCATABLE
 838__relocate_kernel:
 839        /*
 840         * Iterate over each entry in the relocation table, and apply the
 841         * relocations in place.
 842         */
 843        ldr     w9, =__rela_offset              // offset to reloc table
 844        ldr     w10, =__rela_size               // size of reloc table
 845
 846        mov_q   x11, KIMAGE_VADDR               // default virtual offset
 847        add     x11, x11, x23                   // actual virtual offset
 848        add     x9, x9, x11                     // __va(.rela)
 849        add     x10, x9, x10                    // __va(.rela) + sizeof(.rela)
 850
 8510:      cmp     x9, x10
 852        b.hs    1f
 853        ldp     x11, x12, [x9], #24
 854        ldr     x13, [x9, #-8]
 855        cmp     w12, #R_AARCH64_RELATIVE
 856        b.ne    0b
 857        add     x13, x13, x23                   // relocate
 858        str     x13, [x11, x23]
 859        b       0b
 8601:      ret
 861ENDPROC(__relocate_kernel)
 862#endif
 863
 864__primary_switch:
 865#ifdef CONFIG_RANDOMIZE_BASE
 866        mov     x19, x0                         // preserve new SCTLR_EL1 value
 867        mrs     x20, sctlr_el1                  // preserve old SCTLR_EL1 value
 868#endif
 869
 870        adrp    x1, init_pg_dir
 871        bl      __enable_mmu
 872#ifdef CONFIG_RELOCATABLE
 873        bl      __relocate_kernel
 874#ifdef CONFIG_RANDOMIZE_BASE
 875        ldr     x8, =__primary_switched
 876        adrp    x0, __PHYS_OFFSET
 877        blr     x8
 878
 879        /*
 880         * If we return here, we have a KASLR displacement in x23 which we need
 881         * to take into account by discarding the current kernel mapping and
 882         * creating a new one.
 883         */
 884        pre_disable_mmu_workaround
 885        msr     sctlr_el1, x20                  // disable the MMU
 886        isb
 887        bl      __create_page_tables            // recreate kernel mapping
 888
 889        tlbi    vmalle1                         // Remove any stale TLB entries
 890        dsb     nsh
 891
 892        msr     sctlr_el1, x19                  // re-enable the MMU
 893        isb
 894        ic      iallu                           // flush instructions fetched
 895        dsb     nsh                             // via old mapping
 896        isb
 897
 898        bl      __relocate_kernel
 899#endif
 900#endif
 901        ldr     x8, =__primary_switched
 902        adrp    x0, __PHYS_OFFSET
 903        br      x8
 904ENDPROC(__primary_switch)
 905