linux/arch/arm/boot/compressed/head.S
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0-only */
   2/*
   3 *  linux/arch/arm/boot/compressed/head.S
   4 *
   5 *  Copyright (C) 1996-2002 Russell King
   6 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
   7 */
   8#include <linux/linkage.h>
   9#include <asm/assembler.h>
  10#include <asm/v7m.h>
  11
  12#include "efi-header.S"
  13
  14 AR_CLASS(      .arch   armv7-a )
  15 M_CLASS(       .arch   armv7-m )
  16
  17/*
  18 * Debugging stuff
  19 *
  20 * Note that these macros must not contain any code which is not
  21 * 100% relocatable.  Any attempt to do so will result in a crash.
  22 * Please select one of the following when turning on debugging.
  23 */
  24#ifdef DEBUG
  25
  26#if defined(CONFIG_DEBUG_ICEDCC)
  27
  28#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
  29                .macro  loadsp, rb, tmp1, tmp2
  30                .endm
  31                .macro  writeb, ch, rb
  32                mcr     p14, 0, \ch, c0, c5, 0
  33                .endm
  34#elif defined(CONFIG_CPU_XSCALE)
  35                .macro  loadsp, rb, tmp1, tmp2
  36                .endm
  37                .macro  writeb, ch, rb
  38                mcr     p14, 0, \ch, c8, c0, 0
  39                .endm
  40#else
  41                .macro  loadsp, rb, tmp1, tmp2
  42                .endm
  43                .macro  writeb, ch, rb
  44                mcr     p14, 0, \ch, c1, c0, 0
  45                .endm
  46#endif
  47
  48#else
  49
  50#include CONFIG_DEBUG_LL_INCLUDE
  51
  52                .macro  writeb, ch, rb
  53                senduart \ch, \rb
  54                .endm
  55
  56#if defined(CONFIG_ARCH_SA1100)
  57                .macro  loadsp, rb, tmp1, tmp2
  58                mov     \rb, #0x80000000        @ physical base address
  59#ifdef CONFIG_DEBUG_LL_SER3
  60                add     \rb, \rb, #0x00050000   @ Ser3
  61#else
  62                add     \rb, \rb, #0x00010000   @ Ser1
  63#endif
  64                .endm
  65#else
  66                .macro  loadsp, rb, tmp1, tmp2
  67                addruart \rb, \tmp1, \tmp2
  68                .endm
  69#endif
  70#endif
  71#endif
  72
  73                .macro  kputc,val
  74                mov     r0, \val
  75                bl      putc
  76                .endm
  77
  78                .macro  kphex,val,len
  79                mov     r0, \val
  80                mov     r1, #\len
  81                bl      phex
  82                .endm
  83
  84                .macro  debug_reloc_start
  85#ifdef DEBUG
  86                kputc   #'\n'
  87                kphex   r6, 8           /* processor id */
  88                kputc   #':'
  89                kphex   r7, 8           /* architecture id */
  90#ifdef CONFIG_CPU_CP15
  91                kputc   #':'
  92                mrc     p15, 0, r0, c1, c0
  93                kphex   r0, 8           /* control reg */
  94#endif
  95                kputc   #'\n'
  96                kphex   r5, 8           /* decompressed kernel start */
  97                kputc   #'-'
  98                kphex   r9, 8           /* decompressed kernel end  */
  99                kputc   #'>'
 100                kphex   r4, 8           /* kernel execution address */
 101                kputc   #'\n'
 102#endif
 103                .endm
 104
 105                .macro  debug_reloc_end
 106#ifdef DEBUG
 107                kphex   r5, 8           /* end of kernel */
 108                kputc   #'\n'
 109                mov     r0, r4
 110                bl      memdump         /* dump 256 bytes at start of kernel */
 111#endif
 112                .endm
 113
 114                /*
 115                 * Debug kernel copy by printing the memory addresses involved
 116                 */
 117                .macro dbgkc, begin, end, cbegin, cend
 118#ifdef DEBUG
 119                kputc   #'\n'
 120                kputc   #'C'
 121                kputc   #':'
 122                kputc   #'0'
 123                kputc   #'x'
 124                kphex   \begin, 8       /* Start of compressed kernel */
 125                kputc   #'-'
 126                kputc   #'0'
 127                kputc   #'x'
 128                kphex   \end, 8         /* End of compressed kernel */
 129                kputc   #'-'
 130                kputc   #'>'
 131                kputc   #'0'
 132                kputc   #'x'
 133                kphex   \cbegin, 8      /* Start of kernel copy */
 134                kputc   #'-'
 135                kputc   #'0'
 136                kputc   #'x'
 137                kphex   \cend, 8        /* End of kernel copy */
 138                kputc   #'\n'
 139                kputc   #'\r'
 140#endif
 141                .endm
 142
 143                .section ".start", #alloc, #execinstr
 144/*
 145 * sort out different calling conventions
 146 */
 147                .align
 148                /*
 149                 * Always enter in ARM state for CPUs that support the ARM ISA.
 150                 * As of today (2014) that's exactly the members of the A and R
 151                 * classes.
 152                 */
 153 AR_CLASS(      .arm    )
 154start:
 155                .type   start,#function
 156                .rept   7
 157                __nop
 158                .endr
 159#ifndef CONFIG_THUMB2_KERNEL
 160                mov     r0, r0
 161#else
 162 AR_CLASS(      sub     pc, pc, #3      )       @ A/R: switch to Thumb2 mode
 163  M_CLASS(      nop.w                   )       @ M: already in Thumb2 mode
 164                .thumb
 165#endif
 166                W(b)    1f
 167
 168                .word   _magic_sig      @ Magic numbers to help the loader
 169                .word   _magic_start    @ absolute load/run zImage address
 170                .word   _magic_end      @ zImage end address
 171                .word   0x04030201      @ endianness flag
 172                .word   0x45454545      @ another magic number to indicate
 173                .word   _magic_table    @ additional data table
 174
 175                __EFI_HEADER
 1761:
 177 ARM_BE8(       setend  be              )       @ go BE8 if compiled for BE8
 178 AR_CLASS(      mrs     r9, cpsr        )
 179#ifdef CONFIG_ARM_VIRT_EXT
 180                bl      __hyp_stub_install      @ get into SVC mode, reversibly
 181#endif
 182                mov     r7, r1                  @ save architecture ID
 183                mov     r8, r2                  @ save atags pointer
 184
 185#ifndef CONFIG_CPU_V7M
 186                /*
 187                 * Booting from Angel - need to enter SVC mode and disable
 188                 * FIQs/IRQs (numeric definitions from angel arm.h source).
 189                 * We only do this if we were in user mode on entry.
 190                 */
 191                mrs     r2, cpsr                @ get current mode
 192                tst     r2, #3                  @ not user?
 193                bne     not_angel
 194                mov     r0, #0x17               @ angel_SWIreason_EnterSVC
 195 ARM(           swi     0x123456        )       @ angel_SWI_ARM
 196 THUMB(         svc     0xab            )       @ angel_SWI_THUMB
 197not_angel:
 198                safe_svcmode_maskall r0
 199                msr     spsr_cxsf, r9           @ Save the CPU boot mode in
 200                                                @ SPSR
 201#endif
 202                /*
 203                 * Note that some cache flushing and other stuff may
 204                 * be needed here - is there an Angel SWI call for this?
 205                 */
 206
 207                /*
 208                 * some architecture specific code can be inserted
 209                 * by the linker here, but it should preserve r7, r8, and r9.
 210                 */
 211
 212                .text
 213
 214#ifdef CONFIG_AUTO_ZRELADDR
 215                /*
 216                 * Find the start of physical memory.  As we are executing
 217                 * without the MMU on, we are in the physical address space.
 218                 * We just need to get rid of any offset by aligning the
 219                 * address.
 220                 *
 221                 * This alignment is a balance between the requirements of
 222                 * different platforms - we have chosen 128MB to allow
 223                 * platforms which align the start of their physical memory
 224                 * to 128MB to use this feature, while allowing the zImage
 225                 * to be placed within the first 128MB of memory on other
 226                 * platforms.  Increasing the alignment means we place
 227                 * stricter alignment requirements on the start of physical
 228                 * memory, but relaxing it means that we break people who
 229                 * are already placing their zImage in (eg) the top 64MB
 230                 * of this range.
 231                 */
 232                mov     r4, pc
 233                and     r4, r4, #0xf8000000
 234                /* Determine final kernel image address. */
 235                add     r4, r4, #TEXT_OFFSET
 236#else
 237                ldr     r4, =zreladdr
 238#endif
 239
 240                /*
 241                 * Set up a page table only if it won't overwrite ourself.
 242                 * That means r4 < pc || r4 - 16k page directory > &_end.
 243                 * Given that r4 > &_end is most unfrequent, we add a rough
 244                 * additional 1MB of room for a possible appended DTB.
 245                 */
 246                mov     r0, pc
 247                cmp     r0, r4
 248                ldrcc   r0, LC0+32
 249                addcc   r0, r0, pc
 250                cmpcc   r4, r0
 251                orrcc   r4, r4, #1              @ remember we skipped cache_on
 252                blcs    cache_on
 253
 254restart:        adr     r0, LC0
 255                ldmia   r0, {r1, r2, r3, r6, r10, r11, r12}
 256                ldr     sp, [r0, #28]
 257
 258                /*
 259                 * We might be running at a different address.  We need
 260                 * to fix up various pointers.
 261                 */
 262                sub     r0, r0, r1              @ calculate the delta offset
 263                add     r6, r6, r0              @ _edata
 264                add     r10, r10, r0            @ inflated kernel size location
 265
 266                /*
 267                 * The kernel build system appends the size of the
 268                 * decompressed kernel at the end of the compressed data
 269                 * in little-endian form.
 270                 */
 271                ldrb    r9, [r10, #0]
 272                ldrb    lr, [r10, #1]
 273                orr     r9, r9, lr, lsl #8
 274                ldrb    lr, [r10, #2]
 275                ldrb    r10, [r10, #3]
 276                orr     r9, r9, lr, lsl #16
 277                orr     r9, r9, r10, lsl #24
 278
 279#ifndef CONFIG_ZBOOT_ROM
 280                /* malloc space is above the relocated stack (64k max) */
 281                add     sp, sp, r0
 282                add     r10, sp, #0x10000
 283#else
 284                /*
 285                 * With ZBOOT_ROM the bss/stack is non relocatable,
 286                 * but someone could still run this code from RAM,
 287                 * in which case our reference is _edata.
 288                 */
 289                mov     r10, r6
 290#endif
 291
 292                mov     r5, #0                  @ init dtb size to 0
 293#ifdef CONFIG_ARM_APPENDED_DTB
 294/*
 295 *   r0  = delta
 296 *   r2  = BSS start
 297 *   r3  = BSS end
 298 *   r4  = final kernel address (possibly with LSB set)
 299 *   r5  = appended dtb size (still unknown)
 300 *   r6  = _edata
 301 *   r7  = architecture ID
 302 *   r8  = atags/device tree pointer
 303 *   r9  = size of decompressed image
 304 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
 305 *   r11 = GOT start
 306 *   r12 = GOT end
 307 *   sp  = stack pointer
 308 *
 309 * if there are device trees (dtb) appended to zImage, advance r10 so that the
 310 * dtb data will get relocated along with the kernel if necessary.
 311 */
 312
 313                ldr     lr, [r6, #0]
 314#ifndef __ARMEB__
 315                ldr     r1, =0xedfe0dd0         @ sig is 0xd00dfeed big endian
 316#else
 317                ldr     r1, =0xd00dfeed
 318#endif
 319                cmp     lr, r1
 320                bne     dtb_check_done          @ not found
 321
 322#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
 323                /*
 324                 * OK... Let's do some funky business here.
 325                 * If we do have a DTB appended to zImage, and we do have
 326                 * an ATAG list around, we want the later to be translated
 327                 * and folded into the former here. No GOT fixup has occurred
 328                 * yet, but none of the code we're about to call uses any
 329                 * global variable.
 330                */
 331
 332                /* Get the initial DTB size */
 333                ldr     r5, [r6, #4]
 334#ifndef __ARMEB__
 335                /* convert to little endian */
 336                eor     r1, r5, r5, ror #16
 337                bic     r1, r1, #0x00ff0000
 338                mov     r5, r5, ror #8
 339                eor     r5, r5, r1, lsr #8
 340#endif
 341                /* 50% DTB growth should be good enough */
 342                add     r5, r5, r5, lsr #1
 343                /* preserve 64-bit alignment */
 344                add     r5, r5, #7
 345                bic     r5, r5, #7
 346                /* clamp to 32KB min and 1MB max */
 347                cmp     r5, #(1 << 15)
 348                movlo   r5, #(1 << 15)
 349                cmp     r5, #(1 << 20)
 350                movhi   r5, #(1 << 20)
 351                /* temporarily relocate the stack past the DTB work space */
 352                add     sp, sp, r5
 353
 354                stmfd   sp!, {r0-r3, ip, lr}
 355                mov     r0, r8
 356                mov     r1, r6
 357                mov     r2, r5
 358                bl      atags_to_fdt
 359
 360                /*
 361                 * If returned value is 1, there is no ATAG at the location
 362                 * pointed by r8.  Try the typical 0x100 offset from start
 363                 * of RAM and hope for the best.
 364                 */
 365                cmp     r0, #1
 366                sub     r0, r4, #TEXT_OFFSET
 367                bic     r0, r0, #1
 368                add     r0, r0, #0x100
 369                mov     r1, r6
 370                mov     r2, r5
 371                bleq    atags_to_fdt
 372
 373                ldmfd   sp!, {r0-r3, ip, lr}
 374                sub     sp, sp, r5
 375#endif
 376
 377                mov     r8, r6                  @ use the appended device tree
 378
 379                /*
 380                 * Make sure that the DTB doesn't end up in the final
 381                 * kernel's .bss area. To do so, we adjust the decompressed
 382                 * kernel size to compensate if that .bss size is larger
 383                 * than the relocated code.
 384                 */
 385                ldr     r5, =_kernel_bss_size
 386                adr     r1, wont_overwrite
 387                sub     r1, r6, r1
 388                subs    r1, r5, r1
 389                addhi   r9, r9, r1
 390
 391                /* Get the current DTB size */
 392                ldr     r5, [r6, #4]
 393#ifndef __ARMEB__
 394                /* convert r5 (dtb size) to little endian */
 395                eor     r1, r5, r5, ror #16
 396                bic     r1, r1, #0x00ff0000
 397                mov     r5, r5, ror #8
 398                eor     r5, r5, r1, lsr #8
 399#endif
 400
 401                /* preserve 64-bit alignment */
 402                add     r5, r5, #7
 403                bic     r5, r5, #7
 404
 405                /* relocate some pointers past the appended dtb */
 406                add     r6, r6, r5
 407                add     r10, r10, r5
 408                add     sp, sp, r5
 409dtb_check_done:
 410#endif
 411
 412/*
 413 * Check to see if we will overwrite ourselves.
 414 *   r4  = final kernel address (possibly with LSB set)
 415 *   r9  = size of decompressed image
 416 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
 417 * We basically want:
 418 *   r4 - 16k page directory >= r10 -> OK
 419 *   r4 + image length <= address of wont_overwrite -> OK
 420 * Note: the possible LSB in r4 is harmless here.
 421 */
 422                add     r10, r10, #16384
 423                cmp     r4, r10
 424                bhs     wont_overwrite
 425                add     r10, r4, r9
 426                adr     r9, wont_overwrite
 427                cmp     r10, r9
 428                bls     wont_overwrite
 429
 430/*
 431 * Relocate ourselves past the end of the decompressed kernel.
 432 *   r6  = _edata
 433 *   r10 = end of the decompressed kernel
 434 * Because we always copy ahead, we need to do it from the end and go
 435 * backward in case the source and destination overlap.
 436 */
 437                /*
 438                 * Bump to the next 256-byte boundary with the size of
 439                 * the relocation code added. This avoids overwriting
 440                 * ourself when the offset is small.
 441                 */
 442                add     r10, r10, #((reloc_code_end - restart + 256) & ~255)
 443                bic     r10, r10, #255
 444
 445                /* Get start of code we want to copy and align it down. */
 446                adr     r5, restart
 447                bic     r5, r5, #31
 448
 449/* Relocate the hyp vector base if necessary */
 450#ifdef CONFIG_ARM_VIRT_EXT
 451                mrs     r0, spsr
 452                and     r0, r0, #MODE_MASK
 453                cmp     r0, #HYP_MODE
 454                bne     1f
 455
 456                /*
 457                 * Compute the address of the hyp vectors after relocation.
 458                 * This requires some arithmetic since we cannot directly
 459                 * reference __hyp_stub_vectors in a PC-relative way.
 460                 * Call __hyp_set_vectors with the new address so that we
 461                 * can HVC again after the copy.
 462                 */
 4630:              adr     r0, 0b
 464                movw    r1, #:lower16:__hyp_stub_vectors - 0b
 465                movt    r1, #:upper16:__hyp_stub_vectors - 0b
 466                add     r0, r0, r1
 467                sub     r0, r0, r5
 468                add     r0, r0, r10
 469                bl      __hyp_set_vectors
 4701:
 471#endif
 472
 473                sub     r9, r6, r5              @ size to copy
 474                add     r9, r9, #31             @ rounded up to a multiple
 475                bic     r9, r9, #31             @ ... of 32 bytes
 476                add     r6, r9, r5
 477                add     r9, r9, r10
 478
 479#ifdef DEBUG
 480                sub     r10, r6, r5
 481                sub     r10, r9, r10
 482                /*
 483                 * We are about to copy the kernel to a new memory area.
 484                 * The boundaries of the new memory area can be found in
 485                 * r10 and r9, whilst r5 and r6 contain the boundaries
 486                 * of the memory we are going to copy.
 487                 * Calling dbgkc will help with the printing of this
 488                 * information.
 489                 */
 490                dbgkc   r5, r6, r10, r9
 491#endif
 492
 4931:              ldmdb   r6!, {r0 - r3, r10 - r12, lr}
 494                cmp     r6, r5
 495                stmdb   r9!, {r0 - r3, r10 - r12, lr}
 496                bhi     1b
 497
 498                /* Preserve offset to relocated code. */
 499                sub     r6, r9, r6
 500
 501#ifndef CONFIG_ZBOOT_ROM
 502                /* cache_clean_flush may use the stack, so relocate it */
 503                add     sp, sp, r6
 504#endif
 505
 506                bl      cache_clean_flush
 507
 508                badr    r0, restart
 509                add     r0, r0, r6
 510                mov     pc, r0
 511
 512wont_overwrite:
 513/*
 514 * If delta is zero, we are running at the address we were linked at.
 515 *   r0  = delta
 516 *   r2  = BSS start
 517 *   r3  = BSS end
 518 *   r4  = kernel execution address (possibly with LSB set)
 519 *   r5  = appended dtb size (0 if not present)
 520 *   r7  = architecture ID
 521 *   r8  = atags pointer
 522 *   r11 = GOT start
 523 *   r12 = GOT end
 524 *   sp  = stack pointer
 525 */
 526                orrs    r1, r0, r5
 527                beq     not_relocated
 528
 529                add     r11, r11, r0
 530                add     r12, r12, r0
 531
 532#ifndef CONFIG_ZBOOT_ROM
 533                /*
 534                 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
 535                 * we need to fix up pointers into the BSS region.
 536                 * Note that the stack pointer has already been fixed up.
 537                 */
 538                add     r2, r2, r0
 539                add     r3, r3, r0
 540
 541                /*
 542                 * Relocate all entries in the GOT table.
 543                 * Bump bss entries to _edata + dtb size
 544                 */
 5451:              ldr     r1, [r11, #0]           @ relocate entries in the GOT
 546                add     r1, r1, r0              @ This fixes up C references
 547                cmp     r1, r2                  @ if entry >= bss_start &&
 548                cmphs   r3, r1                  @       bss_end > entry
 549                addhi   r1, r1, r5              @    entry += dtb size
 550                str     r1, [r11], #4           @ next entry
 551                cmp     r11, r12
 552                blo     1b
 553
 554                /* bump our bss pointers too */
 555                add     r2, r2, r5
 556                add     r3, r3, r5
 557
 558#else
 559
 560                /*
 561                 * Relocate entries in the GOT table.  We only relocate
 562                 * the entries that are outside the (relocated) BSS region.
 563                 */
 5641:              ldr     r1, [r11, #0]           @ relocate entries in the GOT
 565                cmp     r1, r2                  @ entry < bss_start ||
 566                cmphs   r3, r1                  @ _end < entry
 567                addlo   r1, r1, r0              @ table.  This fixes up the
 568                str     r1, [r11], #4           @ C references.
 569                cmp     r11, r12
 570                blo     1b
 571#endif
 572
 573not_relocated:  mov     r0, #0
 5741:              str     r0, [r2], #4            @ clear bss
 575                str     r0, [r2], #4
 576                str     r0, [r2], #4
 577                str     r0, [r2], #4
 578                cmp     r2, r3
 579                blo     1b
 580
 581                /*
 582                 * Did we skip the cache setup earlier?
 583                 * That is indicated by the LSB in r4.
 584                 * Do it now if so.
 585                 */
 586                tst     r4, #1
 587                bic     r4, r4, #1
 588                blne    cache_on
 589
 590/*
 591 * The C runtime environment should now be setup sufficiently.
 592 * Set up some pointers, and start decompressing.
 593 *   r4  = kernel execution address
 594 *   r7  = architecture ID
 595 *   r8  = atags pointer
 596 */
 597                mov     r0, r4
 598                mov     r1, sp                  @ malloc space above stack
 599                add     r2, sp, #0x10000        @ 64k max
 600                mov     r3, r7
 601                bl      decompress_kernel
 602                bl      cache_clean_flush
 603                bl      cache_off
 604
 605#ifdef CONFIG_ARM_VIRT_EXT
 606                mrs     r0, spsr                @ Get saved CPU boot mode
 607                and     r0, r0, #MODE_MASK
 608                cmp     r0, #HYP_MODE           @ if not booted in HYP mode...
 609                bne     __enter_kernel          @ boot kernel directly
 610
 611                adr     r12, .L__hyp_reentry_vectors_offset
 612                ldr     r0, [r12]
 613                add     r0, r0, r12
 614
 615                bl      __hyp_set_vectors
 616                __HVC(0)                        @ otherwise bounce to hyp mode
 617
 618                b       .                       @ should never be reached
 619
 620                .align  2
 621.L__hyp_reentry_vectors_offset: .long   __hyp_reentry_vectors - .
 622#else
 623                b       __enter_kernel
 624#endif
 625
 626                .align  2
 627                .type   LC0, #object
 628LC0:            .word   LC0                     @ r1
 629                .word   __bss_start             @ r2
 630                .word   _end                    @ r3
 631                .word   _edata                  @ r6
 632                .word   input_data_end - 4      @ r10 (inflated size location)
 633                .word   _got_start              @ r11
 634                .word   _got_end                @ ip
 635                .word   .L_user_stack_end       @ sp
 636                .word   _end - restart + 16384 + 1024*1024
 637                .size   LC0, . - LC0
 638
 639#ifdef CONFIG_ARCH_RPC
 640                .globl  params
 641params:         ldr     r0, =0x10000100         @ params_phys for RPC
 642                mov     pc, lr
 643                .ltorg
 644                .align
 645#endif
 646
 647/*
 648 * Turn on the cache.  We need to setup some page tables so that we
 649 * can have both the I and D caches on.
 650 *
 651 * We place the page tables 16k down from the kernel execution address,
 652 * and we hope that nothing else is using it.  If we're using it, we
 653 * will go pop!
 654 *
 655 * On entry,
 656 *  r4 = kernel execution address
 657 *  r7 = architecture number
 658 *  r8 = atags pointer
 659 * On exit,
 660 *  r0, r1, r2, r3, r9, r10, r12 corrupted
 661 * This routine must preserve:
 662 *  r4, r7, r8
 663 */
 664                .align  5
 665cache_on:       mov     r3, #8                  @ cache_on function
 666                b       call_cache_fn
 667
 668/*
 669 * Initialize the highest priority protection region, PR7
 670 * to cover all 32bit address and cacheable and bufferable.
 671 */
 672__armv4_mpu_cache_on:
 673                mov     r0, #0x3f               @ 4G, the whole
 674                mcr     p15, 0, r0, c6, c7, 0   @ PR7 Area Setting
 675                mcr     p15, 0, r0, c6, c7, 1
 676
 677                mov     r0, #0x80               @ PR7
 678                mcr     p15, 0, r0, c2, c0, 0   @ D-cache on
 679                mcr     p15, 0, r0, c2, c0, 1   @ I-cache on
 680                mcr     p15, 0, r0, c3, c0, 0   @ write-buffer on
 681
 682                mov     r0, #0xc000
 683                mcr     p15, 0, r0, c5, c0, 1   @ I-access permission
 684                mcr     p15, 0, r0, c5, c0, 0   @ D-access permission
 685
 686                mov     r0, #0
 687                mcr     p15, 0, r0, c7, c10, 4  @ drain write buffer
 688                mcr     p15, 0, r0, c7, c5, 0   @ flush(inval) I-Cache
 689                mcr     p15, 0, r0, c7, c6, 0   @ flush(inval) D-Cache
 690                mrc     p15, 0, r0, c1, c0, 0   @ read control reg
 691                                                @ ...I .... ..D. WC.M
 692                orr     r0, r0, #0x002d         @ .... .... ..1. 11.1
 693                orr     r0, r0, #0x1000         @ ...1 .... .... ....
 694
 695                mcr     p15, 0, r0, c1, c0, 0   @ write control reg
 696
 697                mov     r0, #0
 698                mcr     p15, 0, r0, c7, c5, 0   @ flush(inval) I-Cache
 699                mcr     p15, 0, r0, c7, c6, 0   @ flush(inval) D-Cache
 700                mov     pc, lr
 701
 702__armv3_mpu_cache_on:
 703                mov     r0, #0x3f               @ 4G, the whole
 704                mcr     p15, 0, r0, c6, c7, 0   @ PR7 Area Setting
 705
 706                mov     r0, #0x80               @ PR7
 707                mcr     p15, 0, r0, c2, c0, 0   @ cache on
 708                mcr     p15, 0, r0, c3, c0, 0   @ write-buffer on
 709
 710                mov     r0, #0xc000
 711                mcr     p15, 0, r0, c5, c0, 0   @ access permission
 712
 713                mov     r0, #0
 714                mcr     p15, 0, r0, c7, c0, 0   @ invalidate whole cache v3
 715                /*
 716                 * ?? ARMv3 MMU does not allow reading the control register,
 717                 * does this really work on ARMv3 MPU?
 718                 */
 719                mrc     p15, 0, r0, c1, c0, 0   @ read control reg
 720                                                @ .... .... .... WC.M
 721                orr     r0, r0, #0x000d         @ .... .... .... 11.1
 722                /* ?? this overwrites the value constructed above? */
 723                mov     r0, #0
 724                mcr     p15, 0, r0, c1, c0, 0   @ write control reg
 725
 726                /* ?? invalidate for the second time? */
 727                mcr     p15, 0, r0, c7, c0, 0   @ invalidate whole cache v3
 728                mov     pc, lr
 729
 730#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
 731#define CB_BITS 0x08
 732#else
 733#define CB_BITS 0x0c
 734#endif
 735
 736__setup_mmu:    sub     r3, r4, #16384          @ Page directory size
 737                bic     r3, r3, #0xff           @ Align the pointer
 738                bic     r3, r3, #0x3f00
 739/*
 740 * Initialise the page tables, turning on the cacheable and bufferable
 741 * bits for the RAM area only.
 742 */
 743                mov     r0, r3
 744                mov     r9, r0, lsr #18
 745                mov     r9, r9, lsl #18         @ start of RAM
 746                add     r10, r9, #0x10000000    @ a reasonable RAM size
 747                mov     r1, #0x12               @ XN|U + section mapping
 748                orr     r1, r1, #3 << 10        @ AP=11
 749                add     r2, r3, #16384
 7501:              cmp     r1, r9                  @ if virt > start of RAM
 751                cmphs   r10, r1                 @   && end of RAM > virt
 752                bic     r1, r1, #0x1c           @ clear XN|U + C + B
 753                orrlo   r1, r1, #0x10           @ Set XN|U for non-RAM
 754                orrhs   r1, r1, r6              @ set RAM section settings
 755                str     r1, [r0], #4            @ 1:1 mapping
 756                add     r1, r1, #1048576
 757                teq     r0, r2
 758                bne     1b
 759/*
 760 * If ever we are running from Flash, then we surely want the cache
 761 * to be enabled also for our execution instance...  We map 2MB of it
 762 * so there is no map overlap problem for up to 1 MB compressed kernel.
 763 * If the execution is in RAM then we would only be duplicating the above.
 764 */
 765                orr     r1, r6, #0x04           @ ensure B is set for this
 766                orr     r1, r1, #3 << 10
 767                mov     r2, pc
 768                mov     r2, r2, lsr #20
 769                orr     r1, r1, r2, lsl #20
 770                add     r0, r3, r2, lsl #2
 771                str     r1, [r0], #4
 772                add     r1, r1, #1048576
 773                str     r1, [r0]
 774                mov     pc, lr
 775ENDPROC(__setup_mmu)
 776
 777@ Enable unaligned access on v6, to allow better code generation
 778@ for the decompressor C code:
 779__armv6_mmu_cache_on:
 780                mrc     p15, 0, r0, c1, c0, 0   @ read SCTLR
 781                bic     r0, r0, #2              @ A (no unaligned access fault)
 782                orr     r0, r0, #1 << 22        @ U (v6 unaligned access model)
 783                mcr     p15, 0, r0, c1, c0, 0   @ write SCTLR
 784                b       __armv4_mmu_cache_on
 785
 786__arm926ejs_mmu_cache_on:
 787#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
 788                mov     r0, #4                  @ put dcache in WT mode
 789                mcr     p15, 7, r0, c15, c0, 0
 790#endif
 791
 792__armv4_mmu_cache_on:
 793                mov     r12, lr
 794#ifdef CONFIG_MMU
 795                mov     r6, #CB_BITS | 0x12     @ U
 796                bl      __setup_mmu
 797                mov     r0, #0
 798                mcr     p15, 0, r0, c7, c10, 4  @ drain write buffer
 799                mcr     p15, 0, r0, c8, c7, 0   @ flush I,D TLBs
 800                mrc     p15, 0, r0, c1, c0, 0   @ read control reg
 801                orr     r0, r0, #0x5000         @ I-cache enable, RR cache replacement
 802                orr     r0, r0, #0x0030
 803 ARM_BE8(       orr     r0, r0, #1 << 25 )      @ big-endian page tables
 804                bl      __common_mmu_cache_on
 805                mov     r0, #0
 806                mcr     p15, 0, r0, c8, c7, 0   @ flush I,D TLBs
 807#endif
 808                mov     pc, r12
 809
 810__armv7_mmu_cache_on:
 811                mov     r12, lr
 812#ifdef CONFIG_MMU
 813                mrc     p15, 0, r11, c0, c1, 4  @ read ID_MMFR0
 814                tst     r11, #0xf               @ VMSA
 815                movne   r6, #CB_BITS | 0x02     @ !XN
 816                blne    __setup_mmu
 817                mov     r0, #0
 818                mcr     p15, 0, r0, c7, c10, 4  @ drain write buffer
 819                tst     r11, #0xf               @ VMSA
 820                mcrne   p15, 0, r0, c8, c7, 0   @ flush I,D TLBs
 821#endif
 822                mrc     p15, 0, r0, c1, c0, 0   @ read control reg
 823                bic     r0, r0, #1 << 28        @ clear SCTLR.TRE
 824                orr     r0, r0, #0x5000         @ I-cache enable, RR cache replacement
 825                orr     r0, r0, #0x003c         @ write buffer
 826                bic     r0, r0, #2              @ A (no unaligned access fault)
 827                orr     r0, r0, #1 << 22        @ U (v6 unaligned access model)
 828                                                @ (needed for ARM1176)
 829#ifdef CONFIG_MMU
 830 ARM_BE8(       orr     r0, r0, #1 << 25 )      @ big-endian page tables
 831                mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
 832                orrne   r0, r0, #1              @ MMU enabled
 833                movne   r1, #0xfffffffd         @ domain 0 = client
 834                bic     r6, r6, #1 << 31        @ 32-bit translation system
 835                bic     r6, r6, #(7 << 0) | (1 << 4)    @ use only ttbr0
 836                mcrne   p15, 0, r3, c2, c0, 0   @ load page table pointer
 837                mcrne   p15, 0, r1, c3, c0, 0   @ load domain access control
 838                mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
 839#endif
 840                mcr     p15, 0, r0, c7, c5, 4   @ ISB
 841                mcr     p15, 0, r0, c1, c0, 0   @ load control register
 842                mrc     p15, 0, r0, c1, c0, 0   @ and read it back
 843                mov     r0, #0
 844                mcr     p15, 0, r0, c7, c5, 4   @ ISB
 845                mov     pc, r12
 846
 847__fa526_cache_on:
 848                mov     r12, lr
 849                mov     r6, #CB_BITS | 0x12     @ U
 850                bl      __setup_mmu
 851                mov     r0, #0
 852                mcr     p15, 0, r0, c7, c7, 0   @ Invalidate whole cache
 853                mcr     p15, 0, r0, c7, c10, 4  @ drain write buffer
 854                mcr     p15, 0, r0, c8, c7, 0   @ flush UTLB
 855                mrc     p15, 0, r0, c1, c0, 0   @ read control reg
 856                orr     r0, r0, #0x1000         @ I-cache enable
 857                bl      __common_mmu_cache_on
 858                mov     r0, #0
 859                mcr     p15, 0, r0, c8, c7, 0   @ flush UTLB
 860                mov     pc, r12
 861
 862__common_mmu_cache_on:
 863#ifndef CONFIG_THUMB2_KERNEL
 864#ifndef DEBUG
 865                orr     r0, r0, #0x000d         @ Write buffer, mmu
 866#endif
 867                mov     r1, #-1
 868                mcr     p15, 0, r3, c2, c0, 0   @ load page table pointer
 869                mcr     p15, 0, r1, c3, c0, 0   @ load domain access control
 870                b       1f
 871                .align  5                       @ cache line aligned
 8721:              mcr     p15, 0, r0, c1, c0, 0   @ load control register
 873                mrc     p15, 0, r0, c1, c0, 0   @ and read it back to
 874                sub     pc, lr, r0, lsr #32     @ properly flush pipeline
 875#endif
 876
 877#define PROC_ENTRY_SIZE (4*5)
 878
 879/*
 880 * Here follow the relocatable cache support functions for the
 881 * various processors.  This is a generic hook for locating an
 882 * entry and jumping to an instruction at the specified offset
 883 * from the start of the block.  Please note this is all position
 884 * independent code.
 885 *
 886 *  r1  = corrupted
 887 *  r2  = corrupted
 888 *  r3  = block offset
 889 *  r9  = corrupted
 890 *  r12 = corrupted
 891 */
 892
 893call_cache_fn:  adr     r12, proc_types
 894#ifdef CONFIG_CPU_CP15
 895                mrc     p15, 0, r9, c0, c0      @ get processor ID
 896#elif defined(CONFIG_CPU_V7M)
 897                /*
 898                 * On v7-M the processor id is located in the V7M_SCB_CPUID
 899                 * register, but as cache handling is IMPLEMENTATION DEFINED on
 900                 * v7-M (if existant at all) we just return early here.
 901                 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
 902                 * __armv7_mmu_cache_{on,off,flush}) would be selected which
 903                 * use cp15 registers that are not implemented on v7-M.
 904                 */
 905                bx      lr
 906#else
 907                ldr     r9, =CONFIG_PROCESSOR_ID
 908#endif
 9091:              ldr     r1, [r12, #0]           @ get value
 910                ldr     r2, [r12, #4]           @ get mask
 911                eor     r1, r1, r9              @ (real ^ match)
 912                tst     r1, r2                  @       & mask
 913 ARM(           addeq   pc, r12, r3             ) @ call cache function
 914 THUMB(         addeq   r12, r3                 )
 915 THUMB(         moveq   pc, r12                 ) @ call cache function
 916                add     r12, r12, #PROC_ENTRY_SIZE
 917                b       1b
 918
 919/*
 920 * Table for cache operations.  This is basically:
 921 *   - CPU ID match
 922 *   - CPU ID mask
 923 *   - 'cache on' method instruction
 924 *   - 'cache off' method instruction
 925 *   - 'cache flush' method instruction
 926 *
 927 * We match an entry using: ((real_id ^ match) & mask) == 0
 928 *
 929 * Writethrough caches generally only need 'on' and 'off'
 930 * methods.  Writeback caches _must_ have the flush method
 931 * defined.
 932 */
 933                .align  2
 934                .type   proc_types,#object
 935proc_types:
 936                .word   0x41000000              @ old ARM ID
 937                .word   0xff00f000
 938                mov     pc, lr
 939 THUMB(         nop                             )
 940                mov     pc, lr
 941 THUMB(         nop                             )
 942                mov     pc, lr
 943 THUMB(         nop                             )
 944
 945                .word   0x41007000              @ ARM7/710
 946                .word   0xfff8fe00
 947                mov     pc, lr
 948 THUMB(         nop                             )
 949                mov     pc, lr
 950 THUMB(         nop                             )
 951                mov     pc, lr
 952 THUMB(         nop                             )
 953
 954                .word   0x41807200              @ ARM720T (writethrough)
 955                .word   0xffffff00
 956                W(b)    __armv4_mmu_cache_on
 957                W(b)    __armv4_mmu_cache_off
 958                mov     pc, lr
 959 THUMB(         nop                             )
 960
 961                .word   0x41007400              @ ARM74x
 962                .word   0xff00ff00
 963                W(b)    __armv3_mpu_cache_on
 964                W(b)    __armv3_mpu_cache_off
 965                W(b)    __armv3_mpu_cache_flush
 966                
 967                .word   0x41009400              @ ARM94x
 968                .word   0xff00ff00
 969                W(b)    __armv4_mpu_cache_on
 970                W(b)    __armv4_mpu_cache_off
 971                W(b)    __armv4_mpu_cache_flush
 972
 973                .word   0x41069260              @ ARM926EJ-S (v5TEJ)
 974                .word   0xff0ffff0
 975                W(b)    __arm926ejs_mmu_cache_on
 976                W(b)    __armv4_mmu_cache_off
 977                W(b)    __armv5tej_mmu_cache_flush
 978
 979                .word   0x00007000              @ ARM7 IDs
 980                .word   0x0000f000
 981                mov     pc, lr
 982 THUMB(         nop                             )
 983                mov     pc, lr
 984 THUMB(         nop                             )
 985                mov     pc, lr
 986 THUMB(         nop                             )
 987
 988                @ Everything from here on will be the new ID system.
 989
 990                .word   0x4401a100              @ sa110 / sa1100
 991                .word   0xffffffe0
 992                W(b)    __armv4_mmu_cache_on
 993                W(b)    __armv4_mmu_cache_off
 994                W(b)    __armv4_mmu_cache_flush
 995
 996                .word   0x6901b110              @ sa1110
 997                .word   0xfffffff0
 998                W(b)    __armv4_mmu_cache_on
 999                W(b)    __armv4_mmu_cache_off
1000                W(b)    __armv4_mmu_cache_flush
1001
1002                .word   0x56056900
1003                .word   0xffffff00              @ PXA9xx
1004                W(b)    __armv4_mmu_cache_on
1005                W(b)    __armv4_mmu_cache_off
1006                W(b)    __armv4_mmu_cache_flush
1007
1008                .word   0x56158000              @ PXA168
1009                .word   0xfffff000
1010                W(b)    __armv4_mmu_cache_on
1011                W(b)    __armv4_mmu_cache_off
1012                W(b)    __armv5tej_mmu_cache_flush
1013
1014                .word   0x56050000              @ Feroceon
1015                .word   0xff0f0000
1016                W(b)    __armv4_mmu_cache_on
1017                W(b)    __armv4_mmu_cache_off
1018                W(b)    __armv5tej_mmu_cache_flush
1019
1020#ifdef CONFIG_CPU_FEROCEON_OLD_ID
1021                /* this conflicts with the standard ARMv5TE entry */
1022                .long   0x41009260              @ Old Feroceon
1023                .long   0xff00fff0
1024                b       __armv4_mmu_cache_on
1025                b       __armv4_mmu_cache_off
1026                b       __armv5tej_mmu_cache_flush
1027#endif
1028
1029                .word   0x66015261              @ FA526
1030                .word   0xff01fff1
1031                W(b)    __fa526_cache_on
1032                W(b)    __armv4_mmu_cache_off
1033                W(b)    __fa526_cache_flush
1034
1035                @ These match on the architecture ID
1036
1037                .word   0x00020000              @ ARMv4T
1038                .word   0x000f0000
1039                W(b)    __armv4_mmu_cache_on
1040                W(b)    __armv4_mmu_cache_off
1041                W(b)    __armv4_mmu_cache_flush
1042
1043                .word   0x00050000              @ ARMv5TE
1044                .word   0x000f0000
1045                W(b)    __armv4_mmu_cache_on
1046                W(b)    __armv4_mmu_cache_off
1047                W(b)    __armv4_mmu_cache_flush
1048
1049                .word   0x00060000              @ ARMv5TEJ
1050                .word   0x000f0000
1051                W(b)    __armv4_mmu_cache_on
1052                W(b)    __armv4_mmu_cache_off
1053                W(b)    __armv5tej_mmu_cache_flush
1054
1055                .word   0x0007b000              @ ARMv6
1056                .word   0x000ff000
1057                W(b)    __armv6_mmu_cache_on
1058                W(b)    __armv4_mmu_cache_off
1059                W(b)    __armv6_mmu_cache_flush
1060
1061                .word   0x000f0000              @ new CPU Id
1062                .word   0x000f0000
1063                W(b)    __armv7_mmu_cache_on
1064                W(b)    __armv7_mmu_cache_off
1065                W(b)    __armv7_mmu_cache_flush
1066
1067                .word   0                       @ unrecognised type
1068                .word   0
1069                mov     pc, lr
1070 THUMB(         nop                             )
1071                mov     pc, lr
1072 THUMB(         nop                             )
1073                mov     pc, lr
1074 THUMB(         nop                             )
1075
1076                .size   proc_types, . - proc_types
1077
1078                /*
1079                 * If you get a "non-constant expression in ".if" statement"
1080                 * error from the assembler on this line, check that you have
1081                 * not accidentally written a "b" instruction where you should
1082                 * have written W(b).
1083                 */
1084                .if (. - proc_types) % PROC_ENTRY_SIZE != 0
1085                .error "The size of one or more proc_types entries is wrong."
1086                .endif
1087
1088/*
1089 * Turn off the Cache and MMU.  ARMv3 does not support
1090 * reading the control register, but ARMv4 does.
1091 *
1092 * On exit,
1093 *  r0, r1, r2, r3, r9, r12 corrupted
1094 * This routine must preserve:
1095 *  r4, r7, r8
1096 */
1097                .align  5
1098cache_off:      mov     r3, #12                 @ cache_off function
1099                b       call_cache_fn
1100
1101__armv4_mpu_cache_off:
1102                mrc     p15, 0, r0, c1, c0
1103                bic     r0, r0, #0x000d
1104                mcr     p15, 0, r0, c1, c0      @ turn MPU and cache off
1105                mov     r0, #0
1106                mcr     p15, 0, r0, c7, c10, 4  @ drain write buffer
1107                mcr     p15, 0, r0, c7, c6, 0   @ flush D-Cache
1108                mcr     p15, 0, r0, c7, c5, 0   @ flush I-Cache
1109                mov     pc, lr
1110
1111__armv3_mpu_cache_off:
1112                mrc     p15, 0, r0, c1, c0
1113                bic     r0, r0, #0x000d
1114                mcr     p15, 0, r0, c1, c0, 0   @ turn MPU and cache off
1115                mov     r0, #0
1116                mcr     p15, 0, r0, c7, c0, 0   @ invalidate whole cache v3
1117                mov     pc, lr
1118
1119__armv4_mmu_cache_off:
1120#ifdef CONFIG_MMU
1121                mrc     p15, 0, r0, c1, c0
1122                bic     r0, r0, #0x000d
1123                mcr     p15, 0, r0, c1, c0      @ turn MMU and cache off
1124                mov     r0, #0
1125                mcr     p15, 0, r0, c7, c7      @ invalidate whole cache v4
1126                mcr     p15, 0, r0, c8, c7      @ invalidate whole TLB v4
1127#endif
1128                mov     pc, lr
1129
1130__armv7_mmu_cache_off:
1131                mrc     p15, 0, r0, c1, c0
1132#ifdef CONFIG_MMU
1133                bic     r0, r0, #0x000d
1134#else
1135                bic     r0, r0, #0x000c
1136#endif
1137                mcr     p15, 0, r0, c1, c0      @ turn MMU and cache off
1138                mov     r12, lr
1139                bl      __armv7_mmu_cache_flush
1140                mov     r0, #0
1141#ifdef CONFIG_MMU
1142                mcr     p15, 0, r0, c8, c7, 0   @ invalidate whole TLB
1143#endif
1144                mcr     p15, 0, r0, c7, c5, 6   @ invalidate BTC
1145                mcr     p15, 0, r0, c7, c10, 4  @ DSB
1146                mcr     p15, 0, r0, c7, c5, 4   @ ISB
1147                mov     pc, r12
1148
1149/*
1150 * Clean and flush the cache to maintain consistency.
1151 *
1152 * On exit,
1153 *  r1, r2, r3, r9, r10, r11, r12 corrupted
1154 * This routine must preserve:
1155 *  r4, r6, r7, r8
1156 */
1157                .align  5
1158cache_clean_flush:
1159                mov     r3, #16
1160                b       call_cache_fn
1161
1162__armv4_mpu_cache_flush:
1163                tst     r4, #1
1164                movne   pc, lr
1165                mov     r2, #1
1166                mov     r3, #0
1167                mcr     p15, 0, ip, c7, c6, 0   @ invalidate D cache
1168                mov     r1, #7 << 5             @ 8 segments
11691:              orr     r3, r1, #63 << 26       @ 64 entries
11702:              mcr     p15, 0, r3, c7, c14, 2  @ clean & invalidate D index
1171                subs    r3, r3, #1 << 26
1172                bcs     2b                      @ entries 63 to 0
1173                subs    r1, r1, #1 << 5
1174                bcs     1b                      @ segments 7 to 0
1175
1176                teq     r2, #0
1177                mcrne   p15, 0, ip, c7, c5, 0   @ invalidate I cache
1178                mcr     p15, 0, ip, c7, c10, 4  @ drain WB
1179                mov     pc, lr
1180                
1181__fa526_cache_flush:
1182                tst     r4, #1
1183                movne   pc, lr
1184                mov     r1, #0
1185                mcr     p15, 0, r1, c7, c14, 0  @ clean and invalidate D cache
1186                mcr     p15, 0, r1, c7, c5, 0   @ flush I cache
1187                mcr     p15, 0, r1, c7, c10, 4  @ drain WB
1188                mov     pc, lr
1189
1190__armv6_mmu_cache_flush:
1191                mov     r1, #0
1192                tst     r4, #1
1193                mcreq   p15, 0, r1, c7, c14, 0  @ clean+invalidate D
1194                mcr     p15, 0, r1, c7, c5, 0   @ invalidate I+BTB
1195                mcreq   p15, 0, r1, c7, c15, 0  @ clean+invalidate unified
1196                mcr     p15, 0, r1, c7, c10, 4  @ drain WB
1197                mov     pc, lr
1198
1199__armv7_mmu_cache_flush:
1200                tst     r4, #1
1201                bne     iflush
1202                mrc     p15, 0, r10, c0, c1, 5  @ read ID_MMFR1
1203                tst     r10, #0xf << 16         @ hierarchical cache (ARMv7)
1204                mov     r10, #0
1205                beq     hierarchical
1206                mcr     p15, 0, r10, c7, c14, 0 @ clean+invalidate D
1207                b       iflush
1208hierarchical:
1209                mcr     p15, 0, r10, c7, c10, 5 @ DMB
1210                stmfd   sp!, {r0-r7, r9-r11}
1211                mrc     p15, 1, r0, c0, c0, 1   @ read clidr
1212                ands    r3, r0, #0x7000000      @ extract loc from clidr
1213                mov     r3, r3, lsr #23         @ left align loc bit field
1214                beq     finished                @ if loc is 0, then no need to clean
1215                mov     r10, #0                 @ start clean at cache level 0
1216loop1:
1217                add     r2, r10, r10, lsr #1    @ work out 3x current cache level
1218                mov     r1, r0, lsr r2          @ extract cache type bits from clidr
1219                and     r1, r1, #7              @ mask of the bits for current cache only
1220                cmp     r1, #2                  @ see what cache we have at this level
1221                blt     skip                    @ skip if no cache, or just i-cache
1222                mcr     p15, 2, r10, c0, c0, 0  @ select current cache level in cssr
1223                mcr     p15, 0, r10, c7, c5, 4  @ isb to sych the new cssr&csidr
1224                mrc     p15, 1, r1, c0, c0, 0   @ read the new csidr
1225                and     r2, r1, #7              @ extract the length of the cache lines
1226                add     r2, r2, #4              @ add 4 (line length offset)
1227                ldr     r4, =0x3ff
1228                ands    r4, r4, r1, lsr #3      @ find maximum number on the way size
1229                clz     r5, r4                  @ find bit position of way size increment
1230                ldr     r7, =0x7fff
1231                ands    r7, r7, r1, lsr #13     @ extract max number of the index size
1232loop2:
1233                mov     r9, r4                  @ create working copy of max way size
1234loop3:
1235 ARM(           orr     r11, r10, r9, lsl r5    ) @ factor way and cache number into r11
1236 ARM(           orr     r11, r11, r7, lsl r2    ) @ factor index number into r11
1237 THUMB(         lsl     r6, r9, r5              )
1238 THUMB(         orr     r11, r10, r6            ) @ factor way and cache number into r11
1239 THUMB(         lsl     r6, r7, r2              )
1240 THUMB(         orr     r11, r11, r6            ) @ factor index number into r11
1241                mcr     p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way
1242                subs    r9, r9, #1              @ decrement the way
1243                bge     loop3
1244                subs    r7, r7, #1              @ decrement the index
1245                bge     loop2
1246skip:
1247                add     r10, r10, #2            @ increment cache number
1248                cmp     r3, r10
1249                bgt     loop1
1250finished:
1251                ldmfd   sp!, {r0-r7, r9-r11}
1252                mov     r10, #0                 @ switch back to cache level 0
1253                mcr     p15, 2, r10, c0, c0, 0  @ select current cache level in cssr
1254iflush:
1255                mcr     p15, 0, r10, c7, c10, 4 @ DSB
1256                mcr     p15, 0, r10, c7, c5, 0  @ invalidate I+BTB
1257                mcr     p15, 0, r10, c7, c10, 4 @ DSB
1258                mcr     p15, 0, r10, c7, c5, 4  @ ISB
1259                mov     pc, lr
1260
1261__armv5tej_mmu_cache_flush:
1262                tst     r4, #1
1263                movne   pc, lr
12641:              mrc     p15, 0, r15, c7, c14, 3 @ test,clean,invalidate D cache
1265                bne     1b
1266                mcr     p15, 0, r0, c7, c5, 0   @ flush I cache
1267                mcr     p15, 0, r0, c7, c10, 4  @ drain WB
1268                mov     pc, lr
1269
1270__armv4_mmu_cache_flush:
1271                tst     r4, #1
1272                movne   pc, lr
1273                mov     r2, #64*1024            @ default: 32K dcache size (*2)
1274                mov     r11, #32                @ default: 32 byte line size
1275                mrc     p15, 0, r3, c0, c0, 1   @ read cache type
1276                teq     r3, r9                  @ cache ID register present?
1277                beq     no_cache_id
1278                mov     r1, r3, lsr #18
1279                and     r1, r1, #7
1280                mov     r2, #1024
1281                mov     r2, r2, lsl r1          @ base dcache size *2
1282                tst     r3, #1 << 14            @ test M bit
1283                addne   r2, r2, r2, lsr #1      @ +1/2 size if M == 1
1284                mov     r3, r3, lsr #12
1285                and     r3, r3, #3
1286                mov     r11, #8
1287                mov     r11, r11, lsl r3        @ cache line size in bytes
1288no_cache_id:
1289                mov     r1, pc
1290                bic     r1, r1, #63             @ align to longest cache line
1291                add     r2, r1, r2
12921:
1293 ARM(           ldr     r3, [r1], r11           ) @ s/w flush D cache
1294 THUMB(         ldr     r3, [r1]                ) @ s/w flush D cache
1295 THUMB(         add     r1, r1, r11             )
1296                teq     r1, r2
1297                bne     1b
1298
1299                mcr     p15, 0, r1, c7, c5, 0   @ flush I cache
1300                mcr     p15, 0, r1, c7, c6, 0   @ flush D cache
1301                mcr     p15, 0, r1, c7, c10, 4  @ drain WB
1302                mov     pc, lr
1303
1304__armv3_mmu_cache_flush:
1305__armv3_mpu_cache_flush:
1306                tst     r4, #1
1307                movne   pc, lr
1308                mov     r1, #0
1309                mcr     p15, 0, r1, c7, c0, 0   @ invalidate whole cache v3
1310                mov     pc, lr
1311
1312/*
1313 * Various debugging routines for printing hex characters and
1314 * memory, which again must be relocatable.
1315 */
1316#ifdef DEBUG
1317                .align  2
1318                .type   phexbuf,#object
1319phexbuf:        .space  12
1320                .size   phexbuf, . - phexbuf
1321
1322@ phex corrupts {r0, r1, r2, r3}
1323phex:           adr     r3, phexbuf
1324                mov     r2, #0
1325                strb    r2, [r3, r1]
13261:              subs    r1, r1, #1
1327                movmi   r0, r3
1328                bmi     puts
1329                and     r2, r0, #15
1330                mov     r0, r0, lsr #4
1331                cmp     r2, #10
1332                addge   r2, r2, #7
1333                add     r2, r2, #'0'
1334                strb    r2, [r3, r1]
1335                b       1b
1336
1337@ puts corrupts {r0, r1, r2, r3}
1338puts:           loadsp  r3, r2, r1
13391:              ldrb    r2, [r0], #1
1340                teq     r2, #0
1341                moveq   pc, lr
13422:              writeb  r2, r3
1343                mov     r1, #0x00020000
13443:              subs    r1, r1, #1
1345                bne     3b
1346                teq     r2, #'\n'
1347                moveq   r2, #'\r'
1348                beq     2b
1349                teq     r0, #0
1350                bne     1b
1351                mov     pc, lr
1352@ putc corrupts {r0, r1, r2, r3}
1353putc:
1354                mov     r2, r0
1355                loadsp  r3, r1, r0
1356                mov     r0, #0
1357                b       2b
1358
1359@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1360memdump:        mov     r12, r0
1361                mov     r10, lr
1362                mov     r11, #0
13632:              mov     r0, r11, lsl #2
1364                add     r0, r0, r12
1365                mov     r1, #8
1366                bl      phex
1367                mov     r0, #':'
1368                bl      putc
13691:              mov     r0, #' '
1370                bl      putc
1371                ldr     r0, [r12, r11, lsl #2]
1372                mov     r1, #8
1373                bl      phex
1374                and     r0, r11, #7
1375                teq     r0, #3
1376                moveq   r0, #' '
1377                bleq    putc
1378                and     r0, r11, #7
1379                add     r11, r11, #1
1380                teq     r0, #7
1381                bne     1b
1382                mov     r0, #'\n'
1383                bl      putc
1384                cmp     r11, #64
1385                blt     2b
1386                mov     pc, r10
1387#endif
1388
1389                .ltorg
1390
1391#ifdef CONFIG_ARM_VIRT_EXT
1392.align 5
1393__hyp_reentry_vectors:
1394                W(b)    .                       @ reset
1395                W(b)    .                       @ undef
1396                W(b)    .                       @ svc
1397                W(b)    .                       @ pabort
1398                W(b)    .                       @ dabort
1399                W(b)    __enter_kernel          @ hyp
1400                W(b)    .                       @ irq
1401                W(b)    .                       @ fiq
1402#endif /* CONFIG_ARM_VIRT_EXT */
1403
1404__enter_kernel:
1405                mov     r0, #0                  @ must be 0
1406                mov     r1, r7                  @ restore architecture number
1407                mov     r2, r8                  @ restore atags pointer
1408 ARM(           mov     pc, r4          )       @ call kernel
1409 M_CLASS(       add     r4, r4, #1      )       @ enter in Thumb mode for M class
1410 THUMB(         bx      r4              )       @ entry point is always ARM for A/R classes
1411
1412reloc_code_end:
1413
1414#ifdef CONFIG_EFI_STUB
1415                .align  2
1416_start:         .long   start - .
1417
1418ENTRY(efi_stub_entry)
1419                @ allocate space on stack for passing current zImage address
1420                @ and for the EFI stub to return of new entry point of
1421                @ zImage, as EFI stub may copy the kernel. Pointer address
1422                @ is passed in r2. r0 and r1 are passed through from the
1423                @ EFI firmware to efi_entry
1424                adr     ip, _start
1425                ldr     r3, [ip]
1426                add     r3, r3, ip
1427                stmfd   sp!, {r3, lr}
1428                mov     r2, sp                  @ pass zImage address in r2
1429                bl      efi_entry
1430
1431                @ Check for error return from EFI stub. r0 has FDT address
1432                @ or error code.
1433                cmn     r0, #1
1434                beq     efi_load_fail
1435
1436                @ Preserve return value of efi_entry() in r4
1437                mov     r4, r0
1438
1439                @ our cache maintenance code relies on CP15 barrier instructions
1440                @ but since we arrived here with the MMU and caches configured
1441                @ by UEFI, we must check that the CP15BEN bit is set in SCTLR.
1442                @ Note that this bit is RAO/WI on v6 and earlier, so the ISB in
1443                @ the enable path will be executed on v7+ only.
1444                mrc     p15, 0, r1, c1, c0, 0   @ read SCTLR
1445                tst     r1, #(1 << 5)           @ CP15BEN bit set?
1446                bne     0f
1447                orr     r1, r1, #(1 << 5)       @ CP15 barrier instructions
1448                mcr     p15, 0, r1, c1, c0, 0   @ write SCTLR
1449 ARM(           .inst   0xf57ff06f              @ v7+ isb       )
1450 THUMB(         isb                                             )
1451
14520:              bl      cache_clean_flush
1453                bl      cache_off
1454
1455                @ Set parameters for booting zImage according to boot protocol
1456                @ put FDT address in r2, it was returned by efi_entry()
1457                @ r1 is the machine type, and r0 needs to be 0
1458                mov     r0, #0
1459                mov     r1, #0xFFFFFFFF
1460                mov     r2, r4
1461
1462                @ Branch to (possibly) relocated zImage that is in [sp]
1463                ldr     lr, [sp]
1464                ldr     ip, =start_offset
1465                add     lr, lr, ip
1466                mov     pc, lr                          @ no mode switch
1467
1468efi_load_fail:
1469                @ Return EFI_LOAD_ERROR to EFI firmware on error.
1470                ldr     r0, =0x80000001
1471                ldmfd   sp!, {ip, pc}
1472ENDPROC(efi_stub_entry)
1473#endif
1474
1475                .align
1476                .section ".stack", "aw", %nobits
1477.L_user_stack:  .space  4096
1478.L_user_stack_end:
1479