linux/arch/arm/boot/compressed/head.S
<<
>>
Prefs
   1/*
   2 *  linux/arch/arm/boot/compressed/head.S
   3 *
   4 *  Copyright (C) 1996-2002 Russell King
   5 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 */
  11#include <linux/linkage.h>
  12#include <asm/assembler.h>
  13#include <asm/v7m.h>
  14
  15#include "efi-header.S"
  16
  17 AR_CLASS(      .arch   armv7-a )
  18 M_CLASS(       .arch   armv7-m )
  19
  20/*
  21 * Debugging stuff
  22 *
  23 * Note that these macros must not contain any code which is not
  24 * 100% relocatable.  Any attempt to do so will result in a crash.
  25 * Please select one of the following when turning on debugging.
  26 */
  27#ifdef DEBUG
  28
  29#if defined(CONFIG_DEBUG_ICEDCC)
  30
  31#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
  32                .macro  loadsp, rb, tmp
  33                .endm
  34                .macro  writeb, ch, rb
  35                mcr     p14, 0, \ch, c0, c5, 0
  36                .endm
  37#elif defined(CONFIG_CPU_XSCALE)
  38                .macro  loadsp, rb, tmp
  39                .endm
  40                .macro  writeb, ch, rb
  41                mcr     p14, 0, \ch, c8, c0, 0
  42                .endm
  43#else
  44                .macro  loadsp, rb, tmp
  45                .endm
  46                .macro  writeb, ch, rb
  47                mcr     p14, 0, \ch, c1, c0, 0
  48                .endm
  49#endif
  50
  51#else
  52
  53#include CONFIG_DEBUG_LL_INCLUDE
  54
  55                .macro  writeb, ch, rb
  56                senduart \ch, \rb
  57                .endm
  58
  59#if defined(CONFIG_ARCH_SA1100)
  60                .macro  loadsp, rb, tmp
  61                mov     \rb, #0x80000000        @ physical base address
  62#ifdef CONFIG_DEBUG_LL_SER3
  63                add     \rb, \rb, #0x00050000   @ Ser3
  64#else
  65                add     \rb, \rb, #0x00010000   @ Ser1
  66#endif
  67                .endm
  68#else
  69                .macro  loadsp, rb, tmp
  70                addruart \rb, \tmp
  71                .endm
  72#endif
  73#endif
  74#endif
  75
  76                .macro  kputc,val
  77                mov     r0, \val
  78                bl      putc
  79                .endm
  80
  81                .macro  kphex,val,len
  82                mov     r0, \val
  83                mov     r1, #\len
  84                bl      phex
  85                .endm
  86
  87                .macro  debug_reloc_start
  88#ifdef DEBUG
  89                kputc   #'\n'
  90                kphex   r6, 8           /* processor id */
  91                kputc   #':'
  92                kphex   r7, 8           /* architecture id */
  93#ifdef CONFIG_CPU_CP15
  94                kputc   #':'
  95                mrc     p15, 0, r0, c1, c0
  96                kphex   r0, 8           /* control reg */
  97#endif
  98                kputc   #'\n'
  99                kphex   r5, 8           /* decompressed kernel start */
 100                kputc   #'-'
 101                kphex   r9, 8           /* decompressed kernel end  */
 102                kputc   #'>'
 103                kphex   r4, 8           /* kernel execution address */
 104                kputc   #'\n'
 105#endif
 106                .endm
 107
 108                .macro  debug_reloc_end
 109#ifdef DEBUG
 110                kphex   r5, 8           /* end of kernel */
 111                kputc   #'\n'
 112                mov     r0, r4
 113                bl      memdump         /* dump 256 bytes at start of kernel */
 114#endif
 115                .endm
 116
 117                .section ".start", #alloc, #execinstr
 118/*
 119 * sort out different calling conventions
 120 */
 121                .align
 122                /*
 123                 * Always enter in ARM state for CPUs that support the ARM ISA.
 124                 * As of today (2014) that's exactly the members of the A and R
 125                 * classes.
 126                 */
 127 AR_CLASS(      .arm    )
 128start:
 129                .type   start,#function
 130                .rept   7
 131                __nop
 132                .endr
 133   ARM(         mov     r0, r0          )
 134   ARM(         b       1f              )
 135 THUMB(         badr    r12, 1f         )
 136 THUMB(         bx      r12             )
 137
 138                .word   _magic_sig      @ Magic numbers to help the loader
 139                .word   _magic_start    @ absolute load/run zImage address
 140                .word   _magic_end      @ zImage end address
 141                .word   0x04030201      @ endianness flag
 142
 143 THUMB(         .thumb                  )
 1441:              __EFI_HEADER
 145
 146 ARM_BE8(       setend  be              )       @ go BE8 if compiled for BE8
 147 AR_CLASS(      mrs     r9, cpsr        )
 148#ifdef CONFIG_ARM_VIRT_EXT
 149                bl      __hyp_stub_install      @ get into SVC mode, reversibly
 150#endif
 151                mov     r7, r1                  @ save architecture ID
 152                mov     r8, r2                  @ save atags pointer
 153
 154#ifndef CONFIG_CPU_V7M
 155                /*
 156                 * Booting from Angel - need to enter SVC mode and disable
 157                 * FIQs/IRQs (numeric definitions from angel arm.h source).
 158                 * We only do this if we were in user mode on entry.
 159                 */
 160                mrs     r2, cpsr                @ get current mode
 161                tst     r2, #3                  @ not user?
 162                bne     not_angel
 163                mov     r0, #0x17               @ angel_SWIreason_EnterSVC
 164 ARM(           swi     0x123456        )       @ angel_SWI_ARM
 165 THUMB(         svc     0xab            )       @ angel_SWI_THUMB
 166not_angel:
 167                safe_svcmode_maskall r0
 168                msr     spsr_cxsf, r9           @ Save the CPU boot mode in
 169                                                @ SPSR
 170#endif
 171                /*
 172                 * Note that some cache flushing and other stuff may
 173                 * be needed here - is there an Angel SWI call for this?
 174                 */
 175
 176                /*
 177                 * some architecture specific code can be inserted
 178                 * by the linker here, but it should preserve r7, r8, and r9.
 179                 */
 180
 181                .text
 182
 183#ifdef CONFIG_AUTO_ZRELADDR
 184                /*
 185                 * Find the start of physical memory.  As we are executing
 186                 * without the MMU on, we are in the physical address space.
 187                 * We just need to get rid of any offset by aligning the
 188                 * address.
 189                 *
 190                 * This alignment is a balance between the requirements of
 191                 * different platforms - we have chosen 128MB to allow
 192                 * platforms which align the start of their physical memory
 193                 * to 128MB to use this feature, while allowing the zImage
 194                 * to be placed within the first 128MB of memory on other
 195                 * platforms.  Increasing the alignment means we place
 196                 * stricter alignment requirements on the start of physical
 197                 * memory, but relaxing it means that we break people who
 198                 * are already placing their zImage in (eg) the top 64MB
 199                 * of this range.
 200                 */
 201                mov     r4, pc
 202                and     r4, r4, #0xf8000000
 203                /* Determine final kernel image address. */
 204                add     r4, r4, #TEXT_OFFSET
 205#else
 206                ldr     r4, =zreladdr
 207#endif
 208
 209                /*
 210                 * Set up a page table only if it won't overwrite ourself.
 211                 * That means r4 < pc || r4 - 16k page directory > &_end.
 212                 * Given that r4 > &_end is most unfrequent, we add a rough
 213                 * additional 1MB of room for a possible appended DTB.
 214                 */
 215                mov     r0, pc
 216                cmp     r0, r4
 217                ldrcc   r0, LC0+32
 218                addcc   r0, r0, pc
 219                cmpcc   r4, r0
 220                orrcc   r4, r4, #1              @ remember we skipped cache_on
 221                blcs    cache_on
 222
 223restart:        adr     r0, LC0
 224                ldmia   r0, {r1, r2, r3, r6, r10, r11, r12}
 225                ldr     sp, [r0, #28]
 226
 227                /*
 228                 * We might be running at a different address.  We need
 229                 * to fix up various pointers.
 230                 */
 231                sub     r0, r0, r1              @ calculate the delta offset
 232                add     r6, r6, r0              @ _edata
 233                add     r10, r10, r0            @ inflated kernel size location
 234
 235                /*
 236                 * The kernel build system appends the size of the
 237                 * decompressed kernel at the end of the compressed data
 238                 * in little-endian form.
 239                 */
 240                ldrb    r9, [r10, #0]
 241                ldrb    lr, [r10, #1]
 242                orr     r9, r9, lr, lsl #8
 243                ldrb    lr, [r10, #2]
 244                ldrb    r10, [r10, #3]
 245                orr     r9, r9, lr, lsl #16
 246                orr     r9, r9, r10, lsl #24
 247
 248#ifndef CONFIG_ZBOOT_ROM
 249                /* malloc space is above the relocated stack (64k max) */
 250                add     sp, sp, r0
 251                add     r10, sp, #0x10000
 252#else
 253                /*
 254                 * With ZBOOT_ROM the bss/stack is non relocatable,
 255                 * but someone could still run this code from RAM,
 256                 * in which case our reference is _edata.
 257                 */
 258                mov     r10, r6
 259#endif
 260
 261                mov     r5, #0                  @ init dtb size to 0
 262#ifdef CONFIG_ARM_APPENDED_DTB
 263/*
 264 *   r0  = delta
 265 *   r2  = BSS start
 266 *   r3  = BSS end
 267 *   r4  = final kernel address (possibly with LSB set)
 268 *   r5  = appended dtb size (still unknown)
 269 *   r6  = _edata
 270 *   r7  = architecture ID
 271 *   r8  = atags/device tree pointer
 272 *   r9  = size of decompressed image
 273 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
 274 *   r11 = GOT start
 275 *   r12 = GOT end
 276 *   sp  = stack pointer
 277 *
 278 * if there are device trees (dtb) appended to zImage, advance r10 so that the
 279 * dtb data will get relocated along with the kernel if necessary.
 280 */
 281
 282                ldr     lr, [r6, #0]
 283#ifndef __ARMEB__
 284                ldr     r1, =0xedfe0dd0         @ sig is 0xd00dfeed big endian
 285#else
 286                ldr     r1, =0xd00dfeed
 287#endif
 288                cmp     lr, r1
 289                bne     dtb_check_done          @ not found
 290
 291#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
 292                /*
 293                 * OK... Let's do some funky business here.
 294                 * If we do have a DTB appended to zImage, and we do have
 295                 * an ATAG list around, we want the later to be translated
 296                 * and folded into the former here. No GOT fixup has occurred
 297                 * yet, but none of the code we're about to call uses any
 298                 * global variable.
 299                */
 300
 301                /* Get the initial DTB size */
 302                ldr     r5, [r6, #4]
 303#ifndef __ARMEB__
 304                /* convert to little endian */
 305                eor     r1, r5, r5, ror #16
 306                bic     r1, r1, #0x00ff0000
 307                mov     r5, r5, ror #8
 308                eor     r5, r5, r1, lsr #8
 309#endif
 310                /* 50% DTB growth should be good enough */
 311                add     r5, r5, r5, lsr #1
 312                /* preserve 64-bit alignment */
 313                add     r5, r5, #7
 314                bic     r5, r5, #7
 315                /* clamp to 32KB min and 1MB max */
 316                cmp     r5, #(1 << 15)
 317                movlo   r5, #(1 << 15)
 318                cmp     r5, #(1 << 20)
 319                movhi   r5, #(1 << 20)
 320                /* temporarily relocate the stack past the DTB work space */
 321                add     sp, sp, r5
 322
 323                stmfd   sp!, {r0-r3, ip, lr}
 324                mov     r0, r8
 325                mov     r1, r6
 326                mov     r2, r5
 327                bl      atags_to_fdt
 328
 329                /*
 330                 * If returned value is 1, there is no ATAG at the location
 331                 * pointed by r8.  Try the typical 0x100 offset from start
 332                 * of RAM and hope for the best.
 333                 */
 334                cmp     r0, #1
 335                sub     r0, r4, #TEXT_OFFSET
 336                bic     r0, r0, #1
 337                add     r0, r0, #0x100
 338                mov     r1, r6
 339                mov     r2, r5
 340                bleq    atags_to_fdt
 341
 342                ldmfd   sp!, {r0-r3, ip, lr}
 343                sub     sp, sp, r5
 344#endif
 345
 346                mov     r8, r6                  @ use the appended device tree
 347
 348                /*
 349                 * Make sure that the DTB doesn't end up in the final
 350                 * kernel's .bss area. To do so, we adjust the decompressed
 351                 * kernel size to compensate if that .bss size is larger
 352                 * than the relocated code.
 353                 */
 354                ldr     r5, =_kernel_bss_size
 355                adr     r1, wont_overwrite
 356                sub     r1, r6, r1
 357                subs    r1, r5, r1
 358                addhi   r9, r9, r1
 359
 360                /* Get the current DTB size */
 361                ldr     r5, [r6, #4]
 362#ifndef __ARMEB__
 363                /* convert r5 (dtb size) to little endian */
 364                eor     r1, r5, r5, ror #16
 365                bic     r1, r1, #0x00ff0000
 366                mov     r5, r5, ror #8
 367                eor     r5, r5, r1, lsr #8
 368#endif
 369
 370                /* preserve 64-bit alignment */
 371                add     r5, r5, #7
 372                bic     r5, r5, #7
 373
 374                /* relocate some pointers past the appended dtb */
 375                add     r6, r6, r5
 376                add     r10, r10, r5
 377                add     sp, sp, r5
 378dtb_check_done:
 379#endif
 380
 381/*
 382 * Check to see if we will overwrite ourselves.
 383 *   r4  = final kernel address (possibly with LSB set)
 384 *   r9  = size of decompressed image
 385 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
 386 * We basically want:
 387 *   r4 - 16k page directory >= r10 -> OK
 388 *   r4 + image length <= address of wont_overwrite -> OK
 389 * Note: the possible LSB in r4 is harmless here.
 390 */
 391                add     r10, r10, #16384
 392                cmp     r4, r10
 393                bhs     wont_overwrite
 394                add     r10, r4, r9
 395                adr     r9, wont_overwrite
 396                cmp     r10, r9
 397                bls     wont_overwrite
 398
 399/*
 400 * Relocate ourselves past the end of the decompressed kernel.
 401 *   r6  = _edata
 402 *   r10 = end of the decompressed kernel
 403 * Because we always copy ahead, we need to do it from the end and go
 404 * backward in case the source and destination overlap.
 405 */
 406                /*
 407                 * Bump to the next 256-byte boundary with the size of
 408                 * the relocation code added. This avoids overwriting
 409                 * ourself when the offset is small.
 410                 */
 411                add     r10, r10, #((reloc_code_end - restart + 256) & ~255)
 412                bic     r10, r10, #255
 413
 414                /* Get start of code we want to copy and align it down. */
 415                adr     r5, restart
 416                bic     r5, r5, #31
 417
 418/* Relocate the hyp vector base if necessary */
 419#ifdef CONFIG_ARM_VIRT_EXT
 420                mrs     r0, spsr
 421                and     r0, r0, #MODE_MASK
 422                cmp     r0, #HYP_MODE
 423                bne     1f
 424
 425                bl      __hyp_get_vectors
 426                sub     r0, r0, r5
 427                add     r0, r0, r10
 428                bl      __hyp_set_vectors
 4291:
 430#endif
 431
 432                sub     r9, r6, r5              @ size to copy
 433                add     r9, r9, #31             @ rounded up to a multiple
 434                bic     r9, r9, #31             @ ... of 32 bytes
 435                add     r6, r9, r5
 436                add     r9, r9, r10
 437
 4381:              ldmdb   r6!, {r0 - r3, r10 - r12, lr}
 439                cmp     r6, r5
 440                stmdb   r9!, {r0 - r3, r10 - r12, lr}
 441                bhi     1b
 442
 443                /* Preserve offset to relocated code. */
 444                sub     r6, r9, r6
 445
 446#ifndef CONFIG_ZBOOT_ROM
 447                /* cache_clean_flush may use the stack, so relocate it */
 448                add     sp, sp, r6
 449#endif
 450
 451                bl      cache_clean_flush
 452
 453                badr    r0, restart
 454                add     r0, r0, r6
 455                mov     pc, r0
 456
 457wont_overwrite:
 458/*
 459 * If delta is zero, we are running at the address we were linked at.
 460 *   r0  = delta
 461 *   r2  = BSS start
 462 *   r3  = BSS end
 463 *   r4  = kernel execution address (possibly with LSB set)
 464 *   r5  = appended dtb size (0 if not present)
 465 *   r7  = architecture ID
 466 *   r8  = atags pointer
 467 *   r11 = GOT start
 468 *   r12 = GOT end
 469 *   sp  = stack pointer
 470 */
 471                orrs    r1, r0, r5
 472                beq     not_relocated
 473
 474                add     r11, r11, r0
 475                add     r12, r12, r0
 476
 477#ifndef CONFIG_ZBOOT_ROM
 478                /*
 479                 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
 480                 * we need to fix up pointers into the BSS region.
 481                 * Note that the stack pointer has already been fixed up.
 482                 */
 483                add     r2, r2, r0
 484                add     r3, r3, r0
 485
 486                /*
 487                 * Relocate all entries in the GOT table.
 488                 * Bump bss entries to _edata + dtb size
 489                 */
 4901:              ldr     r1, [r11, #0]           @ relocate entries in the GOT
 491                add     r1, r1, r0              @ This fixes up C references
 492                cmp     r1, r2                  @ if entry >= bss_start &&
 493                cmphs   r3, r1                  @       bss_end > entry
 494                addhi   r1, r1, r5              @    entry += dtb size
 495                str     r1, [r11], #4           @ next entry
 496                cmp     r11, r12
 497                blo     1b
 498
 499                /* bump our bss pointers too */
 500                add     r2, r2, r5
 501                add     r3, r3, r5
 502
 503#else
 504
 505                /*
 506                 * Relocate entries in the GOT table.  We only relocate
 507                 * the entries that are outside the (relocated) BSS region.
 508                 */
 5091:              ldr     r1, [r11, #0]           @ relocate entries in the GOT
 510                cmp     r1, r2                  @ entry < bss_start ||
 511                cmphs   r3, r1                  @ _end < entry
 512                addlo   r1, r1, r0              @ table.  This fixes up the
 513                str     r1, [r11], #4           @ C references.
 514                cmp     r11, r12
 515                blo     1b
 516#endif
 517
 518not_relocated:  mov     r0, #0
 5191:              str     r0, [r2], #4            @ clear bss
 520                str     r0, [r2], #4
 521                str     r0, [r2], #4
 522                str     r0, [r2], #4
 523                cmp     r2, r3
 524                blo     1b
 525
 526                /*
 527                 * Did we skip the cache setup earlier?
 528                 * That is indicated by the LSB in r4.
 529                 * Do it now if so.
 530                 */
 531                tst     r4, #1
 532                bic     r4, r4, #1
 533                blne    cache_on
 534
 535/*
 536 * The C runtime environment should now be setup sufficiently.
 537 * Set up some pointers, and start decompressing.
 538 *   r4  = kernel execution address
 539 *   r7  = architecture ID
 540 *   r8  = atags pointer
 541 */
 542                mov     r0, r4
 543                mov     r1, sp                  @ malloc space above stack
 544                add     r2, sp, #0x10000        @ 64k max
 545                mov     r3, r7
 546                bl      decompress_kernel
 547                bl      cache_clean_flush
 548                bl      cache_off
 549                mov     r1, r7                  @ restore architecture number
 550                mov     r2, r8                  @ restore atags pointer
 551
 552#ifdef CONFIG_ARM_VIRT_EXT
 553                mrs     r0, spsr                @ Get saved CPU boot mode
 554                and     r0, r0, #MODE_MASK
 555                cmp     r0, #HYP_MODE           @ if not booted in HYP mode...
 556                bne     __enter_kernel          @ boot kernel directly
 557
 558                adr     r12, .L__hyp_reentry_vectors_offset
 559                ldr     r0, [r12]
 560                add     r0, r0, r12
 561
 562                bl      __hyp_set_vectors
 563                __HVC(0)                        @ otherwise bounce to hyp mode
 564
 565                b       .                       @ should never be reached
 566
 567                .align  2
 568.L__hyp_reentry_vectors_offset: .long   __hyp_reentry_vectors - .
 569#else
 570                b       __enter_kernel
 571#endif
 572
 573                .align  2
 574                .type   LC0, #object
 575LC0:            .word   LC0                     @ r1
 576                .word   __bss_start             @ r2
 577                .word   _end                    @ r3
 578                .word   _edata                  @ r6
 579                .word   input_data_end - 4      @ r10 (inflated size location)
 580                .word   _got_start              @ r11
 581                .word   _got_end                @ ip
 582                .word   .L_user_stack_end       @ sp
 583                .word   _end - restart + 16384 + 1024*1024
 584                .size   LC0, . - LC0
 585
 586#ifdef CONFIG_ARCH_RPC
 587                .globl  params
 588params:         ldr     r0, =0x10000100         @ params_phys for RPC
 589                mov     pc, lr
 590                .ltorg
 591                .align
 592#endif
 593
 594/*
 595 * Turn on the cache.  We need to setup some page tables so that we
 596 * can have both the I and D caches on.
 597 *
 598 * We place the page tables 16k down from the kernel execution address,
 599 * and we hope that nothing else is using it.  If we're using it, we
 600 * will go pop!
 601 *
 602 * On entry,
 603 *  r4 = kernel execution address
 604 *  r7 = architecture number
 605 *  r8 = atags pointer
 606 * On exit,
 607 *  r0, r1, r2, r3, r9, r10, r12 corrupted
 608 * This routine must preserve:
 609 *  r4, r7, r8
 610 */
 611                .align  5
 612cache_on:       mov     r3, #8                  @ cache_on function
 613                b       call_cache_fn
 614
 615/*
 616 * Initialize the highest priority protection region, PR7
 617 * to cover all 32bit address and cacheable and bufferable.
 618 */
 619__armv4_mpu_cache_on:
 620                mov     r0, #0x3f               @ 4G, the whole
 621                mcr     p15, 0, r0, c6, c7, 0   @ PR7 Area Setting
 622                mcr     p15, 0, r0, c6, c7, 1
 623
 624                mov     r0, #0x80               @ PR7
 625                mcr     p15, 0, r0, c2, c0, 0   @ D-cache on
 626                mcr     p15, 0, r0, c2, c0, 1   @ I-cache on
 627                mcr     p15, 0, r0, c3, c0, 0   @ write-buffer on
 628
 629                mov     r0, #0xc000
 630                mcr     p15, 0, r0, c5, c0, 1   @ I-access permission
 631                mcr     p15, 0, r0, c5, c0, 0   @ D-access permission
 632
 633                mov     r0, #0
 634                mcr     p15, 0, r0, c7, c10, 4  @ drain write buffer
 635                mcr     p15, 0, r0, c7, c5, 0   @ flush(inval) I-Cache
 636                mcr     p15, 0, r0, c7, c6, 0   @ flush(inval) D-Cache
 637                mrc     p15, 0, r0, c1, c0, 0   @ read control reg
 638                                                @ ...I .... ..D. WC.M
 639                orr     r0, r0, #0x002d         @ .... .... ..1. 11.1
 640                orr     r0, r0, #0x1000         @ ...1 .... .... ....
 641
 642                mcr     p15, 0, r0, c1, c0, 0   @ write control reg
 643
 644                mov     r0, #0
 645                mcr     p15, 0, r0, c7, c5, 0   @ flush(inval) I-Cache
 646                mcr     p15, 0, r0, c7, c6, 0   @ flush(inval) D-Cache
 647                mov     pc, lr
 648
 649__armv3_mpu_cache_on:
 650                mov     r0, #0x3f               @ 4G, the whole
 651                mcr     p15, 0, r0, c6, c7, 0   @ PR7 Area Setting
 652
 653                mov     r0, #0x80               @ PR7
 654                mcr     p15, 0, r0, c2, c0, 0   @ cache on
 655                mcr     p15, 0, r0, c3, c0, 0   @ write-buffer on
 656
 657                mov     r0, #0xc000
 658                mcr     p15, 0, r0, c5, c0, 0   @ access permission
 659
 660                mov     r0, #0
 661                mcr     p15, 0, r0, c7, c0, 0   @ invalidate whole cache v3
 662                /*
 663                 * ?? ARMv3 MMU does not allow reading the control register,
 664                 * does this really work on ARMv3 MPU?
 665                 */
 666                mrc     p15, 0, r0, c1, c0, 0   @ read control reg
 667                                                @ .... .... .... WC.M
 668                orr     r0, r0, #0x000d         @ .... .... .... 11.1
 669                /* ?? this overwrites the value constructed above? */
 670                mov     r0, #0
 671                mcr     p15, 0, r0, c1, c0, 0   @ write control reg
 672
 673                /* ?? invalidate for the second time? */
 674                mcr     p15, 0, r0, c7, c0, 0   @ invalidate whole cache v3
 675                mov     pc, lr
 676
 677#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
 678#define CB_BITS 0x08
 679#else
 680#define CB_BITS 0x0c
 681#endif
 682
 683__setup_mmu:    sub     r3, r4, #16384          @ Page directory size
 684                bic     r3, r3, #0xff           @ Align the pointer
 685                bic     r3, r3, #0x3f00
 686/*
 687 * Initialise the page tables, turning on the cacheable and bufferable
 688 * bits for the RAM area only.
 689 */
 690                mov     r0, r3
 691                mov     r9, r0, lsr #18
 692                mov     r9, r9, lsl #18         @ start of RAM
 693                add     r10, r9, #0x10000000    @ a reasonable RAM size
 694                mov     r1, #0x12               @ XN|U + section mapping
 695                orr     r1, r1, #3 << 10        @ AP=11
 696                add     r2, r3, #16384
 6971:              cmp     r1, r9                  @ if virt > start of RAM
 698                cmphs   r10, r1                 @   && end of RAM > virt
 699                bic     r1, r1, #0x1c           @ clear XN|U + C + B
 700                orrlo   r1, r1, #0x10           @ Set XN|U for non-RAM
 701                orrhs   r1, r1, r6              @ set RAM section settings
 702                str     r1, [r0], #4            @ 1:1 mapping
 703                add     r1, r1, #1048576
 704                teq     r0, r2
 705                bne     1b
 706/*
 707 * If ever we are running from Flash, then we surely want the cache
 708 * to be enabled also for our execution instance...  We map 2MB of it
 709 * so there is no map overlap problem for up to 1 MB compressed kernel.
 710 * If the execution is in RAM then we would only be duplicating the above.
 711 */
 712                orr     r1, r6, #0x04           @ ensure B is set for this
 713                orr     r1, r1, #3 << 10
 714                mov     r2, pc
 715                mov     r2, r2, lsr #20
 716                orr     r1, r1, r2, lsl #20
 717                add     r0, r3, r2, lsl #2
 718                str     r1, [r0], #4
 719                add     r1, r1, #1048576
 720                str     r1, [r0]
 721                mov     pc, lr
 722ENDPROC(__setup_mmu)
 723
 724@ Enable unaligned access on v6, to allow better code generation
 725@ for the decompressor C code:
 726__armv6_mmu_cache_on:
 727                mrc     p15, 0, r0, c1, c0, 0   @ read SCTLR
 728                bic     r0, r0, #2              @ A (no unaligned access fault)
 729                orr     r0, r0, #1 << 22        @ U (v6 unaligned access model)
 730                mcr     p15, 0, r0, c1, c0, 0   @ write SCTLR
 731                b       __armv4_mmu_cache_on
 732
 733__arm926ejs_mmu_cache_on:
 734#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
 735                mov     r0, #4                  @ put dcache in WT mode
 736                mcr     p15, 7, r0, c15, c0, 0
 737#endif
 738
 739__armv4_mmu_cache_on:
 740                mov     r12, lr
 741#ifdef CONFIG_MMU
 742                mov     r6, #CB_BITS | 0x12     @ U
 743                bl      __setup_mmu
 744                mov     r0, #0
 745                mcr     p15, 0, r0, c7, c10, 4  @ drain write buffer
 746                mcr     p15, 0, r0, c8, c7, 0   @ flush I,D TLBs
 747                mrc     p15, 0, r0, c1, c0, 0   @ read control reg
 748                orr     r0, r0, #0x5000         @ I-cache enable, RR cache replacement
 749                orr     r0, r0, #0x0030
 750 ARM_BE8(       orr     r0, r0, #1 << 25 )      @ big-endian page tables
 751                bl      __common_mmu_cache_on
 752                mov     r0, #0
 753                mcr     p15, 0, r0, c8, c7, 0   @ flush I,D TLBs
 754#endif
 755                mov     pc, r12
 756
 757__armv7_mmu_cache_on:
 758                mov     r12, lr
 759#ifdef CONFIG_MMU
 760                mrc     p15, 0, r11, c0, c1, 4  @ read ID_MMFR0
 761                tst     r11, #0xf               @ VMSA
 762                movne   r6, #CB_BITS | 0x02     @ !XN
 763                blne    __setup_mmu
 764                mov     r0, #0
 765                mcr     p15, 0, r0, c7, c10, 4  @ drain write buffer
 766                tst     r11, #0xf               @ VMSA
 767                mcrne   p15, 0, r0, c8, c7, 0   @ flush I,D TLBs
 768#endif
 769                mrc     p15, 0, r0, c1, c0, 0   @ read control reg
 770                bic     r0, r0, #1 << 28        @ clear SCTLR.TRE
 771                orr     r0, r0, #0x5000         @ I-cache enable, RR cache replacement
 772                orr     r0, r0, #0x003c         @ write buffer
 773                bic     r0, r0, #2              @ A (no unaligned access fault)
 774                orr     r0, r0, #1 << 22        @ U (v6 unaligned access model)
 775                                                @ (needed for ARM1176)
 776#ifdef CONFIG_MMU
 777 ARM_BE8(       orr     r0, r0, #1 << 25 )      @ big-endian page tables
 778                mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
 779                orrne   r0, r0, #1              @ MMU enabled
 780                movne   r1, #0xfffffffd         @ domain 0 = client
 781                bic     r6, r6, #1 << 31        @ 32-bit translation system
 782                bic     r6, r6, #(7 << 0) | (1 << 4)    @ use only ttbr0
 783                mcrne   p15, 0, r3, c2, c0, 0   @ load page table pointer
 784                mcrne   p15, 0, r1, c3, c0, 0   @ load domain access control
 785                mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
 786#endif
 787                mcr     p15, 0, r0, c7, c5, 4   @ ISB
 788                mcr     p15, 0, r0, c1, c0, 0   @ load control register
 789                mrc     p15, 0, r0, c1, c0, 0   @ and read it back
 790                mov     r0, #0
 791                mcr     p15, 0, r0, c7, c5, 4   @ ISB
 792                mov     pc, r12
 793
 794__fa526_cache_on:
 795                mov     r12, lr
 796                mov     r6, #CB_BITS | 0x12     @ U
 797                bl      __setup_mmu
 798                mov     r0, #0
 799                mcr     p15, 0, r0, c7, c7, 0   @ Invalidate whole cache
 800                mcr     p15, 0, r0, c7, c10, 4  @ drain write buffer
 801                mcr     p15, 0, r0, c8, c7, 0   @ flush UTLB
 802                mrc     p15, 0, r0, c1, c0, 0   @ read control reg
 803                orr     r0, r0, #0x1000         @ I-cache enable
 804                bl      __common_mmu_cache_on
 805                mov     r0, #0
 806                mcr     p15, 0, r0, c8, c7, 0   @ flush UTLB
 807                mov     pc, r12
 808
 809__common_mmu_cache_on:
 810#ifndef CONFIG_THUMB2_KERNEL
 811#ifndef DEBUG
 812                orr     r0, r0, #0x000d         @ Write buffer, mmu
 813#endif
 814                mov     r1, #-1
 815                mcr     p15, 0, r3, c2, c0, 0   @ load page table pointer
 816                mcr     p15, 0, r1, c3, c0, 0   @ load domain access control
 817                b       1f
 818                .align  5                       @ cache line aligned
 8191:              mcr     p15, 0, r0, c1, c0, 0   @ load control register
 820                mrc     p15, 0, r0, c1, c0, 0   @ and read it back to
 821                sub     pc, lr, r0, lsr #32     @ properly flush pipeline
 822#endif
 823
 824#define PROC_ENTRY_SIZE (4*5)
 825
 826/*
 827 * Here follow the relocatable cache support functions for the
 828 * various processors.  This is a generic hook for locating an
 829 * entry and jumping to an instruction at the specified offset
 830 * from the start of the block.  Please note this is all position
 831 * independent code.
 832 *
 833 *  r1  = corrupted
 834 *  r2  = corrupted
 835 *  r3  = block offset
 836 *  r9  = corrupted
 837 *  r12 = corrupted
 838 */
 839
 840call_cache_fn:  adr     r12, proc_types
 841#ifdef CONFIG_CPU_CP15
 842                mrc     p15, 0, r9, c0, c0      @ get processor ID
 843#elif defined(CONFIG_CPU_V7M)
 844                /*
 845                 * On v7-M the processor id is located in the V7M_SCB_CPUID
 846                 * register, but as cache handling is IMPLEMENTATION DEFINED on
 847                 * v7-M (if existant at all) we just return early here.
 848                 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
 849                 * __armv7_mmu_cache_{on,off,flush}) would be selected which
 850                 * use cp15 registers that are not implemented on v7-M.
 851                 */
 852                bx      lr
 853#else
 854                ldr     r9, =CONFIG_PROCESSOR_ID
 855#endif
 8561:              ldr     r1, [r12, #0]           @ get value
 857                ldr     r2, [r12, #4]           @ get mask
 858                eor     r1, r1, r9              @ (real ^ match)
 859                tst     r1, r2                  @       & mask
 860 ARM(           addeq   pc, r12, r3             ) @ call cache function
 861 THUMB(         addeq   r12, r3                 )
 862 THUMB(         moveq   pc, r12                 ) @ call cache function
 863                add     r12, r12, #PROC_ENTRY_SIZE
 864                b       1b
 865
 866/*
 867 * Table for cache operations.  This is basically:
 868 *   - CPU ID match
 869 *   - CPU ID mask
 870 *   - 'cache on' method instruction
 871 *   - 'cache off' method instruction
 872 *   - 'cache flush' method instruction
 873 *
 874 * We match an entry using: ((real_id ^ match) & mask) == 0
 875 *
 876 * Writethrough caches generally only need 'on' and 'off'
 877 * methods.  Writeback caches _must_ have the flush method
 878 * defined.
 879 */
 880                .align  2
 881                .type   proc_types,#object
 882proc_types:
 883                .word   0x41000000              @ old ARM ID
 884                .word   0xff00f000
 885                mov     pc, lr
 886 THUMB(         nop                             )
 887                mov     pc, lr
 888 THUMB(         nop                             )
 889                mov     pc, lr
 890 THUMB(         nop                             )
 891
 892                .word   0x41007000              @ ARM7/710
 893                .word   0xfff8fe00
 894                mov     pc, lr
 895 THUMB(         nop                             )
 896                mov     pc, lr
 897 THUMB(         nop                             )
 898                mov     pc, lr
 899 THUMB(         nop                             )
 900
 901                .word   0x41807200              @ ARM720T (writethrough)
 902                .word   0xffffff00
 903                W(b)    __armv4_mmu_cache_on
 904                W(b)    __armv4_mmu_cache_off
 905                mov     pc, lr
 906 THUMB(         nop                             )
 907
 908                .word   0x41007400              @ ARM74x
 909                .word   0xff00ff00
 910                W(b)    __armv3_mpu_cache_on
 911                W(b)    __armv3_mpu_cache_off
 912                W(b)    __armv3_mpu_cache_flush
 913                
 914                .word   0x41009400              @ ARM94x
 915                .word   0xff00ff00
 916                W(b)    __armv4_mpu_cache_on
 917                W(b)    __armv4_mpu_cache_off
 918                W(b)    __armv4_mpu_cache_flush
 919
 920                .word   0x41069260              @ ARM926EJ-S (v5TEJ)
 921                .word   0xff0ffff0
 922                W(b)    __arm926ejs_mmu_cache_on
 923                W(b)    __armv4_mmu_cache_off
 924                W(b)    __armv5tej_mmu_cache_flush
 925
 926                .word   0x00007000              @ ARM7 IDs
 927                .word   0x0000f000
 928                mov     pc, lr
 929 THUMB(         nop                             )
 930                mov     pc, lr
 931 THUMB(         nop                             )
 932                mov     pc, lr
 933 THUMB(         nop                             )
 934
 935                @ Everything from here on will be the new ID system.
 936
 937                .word   0x4401a100              @ sa110 / sa1100
 938                .word   0xffffffe0
 939                W(b)    __armv4_mmu_cache_on
 940                W(b)    __armv4_mmu_cache_off
 941                W(b)    __armv4_mmu_cache_flush
 942
 943                .word   0x6901b110              @ sa1110
 944                .word   0xfffffff0
 945                W(b)    __armv4_mmu_cache_on
 946                W(b)    __armv4_mmu_cache_off
 947                W(b)    __armv4_mmu_cache_flush
 948
 949                .word   0x56056900
 950                .word   0xffffff00              @ PXA9xx
 951                W(b)    __armv4_mmu_cache_on
 952                W(b)    __armv4_mmu_cache_off
 953                W(b)    __armv4_mmu_cache_flush
 954
 955                .word   0x56158000              @ PXA168
 956                .word   0xfffff000
 957                W(b)    __armv4_mmu_cache_on
 958                W(b)    __armv4_mmu_cache_off
 959                W(b)    __armv5tej_mmu_cache_flush
 960
 961                .word   0x56050000              @ Feroceon
 962                .word   0xff0f0000
 963                W(b)    __armv4_mmu_cache_on
 964                W(b)    __armv4_mmu_cache_off
 965                W(b)    __armv5tej_mmu_cache_flush
 966
 967#ifdef CONFIG_CPU_FEROCEON_OLD_ID
 968                /* this conflicts with the standard ARMv5TE entry */
 969                .long   0x41009260              @ Old Feroceon
 970                .long   0xff00fff0
 971                b       __armv4_mmu_cache_on
 972                b       __armv4_mmu_cache_off
 973                b       __armv5tej_mmu_cache_flush
 974#endif
 975
 976                .word   0x66015261              @ FA526
 977                .word   0xff01fff1
 978                W(b)    __fa526_cache_on
 979                W(b)    __armv4_mmu_cache_off
 980                W(b)    __fa526_cache_flush
 981
 982                @ These match on the architecture ID
 983
 984                .word   0x00020000              @ ARMv4T
 985                .word   0x000f0000
 986                W(b)    __armv4_mmu_cache_on
 987                W(b)    __armv4_mmu_cache_off
 988                W(b)    __armv4_mmu_cache_flush
 989
 990                .word   0x00050000              @ ARMv5TE
 991                .word   0x000f0000
 992                W(b)    __armv4_mmu_cache_on
 993                W(b)    __armv4_mmu_cache_off
 994                W(b)    __armv4_mmu_cache_flush
 995
 996                .word   0x00060000              @ ARMv5TEJ
 997                .word   0x000f0000
 998                W(b)    __armv4_mmu_cache_on
 999                W(b)    __armv4_mmu_cache_off
1000                W(b)    __armv5tej_mmu_cache_flush
1001
1002                .word   0x0007b000              @ ARMv6
1003                .word   0x000ff000
1004                W(b)    __armv6_mmu_cache_on
1005                W(b)    __armv4_mmu_cache_off
1006                W(b)    __armv6_mmu_cache_flush
1007
1008                .word   0x000f0000              @ new CPU Id
1009                .word   0x000f0000
1010                W(b)    __armv7_mmu_cache_on
1011                W(b)    __armv7_mmu_cache_off
1012                W(b)    __armv7_mmu_cache_flush
1013
1014                .word   0                       @ unrecognised type
1015                .word   0
1016                mov     pc, lr
1017 THUMB(         nop                             )
1018                mov     pc, lr
1019 THUMB(         nop                             )
1020                mov     pc, lr
1021 THUMB(         nop                             )
1022
1023                .size   proc_types, . - proc_types
1024
1025                /*
1026                 * If you get a "non-constant expression in ".if" statement"
1027                 * error from the assembler on this line, check that you have
1028                 * not accidentally written a "b" instruction where you should
1029                 * have written W(b).
1030                 */
1031                .if (. - proc_types) % PROC_ENTRY_SIZE != 0
1032                .error "The size of one or more proc_types entries is wrong."
1033                .endif
1034
1035/*
1036 * Turn off the Cache and MMU.  ARMv3 does not support
1037 * reading the control register, but ARMv4 does.
1038 *
1039 * On exit,
1040 *  r0, r1, r2, r3, r9, r12 corrupted
1041 * This routine must preserve:
1042 *  r4, r7, r8
1043 */
1044                .align  5
1045cache_off:      mov     r3, #12                 @ cache_off function
1046                b       call_cache_fn
1047
1048__armv4_mpu_cache_off:
1049                mrc     p15, 0, r0, c1, c0
1050                bic     r0, r0, #0x000d
1051                mcr     p15, 0, r0, c1, c0      @ turn MPU and cache off
1052                mov     r0, #0
1053                mcr     p15, 0, r0, c7, c10, 4  @ drain write buffer
1054                mcr     p15, 0, r0, c7, c6, 0   @ flush D-Cache
1055                mcr     p15, 0, r0, c7, c5, 0   @ flush I-Cache
1056                mov     pc, lr
1057
1058__armv3_mpu_cache_off:
1059                mrc     p15, 0, r0, c1, c0
1060                bic     r0, r0, #0x000d
1061                mcr     p15, 0, r0, c1, c0, 0   @ turn MPU and cache off
1062                mov     r0, #0
1063                mcr     p15, 0, r0, c7, c0, 0   @ invalidate whole cache v3
1064                mov     pc, lr
1065
1066__armv4_mmu_cache_off:
1067#ifdef CONFIG_MMU
1068                mrc     p15, 0, r0, c1, c0
1069                bic     r0, r0, #0x000d
1070                mcr     p15, 0, r0, c1, c0      @ turn MMU and cache off
1071                mov     r0, #0
1072                mcr     p15, 0, r0, c7, c7      @ invalidate whole cache v4
1073                mcr     p15, 0, r0, c8, c7      @ invalidate whole TLB v4
1074#endif
1075                mov     pc, lr
1076
1077__armv7_mmu_cache_off:
1078                mrc     p15, 0, r0, c1, c0
1079#ifdef CONFIG_MMU
1080                bic     r0, r0, #0x000d
1081#else
1082                bic     r0, r0, #0x000c
1083#endif
1084                mcr     p15, 0, r0, c1, c0      @ turn MMU and cache off
1085                mov     r12, lr
1086                bl      __armv7_mmu_cache_flush
1087                mov     r0, #0
1088#ifdef CONFIG_MMU
1089                mcr     p15, 0, r0, c8, c7, 0   @ invalidate whole TLB
1090#endif
1091                mcr     p15, 0, r0, c7, c5, 6   @ invalidate BTC
1092                mcr     p15, 0, r0, c7, c10, 4  @ DSB
1093                mcr     p15, 0, r0, c7, c5, 4   @ ISB
1094                mov     pc, r12
1095
1096/*
1097 * Clean and flush the cache to maintain consistency.
1098 *
1099 * On exit,
1100 *  r1, r2, r3, r9, r10, r11, r12 corrupted
1101 * This routine must preserve:
1102 *  r4, r6, r7, r8
1103 */
1104                .align  5
1105cache_clean_flush:
1106                mov     r3, #16
1107                b       call_cache_fn
1108
1109__armv4_mpu_cache_flush:
1110                tst     r4, #1
1111                movne   pc, lr
1112                mov     r2, #1
1113                mov     r3, #0
1114                mcr     p15, 0, ip, c7, c6, 0   @ invalidate D cache
1115                mov     r1, #7 << 5             @ 8 segments
11161:              orr     r3, r1, #63 << 26       @ 64 entries
11172:              mcr     p15, 0, r3, c7, c14, 2  @ clean & invalidate D index
1118                subs    r3, r3, #1 << 26
1119                bcs     2b                      @ entries 63 to 0
1120                subs    r1, r1, #1 << 5
1121                bcs     1b                      @ segments 7 to 0
1122
1123                teq     r2, #0
1124                mcrne   p15, 0, ip, c7, c5, 0   @ invalidate I cache
1125                mcr     p15, 0, ip, c7, c10, 4  @ drain WB
1126                mov     pc, lr
1127                
1128__fa526_cache_flush:
1129                tst     r4, #1
1130                movne   pc, lr
1131                mov     r1, #0
1132                mcr     p15, 0, r1, c7, c14, 0  @ clean and invalidate D cache
1133                mcr     p15, 0, r1, c7, c5, 0   @ flush I cache
1134                mcr     p15, 0, r1, c7, c10, 4  @ drain WB
1135                mov     pc, lr
1136
1137__armv6_mmu_cache_flush:
1138                mov     r1, #0
1139                tst     r4, #1
1140                mcreq   p15, 0, r1, c7, c14, 0  @ clean+invalidate D
1141                mcr     p15, 0, r1, c7, c5, 0   @ invalidate I+BTB
1142                mcreq   p15, 0, r1, c7, c15, 0  @ clean+invalidate unified
1143                mcr     p15, 0, r1, c7, c10, 4  @ drain WB
1144                mov     pc, lr
1145
1146__armv7_mmu_cache_flush:
1147                tst     r4, #1
1148                bne     iflush
1149                mrc     p15, 0, r10, c0, c1, 5  @ read ID_MMFR1
1150                tst     r10, #0xf << 16         @ hierarchical cache (ARMv7)
1151                mov     r10, #0
1152                beq     hierarchical
1153                mcr     p15, 0, r10, c7, c14, 0 @ clean+invalidate D
1154                b       iflush
1155hierarchical:
1156                mcr     p15, 0, r10, c7, c10, 5 @ DMB
1157                stmfd   sp!, {r0-r7, r9-r11}
1158                mrc     p15, 1, r0, c0, c0, 1   @ read clidr
1159                ands    r3, r0, #0x7000000      @ extract loc from clidr
1160                mov     r3, r3, lsr #23         @ left align loc bit field
1161                beq     finished                @ if loc is 0, then no need to clean
1162                mov     r10, #0                 @ start clean at cache level 0
1163loop1:
1164                add     r2, r10, r10, lsr #1    @ work out 3x current cache level
1165                mov     r1, r0, lsr r2          @ extract cache type bits from clidr
1166                and     r1, r1, #7              @ mask of the bits for current cache only
1167                cmp     r1, #2                  @ see what cache we have at this level
1168                blt     skip                    @ skip if no cache, or just i-cache
1169                mcr     p15, 2, r10, c0, c0, 0  @ select current cache level in cssr
1170                mcr     p15, 0, r10, c7, c5, 4  @ isb to sych the new cssr&csidr
1171                mrc     p15, 1, r1, c0, c0, 0   @ read the new csidr
1172                and     r2, r1, #7              @ extract the length of the cache lines
1173                add     r2, r2, #4              @ add 4 (line length offset)
1174                ldr     r4, =0x3ff
1175                ands    r4, r4, r1, lsr #3      @ find maximum number on the way size
1176                clz     r5, r4                  @ find bit position of way size increment
1177                ldr     r7, =0x7fff
1178                ands    r7, r7, r1, lsr #13     @ extract max number of the index size
1179loop2:
1180                mov     r9, r4                  @ create working copy of max way size
1181loop3:
1182 ARM(           orr     r11, r10, r9, lsl r5    ) @ factor way and cache number into r11
1183 ARM(           orr     r11, r11, r7, lsl r2    ) @ factor index number into r11
1184 THUMB(         lsl     r6, r9, r5              )
1185 THUMB(         orr     r11, r10, r6            ) @ factor way and cache number into r11
1186 THUMB(         lsl     r6, r7, r2              )
1187 THUMB(         orr     r11, r11, r6            ) @ factor index number into r11
1188                mcr     p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way
1189                subs    r9, r9, #1              @ decrement the way
1190                bge     loop3
1191                subs    r7, r7, #1              @ decrement the index
1192                bge     loop2
1193skip:
1194                add     r10, r10, #2            @ increment cache number
1195                cmp     r3, r10
1196                bgt     loop1
1197finished:
1198                ldmfd   sp!, {r0-r7, r9-r11}
1199                mov     r10, #0                 @ swith back to cache level 0
1200                mcr     p15, 2, r10, c0, c0, 0  @ select current cache level in cssr
1201iflush:
1202                mcr     p15, 0, r10, c7, c10, 4 @ DSB
1203                mcr     p15, 0, r10, c7, c5, 0  @ invalidate I+BTB
1204                mcr     p15, 0, r10, c7, c10, 4 @ DSB
1205                mcr     p15, 0, r10, c7, c5, 4  @ ISB
1206                mov     pc, lr
1207
1208__armv5tej_mmu_cache_flush:
1209                tst     r4, #1
1210                movne   pc, lr
12111:              mrc     p15, 0, r15, c7, c14, 3 @ test,clean,invalidate D cache
1212                bne     1b
1213                mcr     p15, 0, r0, c7, c5, 0   @ flush I cache
1214                mcr     p15, 0, r0, c7, c10, 4  @ drain WB
1215                mov     pc, lr
1216
1217__armv4_mmu_cache_flush:
1218                tst     r4, #1
1219                movne   pc, lr
1220                mov     r2, #64*1024            @ default: 32K dcache size (*2)
1221                mov     r11, #32                @ default: 32 byte line size
1222                mrc     p15, 0, r3, c0, c0, 1   @ read cache type
1223                teq     r3, r9                  @ cache ID register present?
1224                beq     no_cache_id
1225                mov     r1, r3, lsr #18
1226                and     r1, r1, #7
1227                mov     r2, #1024
1228                mov     r2, r2, lsl r1          @ base dcache size *2
1229                tst     r3, #1 << 14            @ test M bit
1230                addne   r2, r2, r2, lsr #1      @ +1/2 size if M == 1
1231                mov     r3, r3, lsr #12
1232                and     r3, r3, #3
1233                mov     r11, #8
1234                mov     r11, r11, lsl r3        @ cache line size in bytes
1235no_cache_id:
1236                mov     r1, pc
1237                bic     r1, r1, #63             @ align to longest cache line
1238                add     r2, r1, r2
12391:
1240 ARM(           ldr     r3, [r1], r11           ) @ s/w flush D cache
1241 THUMB(         ldr     r3, [r1]                ) @ s/w flush D cache
1242 THUMB(         add     r1, r1, r11             )
1243                teq     r1, r2
1244                bne     1b
1245
1246                mcr     p15, 0, r1, c7, c5, 0   @ flush I cache
1247                mcr     p15, 0, r1, c7, c6, 0   @ flush D cache
1248                mcr     p15, 0, r1, c7, c10, 4  @ drain WB
1249                mov     pc, lr
1250
1251__armv3_mmu_cache_flush:
1252__armv3_mpu_cache_flush:
1253                tst     r4, #1
1254                movne   pc, lr
1255                mov     r1, #0
1256                mcr     p15, 0, r1, c7, c0, 0   @ invalidate whole cache v3
1257                mov     pc, lr
1258
1259/*
1260 * Various debugging routines for printing hex characters and
1261 * memory, which again must be relocatable.
1262 */
1263#ifdef DEBUG
1264                .align  2
1265                .type   phexbuf,#object
1266phexbuf:        .space  12
1267                .size   phexbuf, . - phexbuf
1268
1269@ phex corrupts {r0, r1, r2, r3}
1270phex:           adr     r3, phexbuf
1271                mov     r2, #0
1272                strb    r2, [r3, r1]
12731:              subs    r1, r1, #1
1274                movmi   r0, r3
1275                bmi     puts
1276                and     r2, r0, #15
1277                mov     r0, r0, lsr #4
1278                cmp     r2, #10
1279                addge   r2, r2, #7
1280                add     r2, r2, #'0'
1281                strb    r2, [r3, r1]
1282                b       1b
1283
1284@ puts corrupts {r0, r1, r2, r3}
1285puts:           loadsp  r3, r1
12861:              ldrb    r2, [r0], #1
1287                teq     r2, #0
1288                moveq   pc, lr
12892:              writeb  r2, r3
1290                mov     r1, #0x00020000
12913:              subs    r1, r1, #1
1292                bne     3b
1293                teq     r2, #'\n'
1294                moveq   r2, #'\r'
1295                beq     2b
1296                teq     r0, #0
1297                bne     1b
1298                mov     pc, lr
1299@ putc corrupts {r0, r1, r2, r3}
1300putc:
1301                mov     r2, r0
1302                mov     r0, #0
1303                loadsp  r3, r1
1304                b       2b
1305
1306@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1307memdump:        mov     r12, r0
1308                mov     r10, lr
1309                mov     r11, #0
13102:              mov     r0, r11, lsl #2
1311                add     r0, r0, r12
1312                mov     r1, #8
1313                bl      phex
1314                mov     r0, #':'
1315                bl      putc
13161:              mov     r0, #' '
1317                bl      putc
1318                ldr     r0, [r12, r11, lsl #2]
1319                mov     r1, #8
1320                bl      phex
1321                and     r0, r11, #7
1322                teq     r0, #3
1323                moveq   r0, #' '
1324                bleq    putc
1325                and     r0, r11, #7
1326                add     r11, r11, #1
1327                teq     r0, #7
1328                bne     1b
1329                mov     r0, #'\n'
1330                bl      putc
1331                cmp     r11, #64
1332                blt     2b
1333                mov     pc, r10
1334#endif
1335
1336                .ltorg
1337
1338#ifdef CONFIG_ARM_VIRT_EXT
1339.align 5
1340__hyp_reentry_vectors:
1341                W(b)    .                       @ reset
1342                W(b)    .                       @ undef
1343                W(b)    .                       @ svc
1344                W(b)    .                       @ pabort
1345                W(b)    .                       @ dabort
1346                W(b)    __enter_kernel          @ hyp
1347                W(b)    .                       @ irq
1348                W(b)    .                       @ fiq
1349#endif /* CONFIG_ARM_VIRT_EXT */
1350
1351__enter_kernel:
1352                mov     r0, #0                  @ must be 0
1353 ARM(           mov     pc, r4          )       @ call kernel
1354 M_CLASS(       add     r4, r4, #1      )       @ enter in Thumb mode for M class
1355 THUMB(         bx      r4              )       @ entry point is always ARM for A/R classes
1356
1357reloc_code_end:
1358
1359#ifdef CONFIG_EFI_STUB
1360                .align  2
1361_start:         .long   start - .
1362
1363ENTRY(efi_stub_entry)
1364                @ allocate space on stack for passing current zImage address
1365                @ and for the EFI stub to return of new entry point of
1366                @ zImage, as EFI stub may copy the kernel. Pointer address
1367                @ is passed in r2. r0 and r1 are passed through from the
1368                @ EFI firmware to efi_entry
1369                adr     ip, _start
1370                ldr     r3, [ip]
1371                add     r3, r3, ip
1372                stmfd   sp!, {r3, lr}
1373                mov     r2, sp                  @ pass zImage address in r2
1374                bl      efi_entry
1375
1376                @ Check for error return from EFI stub. r0 has FDT address
1377                @ or error code.
1378                cmn     r0, #1
1379                beq     efi_load_fail
1380
1381                @ Preserve return value of efi_entry() in r4
1382                mov     r4, r0
1383                bl      cache_clean_flush
1384                bl      cache_off
1385
1386                @ Set parameters for booting zImage according to boot protocol
1387                @ put FDT address in r2, it was returned by efi_entry()
1388                @ r1 is the machine type, and r0 needs to be 0
1389                mov     r0, #0
1390                mov     r1, #0xFFFFFFFF
1391                mov     r2, r4
1392
1393                @ Branch to (possibly) relocated zImage that is in [sp]
1394                ldr     lr, [sp]
1395                ldr     ip, =start_offset
1396                add     lr, lr, ip
1397                mov     pc, lr                          @ no mode switch
1398
1399efi_load_fail:
1400                @ Return EFI_LOAD_ERROR to EFI firmware on error.
1401                ldr     r0, =0x80000001
1402                ldmfd   sp!, {ip, pc}
1403ENDPROC(efi_stub_entry)
1404#endif
1405
1406                .align
1407                .section ".stack", "aw", %nobits
1408.L_user_stack:  .space  4096
1409.L_user_stack_end:
1410