linux/arch/arm/boot/compressed/head.S
<<
>>
Prefs
   1/*
   2 *  linux/arch/arm/boot/compressed/head.S
   3 *
   4 *  Copyright (C) 1996-2002 Russell King
   5 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 */
  11#include <linux/linkage.h>
  12#include <asm/assembler.h>
  13#include <asm/v7m.h>
  14
  15 AR_CLASS(      .arch   armv7-a )
  16 M_CLASS(       .arch   armv7-m )
  17
  18/*
  19 * Debugging stuff
  20 *
  21 * Note that these macros must not contain any code which is not
  22 * 100% relocatable.  Any attempt to do so will result in a crash.
  23 * Please select one of the following when turning on debugging.
  24 */
  25#ifdef DEBUG
  26
  27#if defined(CONFIG_DEBUG_ICEDCC)
  28
  29#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
  30                .macro  loadsp, rb, tmp
  31                .endm
  32                .macro  writeb, ch, rb
  33                mcr     p14, 0, \ch, c0, c5, 0
  34                .endm
  35#elif defined(CONFIG_CPU_XSCALE)
  36                .macro  loadsp, rb, tmp
  37                .endm
  38                .macro  writeb, ch, rb
  39                mcr     p14, 0, \ch, c8, c0, 0
  40                .endm
  41#else
  42                .macro  loadsp, rb, tmp
  43                .endm
  44                .macro  writeb, ch, rb
  45                mcr     p14, 0, \ch, c1, c0, 0
  46                .endm
  47#endif
  48
  49#else
  50
  51#include CONFIG_DEBUG_LL_INCLUDE
  52
  53                .macro  writeb, ch, rb
  54                senduart \ch, \rb
  55                .endm
  56
  57#if defined(CONFIG_ARCH_SA1100)
  58                .macro  loadsp, rb, tmp
  59                mov     \rb, #0x80000000        @ physical base address
  60#ifdef CONFIG_DEBUG_LL_SER3
  61                add     \rb, \rb, #0x00050000   @ Ser3
  62#else
  63                add     \rb, \rb, #0x00010000   @ Ser1
  64#endif
  65                .endm
  66#else
  67                .macro  loadsp, rb, tmp
  68                addruart \rb, \tmp
  69                .endm
  70#endif
  71#endif
  72#endif
  73
  74                .macro  kputc,val
  75                mov     r0, \val
  76                bl      putc
  77                .endm
  78
  79                .macro  kphex,val,len
  80                mov     r0, \val
  81                mov     r1, #\len
  82                bl      phex
  83                .endm
  84
  85                .macro  debug_reloc_start
  86#ifdef DEBUG
  87                kputc   #'\n'
  88                kphex   r6, 8           /* processor id */
  89                kputc   #':'
  90                kphex   r7, 8           /* architecture id */
  91#ifdef CONFIG_CPU_CP15
  92                kputc   #':'
  93                mrc     p15, 0, r0, c1, c0
  94                kphex   r0, 8           /* control reg */
  95#endif
  96                kputc   #'\n'
  97                kphex   r5, 8           /* decompressed kernel start */
  98                kputc   #'-'
  99                kphex   r9, 8           /* decompressed kernel end  */
 100                kputc   #'>'
 101                kphex   r4, 8           /* kernel execution address */
 102                kputc   #'\n'
 103#endif
 104                .endm
 105
 106                .macro  debug_reloc_end
 107#ifdef DEBUG
 108                kphex   r5, 8           /* end of kernel */
 109                kputc   #'\n'
 110                mov     r0, r4
 111                bl      memdump         /* dump 256 bytes at start of kernel */
 112#endif
 113                .endm
 114
 115                .section ".start", #alloc, #execinstr
 116/*
 117 * sort out different calling conventions
 118 */
 119                .align
 120                /*
 121                 * Always enter in ARM state for CPUs that support the ARM ISA.
 122                 * As of today (2014) that's exactly the members of the A and R
 123                 * classes.
 124                 */
 125 AR_CLASS(      .arm    )
 126start:
 127                .type   start,#function
 128                .rept   7
 129                mov     r0, r0
 130                .endr
 131   ARM(         mov     r0, r0          )
 132   ARM(         b       1f              )
 133 THUMB(         badr    r12, 1f         )
 134 THUMB(         bx      r12             )
 135
 136                .word   _magic_sig      @ Magic numbers to help the loader
 137                .word   _magic_start    @ absolute load/run zImage address
 138                .word   _magic_end      @ zImage end address
 139                .word   0x04030201      @ endianness flag
 140
 141 THUMB(         .thumb                  )
 1421:
 143 ARM_BE8(       setend  be              )       @ go BE8 if compiled for BE8
 144 AR_CLASS(      mrs     r9, cpsr        )
 145#ifdef CONFIG_ARM_VIRT_EXT
 146                bl      __hyp_stub_install      @ get into SVC mode, reversibly
 147#endif
 148                mov     r7, r1                  @ save architecture ID
 149                mov     r8, r2                  @ save atags pointer
 150
 151#ifndef CONFIG_CPU_V7M
 152                /*
 153                 * Booting from Angel - need to enter SVC mode and disable
 154                 * FIQs/IRQs (numeric definitions from angel arm.h source).
 155                 * We only do this if we were in user mode on entry.
 156                 */
 157                mrs     r2, cpsr                @ get current mode
 158                tst     r2, #3                  @ not user?
 159                bne     not_angel
 160                mov     r0, #0x17               @ angel_SWIreason_EnterSVC
 161 ARM(           swi     0x123456        )       @ angel_SWI_ARM
 162 THUMB(         svc     0xab            )       @ angel_SWI_THUMB
 163not_angel:
 164                safe_svcmode_maskall r0
 165                msr     spsr_cxsf, r9           @ Save the CPU boot mode in
 166                                                @ SPSR
 167#endif
 168                /*
 169                 * Note that some cache flushing and other stuff may
 170                 * be needed here - is there an Angel SWI call for this?
 171                 */
 172
 173                /*
 174                 * some architecture specific code can be inserted
 175                 * by the linker here, but it should preserve r7, r8, and r9.
 176                 */
 177
 178                .text
 179
 180#ifdef CONFIG_AUTO_ZRELADDR
 181                /*
 182                 * Find the start of physical memory.  As we are executing
 183                 * without the MMU on, we are in the physical address space.
 184                 * We just need to get rid of any offset by aligning the
 185                 * address.
 186                 *
 187                 * This alignment is a balance between the requirements of
 188                 * different platforms - we have chosen 128MB to allow
 189                 * platforms which align the start of their physical memory
 190                 * to 128MB to use this feature, while allowing the zImage
 191                 * to be placed within the first 128MB of memory on other
 192                 * platforms.  Increasing the alignment means we place
 193                 * stricter alignment requirements on the start of physical
 194                 * memory, but relaxing it means that we break people who
 195                 * are already placing their zImage in (eg) the top 64MB
 196                 * of this range.
 197                 */
 198                mov     r4, pc
 199                and     r4, r4, #0xf8000000
 200                /* Determine final kernel image address. */
 201                add     r4, r4, #TEXT_OFFSET
 202#else
 203                ldr     r4, =zreladdr
 204#endif
 205
 206                /*
 207                 * Set up a page table only if it won't overwrite ourself.
 208                 * That means r4 < pc || r4 - 16k page directory > &_end.
 209                 * Given that r4 > &_end is most unfrequent, we add a rough
 210                 * additional 1MB of room for a possible appended DTB.
 211                 */
 212                mov     r0, pc
 213                cmp     r0, r4
 214                ldrcc   r0, LC0+32
 215                addcc   r0, r0, pc
 216                cmpcc   r4, r0
 217                orrcc   r4, r4, #1              @ remember we skipped cache_on
 218                blcs    cache_on
 219
 220restart:        adr     r0, LC0
 221                ldmia   r0, {r1, r2, r3, r6, r10, r11, r12}
 222                ldr     sp, [r0, #28]
 223
 224                /*
 225                 * We might be running at a different address.  We need
 226                 * to fix up various pointers.
 227                 */
 228                sub     r0, r0, r1              @ calculate the delta offset
 229                add     r6, r6, r0              @ _edata
 230                add     r10, r10, r0            @ inflated kernel size location
 231
 232                /*
 233                 * The kernel build system appends the size of the
 234                 * decompressed kernel at the end of the compressed data
 235                 * in little-endian form.
 236                 */
 237                ldrb    r9, [r10, #0]
 238                ldrb    lr, [r10, #1]
 239                orr     r9, r9, lr, lsl #8
 240                ldrb    lr, [r10, #2]
 241                ldrb    r10, [r10, #3]
 242                orr     r9, r9, lr, lsl #16
 243                orr     r9, r9, r10, lsl #24
 244
 245#ifndef CONFIG_ZBOOT_ROM
 246                /* malloc space is above the relocated stack (64k max) */
 247                add     sp, sp, r0
 248                add     r10, sp, #0x10000
 249#else
 250                /*
 251                 * With ZBOOT_ROM the bss/stack is non relocatable,
 252                 * but someone could still run this code from RAM,
 253                 * in which case our reference is _edata.
 254                 */
 255                mov     r10, r6
 256#endif
 257
 258                mov     r5, #0                  @ init dtb size to 0
 259#ifdef CONFIG_ARM_APPENDED_DTB
 260/*
 261 *   r0  = delta
 262 *   r2  = BSS start
 263 *   r3  = BSS end
 264 *   r4  = final kernel address (possibly with LSB set)
 265 *   r5  = appended dtb size (still unknown)
 266 *   r6  = _edata
 267 *   r7  = architecture ID
 268 *   r8  = atags/device tree pointer
 269 *   r9  = size of decompressed image
 270 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
 271 *   r11 = GOT start
 272 *   r12 = GOT end
 273 *   sp  = stack pointer
 274 *
 275 * if there are device trees (dtb) appended to zImage, advance r10 so that the
 276 * dtb data will get relocated along with the kernel if necessary.
 277 */
 278
 279                ldr     lr, [r6, #0]
 280#ifndef __ARMEB__
 281                ldr     r1, =0xedfe0dd0         @ sig is 0xd00dfeed big endian
 282#else
 283                ldr     r1, =0xd00dfeed
 284#endif
 285                cmp     lr, r1
 286                bne     dtb_check_done          @ not found
 287
 288#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
 289                /*
 290                 * OK... Let's do some funky business here.
 291                 * If we do have a DTB appended to zImage, and we do have
 292                 * an ATAG list around, we want the later to be translated
 293                 * and folded into the former here. No GOT fixup has occurred
 294                 * yet, but none of the code we're about to call uses any
 295                 * global variable.
 296                */
 297
 298                /* Get the initial DTB size */
 299                ldr     r5, [r6, #4]
 300#ifndef __ARMEB__
 301                /* convert to little endian */
 302                eor     r1, r5, r5, ror #16
 303                bic     r1, r1, #0x00ff0000
 304                mov     r5, r5, ror #8
 305                eor     r5, r5, r1, lsr #8
 306#endif
 307                /* 50% DTB growth should be good enough */
 308                add     r5, r5, r5, lsr #1
 309                /* preserve 64-bit alignment */
 310                add     r5, r5, #7
 311                bic     r5, r5, #7
 312                /* clamp to 32KB min and 1MB max */
 313                cmp     r5, #(1 << 15)
 314                movlo   r5, #(1 << 15)
 315                cmp     r5, #(1 << 20)
 316                movhi   r5, #(1 << 20)
 317                /* temporarily relocate the stack past the DTB work space */
 318                add     sp, sp, r5
 319
 320                stmfd   sp!, {r0-r3, ip, lr}
 321                mov     r0, r8
 322                mov     r1, r6
 323                mov     r2, r5
 324                bl      atags_to_fdt
 325
 326                /*
 327                 * If returned value is 1, there is no ATAG at the location
 328                 * pointed by r8.  Try the typical 0x100 offset from start
 329                 * of RAM and hope for the best.
 330                 */
 331                cmp     r0, #1
 332                sub     r0, r4, #TEXT_OFFSET
 333                bic     r0, r0, #1
 334                add     r0, r0, #0x100
 335                mov     r1, r6
 336                mov     r2, r5
 337                bleq    atags_to_fdt
 338
 339                ldmfd   sp!, {r0-r3, ip, lr}
 340                sub     sp, sp, r5
 341#endif
 342
 343                mov     r8, r6                  @ use the appended device tree
 344
 345                /*
 346                 * Make sure that the DTB doesn't end up in the final
 347                 * kernel's .bss area. To do so, we adjust the decompressed
 348                 * kernel size to compensate if that .bss size is larger
 349                 * than the relocated code.
 350                 */
 351                ldr     r5, =_kernel_bss_size
 352                adr     r1, wont_overwrite
 353                sub     r1, r6, r1
 354                subs    r1, r5, r1
 355                addhi   r9, r9, r1
 356
 357                /* Get the current DTB size */
 358                ldr     r5, [r6, #4]
 359#ifndef __ARMEB__
 360                /* convert r5 (dtb size) to little endian */
 361                eor     r1, r5, r5, ror #16
 362                bic     r1, r1, #0x00ff0000
 363                mov     r5, r5, ror #8
 364                eor     r5, r5, r1, lsr #8
 365#endif
 366
 367                /* preserve 64-bit alignment */
 368                add     r5, r5, #7
 369                bic     r5, r5, #7
 370
 371                /* relocate some pointers past the appended dtb */
 372                add     r6, r6, r5
 373                add     r10, r10, r5
 374                add     sp, sp, r5
 375dtb_check_done:
 376#endif
 377
 378/*
 379 * Check to see if we will overwrite ourselves.
 380 *   r4  = final kernel address (possibly with LSB set)
 381 *   r9  = size of decompressed image
 382 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
 383 * We basically want:
 384 *   r4 - 16k page directory >= r10 -> OK
 385 *   r4 + image length <= address of wont_overwrite -> OK
 386 * Note: the possible LSB in r4 is harmless here.
 387 */
 388                add     r10, r10, #16384
 389                cmp     r4, r10
 390                bhs     wont_overwrite
 391                add     r10, r4, r9
 392                adr     r9, wont_overwrite
 393                cmp     r10, r9
 394                bls     wont_overwrite
 395
 396/*
 397 * Relocate ourselves past the end of the decompressed kernel.
 398 *   r6  = _edata
 399 *   r10 = end of the decompressed kernel
 400 * Because we always copy ahead, we need to do it from the end and go
 401 * backward in case the source and destination overlap.
 402 */
 403                /*
 404                 * Bump to the next 256-byte boundary with the size of
 405                 * the relocation code added. This avoids overwriting
 406                 * ourself when the offset is small.
 407                 */
 408                add     r10, r10, #((reloc_code_end - restart + 256) & ~255)
 409                bic     r10, r10, #255
 410
 411                /* Get start of code we want to copy and align it down. */
 412                adr     r5, restart
 413                bic     r5, r5, #31
 414
 415/* Relocate the hyp vector base if necessary */
 416#ifdef CONFIG_ARM_VIRT_EXT
 417                mrs     r0, spsr
 418                and     r0, r0, #MODE_MASK
 419                cmp     r0, #HYP_MODE
 420                bne     1f
 421
 422                bl      __hyp_get_vectors
 423                sub     r0, r0, r5
 424                add     r0, r0, r10
 425                bl      __hyp_set_vectors
 4261:
 427#endif
 428
 429                sub     r9, r6, r5              @ size to copy
 430                add     r9, r9, #31             @ rounded up to a multiple
 431                bic     r9, r9, #31             @ ... of 32 bytes
 432                add     r6, r9, r5
 433                add     r9, r9, r10
 434
 4351:              ldmdb   r6!, {r0 - r3, r10 - r12, lr}
 436                cmp     r6, r5
 437                stmdb   r9!, {r0 - r3, r10 - r12, lr}
 438                bhi     1b
 439
 440                /* Preserve offset to relocated code. */
 441                sub     r6, r9, r6
 442
 443#ifndef CONFIG_ZBOOT_ROM
 444                /* cache_clean_flush may use the stack, so relocate it */
 445                add     sp, sp, r6
 446#endif
 447
 448                bl      cache_clean_flush
 449
 450                badr    r0, restart
 451                add     r0, r0, r6
 452                mov     pc, r0
 453
 454wont_overwrite:
 455/*
 456 * If delta is zero, we are running at the address we were linked at.
 457 *   r0  = delta
 458 *   r2  = BSS start
 459 *   r3  = BSS end
 460 *   r4  = kernel execution address (possibly with LSB set)
 461 *   r5  = appended dtb size (0 if not present)
 462 *   r7  = architecture ID
 463 *   r8  = atags pointer
 464 *   r11 = GOT start
 465 *   r12 = GOT end
 466 *   sp  = stack pointer
 467 */
 468                orrs    r1, r0, r5
 469                beq     not_relocated
 470
 471                add     r11, r11, r0
 472                add     r12, r12, r0
 473
 474#ifndef CONFIG_ZBOOT_ROM
 475                /*
 476                 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
 477                 * we need to fix up pointers into the BSS region.
 478                 * Note that the stack pointer has already been fixed up.
 479                 */
 480                add     r2, r2, r0
 481                add     r3, r3, r0
 482
 483                /*
 484                 * Relocate all entries in the GOT table.
 485                 * Bump bss entries to _edata + dtb size
 486                 */
 4871:              ldr     r1, [r11, #0]           @ relocate entries in the GOT
 488                add     r1, r1, r0              @ This fixes up C references
 489                cmp     r1, r2                  @ if entry >= bss_start &&
 490                cmphs   r3, r1                  @       bss_end > entry
 491                addhi   r1, r1, r5              @    entry += dtb size
 492                str     r1, [r11], #4           @ next entry
 493                cmp     r11, r12
 494                blo     1b
 495
 496                /* bump our bss pointers too */
 497                add     r2, r2, r5
 498                add     r3, r3, r5
 499
 500#else
 501
 502                /*
 503                 * Relocate entries in the GOT table.  We only relocate
 504                 * the entries that are outside the (relocated) BSS region.
 505                 */
 5061:              ldr     r1, [r11, #0]           @ relocate entries in the GOT
 507                cmp     r1, r2                  @ entry < bss_start ||
 508                cmphs   r3, r1                  @ _end < entry
 509                addlo   r1, r1, r0              @ table.  This fixes up the
 510                str     r1, [r11], #4           @ C references.
 511                cmp     r11, r12
 512                blo     1b
 513#endif
 514
 515not_relocated:  mov     r0, #0
 5161:              str     r0, [r2], #4            @ clear bss
 517                str     r0, [r2], #4
 518                str     r0, [r2], #4
 519                str     r0, [r2], #4
 520                cmp     r2, r3
 521                blo     1b
 522
 523                /*
 524                 * Did we skip the cache setup earlier?
 525                 * That is indicated by the LSB in r4.
 526                 * Do it now if so.
 527                 */
 528                tst     r4, #1
 529                bic     r4, r4, #1
 530                blne    cache_on
 531
 532/*
 533 * The C runtime environment should now be setup sufficiently.
 534 * Set up some pointers, and start decompressing.
 535 *   r4  = kernel execution address
 536 *   r7  = architecture ID
 537 *   r8  = atags pointer
 538 */
 539                mov     r0, r4
 540                mov     r1, sp                  @ malloc space above stack
 541                add     r2, sp, #0x10000        @ 64k max
 542                mov     r3, r7
 543                bl      decompress_kernel
 544                bl      cache_clean_flush
 545                bl      cache_off
 546                mov     r1, r7                  @ restore architecture number
 547                mov     r2, r8                  @ restore atags pointer
 548
 549#ifdef CONFIG_ARM_VIRT_EXT
 550                mrs     r0, spsr                @ Get saved CPU boot mode
 551                and     r0, r0, #MODE_MASK
 552                cmp     r0, #HYP_MODE           @ if not booted in HYP mode...
 553                bne     __enter_kernel          @ boot kernel directly
 554
 555                adr     r12, .L__hyp_reentry_vectors_offset
 556                ldr     r0, [r12]
 557                add     r0, r0, r12
 558
 559                bl      __hyp_set_vectors
 560                __HVC(0)                        @ otherwise bounce to hyp mode
 561
 562                b       .                       @ should never be reached
 563
 564                .align  2
 565.L__hyp_reentry_vectors_offset: .long   __hyp_reentry_vectors - .
 566#else
 567                b       __enter_kernel
 568#endif
 569
 570                .align  2
 571                .type   LC0, #object
 572LC0:            .word   LC0                     @ r1
 573                .word   __bss_start             @ r2
 574                .word   _end                    @ r3
 575                .word   _edata                  @ r6
 576                .word   input_data_end - 4      @ r10 (inflated size location)
 577                .word   _got_start              @ r11
 578                .word   _got_end                @ ip
 579                .word   .L_user_stack_end       @ sp
 580                .word   _end - restart + 16384 + 1024*1024
 581                .size   LC0, . - LC0
 582
 583#ifdef CONFIG_ARCH_RPC
 584                .globl  params
 585params:         ldr     r0, =0x10000100         @ params_phys for RPC
 586                mov     pc, lr
 587                .ltorg
 588                .align
 589#endif
 590
 591/*
 592 * Turn on the cache.  We need to setup some page tables so that we
 593 * can have both the I and D caches on.
 594 *
 595 * We place the page tables 16k down from the kernel execution address,
 596 * and we hope that nothing else is using it.  If we're using it, we
 597 * will go pop!
 598 *
 599 * On entry,
 600 *  r4 = kernel execution address
 601 *  r7 = architecture number
 602 *  r8 = atags pointer
 603 * On exit,
 604 *  r0, r1, r2, r3, r9, r10, r12 corrupted
 605 * This routine must preserve:
 606 *  r4, r7, r8
 607 */
 608                .align  5
 609cache_on:       mov     r3, #8                  @ cache_on function
 610                b       call_cache_fn
 611
 612/*
 613 * Initialize the highest priority protection region, PR7
 614 * to cover all 32bit address and cacheable and bufferable.
 615 */
 616__armv4_mpu_cache_on:
 617                mov     r0, #0x3f               @ 4G, the whole
 618                mcr     p15, 0, r0, c6, c7, 0   @ PR7 Area Setting
 619                mcr     p15, 0, r0, c6, c7, 1
 620
 621                mov     r0, #0x80               @ PR7
 622                mcr     p15, 0, r0, c2, c0, 0   @ D-cache on
 623                mcr     p15, 0, r0, c2, c0, 1   @ I-cache on
 624                mcr     p15, 0, r0, c3, c0, 0   @ write-buffer on
 625
 626                mov     r0, #0xc000
 627                mcr     p15, 0, r0, c5, c0, 1   @ I-access permission
 628                mcr     p15, 0, r0, c5, c0, 0   @ D-access permission
 629
 630                mov     r0, #0
 631                mcr     p15, 0, r0, c7, c10, 4  @ drain write buffer
 632                mcr     p15, 0, r0, c7, c5, 0   @ flush(inval) I-Cache
 633                mcr     p15, 0, r0, c7, c6, 0   @ flush(inval) D-Cache
 634                mrc     p15, 0, r0, c1, c0, 0   @ read control reg
 635                                                @ ...I .... ..D. WC.M
 636                orr     r0, r0, #0x002d         @ .... .... ..1. 11.1
 637                orr     r0, r0, #0x1000         @ ...1 .... .... ....
 638
 639                mcr     p15, 0, r0, c1, c0, 0   @ write control reg
 640
 641                mov     r0, #0
 642                mcr     p15, 0, r0, c7, c5, 0   @ flush(inval) I-Cache
 643                mcr     p15, 0, r0, c7, c6, 0   @ flush(inval) D-Cache
 644                mov     pc, lr
 645
 646__armv3_mpu_cache_on:
 647                mov     r0, #0x3f               @ 4G, the whole
 648                mcr     p15, 0, r0, c6, c7, 0   @ PR7 Area Setting
 649
 650                mov     r0, #0x80               @ PR7
 651                mcr     p15, 0, r0, c2, c0, 0   @ cache on
 652                mcr     p15, 0, r0, c3, c0, 0   @ write-buffer on
 653
 654                mov     r0, #0xc000
 655                mcr     p15, 0, r0, c5, c0, 0   @ access permission
 656
 657                mov     r0, #0
 658                mcr     p15, 0, r0, c7, c0, 0   @ invalidate whole cache v3
 659                /*
 660                 * ?? ARMv3 MMU does not allow reading the control register,
 661                 * does this really work on ARMv3 MPU?
 662                 */
 663                mrc     p15, 0, r0, c1, c0, 0   @ read control reg
 664                                                @ .... .... .... WC.M
 665                orr     r0, r0, #0x000d         @ .... .... .... 11.1
 666                /* ?? this overwrites the value constructed above? */
 667                mov     r0, #0
 668                mcr     p15, 0, r0, c1, c0, 0   @ write control reg
 669
 670                /* ?? invalidate for the second time? */
 671                mcr     p15, 0, r0, c7, c0, 0   @ invalidate whole cache v3
 672                mov     pc, lr
 673
 674#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
 675#define CB_BITS 0x08
 676#else
 677#define CB_BITS 0x0c
 678#endif
 679
 680__setup_mmu:    sub     r3, r4, #16384          @ Page directory size
 681                bic     r3, r3, #0xff           @ Align the pointer
 682                bic     r3, r3, #0x3f00
 683/*
 684 * Initialise the page tables, turning on the cacheable and bufferable
 685 * bits for the RAM area only.
 686 */
 687                mov     r0, r3
 688                mov     r9, r0, lsr #18
 689                mov     r9, r9, lsl #18         @ start of RAM
 690                add     r10, r9, #0x10000000    @ a reasonable RAM size
 691                mov     r1, #0x12               @ XN|U + section mapping
 692                orr     r1, r1, #3 << 10        @ AP=11
 693                add     r2, r3, #16384
 6941:              cmp     r1, r9                  @ if virt > start of RAM
 695                cmphs   r10, r1                 @   && end of RAM > virt
 696                bic     r1, r1, #0x1c           @ clear XN|U + C + B
 697                orrlo   r1, r1, #0x10           @ Set XN|U for non-RAM
 698                orrhs   r1, r1, r6              @ set RAM section settings
 699                str     r1, [r0], #4            @ 1:1 mapping
 700                add     r1, r1, #1048576
 701                teq     r0, r2
 702                bne     1b
 703/*
 704 * If ever we are running from Flash, then we surely want the cache
 705 * to be enabled also for our execution instance...  We map 2MB of it
 706 * so there is no map overlap problem for up to 1 MB compressed kernel.
 707 * If the execution is in RAM then we would only be duplicating the above.
 708 */
 709                orr     r1, r6, #0x04           @ ensure B is set for this
 710                orr     r1, r1, #3 << 10
 711                mov     r2, pc
 712                mov     r2, r2, lsr #20
 713                orr     r1, r1, r2, lsl #20
 714                add     r0, r3, r2, lsl #2
 715                str     r1, [r0], #4
 716                add     r1, r1, #1048576
 717                str     r1, [r0]
 718                mov     pc, lr
 719ENDPROC(__setup_mmu)
 720
 721@ Enable unaligned access on v6, to allow better code generation
 722@ for the decompressor C code:
 723__armv6_mmu_cache_on:
 724                mrc     p15, 0, r0, c1, c0, 0   @ read SCTLR
 725                bic     r0, r0, #2              @ A (no unaligned access fault)
 726                orr     r0, r0, #1 << 22        @ U (v6 unaligned access model)
 727                mcr     p15, 0, r0, c1, c0, 0   @ write SCTLR
 728                b       __armv4_mmu_cache_on
 729
 730__arm926ejs_mmu_cache_on:
 731#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
 732                mov     r0, #4                  @ put dcache in WT mode
 733                mcr     p15, 7, r0, c15, c0, 0
 734#endif
 735
 736__armv4_mmu_cache_on:
 737                mov     r12, lr
 738#ifdef CONFIG_MMU
 739                mov     r6, #CB_BITS | 0x12     @ U
 740                bl      __setup_mmu
 741                mov     r0, #0
 742                mcr     p15, 0, r0, c7, c10, 4  @ drain write buffer
 743                mcr     p15, 0, r0, c8, c7, 0   @ flush I,D TLBs
 744                mrc     p15, 0, r0, c1, c0, 0   @ read control reg
 745                orr     r0, r0, #0x5000         @ I-cache enable, RR cache replacement
 746                orr     r0, r0, #0x0030
 747 ARM_BE8(       orr     r0, r0, #1 << 25 )      @ big-endian page tables
 748                bl      __common_mmu_cache_on
 749                mov     r0, #0
 750                mcr     p15, 0, r0, c8, c7, 0   @ flush I,D TLBs
 751#endif
 752                mov     pc, r12
 753
 754__armv7_mmu_cache_on:
 755                mov     r12, lr
 756#ifdef CONFIG_MMU
 757                mrc     p15, 0, r11, c0, c1, 4  @ read ID_MMFR0
 758                tst     r11, #0xf               @ VMSA
 759                movne   r6, #CB_BITS | 0x02     @ !XN
 760                blne    __setup_mmu
 761                mov     r0, #0
 762                mcr     p15, 0, r0, c7, c10, 4  @ drain write buffer
 763                tst     r11, #0xf               @ VMSA
 764                mcrne   p15, 0, r0, c8, c7, 0   @ flush I,D TLBs
 765#endif
 766                mrc     p15, 0, r0, c1, c0, 0   @ read control reg
 767                bic     r0, r0, #1 << 28        @ clear SCTLR.TRE
 768                orr     r0, r0, #0x5000         @ I-cache enable, RR cache replacement
 769                orr     r0, r0, #0x003c         @ write buffer
 770                bic     r0, r0, #2              @ A (no unaligned access fault)
 771                orr     r0, r0, #1 << 22        @ U (v6 unaligned access model)
 772                                                @ (needed for ARM1176)
 773#ifdef CONFIG_MMU
 774 ARM_BE8(       orr     r0, r0, #1 << 25 )      @ big-endian page tables
 775                mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
 776                orrne   r0, r0, #1              @ MMU enabled
 777                movne   r1, #0xfffffffd         @ domain 0 = client
 778                bic     r6, r6, #1 << 31        @ 32-bit translation system
 779                bic     r6, r6, #3 << 0         @ use only ttbr0
 780                mcrne   p15, 0, r3, c2, c0, 0   @ load page table pointer
 781                mcrne   p15, 0, r1, c3, c0, 0   @ load domain access control
 782                mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
 783#endif
 784                mcr     p15, 0, r0, c7, c5, 4   @ ISB
 785                mcr     p15, 0, r0, c1, c0, 0   @ load control register
 786                mrc     p15, 0, r0, c1, c0, 0   @ and read it back
 787                mov     r0, #0
 788                mcr     p15, 0, r0, c7, c5, 4   @ ISB
 789                mov     pc, r12
 790
 791__fa526_cache_on:
 792                mov     r12, lr
 793                mov     r6, #CB_BITS | 0x12     @ U
 794                bl      __setup_mmu
 795                mov     r0, #0
 796                mcr     p15, 0, r0, c7, c7, 0   @ Invalidate whole cache
 797                mcr     p15, 0, r0, c7, c10, 4  @ drain write buffer
 798                mcr     p15, 0, r0, c8, c7, 0   @ flush UTLB
 799                mrc     p15, 0, r0, c1, c0, 0   @ read control reg
 800                orr     r0, r0, #0x1000         @ I-cache enable
 801                bl      __common_mmu_cache_on
 802                mov     r0, #0
 803                mcr     p15, 0, r0, c8, c7, 0   @ flush UTLB
 804                mov     pc, r12
 805
 806__common_mmu_cache_on:
 807#ifndef CONFIG_THUMB2_KERNEL
 808#ifndef DEBUG
 809                orr     r0, r0, #0x000d         @ Write buffer, mmu
 810#endif
 811                mov     r1, #-1
 812                mcr     p15, 0, r3, c2, c0, 0   @ load page table pointer
 813                mcr     p15, 0, r1, c3, c0, 0   @ load domain access control
 814                b       1f
 815                .align  5                       @ cache line aligned
 8161:              mcr     p15, 0, r0, c1, c0, 0   @ load control register
 817                mrc     p15, 0, r0, c1, c0, 0   @ and read it back to
 818                sub     pc, lr, r0, lsr #32     @ properly flush pipeline
 819#endif
 820
 821#define PROC_ENTRY_SIZE (4*5)
 822
 823/*
 824 * Here follow the relocatable cache support functions for the
 825 * various processors.  This is a generic hook for locating an
 826 * entry and jumping to an instruction at the specified offset
 827 * from the start of the block.  Please note this is all position
 828 * independent code.
 829 *
 830 *  r1  = corrupted
 831 *  r2  = corrupted
 832 *  r3  = block offset
 833 *  r9  = corrupted
 834 *  r12 = corrupted
 835 */
 836
 837call_cache_fn:  adr     r12, proc_types
 838#ifdef CONFIG_CPU_CP15
 839                mrc     p15, 0, r9, c0, c0      @ get processor ID
 840#elif defined(CONFIG_CPU_V7M)
 841                /*
 842                 * On v7-M the processor id is located in the V7M_SCB_CPUID
 843                 * register, but as cache handling is IMPLEMENTATION DEFINED on
 844                 * v7-M (if existant at all) we just return early here.
 845                 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
 846                 * __armv7_mmu_cache_{on,off,flush}) would be selected which
 847                 * use cp15 registers that are not implemented on v7-M.
 848                 */
 849                bx      lr
 850#else
 851                ldr     r9, =CONFIG_PROCESSOR_ID
 852#endif
 8531:              ldr     r1, [r12, #0]           @ get value
 854                ldr     r2, [r12, #4]           @ get mask
 855                eor     r1, r1, r9              @ (real ^ match)
 856                tst     r1, r2                  @       & mask
 857 ARM(           addeq   pc, r12, r3             ) @ call cache function
 858 THUMB(         addeq   r12, r3                 )
 859 THUMB(         moveq   pc, r12                 ) @ call cache function
 860                add     r12, r12, #PROC_ENTRY_SIZE
 861                b       1b
 862
 863/*
 864 * Table for cache operations.  This is basically:
 865 *   - CPU ID match
 866 *   - CPU ID mask
 867 *   - 'cache on' method instruction
 868 *   - 'cache off' method instruction
 869 *   - 'cache flush' method instruction
 870 *
 871 * We match an entry using: ((real_id ^ match) & mask) == 0
 872 *
 873 * Writethrough caches generally only need 'on' and 'off'
 874 * methods.  Writeback caches _must_ have the flush method
 875 * defined.
 876 */
 877                .align  2
 878                .type   proc_types,#object
 879proc_types:
 880                .word   0x41000000              @ old ARM ID
 881                .word   0xff00f000
 882                mov     pc, lr
 883 THUMB(         nop                             )
 884                mov     pc, lr
 885 THUMB(         nop                             )
 886                mov     pc, lr
 887 THUMB(         nop                             )
 888
 889                .word   0x41007000              @ ARM7/710
 890                .word   0xfff8fe00
 891                mov     pc, lr
 892 THUMB(         nop                             )
 893                mov     pc, lr
 894 THUMB(         nop                             )
 895                mov     pc, lr
 896 THUMB(         nop                             )
 897
 898                .word   0x41807200              @ ARM720T (writethrough)
 899                .word   0xffffff00
 900                W(b)    __armv4_mmu_cache_on
 901                W(b)    __armv4_mmu_cache_off
 902                mov     pc, lr
 903 THUMB(         nop                             )
 904
 905                .word   0x41007400              @ ARM74x
 906                .word   0xff00ff00
 907                W(b)    __armv3_mpu_cache_on
 908                W(b)    __armv3_mpu_cache_off
 909                W(b)    __armv3_mpu_cache_flush
 910                
 911                .word   0x41009400              @ ARM94x
 912                .word   0xff00ff00
 913                W(b)    __armv4_mpu_cache_on
 914                W(b)    __armv4_mpu_cache_off
 915                W(b)    __armv4_mpu_cache_flush
 916
 917                .word   0x41069260              @ ARM926EJ-S (v5TEJ)
 918                .word   0xff0ffff0
 919                W(b)    __arm926ejs_mmu_cache_on
 920                W(b)    __armv4_mmu_cache_off
 921                W(b)    __armv5tej_mmu_cache_flush
 922
 923                .word   0x00007000              @ ARM7 IDs
 924                .word   0x0000f000
 925                mov     pc, lr
 926 THUMB(         nop                             )
 927                mov     pc, lr
 928 THUMB(         nop                             )
 929                mov     pc, lr
 930 THUMB(         nop                             )
 931
 932                @ Everything from here on will be the new ID system.
 933
 934                .word   0x4401a100              @ sa110 / sa1100
 935                .word   0xffffffe0
 936                W(b)    __armv4_mmu_cache_on
 937                W(b)    __armv4_mmu_cache_off
 938                W(b)    __armv4_mmu_cache_flush
 939
 940                .word   0x6901b110              @ sa1110
 941                .word   0xfffffff0
 942                W(b)    __armv4_mmu_cache_on
 943                W(b)    __armv4_mmu_cache_off
 944                W(b)    __armv4_mmu_cache_flush
 945
 946                .word   0x56056900
 947                .word   0xffffff00              @ PXA9xx
 948                W(b)    __armv4_mmu_cache_on
 949                W(b)    __armv4_mmu_cache_off
 950                W(b)    __armv4_mmu_cache_flush
 951
 952                .word   0x56158000              @ PXA168
 953                .word   0xfffff000
 954                W(b)    __armv4_mmu_cache_on
 955                W(b)    __armv4_mmu_cache_off
 956                W(b)    __armv5tej_mmu_cache_flush
 957
 958                .word   0x56050000              @ Feroceon
 959                .word   0xff0f0000
 960                W(b)    __armv4_mmu_cache_on
 961                W(b)    __armv4_mmu_cache_off
 962                W(b)    __armv5tej_mmu_cache_flush
 963
 964#ifdef CONFIG_CPU_FEROCEON_OLD_ID
 965                /* this conflicts with the standard ARMv5TE entry */
 966                .long   0x41009260              @ Old Feroceon
 967                .long   0xff00fff0
 968                b       __armv4_mmu_cache_on
 969                b       __armv4_mmu_cache_off
 970                b       __armv5tej_mmu_cache_flush
 971#endif
 972
 973                .word   0x66015261              @ FA526
 974                .word   0xff01fff1
 975                W(b)    __fa526_cache_on
 976                W(b)    __armv4_mmu_cache_off
 977                W(b)    __fa526_cache_flush
 978
 979                @ These match on the architecture ID
 980
 981                .word   0x00020000              @ ARMv4T
 982                .word   0x000f0000
 983                W(b)    __armv4_mmu_cache_on
 984                W(b)    __armv4_mmu_cache_off
 985                W(b)    __armv4_mmu_cache_flush
 986
 987                .word   0x00050000              @ ARMv5TE
 988                .word   0x000f0000
 989                W(b)    __armv4_mmu_cache_on
 990                W(b)    __armv4_mmu_cache_off
 991                W(b)    __armv4_mmu_cache_flush
 992
 993                .word   0x00060000              @ ARMv5TEJ
 994                .word   0x000f0000
 995                W(b)    __armv4_mmu_cache_on
 996                W(b)    __armv4_mmu_cache_off
 997                W(b)    __armv5tej_mmu_cache_flush
 998
 999                .word   0x0007b000              @ ARMv6
1000                .word   0x000ff000
1001                W(b)    __armv6_mmu_cache_on
1002                W(b)    __armv4_mmu_cache_off
1003                W(b)    __armv6_mmu_cache_flush
1004
1005                .word   0x000f0000              @ new CPU Id
1006                .word   0x000f0000
1007                W(b)    __armv7_mmu_cache_on
1008                W(b)    __armv7_mmu_cache_off
1009                W(b)    __armv7_mmu_cache_flush
1010
1011                .word   0                       @ unrecognised type
1012                .word   0
1013                mov     pc, lr
1014 THUMB(         nop                             )
1015                mov     pc, lr
1016 THUMB(         nop                             )
1017                mov     pc, lr
1018 THUMB(         nop                             )
1019
1020                .size   proc_types, . - proc_types
1021
1022                /*
1023                 * If you get a "non-constant expression in ".if" statement"
1024                 * error from the assembler on this line, check that you have
1025                 * not accidentally written a "b" instruction where you should
1026                 * have written W(b).
1027                 */
1028                .if (. - proc_types) % PROC_ENTRY_SIZE != 0
1029                .error "The size of one or more proc_types entries is wrong."
1030                .endif
1031
1032/*
1033 * Turn off the Cache and MMU.  ARMv3 does not support
1034 * reading the control register, but ARMv4 does.
1035 *
1036 * On exit,
1037 *  r0, r1, r2, r3, r9, r12 corrupted
1038 * This routine must preserve:
1039 *  r4, r7, r8
1040 */
1041                .align  5
1042cache_off:      mov     r3, #12                 @ cache_off function
1043                b       call_cache_fn
1044
1045__armv4_mpu_cache_off:
1046                mrc     p15, 0, r0, c1, c0
1047                bic     r0, r0, #0x000d
1048                mcr     p15, 0, r0, c1, c0      @ turn MPU and cache off
1049                mov     r0, #0
1050                mcr     p15, 0, r0, c7, c10, 4  @ drain write buffer
1051                mcr     p15, 0, r0, c7, c6, 0   @ flush D-Cache
1052                mcr     p15, 0, r0, c7, c5, 0   @ flush I-Cache
1053                mov     pc, lr
1054
1055__armv3_mpu_cache_off:
1056                mrc     p15, 0, r0, c1, c0
1057                bic     r0, r0, #0x000d
1058                mcr     p15, 0, r0, c1, c0, 0   @ turn MPU and cache off
1059                mov     r0, #0
1060                mcr     p15, 0, r0, c7, c0, 0   @ invalidate whole cache v3
1061                mov     pc, lr
1062
1063__armv4_mmu_cache_off:
1064#ifdef CONFIG_MMU
1065                mrc     p15, 0, r0, c1, c0
1066                bic     r0, r0, #0x000d
1067                mcr     p15, 0, r0, c1, c0      @ turn MMU and cache off
1068                mov     r0, #0
1069                mcr     p15, 0, r0, c7, c7      @ invalidate whole cache v4
1070                mcr     p15, 0, r0, c8, c7      @ invalidate whole TLB v4
1071#endif
1072                mov     pc, lr
1073
1074__armv7_mmu_cache_off:
1075                mrc     p15, 0, r0, c1, c0
1076#ifdef CONFIG_MMU
1077                bic     r0, r0, #0x000d
1078#else
1079                bic     r0, r0, #0x000c
1080#endif
1081                mcr     p15, 0, r0, c1, c0      @ turn MMU and cache off
1082                mov     r12, lr
1083                bl      __armv7_mmu_cache_flush
1084                mov     r0, #0
1085#ifdef CONFIG_MMU
1086                mcr     p15, 0, r0, c8, c7, 0   @ invalidate whole TLB
1087#endif
1088                mcr     p15, 0, r0, c7, c5, 6   @ invalidate BTC
1089                mcr     p15, 0, r0, c7, c10, 4  @ DSB
1090                mcr     p15, 0, r0, c7, c5, 4   @ ISB
1091                mov     pc, r12
1092
1093/*
1094 * Clean and flush the cache to maintain consistency.
1095 *
1096 * On exit,
1097 *  r1, r2, r3, r9, r10, r11, r12 corrupted
1098 * This routine must preserve:
1099 *  r4, r6, r7, r8
1100 */
1101                .align  5
1102cache_clean_flush:
1103                mov     r3, #16
1104                b       call_cache_fn
1105
1106__armv4_mpu_cache_flush:
1107                tst     r4, #1
1108                movne   pc, lr
1109                mov     r2, #1
1110                mov     r3, #0
1111                mcr     p15, 0, ip, c7, c6, 0   @ invalidate D cache
1112                mov     r1, #7 << 5             @ 8 segments
11131:              orr     r3, r1, #63 << 26       @ 64 entries
11142:              mcr     p15, 0, r3, c7, c14, 2  @ clean & invalidate D index
1115                subs    r3, r3, #1 << 26
1116                bcs     2b                      @ entries 63 to 0
1117                subs    r1, r1, #1 << 5
1118                bcs     1b                      @ segments 7 to 0
1119
1120                teq     r2, #0
1121                mcrne   p15, 0, ip, c7, c5, 0   @ invalidate I cache
1122                mcr     p15, 0, ip, c7, c10, 4  @ drain WB
1123                mov     pc, lr
1124                
1125__fa526_cache_flush:
1126                tst     r4, #1
1127                movne   pc, lr
1128                mov     r1, #0
1129                mcr     p15, 0, r1, c7, c14, 0  @ clean and invalidate D cache
1130                mcr     p15, 0, r1, c7, c5, 0   @ flush I cache
1131                mcr     p15, 0, r1, c7, c10, 4  @ drain WB
1132                mov     pc, lr
1133
1134__armv6_mmu_cache_flush:
1135                mov     r1, #0
1136                tst     r4, #1
1137                mcreq   p15, 0, r1, c7, c14, 0  @ clean+invalidate D
1138                mcr     p15, 0, r1, c7, c5, 0   @ invalidate I+BTB
1139                mcreq   p15, 0, r1, c7, c15, 0  @ clean+invalidate unified
1140                mcr     p15, 0, r1, c7, c10, 4  @ drain WB
1141                mov     pc, lr
1142
1143__armv7_mmu_cache_flush:
1144                tst     r4, #1
1145                bne     iflush
1146                mrc     p15, 0, r10, c0, c1, 5  @ read ID_MMFR1
1147                tst     r10, #0xf << 16         @ hierarchical cache (ARMv7)
1148                mov     r10, #0
1149                beq     hierarchical
1150                mcr     p15, 0, r10, c7, c14, 0 @ clean+invalidate D
1151                b       iflush
1152hierarchical:
1153                mcr     p15, 0, r10, c7, c10, 5 @ DMB
1154                stmfd   sp!, {r0-r7, r9-r11}
1155                mrc     p15, 1, r0, c0, c0, 1   @ read clidr
1156                ands    r3, r0, #0x7000000      @ extract loc from clidr
1157                mov     r3, r3, lsr #23         @ left align loc bit field
1158                beq     finished                @ if loc is 0, then no need to clean
1159                mov     r10, #0                 @ start clean at cache level 0
1160loop1:
1161                add     r2, r10, r10, lsr #1    @ work out 3x current cache level
1162                mov     r1, r0, lsr r2          @ extract cache type bits from clidr
1163                and     r1, r1, #7              @ mask of the bits for current cache only
1164                cmp     r1, #2                  @ see what cache we have at this level
1165                blt     skip                    @ skip if no cache, or just i-cache
1166                mcr     p15, 2, r10, c0, c0, 0  @ select current cache level in cssr
1167                mcr     p15, 0, r10, c7, c5, 4  @ isb to sych the new cssr&csidr
1168                mrc     p15, 1, r1, c0, c0, 0   @ read the new csidr
1169                and     r2, r1, #7              @ extract the length of the cache lines
1170                add     r2, r2, #4              @ add 4 (line length offset)
1171                ldr     r4, =0x3ff
1172                ands    r4, r4, r1, lsr #3      @ find maximum number on the way size
1173                clz     r5, r4                  @ find bit position of way size increment
1174                ldr     r7, =0x7fff
1175                ands    r7, r7, r1, lsr #13     @ extract max number of the index size
1176loop2:
1177                mov     r9, r4                  @ create working copy of max way size
1178loop3:
1179 ARM(           orr     r11, r10, r9, lsl r5    ) @ factor way and cache number into r11
1180 ARM(           orr     r11, r11, r7, lsl r2    ) @ factor index number into r11
1181 THUMB(         lsl     r6, r9, r5              )
1182 THUMB(         orr     r11, r10, r6            ) @ factor way and cache number into r11
1183 THUMB(         lsl     r6, r7, r2              )
1184 THUMB(         orr     r11, r11, r6            ) @ factor index number into r11
1185                mcr     p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way
1186                subs    r9, r9, #1              @ decrement the way
1187                bge     loop3
1188                subs    r7, r7, #1              @ decrement the index
1189                bge     loop2
1190skip:
1191                add     r10, r10, #2            @ increment cache number
1192                cmp     r3, r10
1193                bgt     loop1
1194finished:
1195                ldmfd   sp!, {r0-r7, r9-r11}
1196                mov     r10, #0                 @ swith back to cache level 0
1197                mcr     p15, 2, r10, c0, c0, 0  @ select current cache level in cssr
1198iflush:
1199                mcr     p15, 0, r10, c7, c10, 4 @ DSB
1200                mcr     p15, 0, r10, c7, c5, 0  @ invalidate I+BTB
1201                mcr     p15, 0, r10, c7, c10, 4 @ DSB
1202                mcr     p15, 0, r10, c7, c5, 4  @ ISB
1203                mov     pc, lr
1204
1205__armv5tej_mmu_cache_flush:
1206                tst     r4, #1
1207                movne   pc, lr
12081:              mrc     p15, 0, r15, c7, c14, 3 @ test,clean,invalidate D cache
1209                bne     1b
1210                mcr     p15, 0, r0, c7, c5, 0   @ flush I cache
1211                mcr     p15, 0, r0, c7, c10, 4  @ drain WB
1212                mov     pc, lr
1213
1214__armv4_mmu_cache_flush:
1215                tst     r4, #1
1216                movne   pc, lr
1217                mov     r2, #64*1024            @ default: 32K dcache size (*2)
1218                mov     r11, #32                @ default: 32 byte line size
1219                mrc     p15, 0, r3, c0, c0, 1   @ read cache type
1220                teq     r3, r9                  @ cache ID register present?
1221                beq     no_cache_id
1222                mov     r1, r3, lsr #18
1223                and     r1, r1, #7
1224                mov     r2, #1024
1225                mov     r2, r2, lsl r1          @ base dcache size *2
1226                tst     r3, #1 << 14            @ test M bit
1227                addne   r2, r2, r2, lsr #1      @ +1/2 size if M == 1
1228                mov     r3, r3, lsr #12
1229                and     r3, r3, #3
1230                mov     r11, #8
1231                mov     r11, r11, lsl r3        @ cache line size in bytes
1232no_cache_id:
1233                mov     r1, pc
1234                bic     r1, r1, #63             @ align to longest cache line
1235                add     r2, r1, r2
12361:
1237 ARM(           ldr     r3, [r1], r11           ) @ s/w flush D cache
1238 THUMB(         ldr     r3, [r1]                ) @ s/w flush D cache
1239 THUMB(         add     r1, r1, r11             )
1240                teq     r1, r2
1241                bne     1b
1242
1243                mcr     p15, 0, r1, c7, c5, 0   @ flush I cache
1244                mcr     p15, 0, r1, c7, c6, 0   @ flush D cache
1245                mcr     p15, 0, r1, c7, c10, 4  @ drain WB
1246                mov     pc, lr
1247
1248__armv3_mmu_cache_flush:
1249__armv3_mpu_cache_flush:
1250                tst     r4, #1
1251                movne   pc, lr
1252                mov     r1, #0
1253                mcr     p15, 0, r1, c7, c0, 0   @ invalidate whole cache v3
1254                mov     pc, lr
1255
1256/*
1257 * Various debugging routines for printing hex characters and
1258 * memory, which again must be relocatable.
1259 */
1260#ifdef DEBUG
1261                .align  2
1262                .type   phexbuf,#object
1263phexbuf:        .space  12
1264                .size   phexbuf, . - phexbuf
1265
1266@ phex corrupts {r0, r1, r2, r3}
1267phex:           adr     r3, phexbuf
1268                mov     r2, #0
1269                strb    r2, [r3, r1]
12701:              subs    r1, r1, #1
1271                movmi   r0, r3
1272                bmi     puts
1273                and     r2, r0, #15
1274                mov     r0, r0, lsr #4
1275                cmp     r2, #10
1276                addge   r2, r2, #7
1277                add     r2, r2, #'0'
1278                strb    r2, [r3, r1]
1279                b       1b
1280
1281@ puts corrupts {r0, r1, r2, r3}
1282puts:           loadsp  r3, r1
12831:              ldrb    r2, [r0], #1
1284                teq     r2, #0
1285                moveq   pc, lr
12862:              writeb  r2, r3
1287                mov     r1, #0x00020000
12883:              subs    r1, r1, #1
1289                bne     3b
1290                teq     r2, #'\n'
1291                moveq   r2, #'\r'
1292                beq     2b
1293                teq     r0, #0
1294                bne     1b
1295                mov     pc, lr
1296@ putc corrupts {r0, r1, r2, r3}
1297putc:
1298                mov     r2, r0
1299                mov     r0, #0
1300                loadsp  r3, r1
1301                b       2b
1302
1303@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1304memdump:        mov     r12, r0
1305                mov     r10, lr
1306                mov     r11, #0
13072:              mov     r0, r11, lsl #2
1308                add     r0, r0, r12
1309                mov     r1, #8
1310                bl      phex
1311                mov     r0, #':'
1312                bl      putc
13131:              mov     r0, #' '
1314                bl      putc
1315                ldr     r0, [r12, r11, lsl #2]
1316                mov     r1, #8
1317                bl      phex
1318                and     r0, r11, #7
1319                teq     r0, #3
1320                moveq   r0, #' '
1321                bleq    putc
1322                and     r0, r11, #7
1323                add     r11, r11, #1
1324                teq     r0, #7
1325                bne     1b
1326                mov     r0, #'\n'
1327                bl      putc
1328                cmp     r11, #64
1329                blt     2b
1330                mov     pc, r10
1331#endif
1332
1333                .ltorg
1334
1335#ifdef CONFIG_ARM_VIRT_EXT
1336.align 5
1337__hyp_reentry_vectors:
1338                W(b)    .                       @ reset
1339                W(b)    .                       @ undef
1340                W(b)    .                       @ svc
1341                W(b)    .                       @ pabort
1342                W(b)    .                       @ dabort
1343                W(b)    __enter_kernel          @ hyp
1344                W(b)    .                       @ irq
1345                W(b)    .                       @ fiq
1346#endif /* CONFIG_ARM_VIRT_EXT */
1347
1348__enter_kernel:
1349                mov     r0, #0                  @ must be 0
1350 ARM(           mov     pc, r4          )       @ call kernel
1351 M_CLASS(       add     r4, r4, #1      )       @ enter in Thumb mode for M class
1352 THUMB(         bx      r4              )       @ entry point is always ARM for A/R classes
1353
1354reloc_code_end:
1355
1356                .align
1357                .section ".stack", "aw", %nobits
1358.L_user_stack:  .space  4096
1359.L_user_stack_end:
1360