linux/arch/arm/lib/lib1funcs.S
<<
>>
Prefs
   1/*
   2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
   3 *
   4 * Author: Nicolas Pitre <nico@fluxnic.net>
   5 *   - contributed to gcc-3.4 on Sep 30, 2003
   6 *   - adapted for the Linux kernel on Oct 2, 2003
   7 */
   8
   9/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
  10
  11This file is free software; you can redistribute it and/or modify it
  12under the terms of the GNU General Public License as published by the
  13Free Software Foundation; either version 2, or (at your option) any
  14later version.
  15
  16In addition to the permissions in the GNU General Public License, the
  17Free Software Foundation gives you unlimited permission to link the
  18compiled version of this file into combinations with other programs,
  19and to distribute those combinations without any restriction coming
  20from the use of this file.  (The General Public License restrictions
  21do apply in other respects; for example, they cover modification of
  22the file, and distribution when not linked into a combine
  23executable.)
  24
  25This file is distributed in the hope that it will be useful, but
  26WITHOUT ANY WARRANTY; without even the implied warranty of
  27MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  28General Public License for more details.
  29
  30You should have received a copy of the GNU General Public License
  31along with this program; see the file COPYING.  If not, write to
  32the Free Software Foundation, 59 Temple Place - Suite 330,
  33Boston, MA 02111-1307, USA.  */
  34
  35
  36#include <linux/linkage.h>
  37#include <asm/assembler.h>
  38#include <asm/unwind.h>
  39
  40.macro ARM_DIV_BODY dividend, divisor, result, curbit
  41
  42#if __LINUX_ARM_ARCH__ >= 5
  43
  44        clz     \curbit, \divisor
  45        clz     \result, \dividend
  46        sub     \result, \curbit, \result
  47        mov     \curbit, #1
  48        mov     \divisor, \divisor, lsl \result
  49        mov     \curbit, \curbit, lsl \result
  50        mov     \result, #0
  51        
  52#else
  53
  54        @ Initially shift the divisor left 3 bits if possible,
  55        @ set curbit accordingly.  This allows for curbit to be located
  56        @ at the left end of each 4 bit nibbles in the division loop
  57        @ to save one loop in most cases.
  58        tst     \divisor, #0xe0000000
  59        moveq   \divisor, \divisor, lsl #3
  60        moveq   \curbit, #8
  61        movne   \curbit, #1
  62
  63        @ Unless the divisor is very big, shift it up in multiples of
  64        @ four bits, since this is the amount of unwinding in the main
  65        @ division loop.  Continue shifting until the divisor is 
  66        @ larger than the dividend.
  671:      cmp     \divisor, #0x10000000
  68        cmplo   \divisor, \dividend
  69        movlo   \divisor, \divisor, lsl #4
  70        movlo   \curbit, \curbit, lsl #4
  71        blo     1b
  72
  73        @ For very big divisors, we must shift it a bit at a time, or
  74        @ we will be in danger of overflowing.
  751:      cmp     \divisor, #0x80000000
  76        cmplo   \divisor, \dividend
  77        movlo   \divisor, \divisor, lsl #1
  78        movlo   \curbit, \curbit, lsl #1
  79        blo     1b
  80
  81        mov     \result, #0
  82
  83#endif
  84
  85        @ Division loop
  861:      cmp     \dividend, \divisor
  87        subhs   \dividend, \dividend, \divisor
  88        orrhs   \result,   \result,   \curbit
  89        cmp     \dividend, \divisor,  lsr #1
  90        subhs   \dividend, \dividend, \divisor, lsr #1
  91        orrhs   \result,   \result,   \curbit,  lsr #1
  92        cmp     \dividend, \divisor,  lsr #2
  93        subhs   \dividend, \dividend, \divisor, lsr #2
  94        orrhs   \result,   \result,   \curbit,  lsr #2
  95        cmp     \dividend, \divisor,  lsr #3
  96        subhs   \dividend, \dividend, \divisor, lsr #3
  97        orrhs   \result,   \result,   \curbit,  lsr #3
  98        cmp     \dividend, #0                   @ Early termination?
  99        movnes  \curbit,   \curbit,  lsr #4     @ No, any more bits to do?
 100        movne   \divisor,  \divisor, lsr #4
 101        bne     1b
 102
 103.endm
 104
 105
 106.macro ARM_DIV2_ORDER divisor, order
 107
 108#if __LINUX_ARM_ARCH__ >= 5
 109
 110        clz     \order, \divisor
 111        rsb     \order, \order, #31
 112
 113#else
 114
 115        cmp     \divisor, #(1 << 16)
 116        movhs   \divisor, \divisor, lsr #16
 117        movhs   \order, #16
 118        movlo   \order, #0
 119
 120        cmp     \divisor, #(1 << 8)
 121        movhs   \divisor, \divisor, lsr #8
 122        addhs   \order, \order, #8
 123
 124        cmp     \divisor, #(1 << 4)
 125        movhs   \divisor, \divisor, lsr #4
 126        addhs   \order, \order, #4
 127
 128        cmp     \divisor, #(1 << 2)
 129        addhi   \order, \order, #3
 130        addls   \order, \order, \divisor, lsr #1
 131
 132#endif
 133
 134.endm
 135
 136
 137.macro ARM_MOD_BODY dividend, divisor, order, spare
 138
 139#if __LINUX_ARM_ARCH__ >= 5
 140
 141        clz     \order, \divisor
 142        clz     \spare, \dividend
 143        sub     \order, \order, \spare
 144        mov     \divisor, \divisor, lsl \order
 145
 146#else
 147
 148        mov     \order, #0
 149
 150        @ Unless the divisor is very big, shift it up in multiples of
 151        @ four bits, since this is the amount of unwinding in the main
 152        @ division loop.  Continue shifting until the divisor is 
 153        @ larger than the dividend.
 1541:      cmp     \divisor, #0x10000000
 155        cmplo   \divisor, \dividend
 156        movlo   \divisor, \divisor, lsl #4
 157        addlo   \order, \order, #4
 158        blo     1b
 159
 160        @ For very big divisors, we must shift it a bit at a time, or
 161        @ we will be in danger of overflowing.
 1621:      cmp     \divisor, #0x80000000
 163        cmplo   \divisor, \dividend
 164        movlo   \divisor, \divisor, lsl #1
 165        addlo   \order, \order, #1
 166        blo     1b
 167
 168#endif
 169
 170        @ Perform all needed substractions to keep only the reminder.
 171        @ Do comparisons in batch of 4 first.
 172        subs    \order, \order, #3              @ yes, 3 is intended here
 173        blt     2f
 174
 1751:      cmp     \dividend, \divisor
 176        subhs   \dividend, \dividend, \divisor
 177        cmp     \dividend, \divisor,  lsr #1
 178        subhs   \dividend, \dividend, \divisor, lsr #1
 179        cmp     \dividend, \divisor,  lsr #2
 180        subhs   \dividend, \dividend, \divisor, lsr #2
 181        cmp     \dividend, \divisor,  lsr #3
 182        subhs   \dividend, \dividend, \divisor, lsr #3
 183        cmp     \dividend, #1
 184        mov     \divisor, \divisor, lsr #4
 185        subges  \order, \order, #4
 186        bge     1b
 187
 188        tst     \order, #3
 189        teqne   \dividend, #0
 190        beq     5f
 191
 192        @ Either 1, 2 or 3 comparison/substractions are left.
 1932:      cmn     \order, #2
 194        blt     4f
 195        beq     3f
 196        cmp     \dividend, \divisor
 197        subhs   \dividend, \dividend, \divisor
 198        mov     \divisor,  \divisor,  lsr #1
 1993:      cmp     \dividend, \divisor
 200        subhs   \dividend, \dividend, \divisor
 201        mov     \divisor,  \divisor,  lsr #1
 2024:      cmp     \dividend, \divisor
 203        subhs   \dividend, \dividend, \divisor
 2045:
 205.endm
 206
 207
 208ENTRY(__udivsi3)
 209ENTRY(__aeabi_uidiv)
 210UNWIND(.fnstart)
 211
 212        subs    r2, r1, #1
 213        moveq   pc, lr
 214        bcc     Ldiv0
 215        cmp     r0, r1
 216        bls     11f
 217        tst     r1, r2
 218        beq     12f
 219
 220        ARM_DIV_BODY r0, r1, r2, r3
 221
 222        mov     r0, r2
 223        mov     pc, lr
 224
 22511:     moveq   r0, #1
 226        movne   r0, #0
 227        mov     pc, lr
 228
 22912:     ARM_DIV2_ORDER r1, r2
 230
 231        mov     r0, r0, lsr r2
 232        mov     pc, lr
 233
 234UNWIND(.fnend)
 235ENDPROC(__udivsi3)
 236ENDPROC(__aeabi_uidiv)
 237
 238ENTRY(__umodsi3)
 239UNWIND(.fnstart)
 240
 241        subs    r2, r1, #1                      @ compare divisor with 1
 242        bcc     Ldiv0
 243        cmpne   r0, r1                          @ compare dividend with divisor
 244        moveq   r0, #0
 245        tsthi   r1, r2                          @ see if divisor is power of 2
 246        andeq   r0, r0, r2
 247        movls   pc, lr
 248
 249        ARM_MOD_BODY r0, r1, r2, r3
 250
 251        mov     pc, lr
 252
 253UNWIND(.fnend)
 254ENDPROC(__umodsi3)
 255
 256ENTRY(__divsi3)
 257ENTRY(__aeabi_idiv)
 258UNWIND(.fnstart)
 259
 260        cmp     r1, #0
 261        eor     ip, r0, r1                      @ save the sign of the result.
 262        beq     Ldiv0
 263        rsbmi   r1, r1, #0                      @ loops below use unsigned.
 264        subs    r2, r1, #1                      @ division by 1 or -1 ?
 265        beq     10f
 266        movs    r3, r0
 267        rsbmi   r3, r0, #0                      @ positive dividend value
 268        cmp     r3, r1
 269        bls     11f
 270        tst     r1, r2                          @ divisor is power of 2 ?
 271        beq     12f
 272
 273        ARM_DIV_BODY r3, r1, r0, r2
 274
 275        cmp     ip, #0
 276        rsbmi   r0, r0, #0
 277        mov     pc, lr
 278
 27910:     teq     ip, r0                          @ same sign ?
 280        rsbmi   r0, r0, #0
 281        mov     pc, lr
 282
 28311:     movlo   r0, #0
 284        moveq   r0, ip, asr #31
 285        orreq   r0, r0, #1
 286        mov     pc, lr
 287
 28812:     ARM_DIV2_ORDER r1, r2
 289
 290        cmp     ip, #0
 291        mov     r0, r3, lsr r2
 292        rsbmi   r0, r0, #0
 293        mov     pc, lr
 294
 295UNWIND(.fnend)
 296ENDPROC(__divsi3)
 297ENDPROC(__aeabi_idiv)
 298
 299ENTRY(__modsi3)
 300UNWIND(.fnstart)
 301
 302        cmp     r1, #0
 303        beq     Ldiv0
 304        rsbmi   r1, r1, #0                      @ loops below use unsigned.
 305        movs    ip, r0                          @ preserve sign of dividend
 306        rsbmi   r0, r0, #0                      @ if negative make positive
 307        subs    r2, r1, #1                      @ compare divisor with 1
 308        cmpne   r0, r1                          @ compare dividend with divisor
 309        moveq   r0, #0
 310        tsthi   r1, r2                          @ see if divisor is power of 2
 311        andeq   r0, r0, r2
 312        bls     10f
 313
 314        ARM_MOD_BODY r0, r1, r2, r3
 315
 31610:     cmp     ip, #0
 317        rsbmi   r0, r0, #0
 318        mov     pc, lr
 319
 320UNWIND(.fnend)
 321ENDPROC(__modsi3)
 322
 323#ifdef CONFIG_AEABI
 324
 325ENTRY(__aeabi_uidivmod)
 326UNWIND(.fnstart)
 327UNWIND(.save {r0, r1, ip, lr}   )
 328
 329        stmfd   sp!, {r0, r1, ip, lr}
 330        bl      __aeabi_uidiv
 331        ldmfd   sp!, {r1, r2, ip, lr}
 332        mul     r3, r0, r2
 333        sub     r1, r1, r3
 334        mov     pc, lr
 335
 336UNWIND(.fnend)
 337ENDPROC(__aeabi_uidivmod)
 338
 339ENTRY(__aeabi_idivmod)
 340UNWIND(.fnstart)
 341UNWIND(.save {r0, r1, ip, lr}   )
 342        stmfd   sp!, {r0, r1, ip, lr}
 343        bl      __aeabi_idiv
 344        ldmfd   sp!, {r1, r2, ip, lr}
 345        mul     r3, r0, r2
 346        sub     r1, r1, r3
 347        mov     pc, lr
 348
 349UNWIND(.fnend)
 350ENDPROC(__aeabi_idivmod)
 351
 352#endif
 353
 354Ldiv0:
 355UNWIND(.fnstart)
 356UNWIND(.pad #4)
 357UNWIND(.save {lr})
 358        str     lr, [sp, #-8]!
 359        bl      __div0
 360        mov     r0, #0                  @ About as wrong as it could be.
 361        ldr     pc, [sp], #8
 362UNWIND(.fnend)
 363ENDPROC(Ldiv0)
 364