linux/arch/arm/lib/div64.S
<<
>>
Prefs
   1/*
   2 *  linux/arch/arm/lib/div64.S
   3 *
   4 *  Optimized computation of 64-bit dividend / 32-bit divisor
   5 *
   6 *  Author:     Nicolas Pitre
   7 *  Created:    Oct 5, 2003
   8 *  Copyright:  Monta Vista Software, Inc.
   9 *
  10 *  This program is free software; you can redistribute it and/or modify
  11 *  it under the terms of the GNU General Public License version 2 as
  12 *  published by the Free Software Foundation.
  13 */
  14
  15#include <linux/linkage.h>
  16#include <asm/unwind.h>
  17
  18#ifdef __ARMEB__
  19#define xh r0
  20#define xl r1
  21#define yh r2
  22#define yl r3
  23#else
  24#define xl r0
  25#define xh r1
  26#define yl r2
  27#define yh r3
  28#endif
  29
  30/*
  31 * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
  32 *
  33 * Note: Calling convention is totally non standard for optimal code.
  34 *       This is meant to be used by do_div() from include/asm/div64.h only.
  35 *
  36 * Input parameters:
  37 *      xh-xl   = dividend (clobbered)
  38 *      r4      = divisor (preserved)
  39 *
  40 * Output values:
  41 *      yh-yl   = result
  42 *      xh      = remainder
  43 *
  44 * Clobbered regs: xl, ip
  45 */
  46
  47ENTRY(__do_div64)
  48UNWIND(.fnstart)
  49
  50        @ Test for easy paths first.
  51        subs    ip, r4, #1
  52        bls     9f                      @ divisor is 0 or 1
  53        tst     ip, r4
  54        beq     8f                      @ divisor is power of 2
  55
  56        @ See if we need to handle upper 32-bit result.
  57        cmp     xh, r4
  58        mov     yh, #0
  59        blo     3f
  60
  61        @ Align divisor with upper part of dividend.
  62        @ The aligned divisor is stored in yl preserving the original.
  63        @ The bit position is stored in ip.
  64
  65#if __LINUX_ARM_ARCH__ >= 5
  66
  67        clz     yl, r4
  68        clz     ip, xh
  69        sub     yl, yl, ip
  70        mov     ip, #1
  71        mov     ip, ip, lsl yl
  72        mov     yl, r4, lsl yl
  73
  74#else
  75
  76        mov     yl, r4
  77        mov     ip, #1
  781:      cmp     yl, #0x80000000
  79        cmpcc   yl, xh
  80        movcc   yl, yl, lsl #1
  81        movcc   ip, ip, lsl #1
  82        bcc     1b
  83
  84#endif
  85
  86        @ The division loop for needed upper bit positions.
  87        @ Break out early if dividend reaches 0.
  882:      cmp     xh, yl
  89        orrcs   yh, yh, ip
  90        subcss  xh, xh, yl
  91        movnes  ip, ip, lsr #1
  92        mov     yl, yl, lsr #1
  93        bne     2b
  94
  95        @ See if we need to handle lower 32-bit result.
  963:      cmp     xh, #0
  97        mov     yl, #0
  98        cmpeq   xl, r4
  99        movlo   xh, xl
 100        movlo   pc, lr
 101
 102        @ The division loop for lower bit positions.
 103        @ Here we shift remainer bits leftwards rather than moving the
 104        @ divisor for comparisons, considering the carry-out bit as well.
 105        mov     ip, #0x80000000
 1064:      movs    xl, xl, lsl #1
 107        adcs    xh, xh, xh
 108        beq     6f
 109        cmpcc   xh, r4
 1105:      orrcs   yl, yl, ip
 111        subcs   xh, xh, r4
 112        movs    ip, ip, lsr #1
 113        bne     4b
 114        mov     pc, lr
 115
 116        @ The top part of remainder became zero.  If carry is set
 117        @ (the 33th bit) this is a false positive so resume the loop.
 118        @ Otherwise, if lower part is also null then we are done.
 1196:      bcs     5b
 120        cmp     xl, #0
 121        moveq   pc, lr
 122
 123        @ We still have remainer bits in the low part.  Bring them up.
 124
 125#if __LINUX_ARM_ARCH__ >= 5
 126
 127        clz     xh, xl                  @ we know xh is zero here so...
 128        add     xh, xh, #1
 129        mov     xl, xl, lsl xh
 130        mov     ip, ip, lsr xh
 131
 132#else
 133
 1347:      movs    xl, xl, lsl #1
 135        mov     ip, ip, lsr #1
 136        bcc     7b
 137
 138#endif
 139
 140        @ Current remainder is now 1.  It is worthless to compare with
 141        @ divisor at this point since divisor can not be smaller than 3 here.
 142        @ If possible, branch for another shift in the division loop.
 143        @ If no bit position left then we are done.
 144        movs    ip, ip, lsr #1
 145        mov     xh, #1
 146        bne     4b
 147        mov     pc, lr
 148
 1498:      @ Division by a power of 2: determine what that divisor order is
 150        @ then simply shift values around
 151
 152#if __LINUX_ARM_ARCH__ >= 5
 153
 154        clz     ip, r4
 155        rsb     ip, ip, #31
 156
 157#else
 158
 159        mov     yl, r4
 160        cmp     r4, #(1 << 16)
 161        mov     ip, #0
 162        movhs   yl, yl, lsr #16
 163        movhs   ip, #16
 164
 165        cmp     yl, #(1 << 8)
 166        movhs   yl, yl, lsr #8
 167        addhs   ip, ip, #8
 168
 169        cmp     yl, #(1 << 4)
 170        movhs   yl, yl, lsr #4
 171        addhs   ip, ip, #4
 172
 173        cmp     yl, #(1 << 2)
 174        addhi   ip, ip, #3
 175        addls   ip, ip, yl, lsr #1
 176
 177#endif
 178
 179        mov     yh, xh, lsr ip
 180        mov     yl, xl, lsr ip
 181        rsb     ip, ip, #32
 182 ARM(   orr     yl, yl, xh, lsl ip      )
 183 THUMB( lsl     xh, xh, ip              )
 184 THUMB( orr     yl, yl, xh              )
 185        mov     xh, xl, lsl ip
 186        mov     xh, xh, lsr ip
 187        mov     pc, lr
 188
 189        @ eq -> division by 1: obvious enough...
 1909:      moveq   yl, xl
 191        moveq   yh, xh
 192        moveq   xh, #0
 193        moveq   pc, lr
 194UNWIND(.fnend)
 195
 196UNWIND(.fnstart)
 197UNWIND(.pad #4)
 198UNWIND(.save {lr})
 199Ldiv0_64:
 200        @ Division by 0:
 201        str     lr, [sp, #-8]!
 202        bl      __div0
 203
 204        @ as wrong as it could be...
 205        mov     yl, #0
 206        mov     yh, #0
 207        mov     xh, #0
 208        ldr     pc, [sp], #8
 209
 210UNWIND(.fnend)
 211ENDPROC(__do_div64)
 212