LXR uboot/arch/arm/lib/div64.S

   1/*
   2 *  linux/arch/arm/lib/div64.S
   3 *
   4 *  Optimized computation of 64-bit dividend / 32-bit divisor
   5 *
   6 *  Author:     Nicolas Pitre
   7 *  Created:    Oct 5, 2003
   8 *  Copyright:  Monta Vista Software, Inc.
   9 *
  10 *  SPDX-License-Identifier:    GPL-2.0
  11 */
  12
  13#include <linux/linkage.h>
  14#include <asm/assembler.h>
  15#ifdef __UBOOT__
  16#define UNWIND(x...)
  17#endif
  18
  19#ifdef __ARMEB__
  20#define xh r0
  21#define xl r1
  22#define yh r2
  23#define yl r3
  24#else
  25#define xl r0
  26#define xh r1
  27#define yl r2
  28#define yh r3
  29#endif
  30
  31/*
  32 * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
  33 *
  34 * Note: Calling convention is totally non standard for optimal code.
  35 *       This is meant to be used by do_div() from include/asm/div64.h only.
  36 *
  37 * Input parameters:
  38 *      xh-xl   = dividend (clobbered)
  39 *      r4      = divisor (preserved)
  40 *
  41 * Output values:
  42 *      yh-yl   = result
  43 *      xh      = remainder
  44 *
  45 * Clobbered regs: xl, ip
  46 */
  47
  48.pushsection .text.__do_div64, "ax"
  49ENTRY(__do_div64)
  50UNWIND(.fnstart)
  51
  52        @ Test for easy paths first.
  53        subs    ip, r4, #1
  54        bls     9f                      @ divisor is 0 or 1
  55        tst     ip, r4
  56        beq     8f                      @ divisor is power of 2
  57
  58        @ See if we need to handle upper 32-bit result.
  59        cmp     xh, r4
  60        mov     yh, #0
  61        blo     3f
  62
  63        @ Align divisor with upper part of dividend.
  64        @ The aligned divisor is stored in yl preserving the original.
  65        @ The bit position is stored in ip.
  66
  67#if __LINUX_ARM_ARCH__ >= 5
  68
  69        clz     yl, r4
  70        clz     ip, xh
  71        sub     yl, yl, ip
  72        mov     ip, #1
  73        mov     ip, ip, lsl yl
  74        mov     yl, r4, lsl yl
  75
  76#else
  77
  78        mov     yl, r4
  79        mov     ip, #1
  801:      cmp     yl, #0x80000000
  81        cmpcc   yl, xh
  82        movcc   yl, yl, lsl #1
  83        movcc   ip, ip, lsl #1
  84        bcc     1b
  85
  86#endif
  87
  88        @ The division loop for needed upper bit positions.
  89        @ Break out early if dividend reaches 0.
  902:      cmp     xh, yl
  91        orrcs   yh, yh, ip
  92        subscs  xh, xh, yl
  93        movsne  ip, ip, lsr #1
  94        mov     yl, yl, lsr #1
  95        bne     2b
  96
  97        @ See if we need to handle lower 32-bit result.
  983:      cmp     xh, #0
  99        mov     yl, #0
 100        cmpeq   xl, r4
 101        movlo   xh, xl
 102        retlo   lr
 103
 104        @ The division loop for lower bit positions.
 105        @ Here we shift remainer bits leftwards rather than moving the
 106        @ divisor for comparisons, considering the carry-out bit as well.
 107        mov     ip, #0x80000000
 1084:      movs    xl, xl, lsl #1
 109        adcs    xh, xh, xh
 110        beq     6f
 111        cmpcc   xh, r4
 1125:      orrcs   yl, yl, ip
 113        subcs   xh, xh, r4
 114        movs    ip, ip, lsr #1
 115        bne     4b
 116        ret     lr
 117
 118        @ The top part of remainder became zero.  If carry is set
 119        @ (the 33th bit) this is a false positive so resume the loop.
 120        @ Otherwise, if lower part is also null then we are done.
 1216:      bcs     5b
 122        cmp     xl, #0
 123        reteq   lr
 124
 125        @ We still have remainer bits in the low part.  Bring them up.
 126
 127#if __LINUX_ARM_ARCH__ >= 5
 128
 129        clz     xh, xl                  @ we know xh is zero here so...
 130        add     xh, xh, #1
 131        mov     xl, xl, lsl xh
 132        mov     ip, ip, lsr xh
 133
 134#else
 135
 1367:      movs    xl, xl, lsl #1
 137        mov     ip, ip, lsr #1
 138        bcc     7b
 139
 140#endif
 141
 142        @ Current remainder is now 1.  It is worthless to compare with
 143        @ divisor at this point since divisor can not be smaller than 3 here.
 144        @ If possible, branch for another shift in the division loop.
 145        @ If no bit position left then we are done.
 146        movs    ip, ip, lsr #1
 147        mov     xh, #1
 148        bne     4b
 149        ret     lr
 150
 1518:      @ Division by a power of 2: determine what that divisor order is
 152        @ then simply shift values around
 153
 154#if __LINUX_ARM_ARCH__ >= 5
 155
 156        clz     ip, r4
 157        rsb     ip, ip, #31
 158
 159#else
 160
 161        mov     yl, r4
 162        cmp     r4, #(1 << 16)
 163        mov     ip, #0
 164        movhs   yl, yl, lsr #16
 165        movhs   ip, #16
 166
 167        cmp     yl, #(1 << 8)
 168        movhs   yl, yl, lsr #8
 169        addhs   ip, ip, #8
 170
 171        cmp     yl, #(1 << 4)
 172        movhs   yl, yl, lsr #4
 173        addhs   ip, ip, #4
 174
 175        cmp     yl, #(1 << 2)
 176        addhi   ip, ip, #3
 177        addls   ip, ip, yl, lsr #1
 178
 179#endif
 180
 181        mov     yh, xh, lsr ip
 182        mov     yl, xl, lsr ip
 183        rsb     ip, ip, #32
 184 ARM(   orr     yl, yl, xh, lsl ip      )
 185 THUMB( lsl     xh, xh, ip              )
 186 THUMB( orr     yl, yl, xh              )
 187        mov     xh, xl, lsl ip
 188        mov     xh, xh, lsr ip
 189        ret     lr
 190
 191        @ eq -> division by 1: obvious enough...
 1929:      moveq   yl, xl
 193        moveq   yh, xh
 194        moveq   xh, #0
 195        reteq   lr
 196UNWIND(.fnend)
 197
 198UNWIND(.fnstart)
 199UNWIND(.pad #4)
 200UNWIND(.save {lr})
 201Ldiv0_64:
 202        @ Division by 0:
 203        str     lr, [sp, #-8]!
 204        bl      __div0
 205
 206        @ as wrong as it could be...
 207        mov     yl, #0
 208        mov     yh, #0
 209        mov     xh, #0
 210        ldr     pc, [sp], #8
 211
 212UNWIND(.fnend)
 213ENDPROC(__do_div64)
 214.popsection
 215