LXR uboot/arch/arm/lib/div64.S

   1/* SPDX-License-Identifier: GPL-2.0 */
   2/*
   3 *  linux/arch/arm/lib/div64.S
   4 *
   5 *  Optimized computation of 64-bit dividend / 32-bit divisor
   6 *
   7 *  Author:     Nicolas Pitre
   8 *  Created:    Oct 5, 2003
   9 *  Copyright:  Monta Vista Software, Inc.
  10 */
  11
  12#include <linux/linkage.h>
  13#include <asm/assembler.h>
  14#ifdef __UBOOT__
  15#define UNWIND(x...)
  16#endif
  17
  18#ifdef __ARMEB__
  19#define xh r0
  20#define xl r1
  21#define yh r2
  22#define yl r3
  23#else
  24#define xl r0
  25#define xh r1
  26#define yl r2
  27#define yh r3
  28#endif
  29
  30/*
  31 * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
  32 *
  33 * Note: Calling convention is totally non standard for optimal code.
  34 *       This is meant to be used by do_div() from include/asm/div64.h only.
  35 *
  36 * Input parameters:
  37 *      xh-xl   = dividend (clobbered)
  38 *      r4      = divisor (preserved)
  39 *
  40 * Output values:
  41 *      yh-yl   = result
  42 *      xh      = remainder
  43 *
  44 * Clobbered regs: xl, ip
  45 */
  46
  47.pushsection .text.__do_div64, "ax"
  48ENTRY(__do_div64)
  49UNWIND(.fnstart)
  50
  51        @ Test for easy paths first.
  52        subs    ip, r4, #1
  53        bls     9f                      @ divisor is 0 or 1
  54        tst     ip, r4
  55        beq     8f                      @ divisor is power of 2
  56
  57        @ See if we need to handle upper 32-bit result.
  58        cmp     xh, r4
  59        mov     yh, #0
  60        blo     3f
  61
  62        @ Align divisor with upper part of dividend.
  63        @ The aligned divisor is stored in yl preserving the original.
  64        @ The bit position is stored in ip.
  65
  66#if __LINUX_ARM_ARCH__ >= 5
  67
  68        clz     yl, r4
  69        clz     ip, xh
  70        sub     yl, yl, ip
  71        mov     ip, #1
  72        mov     ip, ip, lsl yl
  73        mov     yl, r4, lsl yl
  74
  75#else
  76
  77        mov     yl, r4
  78        mov     ip, #1
  791:      cmp     yl, #0x80000000
  80        cmpcc   yl, xh
  81        movcc   yl, yl, lsl #1
  82        movcc   ip, ip, lsl #1
  83        bcc     1b
  84
  85#endif
  86
  87        @ The division loop for needed upper bit positions.
  88        @ Break out early if dividend reaches 0.
  892:      cmp     xh, yl
  90        orrcs   yh, yh, ip
  91        subscs  xh, xh, yl
  92        movsne  ip, ip, lsr #1
  93        mov     yl, yl, lsr #1
  94        bne     2b
  95
  96        @ See if we need to handle lower 32-bit result.
  973:      cmp     xh, #0
  98        mov     yl, #0
  99        cmpeq   xl, r4
 100        movlo   xh, xl
 101        retlo   lr
 102
 103        @ The division loop for lower bit positions.
 104        @ Here we shift remainer bits leftwards rather than moving the
 105        @ divisor for comparisons, considering the carry-out bit as well.
 106        mov     ip, #0x80000000
 1074:      movs    xl, xl, lsl #1
 108        adcs    xh, xh, xh
 109        beq     6f
 110        cmpcc   xh, r4
 1115:      orrcs   yl, yl, ip
 112        subcs   xh, xh, r4
 113        movs    ip, ip, lsr #1
 114        bne     4b
 115        ret     lr
 116
 117        @ The top part of remainder became zero.  If carry is set
 118        @ (the 33th bit) this is a false positive so resume the loop.
 119        @ Otherwise, if lower part is also null then we are done.
 1206:      bcs     5b
 121        cmp     xl, #0
 122        reteq   lr
 123
 124        @ We still have remainer bits in the low part.  Bring them up.
 125
 126#if __LINUX_ARM_ARCH__ >= 5
 127
 128        clz     xh, xl                  @ we know xh is zero here so...
 129        add     xh, xh, #1
 130        mov     xl, xl, lsl xh
 131        mov     ip, ip, lsr xh
 132
 133#else
 134
 1357:      movs    xl, xl, lsl #1
 136        mov     ip, ip, lsr #1
 137        bcc     7b
 138
 139#endif
 140
 141        @ Current remainder is now 1.  It is worthless to compare with
 142        @ divisor at this point since divisor can not be smaller than 3 here.
 143        @ If possible, branch for another shift in the division loop.
 144        @ If no bit position left then we are done.
 145        movs    ip, ip, lsr #1
 146        mov     xh, #1
 147        bne     4b
 148        ret     lr
 149
 1508:      @ Division by a power of 2: determine what that divisor order is
 151        @ then simply shift values around
 152
 153#if __LINUX_ARM_ARCH__ >= 5
 154
 155        clz     ip, r4
 156        rsb     ip, ip, #31
 157
 158#else
 159
 160        mov     yl, r4
 161        cmp     r4, #(1 << 16)
 162        mov     ip, #0
 163        movhs   yl, yl, lsr #16
 164        movhs   ip, #16
 165
 166        cmp     yl, #(1 << 8)
 167        movhs   yl, yl, lsr #8
 168        addhs   ip, ip, #8
 169
 170        cmp     yl, #(1 << 4)
 171        movhs   yl, yl, lsr #4
 172        addhs   ip, ip, #4
 173
 174        cmp     yl, #(1 << 2)
 175        addhi   ip, ip, #3
 176        addls   ip, ip, yl, lsr #1
 177
 178#endif
 179
 180        mov     yh, xh, lsr ip
 181        mov     yl, xl, lsr ip
 182        rsb     ip, ip, #32
 183 ARM(   orr     yl, yl, xh, lsl ip      )
 184 THUMB( lsl     xh, xh, ip              )
 185 THUMB( orr     yl, yl, xh              )
 186        mov     xh, xl, lsl ip
 187        mov     xh, xh, lsr ip
 188        ret     lr
 189
 190        @ eq -> division by 1: obvious enough...
 1919:      moveq   yl, xl
 192        moveq   yh, xh
 193        moveq   xh, #0
 194        reteq   lr
 195UNWIND(.fnend)
 196
 197UNWIND(.fnstart)
 198UNWIND(.pad #4)
 199UNWIND(.save {lr})
 200Ldiv0_64:
 201        @ Division by 0:
 202        str     lr, [sp, #-8]!
 203        bl      __div0
 204
 205        @ as wrong as it could be...
 206        mov     yl, #0
 207        mov     yh, #0
 208        mov     xh, #0
 209        ldr     pc, [sp], #8
 210
 211UNWIND(.fnend)
 212ENDPROC(__do_div64)
 213.popsection
 214