LXR linux/arch/arm/lib/div64.S

   1/* SPDX-License-Identifier: GPL-2.0-only */
   2/*
   3 *  linux/arch/arm/lib/div64.S
   4 *
   5 *  Optimized computation of 64-bit dividend / 32-bit divisor
   6 *
   7 *  Author:     Nicolas Pitre
   8 *  Created:    Oct 5, 2003
   9 *  Copyright:  Monta Vista Software, Inc.
  10 */
  11
  12#include <linux/linkage.h>
  13#include <asm/assembler.h>
  14#include <asm/unwind.h>
  15
  16#ifdef __ARMEB__
  17#define xh r0
  18#define xl r1
  19#define yh r2
  20#define yl r3
  21#else
  22#define xl r0
  23#define xh r1
  24#define yl r2
  25#define yh r3
  26#endif
  27
  28/*
  29 * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
  30 *
  31 * Note: Calling convention is totally non standard for optimal code.
  32 *       This is meant to be used by do_div() from include/asm/div64.h only.
  33 *
  34 * Input parameters:
  35 *      xh-xl   = dividend (clobbered)
  36 *      r4      = divisor (preserved)
  37 *
  38 * Output values:
  39 *      yh-yl   = result
  40 *      xh      = remainder
  41 *
  42 * Clobbered regs: xl, ip
  43 */
  44
  45ENTRY(__do_div64)
  46UNWIND(.fnstart)
  47
  48        @ Test for easy paths first.
  49        subs    ip, r4, #1
  50        bls     9f                      @ divisor is 0 or 1
  51        tst     ip, r4
  52        beq     8f                      @ divisor is power of 2
  53
  54        @ See if we need to handle upper 32-bit result.
  55        cmp     xh, r4
  56        mov     yh, #0
  57        blo     3f
  58
  59        @ Align divisor with upper part of dividend.
  60        @ The aligned divisor is stored in yl preserving the original.
  61        @ The bit position is stored in ip.
  62
  63#if __LINUX_ARM_ARCH__ >= 5
  64
  65        clz     yl, r4
  66        clz     ip, xh
  67        sub     yl, yl, ip
  68        mov     ip, #1
  69        mov     ip, ip, lsl yl
  70        mov     yl, r4, lsl yl
  71
  72#else
  73
  74        mov     yl, r4
  75        mov     ip, #1
  761:      cmp     yl, #0x80000000
  77        cmpcc   yl, xh
  78        movcc   yl, yl, lsl #1
  79        movcc   ip, ip, lsl #1
  80        bcc     1b
  81
  82#endif
  83
  84        @ The division loop for needed upper bit positions.
  85        @ Break out early if dividend reaches 0.
  862:      cmp     xh, yl
  87        orrcs   yh, yh, ip
  88        subscs  xh, xh, yl
  89        movsne  ip, ip, lsr #1
  90        mov     yl, yl, lsr #1
  91        bne     2b
  92
  93        @ See if we need to handle lower 32-bit result.
  943:      cmp     xh, #0
  95        mov     yl, #0
  96        cmpeq   xl, r4
  97        movlo   xh, xl
  98        retlo   lr
  99
 100        @ The division loop for lower bit positions.
 101        @ Here we shift remainer bits leftwards rather than moving the
 102        @ divisor for comparisons, considering the carry-out bit as well.
 103        mov     ip, #0x80000000
 1044:      movs    xl, xl, lsl #1
 105        adcs    xh, xh, xh
 106        beq     6f
 107        cmpcc   xh, r4
 1085:      orrcs   yl, yl, ip
 109        subcs   xh, xh, r4
 110        movs    ip, ip, lsr #1
 111        bne     4b
 112        ret     lr
 113
 114        @ The top part of remainder became zero.  If carry is set
 115        @ (the 33th bit) this is a false positive so resume the loop.
 116        @ Otherwise, if lower part is also null then we are done.
 1176:      bcs     5b
 118        cmp     xl, #0
 119        reteq   lr
 120
 121        @ We still have remainer bits in the low part.  Bring them up.
 122
 123#if __LINUX_ARM_ARCH__ >= 5
 124
 125        clz     xh, xl                  @ we know xh is zero here so...
 126        add     xh, xh, #1
 127        mov     xl, xl, lsl xh
 128        mov     ip, ip, lsr xh
 129
 130#else
 131
 1327:      movs    xl, xl, lsl #1
 133        mov     ip, ip, lsr #1
 134        bcc     7b
 135
 136#endif
 137
 138        @ Current remainder is now 1.  It is worthless to compare with
 139        @ divisor at this point since divisor can not be smaller than 3 here.
 140        @ If possible, branch for another shift in the division loop.
 141        @ If no bit position left then we are done.
 142        movs    ip, ip, lsr #1
 143        mov     xh, #1
 144        bne     4b
 145        ret     lr
 146
 1478:      @ Division by a power of 2: determine what that divisor order is
 148        @ then simply shift values around
 149
 150#if __LINUX_ARM_ARCH__ >= 5
 151
 152        clz     ip, r4
 153        rsb     ip, ip, #31
 154
 155#else
 156
 157        mov     yl, r4
 158        cmp     r4, #(1 << 16)
 159        mov     ip, #0
 160        movhs   yl, yl, lsr #16
 161        movhs   ip, #16
 162
 163        cmp     yl, #(1 << 8)
 164        movhs   yl, yl, lsr #8
 165        addhs   ip, ip, #8
 166
 167        cmp     yl, #(1 << 4)
 168        movhs   yl, yl, lsr #4
 169        addhs   ip, ip, #4
 170
 171        cmp     yl, #(1 << 2)
 172        addhi   ip, ip, #3
 173        addls   ip, ip, yl, lsr #1
 174
 175#endif
 176
 177        mov     yh, xh, lsr ip
 178        mov     yl, xl, lsr ip
 179        rsb     ip, ip, #32
 180 ARM(   orr     yl, yl, xh, lsl ip      )
 181 THUMB( lsl     xh, xh, ip              )
 182 THUMB( orr     yl, yl, xh              )
 183        mov     xh, xl, lsl ip
 184        mov     xh, xh, lsr ip
 185        ret     lr
 186
 187        @ eq -> division by 1: obvious enough...
 1889:      moveq   yl, xl
 189        moveq   yh, xh
 190        moveq   xh, #0
 191        reteq   lr
 192UNWIND(.fnend)
 193
 194UNWIND(.fnstart)
 195UNWIND(.pad #4)
 196UNWIND(.save {lr})
 197Ldiv0_64:
 198        @ Division by 0:
 199        str     lr, [sp, #-8]!
 200        bl      __div0
 201
 202        @ as wrong as it could be...
 203        mov     yl, #0
 204        mov     yh, #0
 205        mov     xh, #0
 206        ldr     pc, [sp], #8
 207
 208UNWIND(.fnend)
 209ENDPROC(__do_div64)
 210