linux/arch/blackfin/lib/udivsi3.S
<<
>>
Prefs
   1/*
   2 * Copyright 2004-2009 Analog Devices Inc.
   3 *
   4 * Licensed under the Clear BSD license or the GPL-2 (or later)
   5 */
   6
   7#include <linux/linkage.h>
   8
   9#define CARRY AC0
  10
  11#ifdef CONFIG_ARITHMETIC_OPS_L1
  12.section .l1.text
  13#else
  14.text
  15#endif
  16
  17
  18ENTRY(___udivsi3)
  19
  20  CC = R0 < R1 (IU);    /* If X < Y, always return 0 */
  21  IF CC JUMP .Lreturn_ident;
  22
  23  R2 = R1 << 16;
  24  CC = R2 <= R0 (IU);
  25  IF CC JUMP .Lidents;
  26
  27  R2 = R0 >> 31;       /* if X is a 31-bit number */
  28  R3 = R1 >> 15;       /* and Y is a 15-bit number */
  29  R2 = R2 | R3;        /* then it's okay to use the DIVQ builtins (fallthrough to fast)*/
  30  CC = R2;
  31  IF CC JUMP .Ly_16bit;
  32
  33/* METHOD 1: FAST DIVQ
  34   We know we have a 31-bit dividend, and 15-bit divisor so we can use the
  35   simple divq approach (first setting AQ to 0 - implying unsigned division,
  36   then 16 DIVQ's).
  37*/
  38
  39  AQ = CC;             /* Clear AQ (CC==0) */
  40
  41/* ISR States: When dividing two integers (32.0/16.0) using divide primitives,
  42   we need to shift the dividend one bit to the left.
  43   We have already checked that we have a 31-bit number so we are safe to do
  44   that.
  45*/
  46  R0 <<= 1;
  47  DIVQ(R0, R1); // 1
  48  DIVQ(R0, R1); // 2
  49  DIVQ(R0, R1); // 3
  50  DIVQ(R0, R1); // 4
  51  DIVQ(R0, R1); // 5
  52  DIVQ(R0, R1); // 6
  53  DIVQ(R0, R1); // 7
  54  DIVQ(R0, R1); // 8
  55  DIVQ(R0, R1); // 9
  56  DIVQ(R0, R1); // 10
  57  DIVQ(R0, R1); // 11
  58  DIVQ(R0, R1); // 12
  59  DIVQ(R0, R1); // 13
  60  DIVQ(R0, R1); // 14
  61  DIVQ(R0, R1); // 15
  62  DIVQ(R0, R1); // 16
  63  R0 = R0.L (Z);
  64  RTS;
  65
  66.Ly_16bit:
  67  /* We know that the upper 17 bits of Y might have bits set,
  68  ** or that the sign bit of X might have a bit. If Y is a
  69  ** 16-bit number, but not bigger, then we can use the builtins
  70  ** with a post-divide correction.
  71  ** R3 currently holds Y>>15, which means R3's LSB is the
  72  ** bit we're interested in.
  73  */
  74
  75  /* According to the ISR, to use the Divide primitives for
  76  ** unsigned integer divide, the useable range is 31 bits
  77  */
  78  CC = ! BITTST(R0, 31);
  79
  80  /* IF condition is true we can scale our inputs and use the divide primitives,
  81  ** with some post-adjustment
  82  */
  83  R3 += -1;             /* if so, Y is 0x00008nnn */
  84  CC &= AZ;
  85
  86  /* If condition is true we can scale our inputs and use the divide primitives,
  87  ** with some post-adjustment
  88  */
  89  R3 = R1 >> 1;         /* Pre-scaled divisor for primitive case */
  90  R2 = R0 >> 16;
  91
  92  R2 = R3 - R2;         /* shifted divisor < upper 16 bits of dividend */
  93  CC &= CARRY;
  94  IF CC JUMP .Lshift_and_correct;
  95
  96  /* Fall through to the identities */
  97
  98/* METHOD 2: identities and manual calculation
  99   We are not able to use the divide primites, but may still catch some special
 100   cases.
 101*/
 102.Lidents:
 103  /* Test for common identities. Value to be returned is placed in R2. */
 104  CC = R0 == 0;        /* 0/Y => 0 */
 105  IF CC JUMP .Lreturn_r0;
 106  CC = R0 == R1;       /* X==Y => 1 */
 107  IF CC JUMP .Lreturn_ident;
 108  CC = R1 == 1;        /* X/1 => X */
 109  IF CC JUMP .Lreturn_ident;
 110
 111  R2.L = ONES R1;
 112  R2 = R2.L (Z);
 113  CC = R2 == 1;
 114  IF CC JUMP .Lpower_of_two;
 115
 116  [--SP] = (R7:5);                /* Push registers R5-R7 */
 117
 118  /* Idents don't match. Go for the full operation. */
 119
 120
 121  R6 = 2;                         /* assume we'll shift two */
 122  R3 = 1;
 123
 124  P2 = R1;
 125                                  /* If either R0 or R1 have sign set, */
 126                                  /* divide them by two, and note it's */
 127                                  /* been done. */
 128  CC = R1 < 0;
 129  R2 = R1 >> 1;
 130  IF CC R1 = R2;                  /* Possibly-shifted R1 */
 131  IF !CC R6 = R3;                 /* R1 doesn't, so at most 1 shifted */
 132
 133  P0 = 0;
 134  R3 = -R1;
 135  [--SP] = R3;
 136  R2 = R0 >> 1;
 137  R2 = R0 >> 1;
 138  CC = R0 < 0;
 139  IF CC P0 = R6;                  /* Number of values divided */
 140  IF !CC R2 = R0;                 /* Shifted R0 */
 141
 142                                  /* P0 is 0, 1 (NR/=2) or 2 (NR/=2, DR/=2) */
 143
 144                                  /* r2 holds Copy dividend  */
 145  R3 = 0;                         /* Clear partial remainder */
 146  R7 = 0;                         /* Initialise quotient bit */
 147
 148  P1 = 32;                        /* Set loop counter */
 149  LSETUP(.Lulst, .Lulend) LC0 = P1; /* Set loop counter */
 150.Lulst:  R6 = R2 >> 31;             /* R6 = sign bit of R2, for carry */
 151       R2 = R2 << 1;              /* Shift 64 bit dividend up by 1 bit */
 152       R3 = R3 << 1 || R5 = [SP];
 153       R3 = R3 | R6;              /* Include any carry */
 154       CC = R7 < 0;               /* Check quotient(AQ) */
 155                                  /* If AQ==0, we'll sub divisor */
 156       IF CC R5 = R1;             /* and if AQ==1, we'll add it. */
 157       R3 = R3 + R5;              /* Add/sub divsor to partial remainder */
 158       R7 = R3 ^ R1;              /* Generate next quotient bit */
 159
 160       R5 = R7 >> 31;             /* Get AQ */
 161       BITTGL(R5, 0);             /* Invert it, to get what we'll shift */
 162.Lulend: R2 = R2 + R5;              /* and "shift" it in. */
 163
 164  CC = P0 == 0;                   /* Check how many inputs we shifted */
 165  IF CC JUMP .Lno_mult;            /* if none... */
 166  R6 = R2 << 1;
 167  CC = P0 == 1;
 168  IF CC R2 = R6;                  /* if 1, Q = Q*2 */
 169  IF !CC R1 = P2;                 /* if 2, restore stored divisor */
 170
 171  R3 = R2;                        /* Copy of R2 */
 172  R3 *= R1;                       /* Q * divisor */
 173  R5 = R0 - R3;                   /* Z = (dividend - Q * divisor) */
 174  CC = R1 <= R5 (IU);             /* Check if divisor <= Z? */
 175  R6 = CC;                        /* if yes, R6 = 1 */
 176  R2 = R2 + R6;                   /* if yes, add one to quotient(Q) */
 177.Lno_mult:
 178  SP += 4;
 179  (R7:5) = [SP++];                /* Pop registers R5-R7 */
 180  R0 = R2;                        /* Store quotient */
 181  RTS;
 182
 183.Lreturn_ident:
 184  CC = R0 < R1 (IU);    /* If X < Y, always return 0 */
 185  R2 = 0;
 186  IF CC JUMP .Ltrue_return_ident;
 187  R2 = -1 (X);         /* X/0 => 0xFFFFFFFF */
 188  CC = R1 == 0;
 189  IF CC JUMP .Ltrue_return_ident;
 190  R2 = -R2;            /* R2 now 1 */
 191  CC = R0 == R1;       /* X==Y => 1 */
 192  IF CC JUMP .Ltrue_return_ident;
 193  R2 = R0;             /* X/1 => X */
 194  /*FALLTHRU*/
 195
 196.Ltrue_return_ident:
 197  R0 = R2;
 198.Lreturn_r0:
 199  RTS;
 200
 201.Lpower_of_two:
 202  /* Y has a single bit set, which means it's a power of two.
 203  ** That means we can perform the division just by shifting
 204  ** X to the right the appropriate number of bits
 205  */
 206
 207  /* signbits returns the number of sign bits, minus one.
 208  ** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need
 209  ** to shift right n-signbits spaces. It also means 0x80000000
 210  ** is a special case, because that *also* gives a signbits of 0
 211  */
 212
 213  R2 = R0 >> 31;
 214  CC = R1 < 0;
 215  IF CC JUMP .Ltrue_return_ident;
 216
 217  R1.l = SIGNBITS R1;
 218  R1 = R1.L (Z);
 219  R1 += -30;
 220  R0 = LSHIFT R0 by R1.L;
 221  RTS;
 222
 223/* METHOD 3: PRESCALE AND USE THE DIVIDE PRIMITIVES WITH SOME POST-CORRECTION
 224  Two scaling operations are required to use the divide primitives with a
 225  divisor > 0x7FFFF.
 226  Firstly (as in method 1) we need to shift the dividend 1 to the left for
 227  integer division.
 228  Secondly we need to shift both the divisor and dividend 1 to the right so
 229  both are in range for the primitives.
 230  The left/right shift of the dividend does nothing so we can skip it.
 231*/
 232.Lshift_and_correct:
 233  R2 = R0;
 234  // R3 is already R1 >> 1
 235  CC=!CC;
 236  AQ = CC;                        /* Clear AQ, got here with CC = 0 */
 237  DIVQ(R2, R3); // 1
 238  DIVQ(R2, R3); // 2
 239  DIVQ(R2, R3); // 3
 240  DIVQ(R2, R3); // 4
 241  DIVQ(R2, R3); // 5
 242  DIVQ(R2, R3); // 6
 243  DIVQ(R2, R3); // 7
 244  DIVQ(R2, R3); // 8
 245  DIVQ(R2, R3); // 9
 246  DIVQ(R2, R3); // 10
 247  DIVQ(R2, R3); // 11
 248  DIVQ(R2, R3); // 12
 249  DIVQ(R2, R3); // 13
 250  DIVQ(R2, R3); // 14
 251  DIVQ(R2, R3); // 15
 252  DIVQ(R2, R3); // 16
 253
 254  /* According to the Instruction Set Reference:
 255     To divide by a divisor > 0x7FFF,
 256     1. prescale and perform divide to obtain quotient (Q) (done above),
 257     2. multiply quotient by unscaled divisor (result M)
 258     3. subtract the product from the divident to get an error (E = X - M)
 259     4. if E < divisor (Y) subtract 1, if E > divisor (Y) add 1, else return quotient (Q)
 260   */
 261  R3 = R2.L (Z);                /* Q = X' / Y' */
 262  R2 = R3;              /* Preserve Q */
 263  R2 *= R1;             /* M = Q * Y */
 264  R2 = R0 - R2;         /* E = X - M */
 265  R0 = R3;              /* Copy Q into result reg */
 266
 267/* Correction: If result of the multiply is negative, we overflowed
 268   and need to correct the result by subtracting 1 from the result.*/
 269  R3 = 0xFFFF (Z);
 270  R2 = R2 >> 16;                /* E >> 16 */
 271  CC = R2 == R3;
 272  R3 = 1 ;
 273  R1 = R0 - R3;
 274  IF CC R0 = R1;
 275  RTS;
 276
 277ENDPROC(___udivsi3)
 278