LXR linux/arch/x86/math-emu/poly

   1/*---------------------------------------------------------------------------+
   2 |  poly_sin.c                                                               |
   3 |                                                                           |
   4 |  Computation of an approximation of the sin function and the cosine       |
   5 |  function by a polynomial.                                                |
   6 |                                                                           |
   7 | Copyright (C) 1992,1993,1994,1997,1999                                    |
   8 |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
   9 |                  E-mail   billm@melbpc.org.au                             |
  10 |                                                                           |
  11 |                                                                           |
  12 +---------------------------------------------------------------------------*/
  13
  14#include "exception.h"
  15#include "reg_constant.h"
  16#include "fpu_emu.h"
  17#include "fpu_system.h"
  18#include "control_w.h"
  19#include "poly.h"
  20
  21#define N_COEFF_P       4
  22#define N_COEFF_N       4
  23
  24static const unsigned long long pos_terms_l[N_COEFF_P] = {
  25        0xaaaaaaaaaaaaaaabLL,
  26        0x00d00d00d00cf906LL,
  27        0x000006b99159a8bbLL,
  28        0x000000000d7392e6LL
  29};
  30
  31static const unsigned long long neg_terms_l[N_COEFF_N] = {
  32        0x2222222222222167LL,
  33        0x0002e3bc74aab624LL,
  34        0x0000000b09229062LL,
  35        0x00000000000c7973LL
  36};
  37
  38#define N_COEFF_PH      4
  39#define N_COEFF_NH      4
  40static const unsigned long long pos_terms_h[N_COEFF_PH] = {
  41        0x0000000000000000LL,
  42        0x05b05b05b05b0406LL,
  43        0x000049f93edd91a9LL,
  44        0x00000000c9c9ed62LL
  45};
  46
  47static const unsigned long long neg_terms_h[N_COEFF_NH] = {
  48        0xaaaaaaaaaaaaaa98LL,
  49        0x001a01a01a019064LL,
  50        0x0000008f76c68a77LL,
  51        0x0000000000d58f5eLL
  52};
  53
  54/*--- poly_sine() -----------------------------------------------------------+
  55 |                                                                           |
  56 +---------------------------------------------------------------------------*/
  57void poly_sine(FPU_REG *st0_ptr)
  58{
  59        int exponent, echange;
  60        Xsig accumulator, argSqrd, argTo4;
  61        unsigned long fix_up, adj;
  62        unsigned long long fixed_arg;
  63        FPU_REG result;
  64
  65        exponent = exponent(st0_ptr);
  66
  67        accumulator.lsw = accumulator.midw = accumulator.msw = 0;
  68
  69        /* Split into two ranges, for arguments below and above 1.0 */
  70        /* The boundary between upper and lower is approx 0.88309101259 */
  71        if ((exponent < -1)
  72            || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) {
  73                /* The argument is <= 0.88309101259 */
  74
  75                argSqrd.msw = st0_ptr->sigh;
  76                argSqrd.midw = st0_ptr->sigl;
  77                argSqrd.lsw = 0;
  78                mul64_Xsig(&argSqrd, &significand(st0_ptr));
  79                shr_Xsig(&argSqrd, 2 * (-1 - exponent));
  80                argTo4.msw = argSqrd.msw;
  81                argTo4.midw = argSqrd.midw;
  82                argTo4.lsw = argSqrd.lsw;
  83                mul_Xsig_Xsig(&argTo4, &argTo4);
  84
  85                polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
  86                                N_COEFF_N - 1);
  87                mul_Xsig_Xsig(&accumulator, &argSqrd);
  88                negate_Xsig(&accumulator);
  89
  90                polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
  91                                N_COEFF_P - 1);
  92
  93                shr_Xsig(&accumulator, 2);      /* Divide by four */
  94                accumulator.msw |= 0x80000000;  /* Add 1.0 */
  95
  96                mul64_Xsig(&accumulator, &significand(st0_ptr));
  97                mul64_Xsig(&accumulator, &significand(st0_ptr));
  98                mul64_Xsig(&accumulator, &significand(st0_ptr));
  99
 100                /* Divide by four, FPU_REG compatible, etc */
 101                exponent = 3 * exponent;
 102
 103                /* The minimum exponent difference is 3 */
 104                shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
 105
 106                negate_Xsig(&accumulator);
 107                XSIG_LL(accumulator) += significand(st0_ptr);
 108
 109                echange = round_Xsig(&accumulator);
 110
 111                setexponentpos(&result, exponent(st0_ptr) + echange);
 112        } else {
 113                /* The argument is > 0.88309101259 */
 114                /* We use sin(st(0)) = cos(pi/2-st(0)) */
 115
 116                fixed_arg = significand(st0_ptr);
 117
 118                if (exponent == 0) {
 119                        /* The argument is >= 1.0 */
 120
 121                        /* Put the binary point at the left. */
 122                        fixed_arg <<= 1;
 123                }
 124                /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
 125                fixed_arg = 0x921fb54442d18469LL - fixed_arg;
 126                /* There is a special case which arises due to rounding, to fix here. */
 127                if (fixed_arg == 0xffffffffffffffffLL)
 128                        fixed_arg = 0;
 129
 130                XSIG_LL(argSqrd) = fixed_arg;
 131                argSqrd.lsw = 0;
 132                mul64_Xsig(&argSqrd, &fixed_arg);
 133
 134                XSIG_LL(argTo4) = XSIG_LL(argSqrd);
 135                argTo4.lsw = argSqrd.lsw;
 136                mul_Xsig_Xsig(&argTo4, &argTo4);
 137
 138                polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
 139                                N_COEFF_NH - 1);
 140                mul_Xsig_Xsig(&accumulator, &argSqrd);
 141                negate_Xsig(&accumulator);
 142
 143                polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
 144                                N_COEFF_PH - 1);
 145                negate_Xsig(&accumulator);
 146
 147                mul64_Xsig(&accumulator, &fixed_arg);
 148                mul64_Xsig(&accumulator, &fixed_arg);
 149
 150                shr_Xsig(&accumulator, 3);
 151                negate_Xsig(&accumulator);
 152
 153                add_Xsig_Xsig(&accumulator, &argSqrd);
 154
 155                shr_Xsig(&accumulator, 1);
 156
 157                accumulator.lsw |= 1;   /* A zero accumulator here would cause problems */
 158                negate_Xsig(&accumulator);
 159
 160                /* The basic computation is complete. Now fix the answer to
 161                   compensate for the error due to the approximation used for
 162                   pi/2
 163                 */
 164
 165                /* This has an exponent of -65 */
 166                fix_up = 0x898cc517;
 167                /* The fix-up needs to be improved for larger args */
 168                if (argSqrd.msw & 0xffc00000) {
 169                        /* Get about 32 bit precision in these: */
 170                        fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
 171                }
 172                fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
 173
 174                adj = accumulator.lsw;  /* temp save */
 175                accumulator.lsw -= fix_up;
 176                if (accumulator.lsw > adj)
 177                        XSIG_LL(accumulator)--;
 178
 179                echange = round_Xsig(&accumulator);
 180
 181                setexponentpos(&result, echange - 1);
 182        }
 183
 184        significand(&result) = XSIG_LL(accumulator);
 185        setsign(&result, getsign(st0_ptr));
 186        FPU_copy_to_reg0(&result, TAG_Valid);
 187
 188#ifdef PARANOID
 189        if ((exponent(&result) >= 0)
 190            && (significand(&result) > 0x8000000000000000LL)) {
 191                EXCEPTION(EX_INTERNAL | 0x150);
 192        }
 193#endif /* PARANOID */
 194
 195}
 196
 197/*--- poly_cos() ------------------------------------------------------------+
 198 |                                                                           |
 199 +---------------------------------------------------------------------------*/
 200void poly_cos(FPU_REG *st0_ptr)
 201{
 202        FPU_REG result;
 203        long int exponent, exp2, echange;
 204        Xsig accumulator, argSqrd, fix_up, argTo4;
 205        unsigned long long fixed_arg;
 206
 207#ifdef PARANOID
 208        if ((exponent(st0_ptr) > 0)
 209            || ((exponent(st0_ptr) == 0)
 210                && (significand(st0_ptr) > 0xc90fdaa22168c234LL))) {
 211                EXCEPTION(EX_Invalid);
 212                FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
 213                return;
 214        }
 215#endif /* PARANOID */
 216
 217        exponent = exponent(st0_ptr);
 218
 219        accumulator.lsw = accumulator.midw = accumulator.msw = 0;
 220
 221        if ((exponent < -1)
 222            || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) {
 223                /* arg is < 0.687705 */
 224
 225                argSqrd.msw = st0_ptr->sigh;
 226                argSqrd.midw = st0_ptr->sigl;
 227                argSqrd.lsw = 0;
 228                mul64_Xsig(&argSqrd, &significand(st0_ptr));
 229
 230                if (exponent < -1) {
 231                        /* shift the argument right by the required places */
 232                        shr_Xsig(&argSqrd, 2 * (-1 - exponent));
 233                }
 234
 235                argTo4.msw = argSqrd.msw;
 236                argTo4.midw = argSqrd.midw;
 237                argTo4.lsw = argSqrd.lsw;
 238                mul_Xsig_Xsig(&argTo4, &argTo4);
 239
 240                polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
 241                                N_COEFF_NH - 1);
 242                mul_Xsig_Xsig(&accumulator, &argSqrd);
 243                negate_Xsig(&accumulator);
 244
 245                polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
 246                                N_COEFF_PH - 1);
 247                negate_Xsig(&accumulator);
 248
 249                mul64_Xsig(&accumulator, &significand(st0_ptr));
 250                mul64_Xsig(&accumulator, &significand(st0_ptr));
 251                shr_Xsig(&accumulator, -2 * (1 + exponent));
 252
 253                shr_Xsig(&accumulator, 3);
 254                negate_Xsig(&accumulator);
 255
 256                add_Xsig_Xsig(&accumulator, &argSqrd);
 257
 258                shr_Xsig(&accumulator, 1);
 259
 260                /* It doesn't matter if accumulator is all zero here, the
 261                   following code will work ok */
 262                negate_Xsig(&accumulator);
 263
 264                if (accumulator.lsw & 0x80000000)
 265                        XSIG_LL(accumulator)++;
 266                if (accumulator.msw == 0) {
 267                        /* The result is 1.0 */
 268                        FPU_copy_to_reg0(&CONST_1, TAG_Valid);
 269                        return;
 270                } else {
 271                        significand(&result) = XSIG_LL(accumulator);
 272
 273                        /* will be a valid positive nr with expon = -1 */
 274                        setexponentpos(&result, -1);
 275                }
 276        } else {
 277                fixed_arg = significand(st0_ptr);
 278
 279                if (exponent == 0) {
 280                        /* The argument is >= 1.0 */
 281
 282                        /* Put the binary point at the left. */
 283                        fixed_arg <<= 1;
 284                }
 285                /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
 286                fixed_arg = 0x921fb54442d18469LL - fixed_arg;
 287                /* There is a special case which arises due to rounding, to fix here. */
 288                if (fixed_arg == 0xffffffffffffffffLL)
 289                        fixed_arg = 0;
 290
 291                exponent = -1;
 292                exp2 = -1;
 293
 294                /* A shift is needed here only for a narrow range of arguments,
 295                   i.e. for fixed_arg approx 2^-32, but we pick up more... */
 296                if (!(LL_MSW(fixed_arg) & 0xffff0000)) {
 297                        fixed_arg <<= 16;
 298                        exponent -= 16;
 299                        exp2 -= 16;
 300                }
 301
 302                XSIG_LL(argSqrd) = fixed_arg;
 303                argSqrd.lsw = 0;
 304                mul64_Xsig(&argSqrd, &fixed_arg);
 305
 306                if (exponent < -1) {
 307                        /* shift the argument right by the required places */
 308                        shr_Xsig(&argSqrd, 2 * (-1 - exponent));
 309                }
 310
 311                argTo4.msw = argSqrd.msw;
 312                argTo4.midw = argSqrd.midw;
 313                argTo4.lsw = argSqrd.lsw;
 314                mul_Xsig_Xsig(&argTo4, &argTo4);
 315
 316                polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
 317                                N_COEFF_N - 1);
 318                mul_Xsig_Xsig(&accumulator, &argSqrd);
 319                negate_Xsig(&accumulator);
 320
 321                polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
 322                                N_COEFF_P - 1);
 323
 324                shr_Xsig(&accumulator, 2);      /* Divide by four */
 325                accumulator.msw |= 0x80000000;  /* Add 1.0 */
 326
 327                mul64_Xsig(&accumulator, &fixed_arg);
 328                mul64_Xsig(&accumulator, &fixed_arg);
 329                mul64_Xsig(&accumulator, &fixed_arg);
 330
 331                /* Divide by four, FPU_REG compatible, etc */
 332                exponent = 3 * exponent;
 333
 334                /* The minimum exponent difference is 3 */
 335                shr_Xsig(&accumulator, exp2 - exponent);
 336
 337                negate_Xsig(&accumulator);
 338                XSIG_LL(accumulator) += fixed_arg;
 339
 340                /* The basic computation is complete. Now fix the answer to
 341                   compensate for the error due to the approximation used for
 342                   pi/2
 343                 */
 344
 345                /* This has an exponent of -65 */
 346                XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
 347                fix_up.lsw = 0;
 348
 349                /* The fix-up needs to be improved for larger args */
 350                if (argSqrd.msw & 0xffc00000) {
 351                        /* Get about 32 bit precision in these: */
 352                        fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
 353                        fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
 354                }
 355
 356                exp2 += norm_Xsig(&accumulator);
 357                shr_Xsig(&accumulator, 1);      /* Prevent overflow */
 358                exp2++;
 359                shr_Xsig(&fix_up, 65 + exp2);
 360
 361                add_Xsig_Xsig(&accumulator, &fix_up);
 362
 363                echange = round_Xsig(&accumulator);
 364
 365                setexponentpos(&result, exp2 + echange);
 366                significand(&result) = XSIG_LL(accumulator);
 367        }
 368
 369        FPU_copy_to_reg0(&result, TAG_Valid);
 370
 371#ifdef PARANOID
 372        if ((exponent(&result) >= 0)
 373            && (significand(&result) > 0x8000000000000000LL)) {
 374                EXCEPTION(EX_INTERNAL | 0x151);
 375        }
 376#endif /* PARANOID */
 377
 378}
 379