linux/arch/x86/math-emu/poly_sin.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*---------------------------------------------------------------------------+
   3 |  poly_sin.c                                                               |
   4 |                                                                           |
   5 |  Computation of an approximation of the sin function and the cosine       |
   6 |  function by a polynomial.                                                |
   7 |                                                                           |
   8 | Copyright (C) 1992,1993,1994,1997,1999                                    |
   9 |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
  10 |                  E-mail   billm@melbpc.org.au                             |
  11 |                                                                           |
  12 |                                                                           |
  13 +---------------------------------------------------------------------------*/
  14
  15#include "exception.h"
  16#include "reg_constant.h"
  17#include "fpu_emu.h"
  18#include "fpu_system.h"
  19#include "control_w.h"
  20#include "poly.h"
  21
  22#define N_COEFF_P       4
  23#define N_COEFF_N       4
  24
  25static const unsigned long long pos_terms_l[N_COEFF_P] = {
  26        0xaaaaaaaaaaaaaaabLL,
  27        0x00d00d00d00cf906LL,
  28        0x000006b99159a8bbLL,
  29        0x000000000d7392e6LL
  30};
  31
  32static const unsigned long long neg_terms_l[N_COEFF_N] = {
  33        0x2222222222222167LL,
  34        0x0002e3bc74aab624LL,
  35        0x0000000b09229062LL,
  36        0x00000000000c7973LL
  37};
  38
  39#define N_COEFF_PH      4
  40#define N_COEFF_NH      4
  41static const unsigned long long pos_terms_h[N_COEFF_PH] = {
  42        0x0000000000000000LL,
  43        0x05b05b05b05b0406LL,
  44        0x000049f93edd91a9LL,
  45        0x00000000c9c9ed62LL
  46};
  47
  48static const unsigned long long neg_terms_h[N_COEFF_NH] = {
  49        0xaaaaaaaaaaaaaa98LL,
  50        0x001a01a01a019064LL,
  51        0x0000008f76c68a77LL,
  52        0x0000000000d58f5eLL
  53};
  54
  55/*--- poly_sine() -----------------------------------------------------------+
  56 |                                                                           |
  57 +---------------------------------------------------------------------------*/
  58void poly_sine(FPU_REG *st0_ptr)
  59{
  60        int exponent, echange;
  61        Xsig accumulator, argSqrd, argTo4;
  62        unsigned long fix_up, adj;
  63        unsigned long long fixed_arg;
  64        FPU_REG result;
  65
  66        exponent = exponent(st0_ptr);
  67
  68        accumulator.lsw = accumulator.midw = accumulator.msw = 0;
  69
  70        /* Split into two ranges, for arguments below and above 1.0 */
  71        /* The boundary between upper and lower is approx 0.88309101259 */
  72        if ((exponent < -1)
  73            || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) {
  74                /* The argument is <= 0.88309101259 */
  75
  76                argSqrd.msw = st0_ptr->sigh;
  77                argSqrd.midw = st0_ptr->sigl;
  78                argSqrd.lsw = 0;
  79                mul64_Xsig(&argSqrd, &significand(st0_ptr));
  80                shr_Xsig(&argSqrd, 2 * (-1 - exponent));
  81                argTo4.msw = argSqrd.msw;
  82                argTo4.midw = argSqrd.midw;
  83                argTo4.lsw = argSqrd.lsw;
  84                mul_Xsig_Xsig(&argTo4, &argTo4);
  85
  86                polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
  87                                N_COEFF_N - 1);
  88                mul_Xsig_Xsig(&accumulator, &argSqrd);
  89                negate_Xsig(&accumulator);
  90
  91                polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
  92                                N_COEFF_P - 1);
  93
  94                shr_Xsig(&accumulator, 2);      /* Divide by four */
  95                accumulator.msw |= 0x80000000;  /* Add 1.0 */
  96
  97                mul64_Xsig(&accumulator, &significand(st0_ptr));
  98                mul64_Xsig(&accumulator, &significand(st0_ptr));
  99                mul64_Xsig(&accumulator, &significand(st0_ptr));
 100
 101                /* Divide by four, FPU_REG compatible, etc */
 102                exponent = 3 * exponent;
 103
 104                /* The minimum exponent difference is 3 */
 105                shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
 106
 107                negate_Xsig(&accumulator);
 108                XSIG_LL(accumulator) += significand(st0_ptr);
 109
 110                echange = round_Xsig(&accumulator);
 111
 112                setexponentpos(&result, exponent(st0_ptr) + echange);
 113        } else {
 114                /* The argument is > 0.88309101259 */
 115                /* We use sin(st(0)) = cos(pi/2-st(0)) */
 116
 117                fixed_arg = significand(st0_ptr);
 118
 119                if (exponent == 0) {
 120                        /* The argument is >= 1.0 */
 121
 122                        /* Put the binary point at the left. */
 123                        fixed_arg <<= 1;
 124                }
 125                /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
 126                fixed_arg = 0x921fb54442d18469LL - fixed_arg;
 127                /* There is a special case which arises due to rounding, to fix here. */
 128                if (fixed_arg == 0xffffffffffffffffLL)
 129                        fixed_arg = 0;
 130
 131                XSIG_LL(argSqrd) = fixed_arg;
 132                argSqrd.lsw = 0;
 133                mul64_Xsig(&argSqrd, &fixed_arg);
 134
 135                XSIG_LL(argTo4) = XSIG_LL(argSqrd);
 136                argTo4.lsw = argSqrd.lsw;
 137                mul_Xsig_Xsig(&argTo4, &argTo4);
 138
 139                polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
 140                                N_COEFF_NH - 1);
 141                mul_Xsig_Xsig(&accumulator, &argSqrd);
 142                negate_Xsig(&accumulator);
 143
 144                polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
 145                                N_COEFF_PH - 1);
 146                negate_Xsig(&accumulator);
 147
 148                mul64_Xsig(&accumulator, &fixed_arg);
 149                mul64_Xsig(&accumulator, &fixed_arg);
 150
 151                shr_Xsig(&accumulator, 3);
 152                negate_Xsig(&accumulator);
 153
 154                add_Xsig_Xsig(&accumulator, &argSqrd);
 155
 156                shr_Xsig(&accumulator, 1);
 157
 158                accumulator.lsw |= 1;   /* A zero accumulator here would cause problems */
 159                negate_Xsig(&accumulator);
 160
 161                /* The basic computation is complete. Now fix the answer to
 162                   compensate for the error due to the approximation used for
 163                   pi/2
 164                 */
 165
 166                /* This has an exponent of -65 */
 167                fix_up = 0x898cc517;
 168                /* The fix-up needs to be improved for larger args */
 169                if (argSqrd.msw & 0xffc00000) {
 170                        /* Get about 32 bit precision in these: */
 171                        fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
 172                }
 173                fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
 174
 175                adj = accumulator.lsw;  /* temp save */
 176                accumulator.lsw -= fix_up;
 177                if (accumulator.lsw > adj)
 178                        XSIG_LL(accumulator)--;
 179
 180                echange = round_Xsig(&accumulator);
 181
 182                setexponentpos(&result, echange - 1);
 183        }
 184
 185        significand(&result) = XSIG_LL(accumulator);
 186        setsign(&result, getsign(st0_ptr));
 187        FPU_copy_to_reg0(&result, TAG_Valid);
 188
 189#ifdef PARANOID
 190        if ((exponent(&result) >= 0)
 191            && (significand(&result) > 0x8000000000000000LL)) {
 192                EXCEPTION(EX_INTERNAL | 0x150);
 193        }
 194#endif /* PARANOID */
 195
 196}
 197
 198/*--- poly_cos() ------------------------------------------------------------+
 199 |                                                                           |
 200 +---------------------------------------------------------------------------*/
 201void poly_cos(FPU_REG *st0_ptr)
 202{
 203        FPU_REG result;
 204        long int exponent, exp2, echange;
 205        Xsig accumulator, argSqrd, fix_up, argTo4;
 206        unsigned long long fixed_arg;
 207
 208#ifdef PARANOID
 209        if ((exponent(st0_ptr) > 0)
 210            || ((exponent(st0_ptr) == 0)
 211                && (significand(st0_ptr) > 0xc90fdaa22168c234LL))) {
 212                EXCEPTION(EX_Invalid);
 213                FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
 214                return;
 215        }
 216#endif /* PARANOID */
 217
 218        exponent = exponent(st0_ptr);
 219
 220        accumulator.lsw = accumulator.midw = accumulator.msw = 0;
 221
 222        if ((exponent < -1)
 223            || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) {
 224                /* arg is < 0.687705 */
 225
 226                argSqrd.msw = st0_ptr->sigh;
 227                argSqrd.midw = st0_ptr->sigl;
 228                argSqrd.lsw = 0;
 229                mul64_Xsig(&argSqrd, &significand(st0_ptr));
 230
 231                if (exponent < -1) {
 232                        /* shift the argument right by the required places */
 233                        shr_Xsig(&argSqrd, 2 * (-1 - exponent));
 234                }
 235
 236                argTo4.msw = argSqrd.msw;
 237                argTo4.midw = argSqrd.midw;
 238                argTo4.lsw = argSqrd.lsw;
 239                mul_Xsig_Xsig(&argTo4, &argTo4);
 240
 241                polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
 242                                N_COEFF_NH - 1);
 243                mul_Xsig_Xsig(&accumulator, &argSqrd);
 244                negate_Xsig(&accumulator);
 245
 246                polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
 247                                N_COEFF_PH - 1);
 248                negate_Xsig(&accumulator);
 249
 250                mul64_Xsig(&accumulator, &significand(st0_ptr));
 251                mul64_Xsig(&accumulator, &significand(st0_ptr));
 252                shr_Xsig(&accumulator, -2 * (1 + exponent));
 253
 254                shr_Xsig(&accumulator, 3);
 255                negate_Xsig(&accumulator);
 256
 257                add_Xsig_Xsig(&accumulator, &argSqrd);
 258
 259                shr_Xsig(&accumulator, 1);
 260
 261                /* It doesn't matter if accumulator is all zero here, the
 262                   following code will work ok */
 263                negate_Xsig(&accumulator);
 264
 265                if (accumulator.lsw & 0x80000000)
 266                        XSIG_LL(accumulator)++;
 267                if (accumulator.msw == 0) {
 268                        /* The result is 1.0 */
 269                        FPU_copy_to_reg0(&CONST_1, TAG_Valid);
 270                        return;
 271                } else {
 272                        significand(&result) = XSIG_LL(accumulator);
 273
 274                        /* will be a valid positive nr with expon = -1 */
 275                        setexponentpos(&result, -1);
 276                }
 277        } else {
 278                fixed_arg = significand(st0_ptr);
 279
 280                if (exponent == 0) {
 281                        /* The argument is >= 1.0 */
 282
 283                        /* Put the binary point at the left. */
 284                        fixed_arg <<= 1;
 285                }
 286                /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
 287                fixed_arg = 0x921fb54442d18469LL - fixed_arg;
 288                /* There is a special case which arises due to rounding, to fix here. */
 289                if (fixed_arg == 0xffffffffffffffffLL)
 290                        fixed_arg = 0;
 291
 292                exponent = -1;
 293                exp2 = -1;
 294
 295                /* A shift is needed here only for a narrow range of arguments,
 296                   i.e. for fixed_arg approx 2^-32, but we pick up more... */
 297                if (!(LL_MSW(fixed_arg) & 0xffff0000)) {
 298                        fixed_arg <<= 16;
 299                        exponent -= 16;
 300                        exp2 -= 16;
 301                }
 302
 303                XSIG_LL(argSqrd) = fixed_arg;
 304                argSqrd.lsw = 0;
 305                mul64_Xsig(&argSqrd, &fixed_arg);
 306
 307                if (exponent < -1) {
 308                        /* shift the argument right by the required places */
 309                        shr_Xsig(&argSqrd, 2 * (-1 - exponent));
 310                }
 311
 312                argTo4.msw = argSqrd.msw;
 313                argTo4.midw = argSqrd.midw;
 314                argTo4.lsw = argSqrd.lsw;
 315                mul_Xsig_Xsig(&argTo4, &argTo4);
 316
 317                polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
 318                                N_COEFF_N - 1);
 319                mul_Xsig_Xsig(&accumulator, &argSqrd);
 320                negate_Xsig(&accumulator);
 321
 322                polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
 323                                N_COEFF_P - 1);
 324
 325                shr_Xsig(&accumulator, 2);      /* Divide by four */
 326                accumulator.msw |= 0x80000000;  /* Add 1.0 */
 327
 328                mul64_Xsig(&accumulator, &fixed_arg);
 329                mul64_Xsig(&accumulator, &fixed_arg);
 330                mul64_Xsig(&accumulator, &fixed_arg);
 331
 332                /* Divide by four, FPU_REG compatible, etc */
 333                exponent = 3 * exponent;
 334
 335                /* The minimum exponent difference is 3 */
 336                shr_Xsig(&accumulator, exp2 - exponent);
 337
 338                negate_Xsig(&accumulator);
 339                XSIG_LL(accumulator) += fixed_arg;
 340
 341                /* The basic computation is complete. Now fix the answer to
 342                   compensate for the error due to the approximation used for
 343                   pi/2
 344                 */
 345
 346                /* This has an exponent of -65 */
 347                XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
 348                fix_up.lsw = 0;
 349
 350                /* The fix-up needs to be improved for larger args */
 351                if (argSqrd.msw & 0xffc00000) {
 352                        /* Get about 32 bit precision in these: */
 353                        fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
 354                        fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
 355                }
 356
 357                exp2 += norm_Xsig(&accumulator);
 358                shr_Xsig(&accumulator, 1);      /* Prevent overflow */
 359                exp2++;
 360                shr_Xsig(&fix_up, 65 + exp2);
 361
 362                add_Xsig_Xsig(&accumulator, &fix_up);
 363
 364                echange = round_Xsig(&accumulator);
 365
 366                setexponentpos(&result, exp2 + echange);
 367                significand(&result) = XSIG_LL(accumulator);
 368        }
 369
 370        FPU_copy_to_reg0(&result, TAG_Valid);
 371
 372#ifdef PARANOID
 373        if ((exponent(&result) >= 0)
 374            && (significand(&result) > 0x8000000000000000LL)) {
 375                EXCEPTION(EX_INTERNAL | 0x151);
 376        }
 377#endif /* PARANOID */
 378
 379}
 380