linux/arch/parisc/math-emu/fmpyfadd.c
<<
>>
Prefs
   1/*
   2 * Linux/PA-RISC Project (http://www.parisc-linux.org/)
   3 *
   4 * Floating-point emulation code
   5 *  Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
   6 *
   7 *    This program is free software; you can redistribute it and/or modify
   8 *    it under the terms of the GNU General Public License as published by
   9 *    the Free Software Foundation; either version 2, or (at your option)
  10 *    any later version.
  11 *
  12 *    This program is distributed in the hope that it will be useful,
  13 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 *    GNU General Public License for more details.
  16 *
  17 *    You should have received a copy of the GNU General Public License
  18 *    along with this program; if not, write to the Free Software
  19 *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20 */
  21/*
  22 * BEGIN_DESC
  23 *
  24 *  File:
  25 *      @(#)    pa/spmath/fmpyfadd.c            $Revision: 1.1 $
  26 *
  27 *  Purpose:
  28 *      Double Floating-point Multiply Fused Add
  29 *      Double Floating-point Multiply Negate Fused Add
  30 *      Single Floating-point Multiply Fused Add
  31 *      Single Floating-point Multiply Negate Fused Add
  32 *
  33 *  External Interfaces:
  34 *      dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
  35 *      dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
  36 *      sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
  37 *      sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
  38 *
  39 *  Internal Interfaces:
  40 *
  41 *  Theory:
  42 *      <<please update with a overview of the operation of this file>>
  43 *
  44 * END_DESC
  45*/
  46
  47
  48#include "float.h"
  49#include "sgl_float.h"
  50#include "dbl_float.h"
  51
  52
  53/*
  54 *  Double Floating-point Multiply Fused Add
  55 */
  56
  57int
  58dbl_fmpyfadd(
  59            dbl_floating_point *src1ptr,
  60            dbl_floating_point *src2ptr,
  61            dbl_floating_point *src3ptr,
  62            unsigned int *status,
  63            dbl_floating_point *dstptr)
  64{
  65        unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
  66        register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
  67        unsigned int rightp1, rightp2, rightp3, rightp4;
  68        unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
  69        register int mpy_exponent, add_exponent, count;
  70        boolean inexact = FALSE, is_tiny = FALSE;
  71
  72        unsigned int signlessleft1, signlessright1, save;
  73        register int result_exponent, diff_exponent;
  74        int sign_save, jumpsize;
  75        
  76        Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
  77        Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
  78        Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
  79
  80        /* 
  81         * set sign bit of result of multiply
  82         */
  83        if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1)) 
  84                Dbl_setnegativezerop1(resultp1); 
  85        else Dbl_setzerop1(resultp1);
  86
  87        /*
  88         * Generate multiply exponent 
  89         */
  90        mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
  91
  92        /*
  93         * check first operand for NaN's or infinity
  94         */
  95        if (Dbl_isinfinity_exponent(opnd1p1)) {
  96                if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
  97                        if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
  98                            Dbl_isnotnan(opnd3p1,opnd3p2)) {
  99                                if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
 100                                        /* 
 101                                         * invalid since operands are infinity 
 102                                         * and zero 
 103                                         */
 104                                        if (Is_invalidtrap_enabled())
 105                                                return(OPC_2E_INVALIDEXCEPTION);
 106                                        Set_invalidflag();
 107                                        Dbl_makequietnan(resultp1,resultp2);
 108                                        Dbl_copytoptr(resultp1,resultp2,dstptr);
 109                                        return(NOEXCEPTION);
 110                                }
 111                                /*
 112                                 * Check third operand for infinity with a
 113                                 *  sign opposite of the multiply result
 114                                 */
 115                                if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
 116                                    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
 117                                        /* 
 118                                         * invalid since attempting a magnitude
 119                                         * subtraction of infinities
 120                                         */
 121                                        if (Is_invalidtrap_enabled())
 122                                                return(OPC_2E_INVALIDEXCEPTION);
 123                                        Set_invalidflag();
 124                                        Dbl_makequietnan(resultp1,resultp2);
 125                                        Dbl_copytoptr(resultp1,resultp2,dstptr);
 126                                        return(NOEXCEPTION);
 127                                }
 128
 129                                /*
 130                                 * return infinity
 131                                 */
 132                                Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
 133                                Dbl_copytoptr(resultp1,resultp2,dstptr);
 134                                return(NOEXCEPTION);
 135                        }
 136                }
 137                else {
 138                        /*
 139                         * is NaN; signaling or quiet?
 140                         */
 141                        if (Dbl_isone_signaling(opnd1p1)) {
 142                                /* trap if INVALIDTRAP enabled */
 143                                if (Is_invalidtrap_enabled()) 
 144                                        return(OPC_2E_INVALIDEXCEPTION);
 145                                /* make NaN quiet */
 146                                Set_invalidflag();
 147                                Dbl_set_quiet(opnd1p1);
 148                        }
 149                        /* 
 150                         * is second operand a signaling NaN? 
 151                         */
 152                        else if (Dbl_is_signalingnan(opnd2p1)) {
 153                                /* trap if INVALIDTRAP enabled */
 154                                if (Is_invalidtrap_enabled())
 155                                        return(OPC_2E_INVALIDEXCEPTION);
 156                                /* make NaN quiet */
 157                                Set_invalidflag();
 158                                Dbl_set_quiet(opnd2p1);
 159                                Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
 160                                return(NOEXCEPTION);
 161                        }
 162                        /* 
 163                         * is third operand a signaling NaN? 
 164                         */
 165                        else if (Dbl_is_signalingnan(opnd3p1)) {
 166                                /* trap if INVALIDTRAP enabled */
 167                                if (Is_invalidtrap_enabled())
 168                                        return(OPC_2E_INVALIDEXCEPTION);
 169                                /* make NaN quiet */
 170                                Set_invalidflag();
 171                                Dbl_set_quiet(opnd3p1);
 172                                Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 173                                return(NOEXCEPTION);
 174                        }
 175                        /*
 176                         * return quiet NaN
 177                         */
 178                        Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
 179                        return(NOEXCEPTION);
 180                }
 181        }
 182
 183        /*
 184         * check second operand for NaN's or infinity
 185         */
 186        if (Dbl_isinfinity_exponent(opnd2p1)) {
 187                if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
 188                        if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
 189                                if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
 190                                        /* 
 191                                         * invalid since multiply operands are
 192                                         * zero & infinity
 193                                         */
 194                                        if (Is_invalidtrap_enabled())
 195                                                return(OPC_2E_INVALIDEXCEPTION);
 196                                        Set_invalidflag();
 197                                        Dbl_makequietnan(opnd2p1,opnd2p2);
 198                                        Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
 199                                        return(NOEXCEPTION);
 200                                }
 201
 202                                /*
 203                                 * Check third operand for infinity with a
 204                                 *  sign opposite of the multiply result
 205                                 */
 206                                if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
 207                                    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
 208                                        /* 
 209                                         * invalid since attempting a magnitude
 210                                         * subtraction of infinities
 211                                         */
 212                                        if (Is_invalidtrap_enabled())
 213                                                return(OPC_2E_INVALIDEXCEPTION);
 214                                        Set_invalidflag();
 215                                        Dbl_makequietnan(resultp1,resultp2);
 216                                        Dbl_copytoptr(resultp1,resultp2,dstptr);
 217                                        return(NOEXCEPTION);
 218                                }
 219
 220                                /*
 221                                 * return infinity
 222                                 */
 223                                Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
 224                                Dbl_copytoptr(resultp1,resultp2,dstptr);
 225                                return(NOEXCEPTION);
 226                        }
 227                }
 228                else {
 229                        /*
 230                         * is NaN; signaling or quiet?
 231                         */
 232                        if (Dbl_isone_signaling(opnd2p1)) {
 233                                /* trap if INVALIDTRAP enabled */
 234                                if (Is_invalidtrap_enabled())
 235                                        return(OPC_2E_INVALIDEXCEPTION);
 236                                /* make NaN quiet */
 237                                Set_invalidflag();
 238                                Dbl_set_quiet(opnd2p1);
 239                        }
 240                        /* 
 241                         * is third operand a signaling NaN? 
 242                         */
 243                        else if (Dbl_is_signalingnan(opnd3p1)) {
 244                                /* trap if INVALIDTRAP enabled */
 245                                if (Is_invalidtrap_enabled())
 246                                                return(OPC_2E_INVALIDEXCEPTION);
 247                                /* make NaN quiet */
 248                                Set_invalidflag();
 249                                Dbl_set_quiet(opnd3p1);
 250                                Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 251                                return(NOEXCEPTION);
 252                        }
 253                        /*
 254                         * return quiet NaN
 255                         */
 256                        Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
 257                        return(NOEXCEPTION);
 258                }
 259        }
 260
 261        /*
 262         * check third operand for NaN's or infinity
 263         */
 264        if (Dbl_isinfinity_exponent(opnd3p1)) {
 265                if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
 266                        /* return infinity */
 267                        Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 268                        return(NOEXCEPTION);
 269                } else {
 270                        /*
 271                         * is NaN; signaling or quiet?
 272                         */
 273                        if (Dbl_isone_signaling(opnd3p1)) {
 274                                /* trap if INVALIDTRAP enabled */
 275                                if (Is_invalidtrap_enabled())
 276                                        return(OPC_2E_INVALIDEXCEPTION);
 277                                /* make NaN quiet */
 278                                Set_invalidflag();
 279                                Dbl_set_quiet(opnd3p1);
 280                        }
 281                        /*
 282                         * return quiet NaN
 283                         */
 284                        Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 285                        return(NOEXCEPTION);
 286                }
 287        }
 288
 289        /*
 290         * Generate multiply mantissa
 291         */
 292        if (Dbl_isnotzero_exponent(opnd1p1)) {
 293                /* set hidden bit */
 294                Dbl_clear_signexponent_set_hidden(opnd1p1);
 295        }
 296        else {
 297                /* check for zero */
 298                if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
 299                        /*
 300                         * Perform the add opnd3 with zero here.
 301                         */
 302                        if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
 303                                if (Is_rounding_mode(ROUNDMINUS)) {
 304                                        Dbl_or_signs(opnd3p1,resultp1);
 305                                } else {
 306                                        Dbl_and_signs(opnd3p1,resultp1);
 307                                }
 308                        }
 309                        /*
 310                         * Now let's check for trapped underflow case.
 311                         */
 312                        else if (Dbl_iszero_exponent(opnd3p1) &&
 313                                 Is_underflowtrap_enabled()) {
 314                                /* need to normalize results mantissa */
 315                                sign_save = Dbl_signextendedsign(opnd3p1);
 316                                result_exponent = 0;
 317                                Dbl_leftshiftby1(opnd3p1,opnd3p2);
 318                                Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
 319                                Dbl_set_sign(opnd3p1,/*using*/sign_save);
 320                                Dbl_setwrapped_exponent(opnd3p1,result_exponent,
 321                                                        unfl);
 322                                Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 323                                /* inexact = FALSE */
 324                                return(OPC_2E_UNDERFLOWEXCEPTION);
 325                        }
 326                        Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 327                        return(NOEXCEPTION);
 328                }
 329                /* is denormalized, adjust exponent */
 330                Dbl_clear_signexponent(opnd1p1);
 331                Dbl_leftshiftby1(opnd1p1,opnd1p2);
 332                Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
 333        }
 334        /* opnd2 needs to have hidden bit set with msb in hidden bit */
 335        if (Dbl_isnotzero_exponent(opnd2p1)) {
 336                Dbl_clear_signexponent_set_hidden(opnd2p1);
 337        }
 338        else {
 339                /* check for zero */
 340                if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
 341                        /*
 342                         * Perform the add opnd3 with zero here.
 343                         */
 344                        if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
 345                                if (Is_rounding_mode(ROUNDMINUS)) {
 346                                        Dbl_or_signs(opnd3p1,resultp1);
 347                                } else {
 348                                        Dbl_and_signs(opnd3p1,resultp1);
 349                                }
 350                        }
 351                        /*
 352                         * Now let's check for trapped underflow case.
 353                         */
 354                        else if (Dbl_iszero_exponent(opnd3p1) &&
 355                            Is_underflowtrap_enabled()) {
 356                                /* need to normalize results mantissa */
 357                                sign_save = Dbl_signextendedsign(opnd3p1);
 358                                result_exponent = 0;
 359                                Dbl_leftshiftby1(opnd3p1,opnd3p2);
 360                                Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
 361                                Dbl_set_sign(opnd3p1,/*using*/sign_save);
 362                                Dbl_setwrapped_exponent(opnd3p1,result_exponent,
 363                                                        unfl);
 364                                Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 365                                /* inexact = FALSE */
 366                                return(OPC_2E_UNDERFLOWEXCEPTION);
 367                        }
 368                        Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 369                        return(NOEXCEPTION);
 370                }
 371                /* is denormalized; want to normalize */
 372                Dbl_clear_signexponent(opnd2p1);
 373                Dbl_leftshiftby1(opnd2p1,opnd2p2);
 374                Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
 375        }
 376
 377        /* Multiply the first two source mantissas together */
 378
 379        /* 
 380         * The intermediate result will be kept in tmpres,
 381         * which needs enough room for 106 bits of mantissa,
 382         * so lets call it a Double extended.
 383         */
 384        Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
 385
 386        /* 
 387         * Four bits at a time are inspected in each loop, and a 
 388         * simple shift and add multiply algorithm is used. 
 389         */ 
 390        for (count = DBL_P-1; count >= 0; count -= 4) {
 391                Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
 392                if (Dbit28p2(opnd1p2)) {
 393                        /* Fourword_add should be an ADD followed by 3 ADDC's */
 394                        Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 
 395                         opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
 396                }
 397                if (Dbit29p2(opnd1p2)) {
 398                        Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
 399                         opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
 400                }
 401                if (Dbit30p2(opnd1p2)) {
 402                        Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
 403                         opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
 404                }
 405                if (Dbit31p2(opnd1p2)) {
 406                        Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
 407                         opnd2p1, opnd2p2, 0, 0);
 408                }
 409                Dbl_rightshiftby4(opnd1p1,opnd1p2);
 410        }
 411        if (Is_dexthiddenoverflow(tmpresp1)) {
 412                /* result mantissa >= 2 (mantissa overflow) */
 413                mpy_exponent++;
 414                Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
 415        }
 416
 417        /*
 418         * Restore the sign of the mpy result which was saved in resultp1.
 419         * The exponent will continue to be kept in mpy_exponent.
 420         */
 421        Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
 422
 423        /* 
 424         * No rounding is required, since the result of the multiply
 425         * is exact in the extended format.
 426         */
 427
 428        /*
 429         * Now we are ready to perform the add portion of the operation.
 430         *
 431         * The exponents need to be kept as integers for now, since the
 432         * multiply result might not fit into the exponent field.  We
 433         * can't overflow or underflow because of this yet, since the
 434         * add could bring the final result back into range.
 435         */
 436        add_exponent = Dbl_exponent(opnd3p1);
 437
 438        /*
 439         * Check for denormalized or zero add operand.
 440         */
 441        if (add_exponent == 0) {
 442                /* check for zero */
 443                if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
 444                        /* right is zero */
 445                        /* Left can't be zero and must be result.
 446                         *
 447                         * The final result is now in tmpres and mpy_exponent,
 448                         * and needs to be rounded and squeezed back into
 449                         * double precision format from double extended.
 450                         */
 451                        result_exponent = mpy_exponent;
 452                        Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
 453                                resultp1,resultp2,resultp3,resultp4);
 454                        sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
 455                        goto round;
 456                }
 457
 458                /* 
 459                 * Neither are zeroes.  
 460                 * Adjust exponent and normalize add operand.
 461                 */
 462                sign_save = Dbl_signextendedsign(opnd3p1);      /* save sign */
 463                Dbl_clear_signexponent(opnd3p1);
 464                Dbl_leftshiftby1(opnd3p1,opnd3p2);
 465                Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
 466                Dbl_set_sign(opnd3p1,sign_save);        /* restore sign */
 467        } else {
 468                Dbl_clear_exponent_set_hidden(opnd3p1);
 469        }
 470        /*
 471         * Copy opnd3 to the double extended variable called right.
 472         */
 473        Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
 474
 475        /*
 476         * A zero "save" helps discover equal operands (for later),
 477         * and is used in swapping operands (if needed).
 478         */
 479        Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
 480
 481        /*
 482         * Compare magnitude of operands.
 483         */
 484        Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
 485        Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
 486        if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
 487            Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
 488                /*
 489                 * Set the left operand to the larger one by XOR swap.
 490                 * First finish the first word "save".
 491                 */
 492                Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
 493                Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
 494                Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
 495                        rightp2,rightp3,rightp4);
 496                /* also setup exponents used in rest of routine */
 497                diff_exponent = add_exponent - mpy_exponent;
 498                result_exponent = add_exponent;
 499        } else {
 500                /* also setup exponents used in rest of routine */
 501                diff_exponent = mpy_exponent - add_exponent;
 502                result_exponent = mpy_exponent;
 503        }
 504        /* Invariant: left is not smaller than right. */
 505
 506        /*
 507         * Special case alignment of operands that would force alignment
 508         * beyond the extent of the extension.  A further optimization
 509         * could special case this but only reduces the path length for
 510         * this infrequent case.
 511         */
 512        if (diff_exponent > DBLEXT_THRESHOLD) {
 513                diff_exponent = DBLEXT_THRESHOLD;
 514        }
 515
 516        /* Align right operand by shifting it to the right */
 517        Dblext_clear_sign(rightp1);
 518        Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
 519                /*shifted by*/diff_exponent);
 520        
 521        /* Treat sum and difference of the operands separately. */
 522        if ((int)save < 0) {
 523                /*
 524                 * Difference of the two operands.  Overflow can occur if the
 525                 * multiply overflowed.  A borrow can occur out of the hidden
 526                 * bit and force a post normalization phase.
 527                 */
 528                Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
 529                        rightp1,rightp2,rightp3,rightp4,
 530                        resultp1,resultp2,resultp3,resultp4);
 531                sign_save = Dbl_signextendedsign(resultp1);
 532                if (Dbl_iszero_hidden(resultp1)) {
 533                        /* Handle normalization */
 534                /* A straight foward algorithm would now shift the
 535                 * result and extension left until the hidden bit
 536                 * becomes one.  Not all of the extension bits need
 537                 * participate in the shift.  Only the two most 
 538                 * significant bits (round and guard) are needed.
 539                 * If only a single shift is needed then the guard
 540                 * bit becomes a significant low order bit and the
 541                 * extension must participate in the rounding.
 542                 * If more than a single shift is needed, then all
 543                 * bits to the right of the guard bit are zeros, 
 544                 * and the guard bit may or may not be zero. */
 545                        Dblext_leftshiftby1(resultp1,resultp2,resultp3,
 546                                resultp4);
 547
 548                        /* Need to check for a zero result.  The sign and
 549                         * exponent fields have already been zeroed.  The more
 550                         * efficient test of the full object can be used.
 551                         */
 552                         if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
 553                                /* Must have been "x-x" or "x+(-x)". */
 554                                if (Is_rounding_mode(ROUNDMINUS))
 555                                        Dbl_setone_sign(resultp1);
 556                                Dbl_copytoptr(resultp1,resultp2,dstptr);
 557                                return(NOEXCEPTION);
 558                        }
 559                        result_exponent--;
 560
 561                        /* Look to see if normalization is finished. */
 562                        if (Dbl_isone_hidden(resultp1)) {
 563                                /* No further normalization is needed */
 564                                goto round;
 565                        }
 566
 567                        /* Discover first one bit to determine shift amount.
 568                         * Use a modified binary search.  We have already
 569                         * shifted the result one position right and still
 570                         * not found a one so the remainder of the extension
 571                         * must be zero and simplifies rounding. */
 572                        /* Scan bytes */
 573                        while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
 574                                Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
 575                                result_exponent -= 8;
 576                        }
 577                        /* Now narrow it down to the nibble */
 578                        if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
 579                                /* The lower nibble contains the
 580                                 * normalizing one */
 581                                Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
 582                                result_exponent -= 4;
 583                        }
 584                        /* Select case where first bit is set (already
 585                         * normalized) otherwise select the proper shift. */
 586                        jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
 587                        if (jumpsize <= 7) switch(jumpsize) {
 588                        case 1:
 589                                Dblext_leftshiftby3(resultp1,resultp2,resultp3,
 590                                        resultp4);
 591                                result_exponent -= 3;
 592                                break;
 593                        case 2:
 594                        case 3:
 595                                Dblext_leftshiftby2(resultp1,resultp2,resultp3,
 596                                        resultp4);
 597                                result_exponent -= 2;
 598                                break;
 599                        case 4:
 600                        case 5:
 601                        case 6:
 602                        case 7:
 603                                Dblext_leftshiftby1(resultp1,resultp2,resultp3,
 604                                        resultp4);
 605                                result_exponent -= 1;
 606                                break;
 607                        }
 608                } /* end if (hidden...)... */
 609        /* Fall through and round */
 610        } /* end if (save < 0)... */
 611        else {
 612                /* Add magnitudes */
 613                Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
 614                        rightp1,rightp2,rightp3,rightp4,
 615                        /*to*/resultp1,resultp2,resultp3,resultp4);
 616                sign_save = Dbl_signextendedsign(resultp1);
 617                if (Dbl_isone_hiddenoverflow(resultp1)) {
 618                        /* Prenormalization required. */
 619                        Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
 620                                resultp4);
 621                        result_exponent++;
 622                } /* end if hiddenoverflow... */
 623        } /* end else ...add magnitudes... */
 624
 625        /* Round the result.  If the extension and lower two words are
 626         * all zeros, then the result is exact.  Otherwise round in the
 627         * correct direction.  Underflow is possible. If a postnormalization
 628         * is necessary, then the mantissa is all zeros so no shift is needed.
 629         */
 630  round:
 631        if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
 632                Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
 633                        result_exponent,is_tiny);
 634        }
 635        Dbl_set_sign(resultp1,/*using*/sign_save);
 636        if (Dblext_isnotzero_mantissap3(resultp3) || 
 637            Dblext_isnotzero_mantissap4(resultp4)) {
 638                inexact = TRUE;
 639                switch(Rounding_mode()) {
 640                case ROUNDNEAREST: /* The default. */
 641                        if (Dblext_isone_highp3(resultp3)) {
 642                                /* at least 1/2 ulp */
 643                                if (Dblext_isnotzero_low31p3(resultp3) ||
 644                                    Dblext_isnotzero_mantissap4(resultp4) ||
 645                                    Dblext_isone_lowp2(resultp2)) {
 646                                        /* either exactly half way and odd or
 647                                         * more than 1/2ulp */
 648                                        Dbl_increment(resultp1,resultp2);
 649                                }
 650                        }
 651                        break;
 652
 653                case ROUNDPLUS:
 654                        if (Dbl_iszero_sign(resultp1)) {
 655                                /* Round up positive results */
 656                                Dbl_increment(resultp1,resultp2);
 657                        }
 658                        break;
 659            
 660                case ROUNDMINUS:
 661                        if (Dbl_isone_sign(resultp1)) {
 662                                /* Round down negative results */
 663                                Dbl_increment(resultp1,resultp2);
 664                        }
 665            
 666                case ROUNDZERO:;
 667                        /* truncate is simple */
 668                } /* end switch... */
 669                if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
 670        }
 671        if (result_exponent >= DBL_INFINITY_EXPONENT) {
 672                /* trap if OVERFLOWTRAP enabled */
 673                if (Is_overflowtrap_enabled()) {
 674                        /*
 675                         * Adjust bias of result
 676                         */
 677                        Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
 678                        Dbl_copytoptr(resultp1,resultp2,dstptr);
 679                        if (inexact)
 680                            if (Is_inexacttrap_enabled())
 681                                return (OPC_2E_OVERFLOWEXCEPTION |
 682                                        OPC_2E_INEXACTEXCEPTION);
 683                            else Set_inexactflag();
 684                        return (OPC_2E_OVERFLOWEXCEPTION);
 685                }
 686                inexact = TRUE;
 687                Set_overflowflag();
 688                /* set result to infinity or largest number */
 689                Dbl_setoverflow(resultp1,resultp2);
 690
 691        } else if (result_exponent <= 0) {      /* underflow case */
 692                if (Is_underflowtrap_enabled()) {
 693                        /*
 694                         * Adjust bias of result
 695                         */
 696                        Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
 697                        Dbl_copytoptr(resultp1,resultp2,dstptr);
 698                        if (inexact)
 699                            if (Is_inexacttrap_enabled())
 700                                return (OPC_2E_UNDERFLOWEXCEPTION |
 701                                        OPC_2E_INEXACTEXCEPTION);
 702                            else Set_inexactflag();
 703                        return(OPC_2E_UNDERFLOWEXCEPTION);
 704                }
 705                else if (inexact && is_tiny) Set_underflowflag();
 706        }
 707        else Dbl_set_exponent(resultp1,result_exponent);
 708        Dbl_copytoptr(resultp1,resultp2,dstptr);
 709        if (inexact) 
 710                if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
 711                else Set_inexactflag();
 712        return(NOEXCEPTION);
 713}
 714
 715/*
 716 *  Double Floating-point Multiply Negate Fused Add
 717 */
 718
 719dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
 720
 721dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
 722unsigned int *status;
 723{
 724        unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
 725        register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
 726        unsigned int rightp1, rightp2, rightp3, rightp4;
 727        unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
 728        register int mpy_exponent, add_exponent, count;
 729        boolean inexact = FALSE, is_tiny = FALSE;
 730
 731        unsigned int signlessleft1, signlessright1, save;
 732        register int result_exponent, diff_exponent;
 733        int sign_save, jumpsize;
 734        
 735        Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
 736        Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
 737        Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
 738
 739        /* 
 740         * set sign bit of result of multiply
 741         */
 742        if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1)) 
 743                Dbl_setzerop1(resultp1);
 744        else
 745                Dbl_setnegativezerop1(resultp1); 
 746
 747        /*
 748         * Generate multiply exponent 
 749         */
 750        mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
 751
 752        /*
 753         * check first operand for NaN's or infinity
 754         */
 755        if (Dbl_isinfinity_exponent(opnd1p1)) {
 756                if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
 757                        if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
 758                            Dbl_isnotnan(opnd3p1,opnd3p2)) {
 759                                if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
 760                                        /* 
 761                                         * invalid since operands are infinity 
 762                                         * and zero 
 763                                         */
 764                                        if (Is_invalidtrap_enabled())
 765                                                return(OPC_2E_INVALIDEXCEPTION);
 766                                        Set_invalidflag();
 767                                        Dbl_makequietnan(resultp1,resultp2);
 768                                        Dbl_copytoptr(resultp1,resultp2,dstptr);
 769                                        return(NOEXCEPTION);
 770                                }
 771                                /*
 772                                 * Check third operand for infinity with a
 773                                 *  sign opposite of the multiply result
 774                                 */
 775                                if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
 776                                    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
 777                                        /* 
 778                                         * invalid since attempting a magnitude
 779                                         * subtraction of infinities
 780                                         */
 781                                        if (Is_invalidtrap_enabled())
 782                                                return(OPC_2E_INVALIDEXCEPTION);
 783                                        Set_invalidflag();
 784                                        Dbl_makequietnan(resultp1,resultp2);
 785                                        Dbl_copytoptr(resultp1,resultp2,dstptr);
 786                                        return(NOEXCEPTION);
 787                                }
 788
 789                                /*
 790                                 * return infinity
 791                                 */
 792                                Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
 793                                Dbl_copytoptr(resultp1,resultp2,dstptr);
 794                                return(NOEXCEPTION);
 795                        }
 796                }
 797                else {
 798                        /*
 799                         * is NaN; signaling or quiet?
 800                         */
 801                        if (Dbl_isone_signaling(opnd1p1)) {
 802                                /* trap if INVALIDTRAP enabled */
 803                                if (Is_invalidtrap_enabled()) 
 804                                        return(OPC_2E_INVALIDEXCEPTION);
 805                                /* make NaN quiet */
 806                                Set_invalidflag();
 807                                Dbl_set_quiet(opnd1p1);
 808                        }
 809                        /* 
 810                         * is second operand a signaling NaN? 
 811                         */
 812                        else if (Dbl_is_signalingnan(opnd2p1)) {
 813                                /* trap if INVALIDTRAP enabled */
 814                                if (Is_invalidtrap_enabled())
 815                                        return(OPC_2E_INVALIDEXCEPTION);
 816                                /* make NaN quiet */
 817                                Set_invalidflag();
 818                                Dbl_set_quiet(opnd2p1);
 819                                Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
 820                                return(NOEXCEPTION);
 821                        }
 822                        /* 
 823                         * is third operand a signaling NaN? 
 824                         */
 825                        else if (Dbl_is_signalingnan(opnd3p1)) {
 826                                /* trap if INVALIDTRAP enabled */
 827                                if (Is_invalidtrap_enabled())
 828                                        return(OPC_2E_INVALIDEXCEPTION);
 829                                /* make NaN quiet */
 830                                Set_invalidflag();
 831                                Dbl_set_quiet(opnd3p1);
 832                                Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 833                                return(NOEXCEPTION);
 834                        }
 835                        /*
 836                         * return quiet NaN
 837                         */
 838                        Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
 839                        return(NOEXCEPTION);
 840                }
 841        }
 842
 843        /*
 844         * check second operand for NaN's or infinity
 845         */
 846        if (Dbl_isinfinity_exponent(opnd2p1)) {
 847                if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
 848                        if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
 849                                if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
 850                                        /* 
 851                                         * invalid since multiply operands are
 852                                         * zero & infinity
 853                                         */
 854                                        if (Is_invalidtrap_enabled())
 855                                                return(OPC_2E_INVALIDEXCEPTION);
 856                                        Set_invalidflag();
 857                                        Dbl_makequietnan(opnd2p1,opnd2p2);
 858                                        Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
 859                                        return(NOEXCEPTION);
 860                                }
 861
 862                                /*
 863                                 * Check third operand for infinity with a
 864                                 *  sign opposite of the multiply result
 865                                 */
 866                                if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
 867                                    (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
 868                                        /* 
 869                                         * invalid since attempting a magnitude
 870                                         * subtraction of infinities
 871                                         */
 872                                        if (Is_invalidtrap_enabled())
 873                                                return(OPC_2E_INVALIDEXCEPTION);
 874                                        Set_invalidflag();
 875                                        Dbl_makequietnan(resultp1,resultp2);
 876                                        Dbl_copytoptr(resultp1,resultp2,dstptr);
 877                                        return(NOEXCEPTION);
 878                                }
 879
 880                                /*
 881                                 * return infinity
 882                                 */
 883                                Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
 884                                Dbl_copytoptr(resultp1,resultp2,dstptr);
 885                                return(NOEXCEPTION);
 886                        }
 887                }
 888                else {
 889                        /*
 890                         * is NaN; signaling or quiet?
 891                         */
 892                        if (Dbl_isone_signaling(opnd2p1)) {
 893                                /* trap if INVALIDTRAP enabled */
 894                                if (Is_invalidtrap_enabled())
 895                                        return(OPC_2E_INVALIDEXCEPTION);
 896                                /* make NaN quiet */
 897                                Set_invalidflag();
 898                                Dbl_set_quiet(opnd2p1);
 899                        }
 900                        /* 
 901                         * is third operand a signaling NaN? 
 902                         */
 903                        else if (Dbl_is_signalingnan(opnd3p1)) {
 904                                /* trap if INVALIDTRAP enabled */
 905                                if (Is_invalidtrap_enabled())
 906                                                return(OPC_2E_INVALIDEXCEPTION);
 907                                /* make NaN quiet */
 908                                Set_invalidflag();
 909                                Dbl_set_quiet(opnd3p1);
 910                                Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 911                                return(NOEXCEPTION);
 912                        }
 913                        /*
 914                         * return quiet NaN
 915                         */
 916                        Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
 917                        return(NOEXCEPTION);
 918                }
 919        }
 920
 921        /*
 922         * check third operand for NaN's or infinity
 923         */
 924        if (Dbl_isinfinity_exponent(opnd3p1)) {
 925                if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
 926                        /* return infinity */
 927                        Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 928                        return(NOEXCEPTION);
 929                } else {
 930                        /*
 931                         * is NaN; signaling or quiet?
 932                         */
 933                        if (Dbl_isone_signaling(opnd3p1)) {
 934                                /* trap if INVALIDTRAP enabled */
 935                                if (Is_invalidtrap_enabled())
 936                                        return(OPC_2E_INVALIDEXCEPTION);
 937                                /* make NaN quiet */
 938                                Set_invalidflag();
 939                                Dbl_set_quiet(opnd3p1);
 940                        }
 941                        /*
 942                         * return quiet NaN
 943                         */
 944                        Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 945                        return(NOEXCEPTION);
 946                }
 947        }
 948
 949        /*
 950         * Generate multiply mantissa
 951         */
 952        if (Dbl_isnotzero_exponent(opnd1p1)) {
 953                /* set hidden bit */
 954                Dbl_clear_signexponent_set_hidden(opnd1p1);
 955        }
 956        else {
 957                /* check for zero */
 958                if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
 959                        /*
 960                         * Perform the add opnd3 with zero here.
 961                         */
 962                        if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
 963                                if (Is_rounding_mode(ROUNDMINUS)) {
 964                                        Dbl_or_signs(opnd3p1,resultp1);
 965                                } else {
 966                                        Dbl_and_signs(opnd3p1,resultp1);
 967                                }
 968                        }
 969                        /*
 970                         * Now let's check for trapped underflow case.
 971                         */
 972                        else if (Dbl_iszero_exponent(opnd3p1) &&
 973                                 Is_underflowtrap_enabled()) {
 974                                /* need to normalize results mantissa */
 975                                sign_save = Dbl_signextendedsign(opnd3p1);
 976                                result_exponent = 0;
 977                                Dbl_leftshiftby1(opnd3p1,opnd3p2);
 978                                Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
 979                                Dbl_set_sign(opnd3p1,/*using*/sign_save);
 980                                Dbl_setwrapped_exponent(opnd3p1,result_exponent,
 981                                                        unfl);
 982                                Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 983                                /* inexact = FALSE */
 984                                return(OPC_2E_UNDERFLOWEXCEPTION);
 985                        }
 986                        Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
 987                        return(NOEXCEPTION);
 988                }
 989                /* is denormalized, adjust exponent */
 990                Dbl_clear_signexponent(opnd1p1);
 991                Dbl_leftshiftby1(opnd1p1,opnd1p2);
 992                Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
 993        }
 994        /* opnd2 needs to have hidden bit set with msb in hidden bit */
 995        if (Dbl_isnotzero_exponent(opnd2p1)) {
 996                Dbl_clear_signexponent_set_hidden(opnd2p1);
 997        }
 998        else {
 999                /* check for zero */
1000                if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
1001                        /*
1002                         * Perform the add opnd3 with zero here.
1003                         */
1004                        if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
1005                                if (Is_rounding_mode(ROUNDMINUS)) {
1006                                        Dbl_or_signs(opnd3p1,resultp1);
1007                                } else {
1008                                        Dbl_and_signs(opnd3p1,resultp1);
1009                                }
1010                        }
1011                        /*
1012                         * Now let's check for trapped underflow case.
1013                         */
1014                        else if (Dbl_iszero_exponent(opnd3p1) &&
1015                            Is_underflowtrap_enabled()) {
1016                                /* need to normalize results mantissa */
1017                                sign_save = Dbl_signextendedsign(opnd3p1);
1018                                result_exponent = 0;
1019                                Dbl_leftshiftby1(opnd3p1,opnd3p2);
1020                                Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
1021                                Dbl_set_sign(opnd3p1,/*using*/sign_save);
1022                                Dbl_setwrapped_exponent(opnd3p1,result_exponent,
1023                                                        unfl);
1024                                Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1025                                /* inexact = FALSE */
1026                                return(OPC_2E_UNDERFLOWEXCEPTION);
1027                        }
1028                        Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1029                        return(NOEXCEPTION);
1030                }
1031                /* is denormalized; want to normalize */
1032                Dbl_clear_signexponent(opnd2p1);
1033                Dbl_leftshiftby1(opnd2p1,opnd2p2);
1034                Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
1035        }
1036
1037        /* Multiply the first two source mantissas together */
1038
1039        /* 
1040         * The intermediate result will be kept in tmpres,
1041         * which needs enough room for 106 bits of mantissa,
1042         * so lets call it a Double extended.
1043         */
1044        Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1045
1046        /* 
1047         * Four bits at a time are inspected in each loop, and a 
1048         * simple shift and add multiply algorithm is used. 
1049         */ 
1050        for (count = DBL_P-1; count >= 0; count -= 4) {
1051                Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1052                if (Dbit28p2(opnd1p2)) {
1053                        /* Fourword_add should be an ADD followed by 3 ADDC's */
1054                        Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 
1055                         opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
1056                }
1057                if (Dbit29p2(opnd1p2)) {
1058                        Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1059                         opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
1060                }
1061                if (Dbit30p2(opnd1p2)) {
1062                        Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1063                         opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
1064                }
1065                if (Dbit31p2(opnd1p2)) {
1066                        Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1067                         opnd2p1, opnd2p2, 0, 0);
1068                }
1069                Dbl_rightshiftby4(opnd1p1,opnd1p2);
1070        }
1071        if (Is_dexthiddenoverflow(tmpresp1)) {
1072                /* result mantissa >= 2 (mantissa overflow) */
1073                mpy_exponent++;
1074                Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1075        }
1076
1077        /*
1078         * Restore the sign of the mpy result which was saved in resultp1.
1079         * The exponent will continue to be kept in mpy_exponent.
1080         */
1081        Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
1082
1083        /* 
1084         * No rounding is required, since the result of the multiply
1085         * is exact in the extended format.
1086         */
1087
1088        /*
1089         * Now we are ready to perform the add portion of the operation.
1090         *
1091         * The exponents need to be kept as integers for now, since the
1092         * multiply result might not fit into the exponent field.  We
1093         * can't overflow or underflow because of this yet, since the
1094         * add could bring the final result back into range.
1095         */
1096        add_exponent = Dbl_exponent(opnd3p1);
1097
1098        /*
1099         * Check for denormalized or zero add operand.
1100         */
1101        if (add_exponent == 0) {
1102                /* check for zero */
1103                if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
1104                        /* right is zero */
1105                        /* Left can't be zero and must be result.
1106                         *
1107                         * The final result is now in tmpres and mpy_exponent,
1108                         * and needs to be rounded and squeezed back into
1109                         * double precision format from double extended.
1110                         */
1111                        result_exponent = mpy_exponent;
1112                        Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1113                                resultp1,resultp2,resultp3,resultp4);
1114                        sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
1115                        goto round;
1116                }
1117
1118                /* 
1119                 * Neither are zeroes.  
1120                 * Adjust exponent and normalize add operand.
1121                 */
1122                sign_save = Dbl_signextendedsign(opnd3p1);      /* save sign */
1123                Dbl_clear_signexponent(opnd3p1);
1124                Dbl_leftshiftby1(opnd3p1,opnd3p2);
1125                Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
1126                Dbl_set_sign(opnd3p1,sign_save);        /* restore sign */
1127        } else {
1128                Dbl_clear_exponent_set_hidden(opnd3p1);
1129        }
1130        /*
1131         * Copy opnd3 to the double extended variable called right.
1132         */
1133        Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
1134
1135        /*
1136         * A zero "save" helps discover equal operands (for later),
1137         * and is used in swapping operands (if needed).
1138         */
1139        Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
1140
1141        /*
1142         * Compare magnitude of operands.
1143         */
1144        Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
1145        Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
1146        if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1147            Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
1148                /*
1149                 * Set the left operand to the larger one by XOR swap.
1150                 * First finish the first word "save".
1151                 */
1152                Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
1153                Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1154                Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
1155                        rightp2,rightp3,rightp4);
1156                /* also setup exponents used in rest of routine */
1157                diff_exponent = add_exponent - mpy_exponent;
1158                result_exponent = add_exponent;
1159        } else {
1160                /* also setup exponents used in rest of routine */
1161                diff_exponent = mpy_exponent - add_exponent;
1162                result_exponent = mpy_exponent;
1163        }
1164        /* Invariant: left is not smaller than right. */
1165
1166        /*
1167         * Special case alignment of operands that would force alignment
1168         * beyond the extent of the extension.  A further optimization
1169         * could special case this but only reduces the path length for
1170         * this infrequent case.
1171         */
1172        if (diff_exponent > DBLEXT_THRESHOLD) {
1173                diff_exponent = DBLEXT_THRESHOLD;
1174        }
1175
1176        /* Align right operand by shifting it to the right */
1177        Dblext_clear_sign(rightp1);
1178        Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
1179                /*shifted by*/diff_exponent);
1180        
1181        /* Treat sum and difference of the operands separately. */
1182        if ((int)save < 0) {
1183                /*
1184                 * Difference of the two operands.  Overflow can occur if the
1185                 * multiply overflowed.  A borrow can occur out of the hidden
1186                 * bit and force a post normalization phase.
1187                 */
1188                Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1189                        rightp1,rightp2,rightp3,rightp4,
1190                        resultp1,resultp2,resultp3,resultp4);
1191                sign_save = Dbl_signextendedsign(resultp1);
1192                if (Dbl_iszero_hidden(resultp1)) {
1193                        /* Handle normalization */
1194                /* A straight foward algorithm would now shift the
1195                 * result and extension left until the hidden bit
1196                 * becomes one.  Not all of the extension bits need
1197                 * participate in the shift.  Only the two most 
1198                 * significant bits (round and guard) are needed.
1199                 * If only a single shift is needed then the guard
1200                 * bit becomes a significant low order bit and the
1201                 * extension must participate in the rounding.
1202                 * If more than a single shift is needed, then all
1203                 * bits to the right of the guard bit are zeros, 
1204                 * and the guard bit may or may not be zero. */
1205                        Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1206                                resultp4);
1207
1208                        /* Need to check for a zero result.  The sign and
1209                         * exponent fields have already been zeroed.  The more
1210                         * efficient test of the full object can be used.
1211                         */
1212                         if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
1213                                /* Must have been "x-x" or "x+(-x)". */
1214                                if (Is_rounding_mode(ROUNDMINUS))
1215                                        Dbl_setone_sign(resultp1);
1216                                Dbl_copytoptr(resultp1,resultp2,dstptr);
1217                                return(NOEXCEPTION);
1218                        }
1219                        result_exponent--;
1220
1221                        /* Look to see if normalization is finished. */
1222                        if (Dbl_isone_hidden(resultp1)) {
1223                                /* No further normalization is needed */
1224                                goto round;
1225                        }
1226
1227                        /* Discover first one bit to determine shift amount.
1228                         * Use a modified binary search.  We have already
1229                         * shifted the result one position right and still
1230                         * not found a one so the remainder of the extension
1231                         * must be zero and simplifies rounding. */
1232                        /* Scan bytes */
1233                        while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
1234                                Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
1235                                result_exponent -= 8;
1236                        }
1237                        /* Now narrow it down to the nibble */
1238                        if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
1239                                /* The lower nibble contains the
1240                                 * normalizing one */
1241                                Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
1242                                result_exponent -= 4;
1243                        }
1244                        /* Select case where first bit is set (already
1245                         * normalized) otherwise select the proper shift. */
1246                        jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
1247                        if (jumpsize <= 7) switch(jumpsize) {
1248                        case 1:
1249                                Dblext_leftshiftby3(resultp1,resultp2,resultp3,
1250                                        resultp4);
1251                                result_exponent -= 3;
1252                                break;
1253                        case 2:
1254                        case 3:
1255                                Dblext_leftshiftby2(resultp1,resultp2,resultp3,
1256                                        resultp4);
1257                                result_exponent -= 2;
1258                                break;
1259                        case 4:
1260                        case 5:
1261                        case 6:
1262                        case 7:
1263                                Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1264                                        resultp4);
1265                                result_exponent -= 1;
1266                                break;
1267                        }
1268                } /* end if (hidden...)... */
1269        /* Fall through and round */
1270        } /* end if (save < 0)... */
1271        else {
1272                /* Add magnitudes */
1273                Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1274                        rightp1,rightp2,rightp3,rightp4,
1275                        /*to*/resultp1,resultp2,resultp3,resultp4);
1276                sign_save = Dbl_signextendedsign(resultp1);
1277                if (Dbl_isone_hiddenoverflow(resultp1)) {
1278                        /* Prenormalization required. */
1279                        Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
1280                                resultp4);
1281                        result_exponent++;
1282                } /* end if hiddenoverflow... */
1283        } /* end else ...add magnitudes... */
1284
1285        /* Round the result.  If the extension and lower two words are
1286         * all zeros, then the result is exact.  Otherwise round in the
1287         * correct direction.  Underflow is possible. If a postnormalization
1288         * is necessary, then the mantissa is all zeros so no shift is needed.
1289         */
1290  round:
1291        if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1292                Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
1293                        result_exponent,is_tiny);
1294        }
1295        Dbl_set_sign(resultp1,/*using*/sign_save);
1296        if (Dblext_isnotzero_mantissap3(resultp3) || 
1297            Dblext_isnotzero_mantissap4(resultp4)) {
1298                inexact = TRUE;
1299                switch(Rounding_mode()) {
1300                case ROUNDNEAREST: /* The default. */
1301                        if (Dblext_isone_highp3(resultp3)) {
1302                                /* at least 1/2 ulp */
1303                                if (Dblext_isnotzero_low31p3(resultp3) ||
1304                                    Dblext_isnotzero_mantissap4(resultp4) ||
1305                                    Dblext_isone_lowp2(resultp2)) {
1306                                        /* either exactly half way and odd or
1307                                         * more than 1/2ulp */
1308                                        Dbl_increment(resultp1,resultp2);
1309                                }
1310                        }
1311                        break;
1312
1313                case ROUNDPLUS:
1314                        if (Dbl_iszero_sign(resultp1)) {
1315                                /* Round up positive results */
1316                                Dbl_increment(resultp1,resultp2);
1317                        }
1318                        break;
1319            
1320                case ROUNDMINUS:
1321                        if (Dbl_isone_sign(resultp1)) {
1322                                /* Round down negative results */
1323                                Dbl_increment(resultp1,resultp2);
1324                        }
1325            
1326                case ROUNDZERO:;
1327                        /* truncate is simple */
1328                } /* end switch... */
1329                if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
1330        }
1331        if (result_exponent >= DBL_INFINITY_EXPONENT) {
1332                /* Overflow */
1333                if (Is_overflowtrap_enabled()) {
1334                        /*
1335                         * Adjust bias of result
1336                         */
1337                        Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1338                        Dbl_copytoptr(resultp1,resultp2,dstptr);
1339                        if (inexact)
1340                            if (Is_inexacttrap_enabled())
1341                                return (OPC_2E_OVERFLOWEXCEPTION |
1342                                        OPC_2E_INEXACTEXCEPTION);
1343                            else Set_inexactflag();
1344                        return (OPC_2E_OVERFLOWEXCEPTION);
1345                }
1346                inexact = TRUE;
1347                Set_overflowflag();
1348                Dbl_setoverflow(resultp1,resultp2);
1349        } else if (result_exponent <= 0) {      /* underflow case */
1350                if (Is_underflowtrap_enabled()) {
1351                        /*
1352                         * Adjust bias of result
1353                         */
1354                        Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
1355                        Dbl_copytoptr(resultp1,resultp2,dstptr);
1356                        if (inexact)
1357                            if (Is_inexacttrap_enabled())
1358                                return (OPC_2E_UNDERFLOWEXCEPTION |
1359                                        OPC_2E_INEXACTEXCEPTION);
1360                            else Set_inexactflag();
1361                        return(OPC_2E_UNDERFLOWEXCEPTION);
1362                }
1363                else if (inexact && is_tiny) Set_underflowflag();
1364        }
1365        else Dbl_set_exponent(resultp1,result_exponent);
1366        Dbl_copytoptr(resultp1,resultp2,dstptr);
1367        if (inexact) 
1368                if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1369                else Set_inexactflag();
1370        return(NOEXCEPTION);
1371}
1372
1373/*
1374 *  Single Floating-point Multiply Fused Add
1375 */
1376
1377sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
1378
1379sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
1380unsigned int *status;
1381{
1382        unsigned int opnd1, opnd2, opnd3;
1383        register unsigned int tmpresp1, tmpresp2;
1384        unsigned int rightp1, rightp2;
1385        unsigned int resultp1, resultp2 = 0;
1386        register int mpy_exponent, add_exponent, count;
1387        boolean inexact = FALSE, is_tiny = FALSE;
1388
1389        unsigned int signlessleft1, signlessright1, save;
1390        register int result_exponent, diff_exponent;
1391        int sign_save, jumpsize;
1392        
1393        Sgl_copyfromptr(src1ptr,opnd1);
1394        Sgl_copyfromptr(src2ptr,opnd2);
1395        Sgl_copyfromptr(src3ptr,opnd3);
1396
1397        /* 
1398         * set sign bit of result of multiply
1399         */
1400        if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2)) 
1401                Sgl_setnegativezero(resultp1); 
1402        else Sgl_setzero(resultp1);
1403
1404        /*
1405         * Generate multiply exponent 
1406         */
1407        mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
1408
1409        /*
1410         * check first operand for NaN's or infinity
1411         */
1412        if (Sgl_isinfinity_exponent(opnd1)) {
1413                if (Sgl_iszero_mantissa(opnd1)) {
1414                        if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
1415                                if (Sgl_iszero_exponentmantissa(opnd2)) {
1416                                        /* 
1417                                         * invalid since operands are infinity 
1418                                         * and zero 
1419                                         */
1420                                        if (Is_invalidtrap_enabled())
1421                                                return(OPC_2E_INVALIDEXCEPTION);
1422                                        Set_invalidflag();
1423                                        Sgl_makequietnan(resultp1);
1424                                        Sgl_copytoptr(resultp1,dstptr);
1425                                        return(NOEXCEPTION);
1426                                }
1427                                /*
1428                                 * Check third operand for infinity with a
1429                                 *  sign opposite of the multiply result
1430                                 */
1431                                if (Sgl_isinfinity(opnd3) &&
1432                                    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1433                                        /* 
1434                                         * invalid since attempting a magnitude
1435                                         * subtraction of infinities
1436                                         */
1437                                        if (Is_invalidtrap_enabled())
1438                                                return(OPC_2E_INVALIDEXCEPTION);
1439                                        Set_invalidflag();
1440                                        Sgl_makequietnan(resultp1);
1441                                        Sgl_copytoptr(resultp1,dstptr);
1442                                        return(NOEXCEPTION);
1443                                }
1444
1445                                /*
1446                                 * return infinity
1447                                 */
1448                                Sgl_setinfinity_exponentmantissa(resultp1);
1449                                Sgl_copytoptr(resultp1,dstptr);
1450                                return(NOEXCEPTION);
1451                        }
1452                }
1453                else {
1454                        /*
1455                         * is NaN; signaling or quiet?
1456                         */
1457                        if (Sgl_isone_signaling(opnd1)) {
1458                                /* trap if INVALIDTRAP enabled */
1459                                if (Is_invalidtrap_enabled()) 
1460                                        return(OPC_2E_INVALIDEXCEPTION);
1461                                /* make NaN quiet */
1462                                Set_invalidflag();
1463                                Sgl_set_quiet(opnd1);
1464                        }
1465                        /* 
1466                         * is second operand a signaling NaN? 
1467                         */
1468                        else if (Sgl_is_signalingnan(opnd2)) {
1469                                /* trap if INVALIDTRAP enabled */
1470                                if (Is_invalidtrap_enabled())
1471                                        return(OPC_2E_INVALIDEXCEPTION);
1472                                /* make NaN quiet */
1473                                Set_invalidflag();
1474                                Sgl_set_quiet(opnd2);
1475                                Sgl_copytoptr(opnd2,dstptr);
1476                                return(NOEXCEPTION);
1477                        }
1478                        /* 
1479                         * is third operand a signaling NaN? 
1480                         */
1481                        else if (Sgl_is_signalingnan(opnd3)) {
1482                                /* trap if INVALIDTRAP enabled */
1483                                if (Is_invalidtrap_enabled())
1484                                        return(OPC_2E_INVALIDEXCEPTION);
1485                                /* make NaN quiet */
1486                                Set_invalidflag();
1487                                Sgl_set_quiet(opnd3);
1488                                Sgl_copytoptr(opnd3,dstptr);
1489                                return(NOEXCEPTION);
1490                        }
1491                        /*
1492                         * return quiet NaN
1493                         */
1494                        Sgl_copytoptr(opnd1,dstptr);
1495                        return(NOEXCEPTION);
1496                }
1497        }
1498
1499        /*
1500         * check second operand for NaN's or infinity
1501         */
1502        if (Sgl_isinfinity_exponent(opnd2)) {
1503                if (Sgl_iszero_mantissa(opnd2)) {
1504                        if (Sgl_isnotnan(opnd3)) {
1505                                if (Sgl_iszero_exponentmantissa(opnd1)) {
1506                                        /* 
1507                                         * invalid since multiply operands are
1508                                         * zero & infinity
1509                                         */
1510                                        if (Is_invalidtrap_enabled())
1511                                                return(OPC_2E_INVALIDEXCEPTION);
1512                                        Set_invalidflag();
1513                                        Sgl_makequietnan(opnd2);
1514                                        Sgl_copytoptr(opnd2,dstptr);
1515                                        return(NOEXCEPTION);
1516                                }
1517
1518                                /*
1519                                 * Check third operand for infinity with a
1520                                 *  sign opposite of the multiply result
1521                                 */
1522                                if (Sgl_isinfinity(opnd3) &&
1523                                    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1524                                        /* 
1525                                         * invalid since attempting a magnitude
1526                                         * subtraction of infinities
1527                                         */
1528                                        if (Is_invalidtrap_enabled())
1529                                                return(OPC_2E_INVALIDEXCEPTION);
1530                                        Set_invalidflag();
1531                                        Sgl_makequietnan(resultp1);
1532                                        Sgl_copytoptr(resultp1,dstptr);
1533                                        return(NOEXCEPTION);
1534                                }
1535
1536                                /*
1537                                 * return infinity
1538                                 */
1539                                Sgl_setinfinity_exponentmantissa(resultp1);
1540                                Sgl_copytoptr(resultp1,dstptr);
1541                                return(NOEXCEPTION);
1542                        }
1543                }
1544                else {
1545                        /*
1546                         * is NaN; signaling or quiet?
1547                         */
1548                        if (Sgl_isone_signaling(opnd2)) {
1549                                /* trap if INVALIDTRAP enabled */
1550                                if (Is_invalidtrap_enabled())
1551                                        return(OPC_2E_INVALIDEXCEPTION);
1552                                /* make NaN quiet */
1553                                Set_invalidflag();
1554                                Sgl_set_quiet(opnd2);
1555                        }
1556                        /* 
1557                         * is third operand a signaling NaN? 
1558                         */
1559                        else if (Sgl_is_signalingnan(opnd3)) {
1560                                /* trap if INVALIDTRAP enabled */
1561                                if (Is_invalidtrap_enabled())
1562                                                return(OPC_2E_INVALIDEXCEPTION);
1563                                /* make NaN quiet */
1564                                Set_invalidflag();
1565                                Sgl_set_quiet(opnd3);
1566                                Sgl_copytoptr(opnd3,dstptr);
1567                                return(NOEXCEPTION);
1568                        }
1569                        /*
1570                         * return quiet NaN
1571                         */
1572                        Sgl_copytoptr(opnd2,dstptr);
1573                        return(NOEXCEPTION);
1574                }
1575        }
1576
1577        /*
1578         * check third operand for NaN's or infinity
1579         */
1580        if (Sgl_isinfinity_exponent(opnd3)) {
1581                if (Sgl_iszero_mantissa(opnd3)) {
1582                        /* return infinity */
1583                        Sgl_copytoptr(opnd3,dstptr);
1584                        return(NOEXCEPTION);
1585                } else {
1586                        /*
1587                         * is NaN; signaling or quiet?
1588                         */
1589                        if (Sgl_isone_signaling(opnd3)) {
1590                                /* trap if INVALIDTRAP enabled */
1591                                if (Is_invalidtrap_enabled())
1592                                        return(OPC_2E_INVALIDEXCEPTION);
1593                                /* make NaN quiet */
1594                                Set_invalidflag();
1595                                Sgl_set_quiet(opnd3);
1596                        }
1597                        /*
1598                         * return quiet NaN
1599                         */
1600                        Sgl_copytoptr(opnd3,dstptr);
1601                        return(NOEXCEPTION);
1602                }
1603        }
1604
1605        /*
1606         * Generate multiply mantissa
1607         */
1608        if (Sgl_isnotzero_exponent(opnd1)) {
1609                /* set hidden bit */
1610                Sgl_clear_signexponent_set_hidden(opnd1);
1611        }
1612        else {
1613                /* check for zero */
1614                if (Sgl_iszero_mantissa(opnd1)) {
1615                        /*
1616                         * Perform the add opnd3 with zero here.
1617                         */
1618                        if (Sgl_iszero_exponentmantissa(opnd3)) {
1619                                if (Is_rounding_mode(ROUNDMINUS)) {
1620                                        Sgl_or_signs(opnd3,resultp1);
1621                                } else {
1622                                        Sgl_and_signs(opnd3,resultp1);
1623                                }
1624                        }
1625                        /*
1626                         * Now let's check for trapped underflow case.
1627                         */
1628                        else if (Sgl_iszero_exponent(opnd3) &&
1629                                 Is_underflowtrap_enabled()) {
1630                                /* need to normalize results mantissa */
1631                                sign_save = Sgl_signextendedsign(opnd3);
1632                                result_exponent = 0;
1633                                Sgl_leftshiftby1(opnd3);
1634                                Sgl_normalize(opnd3,result_exponent);
1635                                Sgl_set_sign(opnd3,/*using*/sign_save);
1636                                Sgl_setwrapped_exponent(opnd3,result_exponent,
1637                                                        unfl);
1638                                Sgl_copytoptr(opnd3,dstptr);
1639                                /* inexact = FALSE */
1640                                return(OPC_2E_UNDERFLOWEXCEPTION);
1641                        }
1642                        Sgl_copytoptr(opnd3,dstptr);
1643                        return(NOEXCEPTION);
1644                }
1645                /* is denormalized, adjust exponent */
1646                Sgl_clear_signexponent(opnd1);
1647                Sgl_leftshiftby1(opnd1);
1648                Sgl_normalize(opnd1,mpy_exponent);
1649        }
1650        /* opnd2 needs to have hidden bit set with msb in hidden bit */
1651        if (Sgl_isnotzero_exponent(opnd2)) {
1652                Sgl_clear_signexponent_set_hidden(opnd2);
1653        }
1654        else {
1655                /* check for zero */
1656                if (Sgl_iszero_mantissa(opnd2)) {
1657                        /*
1658                         * Perform the add opnd3 with zero here.
1659                         */
1660                        if (Sgl_iszero_exponentmantissa(opnd3)) {
1661                                if (Is_rounding_mode(ROUNDMINUS)) {
1662                                        Sgl_or_signs(opnd3,resultp1);
1663                                } else {
1664                                        Sgl_and_signs(opnd3,resultp1);
1665                                }
1666                        }
1667                        /*
1668                         * Now let's check for trapped underflow case.
1669                         */
1670                        else if (Sgl_iszero_exponent(opnd3) &&
1671                            Is_underflowtrap_enabled()) {
1672                                /* need to normalize results mantissa */
1673                                sign_save = Sgl_signextendedsign(opnd3);
1674                                result_exponent = 0;
1675                                Sgl_leftshiftby1(opnd3);
1676                                Sgl_normalize(opnd3,result_exponent);
1677                                Sgl_set_sign(opnd3,/*using*/sign_save);
1678                                Sgl_setwrapped_exponent(opnd3,result_exponent,
1679                                                        unfl);
1680                                Sgl_copytoptr(opnd3,dstptr);
1681                                /* inexact = FALSE */
1682                                return(OPC_2E_UNDERFLOWEXCEPTION);
1683                        }
1684                        Sgl_copytoptr(opnd3,dstptr);
1685                        return(NOEXCEPTION);
1686                }
1687                /* is denormalized; want to normalize */
1688                Sgl_clear_signexponent(opnd2);
1689                Sgl_leftshiftby1(opnd2);
1690                Sgl_normalize(opnd2,mpy_exponent);
1691        }
1692
1693        /* Multiply the first two source mantissas together */
1694
1695        /* 
1696         * The intermediate result will be kept in tmpres,
1697         * which needs enough room for 106 bits of mantissa,
1698         * so lets call it a Double extended.
1699         */
1700        Sglext_setzero(tmpresp1,tmpresp2);
1701
1702        /* 
1703         * Four bits at a time are inspected in each loop, and a 
1704         * simple shift and add multiply algorithm is used. 
1705         */ 
1706        for (count = SGL_P-1; count >= 0; count -= 4) {
1707                Sglext_rightshiftby4(tmpresp1,tmpresp2);
1708                if (Sbit28(opnd1)) {
1709                        /* Twoword_add should be an ADD followed by 2 ADDC's */
1710                        Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
1711                }
1712                if (Sbit29(opnd1)) {
1713                        Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
1714                }
1715                if (Sbit30(opnd1)) {
1716                        Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
1717                }
1718                if (Sbit31(opnd1)) {
1719                        Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
1720                }
1721                Sgl_rightshiftby4(opnd1);
1722        }
1723        if (Is_sexthiddenoverflow(tmpresp1)) {
1724                /* result mantissa >= 2 (mantissa overflow) */
1725                mpy_exponent++;
1726                Sglext_rightshiftby4(tmpresp1,tmpresp2);
1727        } else {
1728                Sglext_rightshiftby3(tmpresp1,tmpresp2);
1729        }
1730
1731        /*
1732         * Restore the sign of the mpy result which was saved in resultp1.
1733         * The exponent will continue to be kept in mpy_exponent.
1734         */
1735        Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
1736
1737        /* 
1738         * No rounding is required, since the result of the multiply
1739         * is exact in the extended format.
1740         */
1741
1742        /*
1743         * Now we are ready to perform the add portion of the operation.
1744         *
1745         * The exponents need to be kept as integers for now, since the
1746         * multiply result might not fit into the exponent field.  We
1747         * can't overflow or underflow because of this yet, since the
1748         * add could bring the final result back into range.
1749         */
1750        add_exponent = Sgl_exponent(opnd3);
1751
1752        /*
1753         * Check for denormalized or zero add operand.
1754         */
1755        if (add_exponent == 0) {
1756                /* check for zero */
1757                if (Sgl_iszero_mantissa(opnd3)) {
1758                        /* right is zero */
1759                        /* Left can't be zero and must be result.
1760                         *
1761                         * The final result is now in tmpres and mpy_exponent,
1762                         * and needs to be rounded and squeezed back into
1763                         * double precision format from double extended.
1764                         */
1765                        result_exponent = mpy_exponent;
1766                        Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
1767                        sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
1768                        goto round;
1769                }
1770
1771                /* 
1772                 * Neither are zeroes.  
1773                 * Adjust exponent and normalize add operand.
1774                 */
1775                sign_save = Sgl_signextendedsign(opnd3);        /* save sign */
1776                Sgl_clear_signexponent(opnd3);
1777                Sgl_leftshiftby1(opnd3);
1778                Sgl_normalize(opnd3,add_exponent);
1779                Sgl_set_sign(opnd3,sign_save);          /* restore sign */
1780        } else {
1781                Sgl_clear_exponent_set_hidden(opnd3);
1782        }
1783        /*
1784         * Copy opnd3 to the double extended variable called right.
1785         */
1786        Sgl_copyto_sglext(opnd3,rightp1,rightp2);
1787
1788        /*
1789         * A zero "save" helps discover equal operands (for later),
1790         * and is used in swapping operands (if needed).
1791         */
1792        Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
1793
1794        /*
1795         * Compare magnitude of operands.
1796         */
1797        Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
1798        Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
1799        if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1800            Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
1801                /*
1802                 * Set the left operand to the larger one by XOR swap.
1803                 * First finish the first word "save".
1804                 */
1805                Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
1806                Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1807                Sglext_swap_lower(tmpresp2,rightp2);
1808                /* also setup exponents used in rest of routine */
1809                diff_exponent = add_exponent - mpy_exponent;
1810                result_exponent = add_exponent;
1811        } else {
1812                /* also setup exponents used in rest of routine */
1813                diff_exponent = mpy_exponent - add_exponent;
1814                result_exponent = mpy_exponent;
1815        }
1816        /* Invariant: left is not smaller than right. */
1817
1818        /*
1819         * Special case alignment of operands that would force alignment
1820         * beyond the extent of the extension.  A further optimization
1821         * could special case this but only reduces the path length for
1822         * this infrequent case.
1823         */
1824        if (diff_exponent > SGLEXT_THRESHOLD) {
1825                diff_exponent = SGLEXT_THRESHOLD;
1826        }
1827
1828        /* Align right operand by shifting it to the right */
1829        Sglext_clear_sign(rightp1);
1830        Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
1831        
1832        /* Treat sum and difference of the operands separately. */
1833        if ((int)save < 0) {
1834                /*
1835                 * Difference of the two operands.  Overflow can occur if the
1836                 * multiply overflowed.  A borrow can occur out of the hidden
1837                 * bit and force a post normalization phase.
1838                 */
1839                Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
1840                        resultp1,resultp2);
1841                sign_save = Sgl_signextendedsign(resultp1);
1842                if (Sgl_iszero_hidden(resultp1)) {
1843                        /* Handle normalization */
1844                /* A straight foward algorithm would now shift the
1845                 * result and extension left until the hidden bit
1846                 * becomes one.  Not all of the extension bits need
1847                 * participate in the shift.  Only the two most 
1848                 * significant bits (round and guard) are needed.
1849                 * If only a single shift is needed then the guard
1850                 * bit becomes a significant low order bit and the
1851                 * extension must participate in the rounding.
1852                 * If more than a single shift is needed, then all
1853                 * bits to the right of the guard bit are zeros, 
1854                 * and the guard bit may or may not be zero. */
1855                        Sglext_leftshiftby1(resultp1,resultp2);
1856
1857                        /* Need to check for a zero result.  The sign and
1858                         * exponent fields have already been zeroed.  The more
1859                         * efficient test of the full object can be used.
1860                         */
1861                         if (Sglext_iszero(resultp1,resultp2)) {
1862                                /* Must have been "x-x" or "x+(-x)". */
1863                                if (Is_rounding_mode(ROUNDMINUS))
1864                                        Sgl_setone_sign(resultp1);
1865                                Sgl_copytoptr(resultp1,dstptr);
1866                                return(NOEXCEPTION);
1867                        }
1868                        result_exponent--;
1869
1870                        /* Look to see if normalization is finished. */
1871                        if (Sgl_isone_hidden(resultp1)) {
1872                                /* No further normalization is needed */
1873                                goto round;
1874                        }
1875
1876                        /* Discover first one bit to determine shift amount.
1877                         * Use a modified binary search.  We have already
1878                         * shifted the result one position right and still
1879                         * not found a one so the remainder of the extension
1880                         * must be zero and simplifies rounding. */
1881                        /* Scan bytes */
1882                        while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
1883                                Sglext_leftshiftby8(resultp1,resultp2);
1884                                result_exponent -= 8;
1885                        }
1886                        /* Now narrow it down to the nibble */
1887                        if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
1888                                /* The lower nibble contains the
1889                                 * normalizing one */
1890                                Sglext_leftshiftby4(resultp1,resultp2);
1891                                result_exponent -= 4;
1892                        }
1893                        /* Select case where first bit is set (already
1894                         * normalized) otherwise select the proper shift. */
1895                        jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
1896                        if (jumpsize <= 7) switch(jumpsize) {
1897                        case 1:
1898                                Sglext_leftshiftby3(resultp1,resultp2);
1899                                result_exponent -= 3;
1900                                break;
1901                        case 2:
1902                        case 3:
1903                                Sglext_leftshiftby2(resultp1,resultp2);
1904                                result_exponent -= 2;
1905                                break;
1906                        case 4:
1907                        case 5:
1908                        case 6:
1909                        case 7:
1910                                Sglext_leftshiftby1(resultp1,resultp2);
1911                                result_exponent -= 1;
1912                                break;
1913                        }
1914                } /* end if (hidden...)... */
1915        /* Fall through and round */
1916        } /* end if (save < 0)... */
1917        else {
1918                /* Add magnitudes */
1919                Sglext_addition(tmpresp1,tmpresp2,
1920                        rightp1,rightp2, /*to*/resultp1,resultp2);
1921                sign_save = Sgl_signextendedsign(resultp1);
1922                if (Sgl_isone_hiddenoverflow(resultp1)) {
1923                        /* Prenormalization required. */
1924                        Sglext_arithrightshiftby1(resultp1,resultp2);
1925                        result_exponent++;
1926                } /* end if hiddenoverflow... */
1927        } /* end else ...add magnitudes... */
1928
1929        /* Round the result.  If the extension and lower two words are
1930         * all zeros, then the result is exact.  Otherwise round in the
1931         * correct direction.  Underflow is possible. If a postnormalization
1932         * is necessary, then the mantissa is all zeros so no shift is needed.
1933         */
1934  round:
1935        if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1936                Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
1937        }
1938        Sgl_set_sign(resultp1,/*using*/sign_save);
1939        if (Sglext_isnotzero_mantissap2(resultp2)) {
1940                inexact = TRUE;
1941                switch(Rounding_mode()) {
1942                case ROUNDNEAREST: /* The default. */
1943                        if (Sglext_isone_highp2(resultp2)) {
1944                                /* at least 1/2 ulp */
1945                                if (Sglext_isnotzero_low31p2(resultp2) ||
1946                                    Sglext_isone_lowp1(resultp1)) {
1947                                        /* either exactly half way and odd or
1948                                         * more than 1/2ulp */
1949                                        Sgl_increment(resultp1);
1950                                }
1951                        }
1952                        break;
1953
1954                case ROUNDPLUS:
1955                        if (Sgl_iszero_sign(resultp1)) {
1956                                /* Round up positive results */
1957                                Sgl_increment(resultp1);
1958                        }
1959                        break;
1960            
1961                case ROUNDMINUS:
1962                        if (Sgl_isone_sign(resultp1)) {
1963                                /* Round down negative results */
1964                                Sgl_increment(resultp1);
1965                        }
1966            
1967                case ROUNDZERO:;
1968                        /* truncate is simple */
1969                } /* end switch... */
1970                if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
1971        }
1972        if (result_exponent >= SGL_INFINITY_EXPONENT) {
1973                /* Overflow */
1974                if (Is_overflowtrap_enabled()) {
1975                        /*
1976                         * Adjust bias of result
1977                         */
1978                        Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1979                        Sgl_copytoptr(resultp1,dstptr);
1980                        if (inexact)
1981                            if (Is_inexacttrap_enabled())
1982                                return (OPC_2E_OVERFLOWEXCEPTION |
1983                                        OPC_2E_INEXACTEXCEPTION);
1984                            else Set_inexactflag();
1985                        return (OPC_2E_OVERFLOWEXCEPTION);
1986                }
1987                inexact = TRUE;
1988                Set_overflowflag();
1989                Sgl_setoverflow(resultp1);
1990        } else if (result_exponent <= 0) {      /* underflow case */
1991                if (Is_underflowtrap_enabled()) {
1992                        /*
1993                         * Adjust bias of result
1994                         */
1995                        Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
1996                        Sgl_copytoptr(resultp1,dstptr);
1997                        if (inexact)
1998                            if (Is_inexacttrap_enabled())
1999                                return (OPC_2E_UNDERFLOWEXCEPTION |
2000                                        OPC_2E_INEXACTEXCEPTION);
2001                            else Set_inexactflag();
2002                        return(OPC_2E_UNDERFLOWEXCEPTION);
2003                }
2004                else if (inexact && is_tiny) Set_underflowflag();
2005        }
2006        else Sgl_set_exponent(resultp1,result_exponent);
2007        Sgl_copytoptr(resultp1,dstptr);
2008        if (inexact) 
2009                if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2010                else Set_inexactflag();
2011        return(NOEXCEPTION);
2012}
2013
2014/*
2015 *  Single Floating-point Multiply Negate Fused Add
2016 */
2017
2018sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
2019
2020sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
2021unsigned int *status;
2022{
2023        unsigned int opnd1, opnd2, opnd3;
2024        register unsigned int tmpresp1, tmpresp2;
2025        unsigned int rightp1, rightp2;
2026        unsigned int resultp1, resultp2 = 0;
2027        register int mpy_exponent, add_exponent, count;
2028        boolean inexact = FALSE, is_tiny = FALSE;
2029
2030        unsigned int signlessleft1, signlessright1, save;
2031        register int result_exponent, diff_exponent;
2032        int sign_save, jumpsize;
2033        
2034        Sgl_copyfromptr(src1ptr,opnd1);
2035        Sgl_copyfromptr(src2ptr,opnd2);
2036        Sgl_copyfromptr(src3ptr,opnd3);
2037
2038        /* 
2039         * set sign bit of result of multiply
2040         */
2041        if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2)) 
2042                Sgl_setzero(resultp1);
2043        else 
2044                Sgl_setnegativezero(resultp1); 
2045
2046        /*
2047         * Generate multiply exponent 
2048         */
2049        mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
2050
2051        /*
2052         * check first operand for NaN's or infinity
2053         */
2054        if (Sgl_isinfinity_exponent(opnd1)) {
2055                if (Sgl_iszero_mantissa(opnd1)) {
2056                        if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
2057                                if (Sgl_iszero_exponentmantissa(opnd2)) {
2058                                        /* 
2059                                         * invalid since operands are infinity 
2060                                         * and zero 
2061                                         */
2062                                        if (Is_invalidtrap_enabled())
2063                                                return(OPC_2E_INVALIDEXCEPTION);
2064                                        Set_invalidflag();
2065                                        Sgl_makequietnan(resultp1);
2066                                        Sgl_copytoptr(resultp1,dstptr);
2067                                        return(NOEXCEPTION);
2068                                }
2069                                /*
2070                                 * Check third operand for infinity with a
2071                                 *  sign opposite of the multiply result
2072                                 */
2073                                if (Sgl_isinfinity(opnd3) &&
2074                                    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2075                                        /* 
2076                                         * invalid since attempting a magnitude
2077                                         * subtraction of infinities
2078                                         */
2079                                        if (Is_invalidtrap_enabled())
2080                                                return(OPC_2E_INVALIDEXCEPTION);
2081                                        Set_invalidflag();
2082                                        Sgl_makequietnan(resultp1);
2083                                        Sgl_copytoptr(resultp1,dstptr);
2084                                        return(NOEXCEPTION);
2085                                }
2086
2087                                /*
2088                                 * return infinity
2089                                 */
2090                                Sgl_setinfinity_exponentmantissa(resultp1);
2091                                Sgl_copytoptr(resultp1,dstptr);
2092                                return(NOEXCEPTION);
2093                        }
2094                }
2095                else {
2096                        /*
2097                         * is NaN; signaling or quiet?
2098                         */
2099                        if (Sgl_isone_signaling(opnd1)) {
2100                                /* trap if INVALIDTRAP enabled */
2101                                if (Is_invalidtrap_enabled()) 
2102                                        return(OPC_2E_INVALIDEXCEPTION);
2103                                /* make NaN quiet */
2104                                Set_invalidflag();
2105                                Sgl_set_quiet(opnd1);
2106                        }
2107                        /* 
2108                         * is second operand a signaling NaN? 
2109                         */
2110                        else if (Sgl_is_signalingnan(opnd2)) {
2111                                /* trap if INVALIDTRAP enabled */
2112                                if (Is_invalidtrap_enabled())
2113                                        return(OPC_2E_INVALIDEXCEPTION);
2114                                /* make NaN quiet */
2115                                Set_invalidflag();
2116                                Sgl_set_quiet(opnd2);
2117                                Sgl_copytoptr(opnd2,dstptr);
2118                                return(NOEXCEPTION);
2119                        }
2120                        /* 
2121                         * is third operand a signaling NaN? 
2122                         */
2123                        else if (Sgl_is_signalingnan(opnd3)) {
2124                                /* trap if INVALIDTRAP enabled */
2125                                if (Is_invalidtrap_enabled())
2126                                        return(OPC_2E_INVALIDEXCEPTION);
2127                                /* make NaN quiet */
2128                                Set_invalidflag();
2129                                Sgl_set_quiet(opnd3);
2130                                Sgl_copytoptr(opnd3,dstptr);
2131                                return(NOEXCEPTION);
2132                        }
2133                        /*
2134                         * return quiet NaN
2135                         */
2136                        Sgl_copytoptr(opnd1,dstptr);
2137                        return(NOEXCEPTION);
2138                }
2139        }
2140
2141        /*
2142         * check second operand for NaN's or infinity
2143         */
2144        if (Sgl_isinfinity_exponent(opnd2)) {
2145                if (Sgl_iszero_mantissa(opnd2)) {
2146                        if (Sgl_isnotnan(opnd3)) {
2147                                if (Sgl_iszero_exponentmantissa(opnd1)) {
2148                                        /* 
2149                                         * invalid since multiply operands are
2150                                         * zero & infinity
2151                                         */
2152                                        if (Is_invalidtrap_enabled())
2153                                                return(OPC_2E_INVALIDEXCEPTION);
2154                                        Set_invalidflag();
2155                                        Sgl_makequietnan(opnd2);
2156                                        Sgl_copytoptr(opnd2,dstptr);
2157                                        return(NOEXCEPTION);
2158                                }
2159
2160                                /*
2161                                 * Check third operand for infinity with a
2162                                 *  sign opposite of the multiply result
2163                                 */
2164                                if (Sgl_isinfinity(opnd3) &&
2165                                    (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2166                                        /* 
2167                                         * invalid since attempting a magnitude
2168                                         * subtraction of infinities
2169                                         */
2170                                        if (Is_invalidtrap_enabled())
2171                                                return(OPC_2E_INVALIDEXCEPTION);
2172                                        Set_invalidflag();
2173                                        Sgl_makequietnan(resultp1);
2174                                        Sgl_copytoptr(resultp1,dstptr);
2175                                        return(NOEXCEPTION);
2176                                }
2177
2178                                /*
2179                                 * return infinity
2180                                 */
2181                                Sgl_setinfinity_exponentmantissa(resultp1);
2182                                Sgl_copytoptr(resultp1,dstptr);
2183                                return(NOEXCEPTION);
2184                        }
2185                }
2186                else {
2187                        /*
2188                         * is NaN; signaling or quiet?
2189                         */
2190                        if (Sgl_isone_signaling(opnd2)) {
2191                                /* trap if INVALIDTRAP enabled */
2192                                if (Is_invalidtrap_enabled())
2193                                        return(OPC_2E_INVALIDEXCEPTION);
2194                                /* make NaN quiet */
2195                                Set_invalidflag();
2196                                Sgl_set_quiet(opnd2);
2197                        }
2198                        /* 
2199                         * is third operand a signaling NaN? 
2200                         */
2201                        else if (Sgl_is_signalingnan(opnd3)) {
2202                                /* trap if INVALIDTRAP enabled */
2203                                if (Is_invalidtrap_enabled())
2204                                                return(OPC_2E_INVALIDEXCEPTION);
2205                                /* make NaN quiet */
2206                                Set_invalidflag();
2207                                Sgl_set_quiet(opnd3);
2208                                Sgl_copytoptr(opnd3,dstptr);
2209                                return(NOEXCEPTION);
2210                        }
2211                        /*
2212                         * return quiet NaN
2213                         */
2214                        Sgl_copytoptr(opnd2,dstptr);
2215                        return(NOEXCEPTION);
2216                }
2217        }
2218
2219        /*
2220         * check third operand for NaN's or infinity
2221         */
2222        if (Sgl_isinfinity_exponent(opnd3)) {
2223                if (Sgl_iszero_mantissa(opnd3)) {
2224                        /* return infinity */
2225                        Sgl_copytoptr(opnd3,dstptr);
2226                        return(NOEXCEPTION);
2227                } else {
2228                        /*
2229                         * is NaN; signaling or quiet?
2230                         */
2231                        if (Sgl_isone_signaling(opnd3)) {
2232                                /* trap if INVALIDTRAP enabled */
2233                                if (Is_invalidtrap_enabled())
2234                                        return(OPC_2E_INVALIDEXCEPTION);
2235                                /* make NaN quiet */
2236                                Set_invalidflag();
2237                                Sgl_set_quiet(opnd3);
2238                        }
2239                        /*
2240                         * return quiet NaN
2241                         */
2242                        Sgl_copytoptr(opnd3,dstptr);
2243                        return(NOEXCEPTION);
2244                }
2245        }
2246
2247        /*
2248         * Generate multiply mantissa
2249         */
2250        if (Sgl_isnotzero_exponent(opnd1)) {
2251                /* set hidden bit */
2252                Sgl_clear_signexponent_set_hidden(opnd1);
2253        }
2254        else {
2255                /* check for zero */
2256                if (Sgl_iszero_mantissa(opnd1)) {
2257                        /*
2258                         * Perform the add opnd3 with zero here.
2259                         */
2260                        if (Sgl_iszero_exponentmantissa(opnd3)) {
2261                                if (Is_rounding_mode(ROUNDMINUS)) {
2262                                        Sgl_or_signs(opnd3,resultp1);
2263                                } else {
2264                                        Sgl_and_signs(opnd3,resultp1);
2265                                }
2266                        }
2267                        /*
2268                         * Now let's check for trapped underflow case.
2269                         */
2270                        else if (Sgl_iszero_exponent(opnd3) &&
2271                                 Is_underflowtrap_enabled()) {
2272                                /* need to normalize results mantissa */
2273                                sign_save = Sgl_signextendedsign(opnd3);
2274                                result_exponent = 0;
2275                                Sgl_leftshiftby1(opnd3);
2276                                Sgl_normalize(opnd3,result_exponent);
2277                                Sgl_set_sign(opnd3,/*using*/sign_save);
2278                                Sgl_setwrapped_exponent(opnd3,result_exponent,
2279                                                        unfl);
2280                                Sgl_copytoptr(opnd3,dstptr);
2281                                /* inexact = FALSE */
2282                                return(OPC_2E_UNDERFLOWEXCEPTION);
2283                        }
2284                        Sgl_copytoptr(opnd3,dstptr);
2285                        return(NOEXCEPTION);
2286                }
2287                /* is denormalized, adjust exponent */
2288                Sgl_clear_signexponent(opnd1);
2289                Sgl_leftshiftby1(opnd1);
2290                Sgl_normalize(opnd1,mpy_exponent);
2291        }
2292        /* opnd2 needs to have hidden bit set with msb in hidden bit */
2293        if (Sgl_isnotzero_exponent(opnd2)) {
2294                Sgl_clear_signexponent_set_hidden(opnd2);
2295        }
2296        else {
2297                /* check for zero */
2298                if (Sgl_iszero_mantissa(opnd2)) {
2299                        /*
2300                         * Perform the add opnd3 with zero here.
2301                         */
2302                        if (Sgl_iszero_exponentmantissa(opnd3)) {
2303                                if (Is_rounding_mode(ROUNDMINUS)) {
2304                                        Sgl_or_signs(opnd3,resultp1);
2305                                } else {
2306                                        Sgl_and_signs(opnd3,resultp1);
2307                                }
2308                        }
2309                        /*
2310                         * Now let's check for trapped underflow case.
2311                         */
2312                        else if (Sgl_iszero_exponent(opnd3) &&
2313                            Is_underflowtrap_enabled()) {
2314                                /* need to normalize results mantissa */
2315                                sign_save = Sgl_signextendedsign(opnd3);
2316                                result_exponent = 0;
2317                                Sgl_leftshiftby1(opnd3);
2318                                Sgl_normalize(opnd3,result_exponent);
2319                                Sgl_set_sign(opnd3,/*using*/sign_save);
2320                                Sgl_setwrapped_exponent(opnd3,result_exponent,
2321                                                        unfl);
2322                                Sgl_copytoptr(opnd3,dstptr);
2323                                /* inexact = FALSE */
2324                                return(OPC_2E_UNDERFLOWEXCEPTION);
2325                        }
2326                        Sgl_copytoptr(opnd3,dstptr);
2327                        return(NOEXCEPTION);
2328                }
2329                /* is denormalized; want to normalize */
2330                Sgl_clear_signexponent(opnd2);
2331                Sgl_leftshiftby1(opnd2);
2332                Sgl_normalize(opnd2,mpy_exponent);
2333        }
2334
2335        /* Multiply the first two source mantissas together */
2336
2337        /* 
2338         * The intermediate result will be kept in tmpres,
2339         * which needs enough room for 106 bits of mantissa,
2340         * so lets call it a Double extended.
2341         */
2342        Sglext_setzero(tmpresp1,tmpresp2);
2343
2344        /* 
2345         * Four bits at a time are inspected in each loop, and a 
2346         * simple shift and add multiply algorithm is used. 
2347         */ 
2348        for (count = SGL_P-1; count >= 0; count -= 4) {
2349                Sglext_rightshiftby4(tmpresp1,tmpresp2);
2350                if (Sbit28(opnd1)) {
2351                        /* Twoword_add should be an ADD followed by 2 ADDC's */
2352                        Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
2353                }
2354                if (Sbit29(opnd1)) {
2355                        Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
2356                }
2357                if (Sbit30(opnd1)) {
2358                        Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
2359                }
2360                if (Sbit31(opnd1)) {
2361                        Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
2362                }
2363                Sgl_rightshiftby4(opnd1);
2364        }
2365        if (Is_sexthiddenoverflow(tmpresp1)) {
2366                /* result mantissa >= 2 (mantissa overflow) */
2367                mpy_exponent++;
2368                Sglext_rightshiftby4(tmpresp1,tmpresp2);
2369        } else {
2370                Sglext_rightshiftby3(tmpresp1,tmpresp2);
2371        }
2372
2373        /*
2374         * Restore the sign of the mpy result which was saved in resultp1.
2375         * The exponent will continue to be kept in mpy_exponent.
2376         */
2377        Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
2378
2379        /* 
2380         * No rounding is required, since the result of the multiply
2381         * is exact in the extended format.
2382         */
2383
2384        /*
2385         * Now we are ready to perform the add portion of the operation.
2386         *
2387         * The exponents need to be kept as integers for now, since the
2388         * multiply result might not fit into the exponent field.  We
2389         * can't overflow or underflow because of this yet, since the
2390         * add could bring the final result back into range.
2391         */
2392        add_exponent = Sgl_exponent(opnd3);
2393
2394        /*
2395         * Check for denormalized or zero add operand.
2396         */
2397        if (add_exponent == 0) {
2398                /* check for zero */
2399                if (Sgl_iszero_mantissa(opnd3)) {
2400                        /* right is zero */
2401                        /* Left can't be zero and must be result.
2402                         *
2403                         * The final result is now in tmpres and mpy_exponent,
2404                         * and needs to be rounded and squeezed back into
2405                         * double precision format from double extended.
2406                         */
2407                        result_exponent = mpy_exponent;
2408                        Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
2409                        sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
2410                        goto round;
2411                }
2412
2413                /* 
2414                 * Neither are zeroes.  
2415                 * Adjust exponent and normalize add operand.
2416                 */
2417                sign_save = Sgl_signextendedsign(opnd3);        /* save sign */
2418                Sgl_clear_signexponent(opnd3);
2419                Sgl_leftshiftby1(opnd3);
2420                Sgl_normalize(opnd3,add_exponent);
2421                Sgl_set_sign(opnd3,sign_save);          /* restore sign */
2422        } else {
2423                Sgl_clear_exponent_set_hidden(opnd3);
2424        }
2425        /*
2426         * Copy opnd3 to the double extended variable called right.
2427         */
2428        Sgl_copyto_sglext(opnd3,rightp1,rightp2);
2429
2430        /*
2431         * A zero "save" helps discover equal operands (for later),
2432         * and is used in swapping operands (if needed).
2433         */
2434        Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
2435
2436        /*
2437         * Compare magnitude of operands.
2438         */
2439        Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
2440        Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
2441        if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
2442            Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
2443                /*
2444                 * Set the left operand to the larger one by XOR swap.
2445                 * First finish the first word "save".
2446                 */
2447                Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
2448                Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
2449                Sglext_swap_lower(tmpresp2,rightp2);
2450                /* also setup exponents used in rest of routine */
2451                diff_exponent = add_exponent - mpy_exponent;
2452                result_exponent = add_exponent;
2453        } else {
2454                /* also setup exponents used in rest of routine */
2455                diff_exponent = mpy_exponent - add_exponent;
2456                result_exponent = mpy_exponent;
2457        }
2458        /* Invariant: left is not smaller than right. */
2459
2460        /*
2461         * Special case alignment of operands that would force alignment
2462         * beyond the extent of the extension.  A further optimization
2463         * could special case this but only reduces the path length for
2464         * this infrequent case.
2465         */
2466        if (diff_exponent > SGLEXT_THRESHOLD) {
2467                diff_exponent = SGLEXT_THRESHOLD;
2468        }
2469
2470        /* Align right operand by shifting it to the right */
2471        Sglext_clear_sign(rightp1);
2472        Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
2473        
2474        /* Treat sum and difference of the operands separately. */
2475        if ((int)save < 0) {
2476                /*
2477                 * Difference of the two operands.  Overflow can occur if the
2478                 * multiply overflowed.  A borrow can occur out of the hidden
2479                 * bit and force a post normalization phase.
2480                 */
2481                Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
2482                        resultp1,resultp2);
2483                sign_save = Sgl_signextendedsign(resultp1);
2484                if (Sgl_iszero_hidden(resultp1)) {
2485                        /* Handle normalization */
2486                /* A straight foward algorithm would now shift the
2487                 * result and extension left until the hidden bit
2488                 * becomes one.  Not all of the extension bits need
2489                 * participate in the shift.  Only the two most 
2490                 * significant bits (round and guard) are needed.
2491                 * If only a single shift is needed then the guard
2492                 * bit becomes a significant low order bit and the
2493                 * extension must participate in the rounding.
2494                 * If more than a single shift is needed, then all
2495                 * bits to the right of the guard bit are zeros, 
2496                 * and the guard bit may or may not be zero. */
2497                        Sglext_leftshiftby1(resultp1,resultp2);
2498
2499                        /* Need to check for a zero result.  The sign and
2500                         * exponent fields have already been zeroed.  The more
2501                         * efficient test of the full object can be used.
2502                         */
2503                         if (Sglext_iszero(resultp1,resultp2)) {
2504                                /* Must have been "x-x" or "x+(-x)". */
2505                                if (Is_rounding_mode(ROUNDMINUS))
2506                                        Sgl_setone_sign(resultp1);
2507                                Sgl_copytoptr(resultp1,dstptr);
2508                                return(NOEXCEPTION);
2509                        }
2510                        result_exponent--;
2511
2512                        /* Look to see if normalization is finished. */
2513                        if (Sgl_isone_hidden(resultp1)) {
2514                                /* No further normalization is needed */
2515                                goto round;
2516                        }
2517
2518                        /* Discover first one bit to determine shift amount.
2519                         * Use a modified binary search.  We have already
2520                         * shifted the result one position right and still
2521                         * not found a one so the remainder of the extension
2522                         * must be zero and simplifies rounding. */
2523                        /* Scan bytes */
2524                        while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
2525                                Sglext_leftshiftby8(resultp1,resultp2);
2526                                result_exponent -= 8;
2527                        }
2528                        /* Now narrow it down to the nibble */
2529                        if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
2530                                /* The lower nibble contains the
2531                                 * normalizing one */
2532                                Sglext_leftshiftby4(resultp1,resultp2);
2533                                result_exponent -= 4;
2534                        }
2535                        /* Select case where first bit is set (already
2536                         * normalized) otherwise select the proper shift. */
2537                        jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
2538                        if (jumpsize <= 7) switch(jumpsize) {
2539                        case 1:
2540                                Sglext_leftshiftby3(resultp1,resultp2);
2541                                result_exponent -= 3;
2542                                break;
2543                        case 2:
2544                        case 3:
2545                                Sglext_leftshiftby2(resultp1,resultp2);
2546                                result_exponent -= 2;
2547                                break;
2548                        case 4:
2549                        case 5:
2550                        case 6:
2551                        case 7:
2552                                Sglext_leftshiftby1(resultp1,resultp2);
2553                                result_exponent -= 1;
2554                                break;
2555                        }
2556                } /* end if (hidden...)... */
2557        /* Fall through and round */
2558        } /* end if (save < 0)... */
2559        else {
2560                /* Add magnitudes */
2561                Sglext_addition(tmpresp1,tmpresp2,
2562                        rightp1,rightp2, /*to*/resultp1,resultp2);
2563                sign_save = Sgl_signextendedsign(resultp1);
2564                if (Sgl_isone_hiddenoverflow(resultp1)) {
2565                        /* Prenormalization required. */
2566                        Sglext_arithrightshiftby1(resultp1,resultp2);
2567                        result_exponent++;
2568                } /* end if hiddenoverflow... */
2569        } /* end else ...add magnitudes... */
2570
2571        /* Round the result.  If the extension and lower two words are
2572         * all zeros, then the result is exact.  Otherwise round in the
2573         * correct direction.  Underflow is possible. If a postnormalization
2574         * is necessary, then the mantissa is all zeros so no shift is needed.
2575         */
2576  round:
2577        if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
2578                Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
2579        }
2580        Sgl_set_sign(resultp1,/*using*/sign_save);
2581        if (Sglext_isnotzero_mantissap2(resultp2)) {
2582                inexact = TRUE;
2583                switch(Rounding_mode()) {
2584                case ROUNDNEAREST: /* The default. */
2585                        if (Sglext_isone_highp2(resultp2)) {
2586                                /* at least 1/2 ulp */
2587                                if (Sglext_isnotzero_low31p2(resultp2) ||
2588                                    Sglext_isone_lowp1(resultp1)) {
2589                                        /* either exactly half way and odd or
2590                                         * more than 1/2ulp */
2591                                        Sgl_increment(resultp1);
2592                                }
2593                        }
2594                        break;
2595
2596                case ROUNDPLUS:
2597                        if (Sgl_iszero_sign(resultp1)) {
2598                                /* Round up positive results */
2599                                Sgl_increment(resultp1);
2600                        }
2601                        break;
2602            
2603                case ROUNDMINUS:
2604                        if (Sgl_isone_sign(resultp1)) {
2605                                /* Round down negative results */
2606                                Sgl_increment(resultp1);
2607                        }
2608            
2609                case ROUNDZERO:;
2610                        /* truncate is simple */
2611                } /* end switch... */
2612                if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
2613        }
2614        if (result_exponent >= SGL_INFINITY_EXPONENT) {
2615                /* Overflow */
2616                if (Is_overflowtrap_enabled()) {
2617                        /*
2618                         * Adjust bias of result
2619                         */
2620                        Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
2621                        Sgl_copytoptr(resultp1,dstptr);
2622                        if (inexact)
2623                            if (Is_inexacttrap_enabled())
2624                                return (OPC_2E_OVERFLOWEXCEPTION |
2625                                        OPC_2E_INEXACTEXCEPTION);
2626                            else Set_inexactflag();
2627                        return (OPC_2E_OVERFLOWEXCEPTION);
2628                }
2629                inexact = TRUE;
2630                Set_overflowflag();
2631                Sgl_setoverflow(resultp1);
2632        } else if (result_exponent <= 0) {      /* underflow case */
2633                if (Is_underflowtrap_enabled()) {
2634                        /*
2635                         * Adjust bias of result
2636                         */
2637                        Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
2638                        Sgl_copytoptr(resultp1,dstptr);
2639                        if (inexact)
2640                            if (Is_inexacttrap_enabled())
2641                                return (OPC_2E_UNDERFLOWEXCEPTION |
2642                                        OPC_2E_INEXACTEXCEPTION);
2643                            else Set_inexactflag();
2644                        return(OPC_2E_UNDERFLOWEXCEPTION);
2645                }
2646                else if (inexact && is_tiny) Set_underflowflag();
2647        }
2648        else Sgl_set_exponent(resultp1,result_exponent);
2649        Sgl_copytoptr(resultp1,dstptr);
2650        if (inexact) 
2651                if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2652                else Set_inexactflag();
2653        return(NOEXCEPTION);
2654}
2655
2656