qemu/fpu/softfloat-macros.h
<<
>>
Prefs
   1/*
   2 * QEMU float support macros
   3 *
   4 * The code in this source file is derived from release 2a of the SoftFloat
   5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6 * some later contributions) are provided under that license, as detailed below.
   7 * It has subsequently been modified by contributors to the QEMU Project,
   8 * so some portions are provided under:
   9 *  the SoftFloat-2a license
  10 *  the BSD license
  11 *  GPL-v2-or-later
  12 *
  13 * Any future contributions to this file after December 1st 2014 will be
  14 * taken to be licensed under the Softfloat-2a license unless specifically
  15 * indicated otherwise.
  16 */
  17
  18/*
  19===============================================================================
  20This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
  21Arithmetic Package, Release 2a.
  22
  23Written by John R. Hauser.  This work was made possible in part by the
  24International Computer Science Institute, located at Suite 600, 1947 Center
  25Street, Berkeley, California 94704.  Funding was partially provided by the
  26National Science Foundation under grant MIP-9311980.  The original version
  27of this code was written as part of a project to build a fixed-point vector
  28processor in collaboration with the University of California at Berkeley,
  29overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31arithmetic/SoftFloat.html'.
  32
  33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39Derivative works are acceptable, even for commercial purposes, so long as
  40(1) they include prominent notice that the work is derivative, and (2) they
  41include prominent notice akin to these four paragraphs for those parts of
  42this code that are retained.
  43
  44===============================================================================
  45*/
  46
  47/* BSD licensing:
  48 * Copyright (c) 2006, Fabrice Bellard
  49 * All rights reserved.
  50 *
  51 * Redistribution and use in source and binary forms, with or without
  52 * modification, are permitted provided that the following conditions are met:
  53 *
  54 * 1. Redistributions of source code must retain the above copyright notice,
  55 * this list of conditions and the following disclaimer.
  56 *
  57 * 2. Redistributions in binary form must reproduce the above copyright notice,
  58 * this list of conditions and the following disclaimer in the documentation
  59 * and/or other materials provided with the distribution.
  60 *
  61 * 3. Neither the name of the copyright holder nor the names of its contributors
  62 * may be used to endorse or promote products derived from this software without
  63 * specific prior written permission.
  64 *
  65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75 * THE POSSIBILITY OF SUCH DAMAGE.
  76 */
  77
  78/* Portions of this work are licensed under the terms of the GNU GPL,
  79 * version 2 or later. See the COPYING file in the top-level directory.
  80 */
  81
  82/*----------------------------------------------------------------------------
  83| This macro tests for minimum version of the GNU C compiler.
  84*----------------------------------------------------------------------------*/
  85#if defined(__GNUC__) && defined(__GNUC_MINOR__)
  86# define SOFTFLOAT_GNUC_PREREQ(maj, min) \
  87         ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
  88#else
  89# define SOFTFLOAT_GNUC_PREREQ(maj, min) 0
  90#endif
  91
  92
  93/*----------------------------------------------------------------------------
  94| Shifts `a' right by the number of bits given in `count'.  If any nonzero
  95| bits are shifted off, they are ``jammed'' into the least significant bit of
  96| the result by setting the least significant bit to 1.  The value of `count'
  97| can be arbitrarily large; in particular, if `count' is greater than 32, the
  98| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
  99| The result is stored in the location pointed to by `zPtr'.
 100*----------------------------------------------------------------------------*/
 101
 102static inline void shift32RightJamming(uint32_t a, int count, uint32_t *zPtr)
 103{
 104    uint32_t z;
 105
 106    if ( count == 0 ) {
 107        z = a;
 108    }
 109    else if ( count < 32 ) {
 110        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
 111    }
 112    else {
 113        z = ( a != 0 );
 114    }
 115    *zPtr = z;
 116
 117}
 118
 119/*----------------------------------------------------------------------------
 120| Shifts `a' right by the number of bits given in `count'.  If any nonzero
 121| bits are shifted off, they are ``jammed'' into the least significant bit of
 122| the result by setting the least significant bit to 1.  The value of `count'
 123| can be arbitrarily large; in particular, if `count' is greater than 64, the
 124| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
 125| The result is stored in the location pointed to by `zPtr'.
 126*----------------------------------------------------------------------------*/
 127
 128static inline void shift64RightJamming(uint64_t a, int count, uint64_t *zPtr)
 129{
 130    uint64_t z;
 131
 132    if ( count == 0 ) {
 133        z = a;
 134    }
 135    else if ( count < 64 ) {
 136        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
 137    }
 138    else {
 139        z = ( a != 0 );
 140    }
 141    *zPtr = z;
 142
 143}
 144
 145/*----------------------------------------------------------------------------
 146| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
 147| _plus_ the number of bits given in `count'.  The shifted result is at most
 148| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
 149| bits shifted off form a second 64-bit result as follows:  The _last_ bit
 150| shifted off is the most-significant bit of the extra result, and the other
 151| 63 bits of the extra result are all zero if and only if _all_but_the_last_
 152| bits shifted off were all zero.  This extra result is stored in the location
 153| pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
 154|     (This routine makes more sense if `a0' and `a1' are considered to form a
 155| fixed-point value with binary point between `a0' and `a1'.  This fixed-point
 156| value is shifted right by the number of bits given in `count', and the
 157| integer part of the result is returned at the location pointed to by
 158| `z0Ptr'.  The fractional part of the result may be slightly corrupted as
 159| described above, and is returned at the location pointed to by `z1Ptr'.)
 160*----------------------------------------------------------------------------*/
 161
 162static inline void
 163 shift64ExtraRightJamming(
 164     uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
 165{
 166    uint64_t z0, z1;
 167    int8_t negCount = ( - count ) & 63;
 168
 169    if ( count == 0 ) {
 170        z1 = a1;
 171        z0 = a0;
 172    }
 173    else if ( count < 64 ) {
 174        z1 = ( a0<<negCount ) | ( a1 != 0 );
 175        z0 = a0>>count;
 176    }
 177    else {
 178        if ( count == 64 ) {
 179            z1 = a0 | ( a1 != 0 );
 180        }
 181        else {
 182            z1 = ( ( a0 | a1 ) != 0 );
 183        }
 184        z0 = 0;
 185    }
 186    *z1Ptr = z1;
 187    *z0Ptr = z0;
 188
 189}
 190
 191/*----------------------------------------------------------------------------
 192| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
 193| number of bits given in `count'.  Any bits shifted off are lost.  The value
 194| of `count' can be arbitrarily large; in particular, if `count' is greater
 195| than 128, the result will be 0.  The result is broken into two 64-bit pieces
 196| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 197*----------------------------------------------------------------------------*/
 198
 199static inline void
 200 shift128Right(
 201     uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
 202{
 203    uint64_t z0, z1;
 204    int8_t negCount = ( - count ) & 63;
 205
 206    if ( count == 0 ) {
 207        z1 = a1;
 208        z0 = a0;
 209    }
 210    else if ( count < 64 ) {
 211        z1 = ( a0<<negCount ) | ( a1>>count );
 212        z0 = a0>>count;
 213    }
 214    else {
 215        z1 = (count < 128) ? (a0 >> (count & 63)) : 0;
 216        z0 = 0;
 217    }
 218    *z1Ptr = z1;
 219    *z0Ptr = z0;
 220
 221}
 222
 223/*----------------------------------------------------------------------------
 224| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
 225| number of bits given in `count'.  If any nonzero bits are shifted off, they
 226| are ``jammed'' into the least significant bit of the result by setting the
 227| least significant bit to 1.  The value of `count' can be arbitrarily large;
 228| in particular, if `count' is greater than 128, the result will be either
 229| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
 230| nonzero.  The result is broken into two 64-bit pieces which are stored at
 231| the locations pointed to by `z0Ptr' and `z1Ptr'.
 232*----------------------------------------------------------------------------*/
 233
 234static inline void
 235 shift128RightJamming(
 236     uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
 237{
 238    uint64_t z0, z1;
 239    int8_t negCount = ( - count ) & 63;
 240
 241    if ( count == 0 ) {
 242        z1 = a1;
 243        z0 = a0;
 244    }
 245    else if ( count < 64 ) {
 246        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
 247        z0 = a0>>count;
 248    }
 249    else {
 250        if ( count == 64 ) {
 251            z1 = a0 | ( a1 != 0 );
 252        }
 253        else if ( count < 128 ) {
 254            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
 255        }
 256        else {
 257            z1 = ( ( a0 | a1 ) != 0 );
 258        }
 259        z0 = 0;
 260    }
 261    *z1Ptr = z1;
 262    *z0Ptr = z0;
 263
 264}
 265
 266/*----------------------------------------------------------------------------
 267| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
 268| by 64 _plus_ the number of bits given in `count'.  The shifted result is
 269| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
 270| stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
 271| off form a third 64-bit result as follows:  The _last_ bit shifted off is
 272| the most-significant bit of the extra result, and the other 63 bits of the
 273| extra result are all zero if and only if _all_but_the_last_ bits shifted off
 274| were all zero.  This extra result is stored in the location pointed to by
 275| `z2Ptr'.  The value of `count' can be arbitrarily large.
 276|     (This routine makes more sense if `a0', `a1', and `a2' are considered
 277| to form a fixed-point value with binary point between `a1' and `a2'.  This
 278| fixed-point value is shifted right by the number of bits given in `count',
 279| and the integer part of the result is returned at the locations pointed to
 280| by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
 281| corrupted as described above, and is returned at the location pointed to by
 282| `z2Ptr'.)
 283*----------------------------------------------------------------------------*/
 284
 285static inline void
 286 shift128ExtraRightJamming(
 287     uint64_t a0,
 288     uint64_t a1,
 289     uint64_t a2,
 290     int count,
 291     uint64_t *z0Ptr,
 292     uint64_t *z1Ptr,
 293     uint64_t *z2Ptr
 294 )
 295{
 296    uint64_t z0, z1, z2;
 297    int8_t negCount = ( - count ) & 63;
 298
 299    if ( count == 0 ) {
 300        z2 = a2;
 301        z1 = a1;
 302        z0 = a0;
 303    }
 304    else {
 305        if ( count < 64 ) {
 306            z2 = a1<<negCount;
 307            z1 = ( a0<<negCount ) | ( a1>>count );
 308            z0 = a0>>count;
 309        }
 310        else {
 311            if ( count == 64 ) {
 312                z2 = a1;
 313                z1 = a0;
 314            }
 315            else {
 316                a2 |= a1;
 317                if ( count < 128 ) {
 318                    z2 = a0<<negCount;
 319                    z1 = a0>>( count & 63 );
 320                }
 321                else {
 322                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
 323                    z1 = 0;
 324                }
 325            }
 326            z0 = 0;
 327        }
 328        z2 |= ( a2 != 0 );
 329    }
 330    *z2Ptr = z2;
 331    *z1Ptr = z1;
 332    *z0Ptr = z0;
 333
 334}
 335
 336/*----------------------------------------------------------------------------
 337| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
 338| number of bits given in `count'.  Any bits shifted off are lost.  The value
 339| of `count' must be less than 64.  The result is broken into two 64-bit
 340| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 341*----------------------------------------------------------------------------*/
 342
 343static inline void
 344 shortShift128Left(
 345     uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
 346{
 347
 348    *z1Ptr = a1<<count;
 349    *z0Ptr =
 350        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
 351
 352}
 353
 354/*----------------------------------------------------------------------------
 355| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
 356| by the number of bits given in `count'.  Any bits shifted off are lost.
 357| The value of `count' must be less than 64.  The result is broken into three
 358| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
 359| `z1Ptr', and `z2Ptr'.
 360*----------------------------------------------------------------------------*/
 361
 362static inline void
 363 shortShift192Left(
 364     uint64_t a0,
 365     uint64_t a1,
 366     uint64_t a2,
 367     int count,
 368     uint64_t *z0Ptr,
 369     uint64_t *z1Ptr,
 370     uint64_t *z2Ptr
 371 )
 372{
 373    uint64_t z0, z1, z2;
 374    int8_t negCount;
 375
 376    z2 = a2<<count;
 377    z1 = a1<<count;
 378    z0 = a0<<count;
 379    if ( 0 < count ) {
 380        negCount = ( ( - count ) & 63 );
 381        z1 |= a2>>negCount;
 382        z0 |= a1>>negCount;
 383    }
 384    *z2Ptr = z2;
 385    *z1Ptr = z1;
 386    *z0Ptr = z0;
 387
 388}
 389
 390/*----------------------------------------------------------------------------
 391| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
 392| value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
 393| any carry out is lost.  The result is broken into two 64-bit pieces which
 394| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 395*----------------------------------------------------------------------------*/
 396
 397static inline void
 398 add128(
 399     uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
 400{
 401    uint64_t z1;
 402
 403    z1 = a1 + b1;
 404    *z1Ptr = z1;
 405    *z0Ptr = a0 + b0 + ( z1 < a1 );
 406
 407}
 408
 409/*----------------------------------------------------------------------------
 410| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
 411| 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
 412| modulo 2^192, so any carry out is lost.  The result is broken into three
 413| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
 414| `z1Ptr', and `z2Ptr'.
 415*----------------------------------------------------------------------------*/
 416
 417static inline void
 418 add192(
 419     uint64_t a0,
 420     uint64_t a1,
 421     uint64_t a2,
 422     uint64_t b0,
 423     uint64_t b1,
 424     uint64_t b2,
 425     uint64_t *z0Ptr,
 426     uint64_t *z1Ptr,
 427     uint64_t *z2Ptr
 428 )
 429{
 430    uint64_t z0, z1, z2;
 431    int8_t carry0, carry1;
 432
 433    z2 = a2 + b2;
 434    carry1 = ( z2 < a2 );
 435    z1 = a1 + b1;
 436    carry0 = ( z1 < a1 );
 437    z0 = a0 + b0;
 438    z1 += carry1;
 439    z0 += ( z1 < carry1 );
 440    z0 += carry0;
 441    *z2Ptr = z2;
 442    *z1Ptr = z1;
 443    *z0Ptr = z0;
 444
 445}
 446
 447/*----------------------------------------------------------------------------
 448| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
 449| 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
 450| 2^128, so any borrow out (carry out) is lost.  The result is broken into two
 451| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
 452| `z1Ptr'.
 453*----------------------------------------------------------------------------*/
 454
 455static inline void
 456 sub128(
 457     uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
 458{
 459
 460    *z1Ptr = a1 - b1;
 461    *z0Ptr = a0 - b0 - ( a1 < b1 );
 462
 463}
 464
 465/*----------------------------------------------------------------------------
 466| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
 467| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
 468| Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
 469| result is broken into three 64-bit pieces which are stored at the locations
 470| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
 471*----------------------------------------------------------------------------*/
 472
 473static inline void
 474 sub192(
 475     uint64_t a0,
 476     uint64_t a1,
 477     uint64_t a2,
 478     uint64_t b0,
 479     uint64_t b1,
 480     uint64_t b2,
 481     uint64_t *z0Ptr,
 482     uint64_t *z1Ptr,
 483     uint64_t *z2Ptr
 484 )
 485{
 486    uint64_t z0, z1, z2;
 487    int8_t borrow0, borrow1;
 488
 489    z2 = a2 - b2;
 490    borrow1 = ( a2 < b2 );
 491    z1 = a1 - b1;
 492    borrow0 = ( a1 < b1 );
 493    z0 = a0 - b0;
 494    z0 -= ( z1 < borrow1 );
 495    z1 -= borrow1;
 496    z0 -= borrow0;
 497    *z2Ptr = z2;
 498    *z1Ptr = z1;
 499    *z0Ptr = z0;
 500
 501}
 502
 503/*----------------------------------------------------------------------------
 504| Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
 505| into two 64-bit pieces which are stored at the locations pointed to by
 506| `z0Ptr' and `z1Ptr'.
 507*----------------------------------------------------------------------------*/
 508
 509static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
 510{
 511    uint32_t aHigh, aLow, bHigh, bLow;
 512    uint64_t z0, zMiddleA, zMiddleB, z1;
 513
 514    aLow = a;
 515    aHigh = a>>32;
 516    bLow = b;
 517    bHigh = b>>32;
 518    z1 = ( (uint64_t) aLow ) * bLow;
 519    zMiddleA = ( (uint64_t) aLow ) * bHigh;
 520    zMiddleB = ( (uint64_t) aHigh ) * bLow;
 521    z0 = ( (uint64_t) aHigh ) * bHigh;
 522    zMiddleA += zMiddleB;
 523    z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
 524    zMiddleA <<= 32;
 525    z1 += zMiddleA;
 526    z0 += ( z1 < zMiddleA );
 527    *z1Ptr = z1;
 528    *z0Ptr = z0;
 529
 530}
 531
 532/*----------------------------------------------------------------------------
 533| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
 534| `b' to obtain a 192-bit product.  The product is broken into three 64-bit
 535| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
 536| `z2Ptr'.
 537*----------------------------------------------------------------------------*/
 538
 539static inline void
 540 mul128By64To192(
 541     uint64_t a0,
 542     uint64_t a1,
 543     uint64_t b,
 544     uint64_t *z0Ptr,
 545     uint64_t *z1Ptr,
 546     uint64_t *z2Ptr
 547 )
 548{
 549    uint64_t z0, z1, z2, more1;
 550
 551    mul64To128( a1, b, &z1, &z2 );
 552    mul64To128( a0, b, &z0, &more1 );
 553    add128( z0, more1, 0, z1, &z0, &z1 );
 554    *z2Ptr = z2;
 555    *z1Ptr = z1;
 556    *z0Ptr = z0;
 557
 558}
 559
 560/*----------------------------------------------------------------------------
 561| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
 562| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
 563| product.  The product is broken into four 64-bit pieces which are stored at
 564| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
 565*----------------------------------------------------------------------------*/
 566
 567static inline void
 568 mul128To256(
 569     uint64_t a0,
 570     uint64_t a1,
 571     uint64_t b0,
 572     uint64_t b1,
 573     uint64_t *z0Ptr,
 574     uint64_t *z1Ptr,
 575     uint64_t *z2Ptr,
 576     uint64_t *z3Ptr
 577 )
 578{
 579    uint64_t z0, z1, z2, z3;
 580    uint64_t more1, more2;
 581
 582    mul64To128( a1, b1, &z2, &z3 );
 583    mul64To128( a1, b0, &z1, &more2 );
 584    add128( z1, more2, 0, z2, &z1, &z2 );
 585    mul64To128( a0, b0, &z0, &more1 );
 586    add128( z0, more1, 0, z1, &z0, &z1 );
 587    mul64To128( a0, b1, &more1, &more2 );
 588    add128( more1, more2, 0, z2, &more1, &z2 );
 589    add128( z0, z1, 0, more1, &z0, &z1 );
 590    *z3Ptr = z3;
 591    *z2Ptr = z2;
 592    *z1Ptr = z1;
 593    *z0Ptr = z0;
 594
 595}
 596
 597/*----------------------------------------------------------------------------
 598| Returns an approximation to the 64-bit integer quotient obtained by dividing
 599| `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
 600| divisor `b' must be at least 2^63.  If q is the exact quotient truncated
 601| toward zero, the approximation returned lies between q and q + 2 inclusive.
 602| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
 603| unsigned integer is returned.
 604*----------------------------------------------------------------------------*/
 605
 606static uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b )
 607{
 608    uint64_t b0, b1;
 609    uint64_t rem0, rem1, term0, term1;
 610    uint64_t z;
 611
 612    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
 613    b0 = b>>32;
 614    z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
 615    mul64To128( b, z, &term0, &term1 );
 616    sub128( a0, a1, term0, term1, &rem0, &rem1 );
 617    while ( ( (int64_t) rem0 ) < 0 ) {
 618        z -= LIT64( 0x100000000 );
 619        b1 = b<<32;
 620        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
 621    }
 622    rem0 = ( rem0<<32 ) | ( rem1>>32 );
 623    z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
 624    return z;
 625
 626}
 627
 628/*----------------------------------------------------------------------------
 629| Returns an approximation to the square root of the 32-bit significand given
 630| by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
 631| `aExp' (the least significant bit) is 1, the integer returned approximates
 632| 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
 633| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
 634| case, the approximation returned lies strictly within +/-2 of the exact
 635| value.
 636*----------------------------------------------------------------------------*/
 637
 638static uint32_t estimateSqrt32(int aExp, uint32_t a)
 639{
 640    static const uint16_t sqrtOddAdjustments[] = {
 641        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
 642        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
 643    };
 644    static const uint16_t sqrtEvenAdjustments[] = {
 645        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
 646        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
 647    };
 648    int8_t index;
 649    uint32_t z;
 650
 651    index = ( a>>27 ) & 15;
 652    if ( aExp & 1 ) {
 653        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
 654        z = ( ( a / z )<<14 ) + ( z<<15 );
 655        a >>= 1;
 656    }
 657    else {
 658        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
 659        z = a / z + z;
 660        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
 661        if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
 662    }
 663    return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
 664
 665}
 666
 667/*----------------------------------------------------------------------------
 668| Returns the number of leading 0 bits before the most-significant 1 bit of
 669| `a'.  If `a' is zero, 32 is returned.
 670*----------------------------------------------------------------------------*/
 671
 672static int8_t countLeadingZeros32( uint32_t a )
 673{
 674#if SOFTFLOAT_GNUC_PREREQ(3, 4)
 675    if (a) {
 676        return __builtin_clz(a);
 677    } else {
 678        return 32;
 679    }
 680#else
 681    static const int8_t countLeadingZerosHigh[] = {
 682        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
 683        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 684        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 685        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 686        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 687        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 688        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 689        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 690        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 691        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 692        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 693        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 694        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 695        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 696        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 697        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 698    };
 699    int8_t shiftCount;
 700
 701    shiftCount = 0;
 702    if ( a < 0x10000 ) {
 703        shiftCount += 16;
 704        a <<= 16;
 705    }
 706    if ( a < 0x1000000 ) {
 707        shiftCount += 8;
 708        a <<= 8;
 709    }
 710    shiftCount += countLeadingZerosHigh[ a>>24 ];
 711    return shiftCount;
 712#endif
 713}
 714
 715/*----------------------------------------------------------------------------
 716| Returns the number of leading 0 bits before the most-significant 1 bit of
 717| `a'.  If `a' is zero, 64 is returned.
 718*----------------------------------------------------------------------------*/
 719
 720static int8_t countLeadingZeros64( uint64_t a )
 721{
 722#if SOFTFLOAT_GNUC_PREREQ(3, 4)
 723    if (a) {
 724        return __builtin_clzll(a);
 725    } else {
 726        return 64;
 727    }
 728#else
 729    int8_t shiftCount;
 730
 731    shiftCount = 0;
 732    if ( a < ( (uint64_t) 1 )<<32 ) {
 733        shiftCount += 32;
 734    }
 735    else {
 736        a >>= 32;
 737    }
 738    shiftCount += countLeadingZeros32( a );
 739    return shiftCount;
 740#endif
 741}
 742
 743/*----------------------------------------------------------------------------
 744| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
 745| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
 746| Otherwise, returns 0.
 747*----------------------------------------------------------------------------*/
 748
 749static inline flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 750{
 751
 752    return ( a0 == b0 ) && ( a1 == b1 );
 753
 754}
 755
 756/*----------------------------------------------------------------------------
 757| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
 758| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
 759| Otherwise, returns 0.
 760*----------------------------------------------------------------------------*/
 761
 762static inline flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 763{
 764
 765    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
 766
 767}
 768
 769/*----------------------------------------------------------------------------
 770| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
 771| than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
 772| returns 0.
 773*----------------------------------------------------------------------------*/
 774
 775static inline flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 776{
 777
 778    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
 779
 780}
 781
 782/*----------------------------------------------------------------------------
 783| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
 784| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
 785| Otherwise, returns 0.
 786*----------------------------------------------------------------------------*/
 787
 788static inline flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 789{
 790
 791    return ( a0 != b0 ) || ( a1 != b1 );
 792
 793}
 794