qemu/include/fpu/softfloat-macros.h
<<
>>
Prefs
   1/*
   2 * QEMU float support macros
   3 *
   4 * The code in this source file is derived from release 2a of the SoftFloat
   5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6 * some later contributions) are provided under that license, as detailed below.
   7 * It has subsequently been modified by contributors to the QEMU Project,
   8 * so some portions are provided under:
   9 *  the SoftFloat-2a license
  10 *  the BSD license
  11 *
  12 * Any future contributions to this file after December 1st 2014 will be
  13 * taken to be licensed under the Softfloat-2a license unless specifically
  14 * indicated otherwise.
  15 */
  16
  17/*
  18===============================================================================
  19This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
  20Arithmetic Package, Release 2a.
  21
  22Written by John R. Hauser.  This work was made possible in part by the
  23International Computer Science Institute, located at Suite 600, 1947 Center
  24Street, Berkeley, California 94704.  Funding was partially provided by the
  25National Science Foundation under grant MIP-9311980.  The original version
  26of this code was written as part of a project to build a fixed-point vector
  27processor in collaboration with the University of California at Berkeley,
  28overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  29is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  30arithmetic/SoftFloat.html'.
  31
  32THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  33has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  34TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  35PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  36AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  37
  38Derivative works are acceptable, even for commercial purposes, so long as
  39(1) they include prominent notice that the work is derivative, and (2) they
  40include prominent notice akin to these four paragraphs for those parts of
  41this code that are retained.
  42
  43===============================================================================
  44*/
  45
  46/* BSD licensing:
  47 * Copyright (c) 2006, Fabrice Bellard
  48 * All rights reserved.
  49 *
  50 * Redistribution and use in source and binary forms, with or without
  51 * modification, are permitted provided that the following conditions are met:
  52 *
  53 * 1. Redistributions of source code must retain the above copyright notice,
  54 * this list of conditions and the following disclaimer.
  55 *
  56 * 2. Redistributions in binary form must reproduce the above copyright notice,
  57 * this list of conditions and the following disclaimer in the documentation
  58 * and/or other materials provided with the distribution.
  59 *
  60 * 3. Neither the name of the copyright holder nor the names of its contributors
  61 * may be used to endorse or promote products derived from this software without
  62 * specific prior written permission.
  63 *
  64 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  65 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  66 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  67 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  68 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  69 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  70 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  71 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  72 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  73 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  74 * THE POSSIBILITY OF SUCH DAMAGE.
  75 */
  76
  77#ifndef FPU_SOFTFLOAT_MACROS_H
  78#define FPU_SOFTFLOAT_MACROS_H
  79
  80#include "fpu/softfloat-types.h"
  81#include "qemu/host-utils.h"
  82
  83/**
  84 * shl_double: double-word merging left shift
  85 * @l: left or most-significant word
  86 * @r: right or least-significant word
  87 * @c: shift count
  88 *
  89 * Shift @l left by @c bits, shifting in bits from @r.
  90 */
  91static inline uint64_t shl_double(uint64_t l, uint64_t r, int c)
  92{
  93#if defined(__x86_64__)
  94    asm("shld %b2, %1, %0" : "+r"(l) : "r"(r), "ci"(c));
  95    return l;
  96#else
  97    return c ? (l << c) | (r >> (64 - c)) : l;
  98#endif
  99}
 100
 101/**
 102 * shr_double: double-word merging right shift
 103 * @l: left or most-significant word
 104 * @r: right or least-significant word
 105 * @c: shift count
 106 *
 107 * Shift @r right by @c bits, shifting in bits from @l.
 108 */
 109static inline uint64_t shr_double(uint64_t l, uint64_t r, int c)
 110{
 111#if defined(__x86_64__)
 112    asm("shrd %b2, %1, %0" : "+r"(r) : "r"(l), "ci"(c));
 113    return r;
 114#else
 115    return c ? (r >> c) | (l << (64 - c)) : r;
 116#endif
 117}
 118
 119/*----------------------------------------------------------------------------
 120| Shifts `a' right by the number of bits given in `count'.  If any nonzero
 121| bits are shifted off, they are ``jammed'' into the least significant bit of
 122| the result by setting the least significant bit to 1.  The value of `count'
 123| can be arbitrarily large; in particular, if `count' is greater than 32, the
 124| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
 125| The result is stored in the location pointed to by `zPtr'.
 126*----------------------------------------------------------------------------*/
 127
 128static inline void shift32RightJamming(uint32_t a, int count, uint32_t *zPtr)
 129{
 130    uint32_t z;
 131
 132    if ( count == 0 ) {
 133        z = a;
 134    }
 135    else if ( count < 32 ) {
 136        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
 137    }
 138    else {
 139        z = ( a != 0 );
 140    }
 141    *zPtr = z;
 142
 143}
 144
 145/*----------------------------------------------------------------------------
 146| Shifts `a' right by the number of bits given in `count'.  If any nonzero
 147| bits are shifted off, they are ``jammed'' into the least significant bit of
 148| the result by setting the least significant bit to 1.  The value of `count'
 149| can be arbitrarily large; in particular, if `count' is greater than 64, the
 150| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
 151| The result is stored in the location pointed to by `zPtr'.
 152*----------------------------------------------------------------------------*/
 153
 154static inline void shift64RightJamming(uint64_t a, int count, uint64_t *zPtr)
 155{
 156    uint64_t z;
 157
 158    if ( count == 0 ) {
 159        z = a;
 160    }
 161    else if ( count < 64 ) {
 162        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
 163    }
 164    else {
 165        z = ( a != 0 );
 166    }
 167    *zPtr = z;
 168
 169}
 170
 171/*----------------------------------------------------------------------------
 172| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
 173| _plus_ the number of bits given in `count'.  The shifted result is at most
 174| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
 175| bits shifted off form a second 64-bit result as follows:  The _last_ bit
 176| shifted off is the most-significant bit of the extra result, and the other
 177| 63 bits of the extra result are all zero if and only if _all_but_the_last_
 178| bits shifted off were all zero.  This extra result is stored in the location
 179| pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
 180|     (This routine makes more sense if `a0' and `a1' are considered to form a
 181| fixed-point value with binary point between `a0' and `a1'.  This fixed-point
 182| value is shifted right by the number of bits given in `count', and the
 183| integer part of the result is returned at the location pointed to by
 184| `z0Ptr'.  The fractional part of the result may be slightly corrupted as
 185| described above, and is returned at the location pointed to by `z1Ptr'.)
 186*----------------------------------------------------------------------------*/
 187
 188static inline void
 189 shift64ExtraRightJamming(
 190     uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
 191{
 192    uint64_t z0, z1;
 193    int8_t negCount = ( - count ) & 63;
 194
 195    if ( count == 0 ) {
 196        z1 = a1;
 197        z0 = a0;
 198    }
 199    else if ( count < 64 ) {
 200        z1 = ( a0<<negCount ) | ( a1 != 0 );
 201        z0 = a0>>count;
 202    }
 203    else {
 204        if ( count == 64 ) {
 205            z1 = a0 | ( a1 != 0 );
 206        }
 207        else {
 208            z1 = ( ( a0 | a1 ) != 0 );
 209        }
 210        z0 = 0;
 211    }
 212    *z1Ptr = z1;
 213    *z0Ptr = z0;
 214
 215}
 216
 217/*----------------------------------------------------------------------------
 218| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
 219| number of bits given in `count'.  Any bits shifted off are lost.  The value
 220| of `count' can be arbitrarily large; in particular, if `count' is greater
 221| than 128, the result will be 0.  The result is broken into two 64-bit pieces
 222| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 223*----------------------------------------------------------------------------*/
 224
 225static inline void
 226 shift128Right(
 227     uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
 228{
 229    uint64_t z0, z1;
 230    int8_t negCount = ( - count ) & 63;
 231
 232    if ( count == 0 ) {
 233        z1 = a1;
 234        z0 = a0;
 235    }
 236    else if ( count < 64 ) {
 237        z1 = ( a0<<negCount ) | ( a1>>count );
 238        z0 = a0>>count;
 239    }
 240    else {
 241        z1 = (count < 128) ? (a0 >> (count & 63)) : 0;
 242        z0 = 0;
 243    }
 244    *z1Ptr = z1;
 245    *z0Ptr = z0;
 246
 247}
 248
 249/*----------------------------------------------------------------------------
 250| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
 251| number of bits given in `count'.  If any nonzero bits are shifted off, they
 252| are ``jammed'' into the least significant bit of the result by setting the
 253| least significant bit to 1.  The value of `count' can be arbitrarily large;
 254| in particular, if `count' is greater than 128, the result will be either
 255| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
 256| nonzero.  The result is broken into two 64-bit pieces which are stored at
 257| the locations pointed to by `z0Ptr' and `z1Ptr'.
 258*----------------------------------------------------------------------------*/
 259
 260static inline void
 261 shift128RightJamming(
 262     uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
 263{
 264    uint64_t z0, z1;
 265    int8_t negCount = ( - count ) & 63;
 266
 267    if ( count == 0 ) {
 268        z1 = a1;
 269        z0 = a0;
 270    }
 271    else if ( count < 64 ) {
 272        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
 273        z0 = a0>>count;
 274    }
 275    else {
 276        if ( count == 64 ) {
 277            z1 = a0 | ( a1 != 0 );
 278        }
 279        else if ( count < 128 ) {
 280            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
 281        }
 282        else {
 283            z1 = ( ( a0 | a1 ) != 0 );
 284        }
 285        z0 = 0;
 286    }
 287    *z1Ptr = z1;
 288    *z0Ptr = z0;
 289
 290}
 291
 292/*----------------------------------------------------------------------------
 293| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
 294| by 64 _plus_ the number of bits given in `count'.  The shifted result is
 295| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
 296| stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
 297| off form a third 64-bit result as follows:  The _last_ bit shifted off is
 298| the most-significant bit of the extra result, and the other 63 bits of the
 299| extra result are all zero if and only if _all_but_the_last_ bits shifted off
 300| were all zero.  This extra result is stored in the location pointed to by
 301| `z2Ptr'.  The value of `count' can be arbitrarily large.
 302|     (This routine makes more sense if `a0', `a1', and `a2' are considered
 303| to form a fixed-point value with binary point between `a1' and `a2'.  This
 304| fixed-point value is shifted right by the number of bits given in `count',
 305| and the integer part of the result is returned at the locations pointed to
 306| by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
 307| corrupted as described above, and is returned at the location pointed to by
 308| `z2Ptr'.)
 309*----------------------------------------------------------------------------*/
 310
 311static inline void
 312 shift128ExtraRightJamming(
 313     uint64_t a0,
 314     uint64_t a1,
 315     uint64_t a2,
 316     int count,
 317     uint64_t *z0Ptr,
 318     uint64_t *z1Ptr,
 319     uint64_t *z2Ptr
 320 )
 321{
 322    uint64_t z0, z1, z2;
 323    int8_t negCount = ( - count ) & 63;
 324
 325    if ( count == 0 ) {
 326        z2 = a2;
 327        z1 = a1;
 328        z0 = a0;
 329    }
 330    else {
 331        if ( count < 64 ) {
 332            z2 = a1<<negCount;
 333            z1 = ( a0<<negCount ) | ( a1>>count );
 334            z0 = a0>>count;
 335        }
 336        else {
 337            if ( count == 64 ) {
 338                z2 = a1;
 339                z1 = a0;
 340            }
 341            else {
 342                a2 |= a1;
 343                if ( count < 128 ) {
 344                    z2 = a0<<negCount;
 345                    z1 = a0>>( count & 63 );
 346                }
 347                else {
 348                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
 349                    z1 = 0;
 350                }
 351            }
 352            z0 = 0;
 353        }
 354        z2 |= ( a2 != 0 );
 355    }
 356    *z2Ptr = z2;
 357    *z1Ptr = z1;
 358    *z0Ptr = z0;
 359
 360}
 361
 362/*----------------------------------------------------------------------------
 363| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
 364| number of bits given in `count'.  Any bits shifted off are lost.  The value
 365| of `count' must be less than 64.  The result is broken into two 64-bit
 366| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 367*----------------------------------------------------------------------------*/
 368
 369static inline void shortShift128Left(uint64_t a0, uint64_t a1, int count,
 370                                     uint64_t *z0Ptr, uint64_t *z1Ptr)
 371{
 372    *z1Ptr = a1 << count;
 373    *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63));
 374}
 375
 376/*----------------------------------------------------------------------------
 377| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
 378| number of bits given in `count'.  Any bits shifted off are lost.  The value
 379| of `count' may be greater than 64.  The result is broken into two 64-bit
 380| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 381*----------------------------------------------------------------------------*/
 382
 383static inline void shift128Left(uint64_t a0, uint64_t a1, int count,
 384                                uint64_t *z0Ptr, uint64_t *z1Ptr)
 385{
 386    if (count < 64) {
 387        *z1Ptr = a1 << count;
 388        *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63));
 389    } else {
 390        *z1Ptr = 0;
 391        *z0Ptr = a1 << (count - 64);
 392    }
 393}
 394
 395/*----------------------------------------------------------------------------
 396| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
 397| by the number of bits given in `count'.  Any bits shifted off are lost.
 398| The value of `count' must be less than 64.  The result is broken into three
 399| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
 400| `z1Ptr', and `z2Ptr'.
 401*----------------------------------------------------------------------------*/
 402
 403static inline void
 404 shortShift192Left(
 405     uint64_t a0,
 406     uint64_t a1,
 407     uint64_t a2,
 408     int count,
 409     uint64_t *z0Ptr,
 410     uint64_t *z1Ptr,
 411     uint64_t *z2Ptr
 412 )
 413{
 414    uint64_t z0, z1, z2;
 415    int8_t negCount;
 416
 417    z2 = a2<<count;
 418    z1 = a1<<count;
 419    z0 = a0<<count;
 420    if ( 0 < count ) {
 421        negCount = ( ( - count ) & 63 );
 422        z1 |= a2>>negCount;
 423        z0 |= a1>>negCount;
 424    }
 425    *z2Ptr = z2;
 426    *z1Ptr = z1;
 427    *z0Ptr = z0;
 428
 429}
 430
 431/*----------------------------------------------------------------------------
 432| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
 433| value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
 434| any carry out is lost.  The result is broken into two 64-bit pieces which
 435| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 436*----------------------------------------------------------------------------*/
 437
 438static inline void add128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
 439                          uint64_t *z0Ptr, uint64_t *z1Ptr)
 440{
 441    bool c = 0;
 442    *z1Ptr = uadd64_carry(a1, b1, &c);
 443    *z0Ptr = uadd64_carry(a0, b0, &c);
 444}
 445
 446/*----------------------------------------------------------------------------
 447| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
 448| 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
 449| modulo 2^192, so any carry out is lost.  The result is broken into three
 450| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
 451| `z1Ptr', and `z2Ptr'.
 452*----------------------------------------------------------------------------*/
 453
 454static inline void add192(uint64_t a0, uint64_t a1, uint64_t a2,
 455                          uint64_t b0, uint64_t b1, uint64_t b2,
 456                          uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
 457{
 458    bool c = 0;
 459    *z2Ptr = uadd64_carry(a2, b2, &c);
 460    *z1Ptr = uadd64_carry(a1, b1, &c);
 461    *z0Ptr = uadd64_carry(a0, b0, &c);
 462}
 463
 464/*----------------------------------------------------------------------------
 465| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
 466| 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
 467| 2^128, so any borrow out (carry out) is lost.  The result is broken into two
 468| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
 469| `z1Ptr'.
 470*----------------------------------------------------------------------------*/
 471
 472static inline void sub128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
 473                          uint64_t *z0Ptr, uint64_t *z1Ptr)
 474{
 475    bool c = 0;
 476    *z1Ptr = usub64_borrow(a1, b1, &c);
 477    *z0Ptr = usub64_borrow(a0, b0, &c);
 478}
 479
 480/*----------------------------------------------------------------------------
 481| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
 482| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
 483| Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
 484| result is broken into three 64-bit pieces which are stored at the locations
 485| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
 486*----------------------------------------------------------------------------*/
 487
 488static inline void sub192(uint64_t a0, uint64_t a1, uint64_t a2,
 489                          uint64_t b0, uint64_t b1, uint64_t b2,
 490                          uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
 491{
 492    bool c = 0;
 493    *z2Ptr = usub64_borrow(a2, b2, &c);
 494    *z1Ptr = usub64_borrow(a1, b1, &c);
 495    *z0Ptr = usub64_borrow(a0, b0, &c);
 496}
 497
 498/*----------------------------------------------------------------------------
 499| Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
 500| into two 64-bit pieces which are stored at the locations pointed to by
 501| `z0Ptr' and `z1Ptr'.
 502*----------------------------------------------------------------------------*/
 503
 504static inline void
 505mul64To128(uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr)
 506{
 507    mulu64(z1Ptr, z0Ptr, a, b);
 508}
 509
 510/*----------------------------------------------------------------------------
 511| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
 512| `b' to obtain a 192-bit product.  The product is broken into three 64-bit
 513| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
 514| `z2Ptr'.
 515*----------------------------------------------------------------------------*/
 516
 517static inline void
 518mul128By64To192(uint64_t a0, uint64_t a1, uint64_t b,
 519                uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
 520{
 521    uint64_t z0, z1, m1;
 522
 523    mul64To128(a1, b, &m1, z2Ptr);
 524    mul64To128(a0, b, &z0, &z1);
 525    add128(z0, z1, 0, m1, z0Ptr, z1Ptr);
 526}
 527
 528/*----------------------------------------------------------------------------
 529| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
 530| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
 531| product.  The product is broken into four 64-bit pieces which are stored at
 532| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
 533*----------------------------------------------------------------------------*/
 534
 535static inline void mul128To256(uint64_t a0, uint64_t a1,
 536                               uint64_t b0, uint64_t b1,
 537                               uint64_t *z0Ptr, uint64_t *z1Ptr,
 538                               uint64_t *z2Ptr, uint64_t *z3Ptr)
 539{
 540    uint64_t z0, z1, z2;
 541    uint64_t m0, m1, m2, n1, n2;
 542
 543    mul64To128(a1, b0, &m1, &m2);
 544    mul64To128(a0, b1, &n1, &n2);
 545    mul64To128(a1, b1, &z2, z3Ptr);
 546    mul64To128(a0, b0, &z0, &z1);
 547
 548    add192( 0, m1, m2,  0, n1, n2, &m0, &m1, &m2);
 549    add192(m0, m1, m2, z0, z1, z2, z0Ptr, z1Ptr, z2Ptr);
 550}
 551
 552/*----------------------------------------------------------------------------
 553| Returns an approximation to the 64-bit integer quotient obtained by dividing
 554| `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
 555| divisor `b' must be at least 2^63.  If q is the exact quotient truncated
 556| toward zero, the approximation returned lies between q and q + 2 inclusive.
 557| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
 558| unsigned integer is returned.
 559*----------------------------------------------------------------------------*/
 560
 561static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
 562{
 563    uint64_t b0, b1;
 564    uint64_t rem0, rem1, term0, term1;
 565    uint64_t z;
 566
 567    if ( b <= a0 ) return UINT64_C(0xFFFFFFFFFFFFFFFF);
 568    b0 = b>>32;
 569    z = ( b0<<32 <= a0 ) ? UINT64_C(0xFFFFFFFF00000000) : ( a0 / b0 )<<32;
 570    mul64To128( b, z, &term0, &term1 );
 571    sub128( a0, a1, term0, term1, &rem0, &rem1 );
 572    while ( ( (int64_t) rem0 ) < 0 ) {
 573        z -= UINT64_C(0x100000000);
 574        b1 = b<<32;
 575        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
 576    }
 577    rem0 = ( rem0<<32 ) | ( rem1>>32 );
 578    z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
 579    return z;
 580
 581}
 582
 583/*----------------------------------------------------------------------------
 584| Returns an approximation to the square root of the 32-bit significand given
 585| by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
 586| `aExp' (the least significant bit) is 1, the integer returned approximates
 587| 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
 588| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
 589| case, the approximation returned lies strictly within +/-2 of the exact
 590| value.
 591*----------------------------------------------------------------------------*/
 592
 593static inline uint32_t estimateSqrt32(int aExp, uint32_t a)
 594{
 595    static const uint16_t sqrtOddAdjustments[] = {
 596        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
 597        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
 598    };
 599    static const uint16_t sqrtEvenAdjustments[] = {
 600        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
 601        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
 602    };
 603    int8_t index;
 604    uint32_t z;
 605
 606    index = ( a>>27 ) & 15;
 607    if ( aExp & 1 ) {
 608        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
 609        z = ( ( a / z )<<14 ) + ( z<<15 );
 610        a >>= 1;
 611    }
 612    else {
 613        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
 614        z = a / z + z;
 615        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
 616        if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
 617    }
 618    return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
 619
 620}
 621
 622/*----------------------------------------------------------------------------
 623| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
 624| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
 625| Otherwise, returns 0.
 626*----------------------------------------------------------------------------*/
 627
 628static inline bool eq128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
 629{
 630    return a0 == b0 && a1 == b1;
 631}
 632
 633/*----------------------------------------------------------------------------
 634| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
 635| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
 636| Otherwise, returns 0.
 637*----------------------------------------------------------------------------*/
 638
 639static inline bool le128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
 640{
 641    return a0 < b0 || (a0 == b0 && a1 <= b1);
 642}
 643
 644/*----------------------------------------------------------------------------
 645| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
 646| than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
 647| returns 0.
 648*----------------------------------------------------------------------------*/
 649
 650static inline bool lt128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
 651{
 652    return a0 < b0 || (a0 == b0 && a1 < b1);
 653}
 654
 655/*----------------------------------------------------------------------------
 656| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
 657| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
 658| Otherwise, returns 0.
 659*----------------------------------------------------------------------------*/
 660
 661static inline bool ne128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
 662{
 663    return a0 != b0 || a1 != b1;
 664}
 665
 666/*
 667 * Similarly, comparisons of 192-bit values.
 668 */
 669
 670static inline bool eq192(uint64_t a0, uint64_t a1, uint64_t a2,
 671                         uint64_t b0, uint64_t b1, uint64_t b2)
 672{
 673    return ((a0 ^ b0) | (a1 ^ b1) | (a2 ^ b2)) == 0;
 674}
 675
 676static inline bool le192(uint64_t a0, uint64_t a1, uint64_t a2,
 677                         uint64_t b0, uint64_t b1, uint64_t b2)
 678{
 679    if (a0 != b0) {
 680        return a0 < b0;
 681    }
 682    if (a1 != b1) {
 683        return a1 < b1;
 684    }
 685    return a2 <= b2;
 686}
 687
 688static inline bool lt192(uint64_t a0, uint64_t a1, uint64_t a2,
 689                         uint64_t b0, uint64_t b1, uint64_t b2)
 690{
 691    if (a0 != b0) {
 692        return a0 < b0;
 693    }
 694    if (a1 != b1) {
 695        return a1 < b1;
 696    }
 697    return a2 < b2;
 698}
 699
 700#endif
 701