LXR qemu/include/qemu/host-utils.h

   1/*
   2 * Utility compute operations used by translated code.
   3 *
   4 * Copyright (c) 2007 Thiemo Seufer
   5 * Copyright (c) 2007 Jocelyn Mayer
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26/* Portions of this work are licensed under the terms of the GNU GPL,
  27 * version 2 or later. See the COPYING file in the top-level directory.
  28 */
  29
  30#ifndef HOST_UTILS_H
  31#define HOST_UTILS_H
  32
  33#include "qemu/compiler.h"
  34#include "qemu/bswap.h"
  35#include "qemu/int128.h"
  36
  37#ifdef CONFIG_INT128
  38static inline void mulu64(uint64_t *plow, uint64_t *phigh,
  39                          uint64_t a, uint64_t b)
  40{
  41    __uint128_t r = (__uint128_t)a * b;
  42    *plow = r;
  43    *phigh = r >> 64;
  44}
  45
  46static inline void muls64(uint64_t *plow, uint64_t *phigh,
  47                          int64_t a, int64_t b)
  48{
  49    __int128_t r = (__int128_t)a * b;
  50    *plow = r;
  51    *phigh = r >> 64;
  52}
  53
  54/* compute with 96 bit intermediate result: (a*b)/c */
  55static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
  56{
  57    return (__int128_t)a * b / c;
  58}
  59
  60static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
  61                               uint64_t divisor)
  62{
  63    __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
  64    __uint128_t result = dividend / divisor;
  65
  66    *plow = result;
  67    *phigh = result >> 64;
  68    return dividend % divisor;
  69}
  70
  71static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
  72                              int64_t divisor)
  73{
  74    __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
  75    __int128_t result = dividend / divisor;
  76
  77    *plow = result;
  78    *phigh = result >> 64;
  79    return dividend % divisor;
  80}
  81#else
  82void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
  83void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
  84uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
  85int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
  86
  87static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
  88{
  89    union {
  90        uint64_t ll;
  91        struct {
  92#if HOST_BIG_ENDIAN
  93            uint32_t high, low;
  94#else
  95            uint32_t low, high;
  96#endif
  97        } l;
  98    } u, res;
  99    uint64_t rl, rh;
 100
 101    u.ll = a;
 102    rl = (uint64_t)u.l.low * (uint64_t)b;
 103    rh = (uint64_t)u.l.high * (uint64_t)b;
 104    rh += (rl >> 32);
 105    res.l.high = rh / c;
 106    res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
 107    return res.ll;
 108}
 109#endif
 110
 111/**
 112 * clz32 - count leading zeros in a 32-bit value.
 113 * @val: The value to search
 114 *
 115 * Returns 32 if the value is zero.  Note that the GCC builtin is
 116 * undefined if the value is zero.
 117 */
 118static inline int clz32(uint32_t val)
 119{
 120    return val ? __builtin_clz(val) : 32;
 121}
 122
 123/**
 124 * clo32 - count leading ones in a 32-bit value.
 125 * @val: The value to search
 126 *
 127 * Returns 32 if the value is -1.
 128 */
 129static inline int clo32(uint32_t val)
 130{
 131    return clz32(~val);
 132}
 133
 134/**
 135 * clz64 - count leading zeros in a 64-bit value.
 136 * @val: The value to search
 137 *
 138 * Returns 64 if the value is zero.  Note that the GCC builtin is
 139 * undefined if the value is zero.
 140 */
 141static inline int clz64(uint64_t val)
 142{
 143    return val ? __builtin_clzll(val) : 64;
 144}
 145
 146/**
 147 * clo64 - count leading ones in a 64-bit value.
 148 * @val: The value to search
 149 *
 150 * Returns 64 if the value is -1.
 151 */
 152static inline int clo64(uint64_t val)
 153{
 154    return clz64(~val);
 155}
 156
 157/**
 158 * ctz32 - count trailing zeros in a 32-bit value.
 159 * @val: The value to search
 160 *
 161 * Returns 32 if the value is zero.  Note that the GCC builtin is
 162 * undefined if the value is zero.
 163 */
 164static inline int ctz32(uint32_t val)
 165{
 166    return val ? __builtin_ctz(val) : 32;
 167}
 168
 169/**
 170 * cto32 - count trailing ones in a 32-bit value.
 171 * @val: The value to search
 172 *
 173 * Returns 32 if the value is -1.
 174 */
 175static inline int cto32(uint32_t val)
 176{
 177    return ctz32(~val);
 178}
 179
 180/**
 181 * ctz64 - count trailing zeros in a 64-bit value.
 182 * @val: The value to search
 183 *
 184 * Returns 64 if the value is zero.  Note that the GCC builtin is
 185 * undefined if the value is zero.
 186 */
 187static inline int ctz64(uint64_t val)
 188{
 189    return val ? __builtin_ctzll(val) : 64;
 190}
 191
 192/**
 193 * cto64 - count trailing ones in a 64-bit value.
 194 * @val: The value to search
 195 *
 196 * Returns 64 if the value is -1.
 197 */
 198static inline int cto64(uint64_t val)
 199{
 200    return ctz64(~val);
 201}
 202
 203/**
 204 * clrsb32 - count leading redundant sign bits in a 32-bit value.
 205 * @val: The value to search
 206 *
 207 * Returns the number of bits following the sign bit that are equal to it.
 208 * No special cases; output range is [0-31].
 209 */
 210static inline int clrsb32(uint32_t val)
 211{
 212#if __has_builtin(__builtin_clrsb) || !defined(__clang__)
 213    return __builtin_clrsb(val);
 214#else
 215    return clz32(val ^ ((int32_t)val >> 1)) - 1;
 216#endif
 217}
 218
 219/**
 220 * clrsb64 - count leading redundant sign bits in a 64-bit value.
 221 * @val: The value to search
 222 *
 223 * Returns the number of bits following the sign bit that are equal to it.
 224 * No special cases; output range is [0-63].
 225 */
 226static inline int clrsb64(uint64_t val)
 227{
 228#if __has_builtin(__builtin_clrsbll) || !defined(__clang__)
 229    return __builtin_clrsbll(val);
 230#else
 231    return clz64(val ^ ((int64_t)val >> 1)) - 1;
 232#endif
 233}
 234
 235/**
 236 * ctpop8 - count the population of one bits in an 8-bit value.
 237 * @val: The value to search
 238 */
 239static inline int ctpop8(uint8_t val)
 240{
 241    return __builtin_popcount(val);
 242}
 243
 244/**
 245 * ctpop16 - count the population of one bits in a 16-bit value.
 246 * @val: The value to search
 247 */
 248static inline int ctpop16(uint16_t val)
 249{
 250    return __builtin_popcount(val);
 251}
 252
 253/**
 254 * ctpop32 - count the population of one bits in a 32-bit value.
 255 * @val: The value to search
 256 */
 257static inline int ctpop32(uint32_t val)
 258{
 259    return __builtin_popcount(val);
 260}
 261
 262/**
 263 * ctpop64 - count the population of one bits in a 64-bit value.
 264 * @val: The value to search
 265 */
 266static inline int ctpop64(uint64_t val)
 267{
 268    return __builtin_popcountll(val);
 269}
 270
 271/**
 272 * revbit8 - reverse the bits in an 8-bit value.
 273 * @x: The value to modify.
 274 */
 275static inline uint8_t revbit8(uint8_t x)
 276{
 277#if __has_builtin(__builtin_bitreverse8)
 278    return __builtin_bitreverse8(x);
 279#else
 280    /* Assign the correct nibble position.  */
 281    x = ((x & 0xf0) >> 4)
 282      | ((x & 0x0f) << 4);
 283    /* Assign the correct bit position.  */
 284    x = ((x & 0x88) >> 3)
 285      | ((x & 0x44) >> 1)
 286      | ((x & 0x22) << 1)
 287      | ((x & 0x11) << 3);
 288    return x;
 289#endif
 290}
 291
 292/**
 293 * revbit16 - reverse the bits in a 16-bit value.
 294 * @x: The value to modify.
 295 */
 296static inline uint16_t revbit16(uint16_t x)
 297{
 298#if __has_builtin(__builtin_bitreverse16)
 299    return __builtin_bitreverse16(x);
 300#else
 301    /* Assign the correct byte position.  */
 302    x = bswap16(x);
 303    /* Assign the correct nibble position.  */
 304    x = ((x & 0xf0f0) >> 4)
 305      | ((x & 0x0f0f) << 4);
 306    /* Assign the correct bit position.  */
 307    x = ((x & 0x8888) >> 3)
 308      | ((x & 0x4444) >> 1)
 309      | ((x & 0x2222) << 1)
 310      | ((x & 0x1111) << 3);
 311    return x;
 312#endif
 313}
 314
 315/**
 316 * revbit32 - reverse the bits in a 32-bit value.
 317 * @x: The value to modify.
 318 */
 319static inline uint32_t revbit32(uint32_t x)
 320{
 321#if __has_builtin(__builtin_bitreverse32)
 322    return __builtin_bitreverse32(x);
 323#else
 324    /* Assign the correct byte position.  */
 325    x = bswap32(x);
 326    /* Assign the correct nibble position.  */
 327    x = ((x & 0xf0f0f0f0u) >> 4)
 328      | ((x & 0x0f0f0f0fu) << 4);
 329    /* Assign the correct bit position.  */
 330    x = ((x & 0x88888888u) >> 3)
 331      | ((x & 0x44444444u) >> 1)
 332      | ((x & 0x22222222u) << 1)
 333      | ((x & 0x11111111u) << 3);
 334    return x;
 335#endif
 336}
 337
 338/**
 339 * revbit64 - reverse the bits in a 64-bit value.
 340 * @x: The value to modify.
 341 */
 342static inline uint64_t revbit64(uint64_t x)
 343{
 344#if __has_builtin(__builtin_bitreverse64)
 345    return __builtin_bitreverse64(x);
 346#else
 347    /* Assign the correct byte position.  */
 348    x = bswap64(x);
 349    /* Assign the correct nibble position.  */
 350    x = ((x & 0xf0f0f0f0f0f0f0f0ull) >> 4)
 351      | ((x & 0x0f0f0f0f0f0f0f0full) << 4);
 352    /* Assign the correct bit position.  */
 353    x = ((x & 0x8888888888888888ull) >> 3)
 354      | ((x & 0x4444444444444444ull) >> 1)
 355      | ((x & 0x2222222222222222ull) << 1)
 356      | ((x & 0x1111111111111111ull) << 3);
 357    return x;
 358#endif
 359}
 360
 361/**
 362 * Return the absolute value of a 64-bit integer as an unsigned 64-bit value
 363 */
 364static inline uint64_t uabs64(int64_t v)
 365{
 366    return v < 0 ? -v : v;
 367}
 368
 369/**
 370 * sadd32_overflow - addition with overflow indication
 371 * @x, @y: addends
 372 * @ret: Output for sum
 373 *
 374 * Computes *@ret = @x + @y, and returns true if and only if that
 375 * value has been truncated.
 376 */
 377static inline bool sadd32_overflow(int32_t x, int32_t y, int32_t *ret)
 378{
 379    return __builtin_add_overflow(x, y, ret);
 380}
 381
 382/**
 383 * sadd64_overflow - addition with overflow indication
 384 * @x, @y: addends
 385 * @ret: Output for sum
 386 *
 387 * Computes *@ret = @x + @y, and returns true if and only if that
 388 * value has been truncated.
 389 */
 390static inline bool sadd64_overflow(int64_t x, int64_t y, int64_t *ret)
 391{
 392    return __builtin_add_overflow(x, y, ret);
 393}
 394
 395/**
 396 * uadd32_overflow - addition with overflow indication
 397 * @x, @y: addends
 398 * @ret: Output for sum
 399 *
 400 * Computes *@ret = @x + @y, and returns true if and only if that
 401 * value has been truncated.
 402 */
 403static inline bool uadd32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
 404{
 405    return __builtin_add_overflow(x, y, ret);
 406}
 407
 408/**
 409 * uadd64_overflow - addition with overflow indication
 410 * @x, @y: addends
 411 * @ret: Output for sum
 412 *
 413 * Computes *@ret = @x + @y, and returns true if and only if that
 414 * value has been truncated.
 415 */
 416static inline bool uadd64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
 417{
 418    return __builtin_add_overflow(x, y, ret);
 419}
 420
 421/**
 422 * ssub32_overflow - subtraction with overflow indication
 423 * @x: Minuend
 424 * @y: Subtrahend
 425 * @ret: Output for difference
 426 *
 427 * Computes *@ret = @x - @y, and returns true if and only if that
 428 * value has been truncated.
 429 */
 430static inline bool ssub32_overflow(int32_t x, int32_t y, int32_t *ret)
 431{
 432    return __builtin_sub_overflow(x, y, ret);
 433}
 434
 435/**
 436 * ssub64_overflow - subtraction with overflow indication
 437 * @x: Minuend
 438 * @y: Subtrahend
 439 * @ret: Output for sum
 440 *
 441 * Computes *@ret = @x - @y, and returns true if and only if that
 442 * value has been truncated.
 443 */
 444static inline bool ssub64_overflow(int64_t x, int64_t y, int64_t *ret)
 445{
 446    return __builtin_sub_overflow(x, y, ret);
 447}
 448
 449/**
 450 * usub32_overflow - subtraction with overflow indication
 451 * @x: Minuend
 452 * @y: Subtrahend
 453 * @ret: Output for sum
 454 *
 455 * Computes *@ret = @x - @y, and returns true if and only if that
 456 * value has been truncated.
 457 */
 458static inline bool usub32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
 459{
 460    return __builtin_sub_overflow(x, y, ret);
 461}
 462
 463/**
 464 * usub64_overflow - subtraction with overflow indication
 465 * @x: Minuend
 466 * @y: Subtrahend
 467 * @ret: Output for sum
 468 *
 469 * Computes *@ret = @x - @y, and returns true if and only if that
 470 * value has been truncated.
 471 */
 472static inline bool usub64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
 473{
 474    return __builtin_sub_overflow(x, y, ret);
 475}
 476
 477/**
 478 * smul32_overflow - multiplication with overflow indication
 479 * @x, @y: Input multipliers
 480 * @ret: Output for product
 481 *
 482 * Computes *@ret = @x * @y, and returns true if and only if that
 483 * value has been truncated.
 484 */
 485static inline bool smul32_overflow(int32_t x, int32_t y, int32_t *ret)
 486{
 487    return __builtin_mul_overflow(x, y, ret);
 488}
 489
 490/**
 491 * smul64_overflow - multiplication with overflow indication
 492 * @x, @y: Input multipliers
 493 * @ret: Output for product
 494 *
 495 * Computes *@ret = @x * @y, and returns true if and only if that
 496 * value has been truncated.
 497 */
 498static inline bool smul64_overflow(int64_t x, int64_t y, int64_t *ret)
 499{
 500    return __builtin_mul_overflow(x, y, ret);
 501}
 502
 503/**
 504 * umul32_overflow - multiplication with overflow indication
 505 * @x, @y: Input multipliers
 506 * @ret: Output for product
 507 *
 508 * Computes *@ret = @x * @y, and returns true if and only if that
 509 * value has been truncated.
 510 */
 511static inline bool umul32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
 512{
 513    return __builtin_mul_overflow(x, y, ret);
 514}
 515
 516/**
 517 * umul64_overflow - multiplication with overflow indication
 518 * @x, @y: Input multipliers
 519 * @ret: Output for product
 520 *
 521 * Computes *@ret = @x * @y, and returns true if and only if that
 522 * value has been truncated.
 523 */
 524static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
 525{
 526    return __builtin_mul_overflow(x, y, ret);
 527}
 528
 529/*
 530 * Unsigned 128x64 multiplication.
 531 * Returns true if the result got truncated to 128 bits.
 532 * Otherwise, returns false and the multiplication result via plow and phigh.
 533 */
 534static inline bool mulu128(uint64_t *plow, uint64_t *phigh, uint64_t factor)
 535{
 536#if defined(CONFIG_INT128)
 537    bool res;
 538    __uint128_t r;
 539    __uint128_t f = ((__uint128_t)*phigh << 64) | *plow;
 540    res = __builtin_mul_overflow(f, factor, &r);
 541
 542    *plow = r;
 543    *phigh = r >> 64;
 544
 545    return res;
 546#else
 547    uint64_t dhi = *phigh;
 548    uint64_t dlo = *plow;
 549    uint64_t ahi;
 550    uint64_t blo, bhi;
 551
 552    if (dhi == 0) {
 553        mulu64(plow, phigh, dlo, factor);
 554        return false;
 555    }
 556
 557    mulu64(plow, &ahi, dlo, factor);
 558    mulu64(&blo, &bhi, dhi, factor);
 559
 560    return uadd64_overflow(ahi, blo, phigh) || bhi != 0;
 561#endif
 562}
 563
 564/**
 565 * uadd64_carry - addition with carry-in and carry-out
 566 * @x, @y: addends
 567 * @pcarry: in-out carry value
 568 *
 569 * Computes @x + @y + *@pcarry, placing the carry-out back
 570 * into *@pcarry and returning the 64-bit sum.
 571 */
 572static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)
 573{
 574#if __has_builtin(__builtin_addcll)
 575    unsigned long long c = *pcarry;
 576    x = __builtin_addcll(x, y, c, &c);
 577    *pcarry = c & 1;
 578    return x;
 579#else
 580    bool c = *pcarry;
 581    /* This is clang's internal expansion of __builtin_addc. */
 582    c = uadd64_overflow(x, c, &x);
 583    c |= uadd64_overflow(x, y, &x);
 584    *pcarry = c;
 585    return x;
 586#endif
 587}
 588
 589/**
 590 * usub64_borrow - subtraction with borrow-in and borrow-out
 591 * @x, @y: addends
 592 * @pborrow: in-out borrow value
 593 *
 594 * Computes @x - @y - *@pborrow, placing the borrow-out back
 595 * into *@pborrow and returning the 64-bit sum.
 596 */
 597static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow)
 598{
 599#if __has_builtin(__builtin_subcll) && !defined(BUILTIN_SUBCLL_BROKEN)
 600    unsigned long long b = *pborrow;
 601    x = __builtin_subcll(x, y, b, &b);
 602    *pborrow = b & 1;
 603    return x;
 604#else
 605    bool b = *pborrow;
 606    b = usub64_overflow(x, b, &x);
 607    b |= usub64_overflow(x, y, &x);
 608    *pborrow = b;
 609    return x;
 610#endif
 611}
 612
 613/* Host type specific sizes of these routines.  */
 614
 615#if ULONG_MAX == UINT32_MAX
 616# define clzl   clz32
 617# define ctzl   ctz32
 618# define clol   clo32
 619# define ctol   cto32
 620# define ctpopl ctpop32
 621# define revbitl revbit32
 622#elif ULONG_MAX == UINT64_MAX
 623# define clzl   clz64
 624# define ctzl   ctz64
 625# define clol   clo64
 626# define ctol   cto64
 627# define ctpopl ctpop64
 628# define revbitl revbit64
 629#else
 630# error Unknown sizeof long
 631#endif
 632
 633static inline bool is_power_of_2(uint64_t value)
 634{
 635    if (!value) {
 636        return false;
 637    }
 638
 639    return !(value & (value - 1));
 640}
 641
 642/**
 643 * Return @value rounded down to the nearest power of two or zero.
 644 */
 645static inline uint64_t pow2floor(uint64_t value)
 646{
 647    if (!value) {
 648        /* Avoid undefined shift by 64 */
 649        return 0;
 650    }
 651    return 0x8000000000000000ull >> clz64(value);
 652}
 653
 654/*
 655 * Return @value rounded up to the nearest power of two modulo 2^64.
 656 * This is *zero* for @value > 2^63, so be careful.
 657 */
 658static inline uint64_t pow2ceil(uint64_t value)
 659{
 660    int n = clz64(value - 1);
 661
 662    if (!n) {
 663        /*
 664         * @value - 1 has no leading zeroes, thus @value - 1 >= 2^63
 665         * Therefore, either @value == 0 or @value > 2^63.
 666         * If it's 0, return 1, else return 0.
 667         */
 668        return !value;
 669    }
 670    return 0x8000000000000000ull >> (n - 1);
 671}
 672
 673static inline uint32_t pow2roundup32(uint32_t x)
 674{
 675    x |= (x >> 1);
 676    x |= (x >> 2);
 677    x |= (x >> 4);
 678    x |= (x >> 8);
 679    x |= (x >> 16);
 680    return x + 1;
 681}
 682
 683/**
 684 * urshift - 128-bit Unsigned Right Shift.
 685 * @plow: in/out - lower 64-bit integer.
 686 * @phigh: in/out - higher 64-bit integer.
 687 * @shift: in - bytes to shift, between 0 and 127.
 688 *
 689 * Result is zero-extended and stored in plow/phigh, which are
 690 * input/output variables. Shift values outside the range will
 691 * be mod to 128. In other words, the caller is responsible to
 692 * verify/assert both the shift range and plow/phigh pointers.
 693 */
 694void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift);
 695
 696/**
 697 * ulshift - 128-bit Unsigned Left Shift.
 698 * @plow: in/out - lower 64-bit integer.
 699 * @phigh: in/out - higher 64-bit integer.
 700 * @shift: in - bytes to shift, between 0 and 127.
 701 * @overflow: out - true if any 1-bit is shifted out.
 702 *
 703 * Result is zero-extended and stored in plow/phigh, which are
 704 * input/output variables. Shift values outside the range will
 705 * be mod to 128. In other words, the caller is responsible to
 706 * verify/assert both the shift range and plow/phigh pointers.
 707 */
 708void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow);
 709
 710/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
 711 * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
 712 *
 713 * Licensed under the GPLv2/LGPLv3
 714 */
 715static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
 716                                  uint64_t n0, uint64_t d)
 717{
 718#if defined(__x86_64__)
 719    uint64_t q;
 720    asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
 721    return q;
 722#elif defined(__s390x__) && !defined(__clang__)
 723    /* Need to use a TImode type to get an even register pair for DLGR.  */
 724    unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
 725    asm("dlgr %0, %1" : "+r"(n) : "r"(d));
 726    *r = n >> 64;
 727    return n;
 728#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
 729    /* From Power ISA 2.06, programming note for divdeu.  */
 730    uint64_t q1, q2, Q, r1, r2, R;
 731    asm("divdeu %0,%2,%4; divdu %1,%3,%4"
 732        : "=&r"(q1), "=r"(q2)
 733        : "r"(n1), "r"(n0), "r"(d));
 734    r1 = -(q1 * d);         /* low part of (n1<<64) - (q1 * d) */
 735    r2 = n0 - (q2 * d);
 736    Q = q1 + q2;
 737    R = r1 + r2;
 738    if (R >= d || R < r2) { /* overflow implies R > d */
 739        Q += 1;
 740        R -= d;
 741    }
 742    *r = R;
 743    return Q;
 744#else
 745    uint64_t d0, d1, q0, q1, r1, r0, m;
 746
 747    d0 = (uint32_t)d;
 748    d1 = d >> 32;
 749
 750    r1 = n1 % d1;
 751    q1 = n1 / d1;
 752    m = q1 * d0;
 753    r1 = (r1 << 32) | (n0 >> 32);
 754    if (r1 < m) {
 755        q1 -= 1;
 756        r1 += d;
 757        if (r1 >= d) {
 758            if (r1 < m) {
 759                q1 -= 1;
 760                r1 += d;
 761            }
 762        }
 763    }
 764    r1 -= m;
 765
 766    r0 = r1 % d1;
 767    q0 = r1 / d1;
 768    m = q0 * d0;
 769    r0 = (r0 << 32) | (uint32_t)n0;
 770    if (r0 < m) {
 771        q0 -= 1;
 772        r0 += d;
 773        if (r0 >= d) {
 774            if (r0 < m) {
 775                q0 -= 1;
 776                r0 += d;
 777            }
 778        }
 779    }
 780    r0 -= m;
 781
 782    *r = r0;
 783    return (q1 << 32) | q0;
 784#endif
 785}
 786
 787Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor);
 788Int128 divs256(Int128 *plow, Int128 *phigh, Int128 divisor);
 789#endif
 790