qemu/include/qemu/host-utils.h
<<
>>
Prefs
   1/*
   2 * Utility compute operations used by translated code.
   3 *
   4 * Copyright (c) 2007 Thiemo Seufer
   5 * Copyright (c) 2007 Jocelyn Mayer
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26/* Portions of this work are licensed under the terms of the GNU GPL,
  27 * version 2 or later. See the COPYING file in the top-level directory.
  28 */
  29
  30#ifndef HOST_UTILS_H
  31#define HOST_UTILS_H
  32
  33#include "qemu/compiler.h"
  34#include "qemu/bswap.h"
  35
  36#ifdef CONFIG_INT128
  37static inline void mulu64(uint64_t *plow, uint64_t *phigh,
  38                          uint64_t a, uint64_t b)
  39{
  40    __uint128_t r = (__uint128_t)a * b;
  41    *plow = r;
  42    *phigh = r >> 64;
  43}
  44
  45static inline void muls64(uint64_t *plow, uint64_t *phigh,
  46                          int64_t a, int64_t b)
  47{
  48    __int128_t r = (__int128_t)a * b;
  49    *plow = r;
  50    *phigh = r >> 64;
  51}
  52
  53/* compute with 96 bit intermediate result: (a*b)/c */
  54static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
  55{
  56    return (__int128_t)a * b / c;
  57}
  58
  59static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
  60                               uint64_t divisor)
  61{
  62    __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
  63    __uint128_t result = dividend / divisor;
  64
  65    *plow = result;
  66    *phigh = result >> 64;
  67    return dividend % divisor;
  68}
  69
  70static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
  71                              int64_t divisor)
  72{
  73    __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
  74    __int128_t result = dividend / divisor;
  75
  76    *plow = result;
  77    *phigh = result >> 64;
  78    return dividend % divisor;
  79}
  80#else
  81void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
  82void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
  83uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
  84int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
  85
  86static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
  87{
  88    union {
  89        uint64_t ll;
  90        struct {
  91#ifdef HOST_WORDS_BIGENDIAN
  92            uint32_t high, low;
  93#else
  94            uint32_t low, high;
  95#endif
  96        } l;
  97    } u, res;
  98    uint64_t rl, rh;
  99
 100    u.ll = a;
 101    rl = (uint64_t)u.l.low * (uint64_t)b;
 102    rh = (uint64_t)u.l.high * (uint64_t)b;
 103    rh += (rl >> 32);
 104    res.l.high = rh / c;
 105    res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
 106    return res.ll;
 107}
 108#endif
 109
 110/**
 111 * clz32 - count leading zeros in a 32-bit value.
 112 * @val: The value to search
 113 *
 114 * Returns 32 if the value is zero.  Note that the GCC builtin is
 115 * undefined if the value is zero.
 116 */
 117static inline int clz32(uint32_t val)
 118{
 119    return val ? __builtin_clz(val) : 32;
 120}
 121
 122/**
 123 * clo32 - count leading ones in a 32-bit value.
 124 * @val: The value to search
 125 *
 126 * Returns 32 if the value is -1.
 127 */
 128static inline int clo32(uint32_t val)
 129{
 130    return clz32(~val);
 131}
 132
 133/**
 134 * clz64 - count leading zeros in a 64-bit value.
 135 * @val: The value to search
 136 *
 137 * Returns 64 if the value is zero.  Note that the GCC builtin is
 138 * undefined if the value is zero.
 139 */
 140static inline int clz64(uint64_t val)
 141{
 142    return val ? __builtin_clzll(val) : 64;
 143}
 144
 145/**
 146 * clo64 - count leading ones in a 64-bit value.
 147 * @val: The value to search
 148 *
 149 * Returns 64 if the value is -1.
 150 */
 151static inline int clo64(uint64_t val)
 152{
 153    return clz64(~val);
 154}
 155
 156/**
 157 * ctz32 - count trailing zeros in a 32-bit value.
 158 * @val: The value to search
 159 *
 160 * Returns 32 if the value is zero.  Note that the GCC builtin is
 161 * undefined if the value is zero.
 162 */
 163static inline int ctz32(uint32_t val)
 164{
 165    return val ? __builtin_ctz(val) : 32;
 166}
 167
 168/**
 169 * cto32 - count trailing ones in a 32-bit value.
 170 * @val: The value to search
 171 *
 172 * Returns 32 if the value is -1.
 173 */
 174static inline int cto32(uint32_t val)
 175{
 176    return ctz32(~val);
 177}
 178
 179/**
 180 * ctz64 - count trailing zeros in a 64-bit value.
 181 * @val: The value to search
 182 *
 183 * Returns 64 if the value is zero.  Note that the GCC builtin is
 184 * undefined if the value is zero.
 185 */
 186static inline int ctz64(uint64_t val)
 187{
 188    return val ? __builtin_ctzll(val) : 64;
 189}
 190
 191/**
 192 * cto64 - count trailing ones in a 64-bit value.
 193 * @val: The value to search
 194 *
 195 * Returns 64 if the value is -1.
 196 */
 197static inline int cto64(uint64_t val)
 198{
 199    return ctz64(~val);
 200}
 201
 202/**
 203 * clrsb32 - count leading redundant sign bits in a 32-bit value.
 204 * @val: The value to search
 205 *
 206 * Returns the number of bits following the sign bit that are equal to it.
 207 * No special cases; output range is [0-31].
 208 */
 209static inline int clrsb32(uint32_t val)
 210{
 211#if __has_builtin(__builtin_clrsb) || !defined(__clang__)
 212    return __builtin_clrsb(val);
 213#else
 214    return clz32(val ^ ((int32_t)val >> 1)) - 1;
 215#endif
 216}
 217
 218/**
 219 * clrsb64 - count leading redundant sign bits in a 64-bit value.
 220 * @val: The value to search
 221 *
 222 * Returns the number of bits following the sign bit that are equal to it.
 223 * No special cases; output range is [0-63].
 224 */
 225static inline int clrsb64(uint64_t val)
 226{
 227#if __has_builtin(__builtin_clrsbll) || !defined(__clang__)
 228    return __builtin_clrsbll(val);
 229#else
 230    return clz64(val ^ ((int64_t)val >> 1)) - 1;
 231#endif
 232}
 233
 234/**
 235 * ctpop8 - count the population of one bits in an 8-bit value.
 236 * @val: The value to search
 237 */
 238static inline int ctpop8(uint8_t val)
 239{
 240    return __builtin_popcount(val);
 241}
 242
 243/**
 244 * ctpop16 - count the population of one bits in a 16-bit value.
 245 * @val: The value to search
 246 */
 247static inline int ctpop16(uint16_t val)
 248{
 249    return __builtin_popcount(val);
 250}
 251
 252/**
 253 * ctpop32 - count the population of one bits in a 32-bit value.
 254 * @val: The value to search
 255 */
 256static inline int ctpop32(uint32_t val)
 257{
 258    return __builtin_popcount(val);
 259}
 260
 261/**
 262 * ctpop64 - count the population of one bits in a 64-bit value.
 263 * @val: The value to search
 264 */
 265static inline int ctpop64(uint64_t val)
 266{
 267    return __builtin_popcountll(val);
 268}
 269
 270/**
 271 * revbit8 - reverse the bits in an 8-bit value.
 272 * @x: The value to modify.
 273 */
 274static inline uint8_t revbit8(uint8_t x)
 275{
 276#if __has_builtin(__builtin_bitreverse8)
 277    return __builtin_bitreverse8(x);
 278#else
 279    /* Assign the correct nibble position.  */
 280    x = ((x & 0xf0) >> 4)
 281      | ((x & 0x0f) << 4);
 282    /* Assign the correct bit position.  */
 283    x = ((x & 0x88) >> 3)
 284      | ((x & 0x44) >> 1)
 285      | ((x & 0x22) << 1)
 286      | ((x & 0x11) << 3);
 287    return x;
 288#endif
 289}
 290
 291/**
 292 * revbit16 - reverse the bits in a 16-bit value.
 293 * @x: The value to modify.
 294 */
 295static inline uint16_t revbit16(uint16_t x)
 296{
 297#if __has_builtin(__builtin_bitreverse16)
 298    return __builtin_bitreverse16(x);
 299#else
 300    /* Assign the correct byte position.  */
 301    x = bswap16(x);
 302    /* Assign the correct nibble position.  */
 303    x = ((x & 0xf0f0) >> 4)
 304      | ((x & 0x0f0f) << 4);
 305    /* Assign the correct bit position.  */
 306    x = ((x & 0x8888) >> 3)
 307      | ((x & 0x4444) >> 1)
 308      | ((x & 0x2222) << 1)
 309      | ((x & 0x1111) << 3);
 310    return x;
 311#endif
 312}
 313
 314/**
 315 * revbit32 - reverse the bits in a 32-bit value.
 316 * @x: The value to modify.
 317 */
 318static inline uint32_t revbit32(uint32_t x)
 319{
 320#if __has_builtin(__builtin_bitreverse32)
 321    return __builtin_bitreverse32(x);
 322#else
 323    /* Assign the correct byte position.  */
 324    x = bswap32(x);
 325    /* Assign the correct nibble position.  */
 326    x = ((x & 0xf0f0f0f0u) >> 4)
 327      | ((x & 0x0f0f0f0fu) << 4);
 328    /* Assign the correct bit position.  */
 329    x = ((x & 0x88888888u) >> 3)
 330      | ((x & 0x44444444u) >> 1)
 331      | ((x & 0x22222222u) << 1)
 332      | ((x & 0x11111111u) << 3);
 333    return x;
 334#endif
 335}
 336
 337/**
 338 * revbit64 - reverse the bits in a 64-bit value.
 339 * @x: The value to modify.
 340 */
 341static inline uint64_t revbit64(uint64_t x)
 342{
 343#if __has_builtin(__builtin_bitreverse64)
 344    return __builtin_bitreverse64(x);
 345#else
 346    /* Assign the correct byte position.  */
 347    x = bswap64(x);
 348    /* Assign the correct nibble position.  */
 349    x = ((x & 0xf0f0f0f0f0f0f0f0ull) >> 4)
 350      | ((x & 0x0f0f0f0f0f0f0f0full) << 4);
 351    /* Assign the correct bit position.  */
 352    x = ((x & 0x8888888888888888ull) >> 3)
 353      | ((x & 0x4444444444444444ull) >> 1)
 354      | ((x & 0x2222222222222222ull) << 1)
 355      | ((x & 0x1111111111111111ull) << 3);
 356    return x;
 357#endif
 358}
 359
 360/**
 361 * Return the absolute value of a 64-bit integer as an unsigned 64-bit value
 362 */
 363static inline uint64_t uabs64(int64_t v)
 364{
 365    return v < 0 ? -v : v;
 366}
 367
 368/**
 369 * sadd32_overflow - addition with overflow indication
 370 * @x, @y: addends
 371 * @ret: Output for sum
 372 *
 373 * Computes *@ret = @x + @y, and returns true if and only if that
 374 * value has been truncated.
 375 */
 376static inline bool sadd32_overflow(int32_t x, int32_t y, int32_t *ret)
 377{
 378#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
 379    return __builtin_add_overflow(x, y, ret);
 380#else
 381    *ret = x + y;
 382    return ((*ret ^ x) & ~(x ^ y)) < 0;
 383#endif
 384}
 385
 386/**
 387 * sadd64_overflow - addition with overflow indication
 388 * @x, @y: addends
 389 * @ret: Output for sum
 390 *
 391 * Computes *@ret = @x + @y, and returns true if and only if that
 392 * value has been truncated.
 393 */
 394static inline bool sadd64_overflow(int64_t x, int64_t y, int64_t *ret)
 395{
 396#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
 397    return __builtin_add_overflow(x, y, ret);
 398#else
 399    *ret = x + y;
 400    return ((*ret ^ x) & ~(x ^ y)) < 0;
 401#endif
 402}
 403
 404/**
 405 * uadd32_overflow - addition with overflow indication
 406 * @x, @y: addends
 407 * @ret: Output for sum
 408 *
 409 * Computes *@ret = @x + @y, and returns true if and only if that
 410 * value has been truncated.
 411 */
 412static inline bool uadd32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
 413{
 414#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
 415    return __builtin_add_overflow(x, y, ret);
 416#else
 417    *ret = x + y;
 418    return *ret < x;
 419#endif
 420}
 421
 422/**
 423 * uadd64_overflow - addition with overflow indication
 424 * @x, @y: addends
 425 * @ret: Output for sum
 426 *
 427 * Computes *@ret = @x + @y, and returns true if and only if that
 428 * value has been truncated.
 429 */
 430static inline bool uadd64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
 431{
 432#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
 433    return __builtin_add_overflow(x, y, ret);
 434#else
 435    *ret = x + y;
 436    return *ret < x;
 437#endif
 438}
 439
 440/**
 441 * ssub32_overflow - subtraction with overflow indication
 442 * @x: Minuend
 443 * @y: Subtrahend
 444 * @ret: Output for difference
 445 *
 446 * Computes *@ret = @x - @y, and returns true if and only if that
 447 * value has been truncated.
 448 */
 449static inline bool ssub32_overflow(int32_t x, int32_t y, int32_t *ret)
 450{
 451#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
 452    return __builtin_sub_overflow(x, y, ret);
 453#else
 454    *ret = x - y;
 455    return ((*ret ^ x) & (x ^ y)) < 0;
 456#endif
 457}
 458
 459/**
 460 * ssub64_overflow - subtraction with overflow indication
 461 * @x: Minuend
 462 * @y: Subtrahend
 463 * @ret: Output for sum
 464 *
 465 * Computes *@ret = @x - @y, and returns true if and only if that
 466 * value has been truncated.
 467 */
 468static inline bool ssub64_overflow(int64_t x, int64_t y, int64_t *ret)
 469{
 470#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
 471    return __builtin_sub_overflow(x, y, ret);
 472#else
 473    *ret = x - y;
 474    return ((*ret ^ x) & (x ^ y)) < 0;
 475#endif
 476}
 477
 478/**
 479 * usub32_overflow - subtraction with overflow indication
 480 * @x: Minuend
 481 * @y: Subtrahend
 482 * @ret: Output for sum
 483 *
 484 * Computes *@ret = @x - @y, and returns true if and only if that
 485 * value has been truncated.
 486 */
 487static inline bool usub32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
 488{
 489#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
 490    return __builtin_sub_overflow(x, y, ret);
 491#else
 492    *ret = x - y;
 493    return x < y;
 494#endif
 495}
 496
 497/**
 498 * usub64_overflow - subtraction with overflow indication
 499 * @x: Minuend
 500 * @y: Subtrahend
 501 * @ret: Output for sum
 502 *
 503 * Computes *@ret = @x - @y, and returns true if and only if that
 504 * value has been truncated.
 505 */
 506static inline bool usub64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
 507{
 508#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
 509    return __builtin_sub_overflow(x, y, ret);
 510#else
 511    *ret = x - y;
 512    return x < y;
 513#endif
 514}
 515
 516/**
 517 * smul32_overflow - multiplication with overflow indication
 518 * @x, @y: Input multipliers
 519 * @ret: Output for product
 520 *
 521 * Computes *@ret = @x * @y, and returns true if and only if that
 522 * value has been truncated.
 523 */
 524static inline bool smul32_overflow(int32_t x, int32_t y, int32_t *ret)
 525{
 526#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
 527    return __builtin_mul_overflow(x, y, ret);
 528#else
 529    int64_t z = (int64_t)x * y;
 530    *ret = z;
 531    return *ret != z;
 532#endif
 533}
 534
 535/**
 536 * smul64_overflow - multiplication with overflow indication
 537 * @x, @y: Input multipliers
 538 * @ret: Output for product
 539 *
 540 * Computes *@ret = @x * @y, and returns true if and only if that
 541 * value has been truncated.
 542 */
 543static inline bool smul64_overflow(int64_t x, int64_t y, int64_t *ret)
 544{
 545#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
 546    return __builtin_mul_overflow(x, y, ret);
 547#else
 548    uint64_t hi, lo;
 549    muls64(&lo, &hi, x, y);
 550    *ret = lo;
 551    return hi != ((int64_t)lo >> 63);
 552#endif
 553}
 554
 555/**
 556 * umul32_overflow - multiplication with overflow indication
 557 * @x, @y: Input multipliers
 558 * @ret: Output for product
 559 *
 560 * Computes *@ret = @x * @y, and returns true if and only if that
 561 * value has been truncated.
 562 */
 563static inline bool umul32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
 564{
 565#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
 566    return __builtin_mul_overflow(x, y, ret);
 567#else
 568    uint64_t z = (uint64_t)x * y;
 569    *ret = z;
 570    return z > UINT32_MAX;
 571#endif
 572}
 573
 574/**
 575 * umul64_overflow - multiplication with overflow indication
 576 * @x, @y: Input multipliers
 577 * @ret: Output for product
 578 *
 579 * Computes *@ret = @x * @y, and returns true if and only if that
 580 * value has been truncated.
 581 */
 582static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
 583{
 584#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
 585    return __builtin_mul_overflow(x, y, ret);
 586#else
 587    uint64_t hi;
 588    mulu64(ret, &hi, x, y);
 589    return hi != 0;
 590#endif
 591}
 592
 593/*
 594 * Unsigned 128x64 multiplication.
 595 * Returns true if the result got truncated to 128 bits.
 596 * Otherwise, returns false and the multiplication result via plow and phigh.
 597 */
 598static inline bool mulu128(uint64_t *plow, uint64_t *phigh, uint64_t factor)
 599{
 600#if defined(CONFIG_INT128) && \
 601    (__has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5)
 602    bool res;
 603    __uint128_t r;
 604    __uint128_t f = ((__uint128_t)*phigh << 64) | *plow;
 605    res = __builtin_mul_overflow(f, factor, &r);
 606
 607    *plow = r;
 608    *phigh = r >> 64;
 609
 610    return res;
 611#else
 612    uint64_t dhi = *phigh;
 613    uint64_t dlo = *plow;
 614    uint64_t ahi;
 615    uint64_t blo, bhi;
 616
 617    if (dhi == 0) {
 618        mulu64(plow, phigh, dlo, factor);
 619        return false;
 620    }
 621
 622    mulu64(plow, &ahi, dlo, factor);
 623    mulu64(&blo, &bhi, dhi, factor);
 624
 625    return uadd64_overflow(ahi, blo, phigh) || bhi != 0;
 626#endif
 627}
 628
 629/**
 630 * uadd64_carry - addition with carry-in and carry-out
 631 * @x, @y: addends
 632 * @pcarry: in-out carry value
 633 *
 634 * Computes @x + @y + *@pcarry, placing the carry-out back
 635 * into *@pcarry and returning the 64-bit sum.
 636 */
 637static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)
 638{
 639#if __has_builtin(__builtin_addcll)
 640    unsigned long long c = *pcarry;
 641    x = __builtin_addcll(x, y, c, &c);
 642    *pcarry = c & 1;
 643    return x;
 644#else
 645    bool c = *pcarry;
 646    /* This is clang's internal expansion of __builtin_addc. */
 647    c = uadd64_overflow(x, c, &x);
 648    c |= uadd64_overflow(x, y, &x);
 649    *pcarry = c;
 650    return x;
 651#endif
 652}
 653
 654/**
 655 * usub64_borrow - subtraction with borrow-in and borrow-out
 656 * @x, @y: addends
 657 * @pborrow: in-out borrow value
 658 *
 659 * Computes @x - @y - *@pborrow, placing the borrow-out back
 660 * into *@pborrow and returning the 64-bit sum.
 661 */
 662static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow)
 663{
 664#if __has_builtin(__builtin_subcll)
 665    unsigned long long b = *pborrow;
 666    x = __builtin_subcll(x, y, b, &b);
 667    *pborrow = b & 1;
 668    return x;
 669#else
 670    bool b = *pborrow;
 671    b = usub64_overflow(x, b, &x);
 672    b |= usub64_overflow(x, y, &x);
 673    *pborrow = b;
 674    return x;
 675#endif
 676}
 677
 678/* Host type specific sizes of these routines.  */
 679
 680#if ULONG_MAX == UINT32_MAX
 681# define clzl   clz32
 682# define ctzl   ctz32
 683# define clol   clo32
 684# define ctol   cto32
 685# define ctpopl ctpop32
 686# define revbitl revbit32
 687#elif ULONG_MAX == UINT64_MAX
 688# define clzl   clz64
 689# define ctzl   ctz64
 690# define clol   clo64
 691# define ctol   cto64
 692# define ctpopl ctpop64
 693# define revbitl revbit64
 694#else
 695# error Unknown sizeof long
 696#endif
 697
 698static inline bool is_power_of_2(uint64_t value)
 699{
 700    if (!value) {
 701        return false;
 702    }
 703
 704    return !(value & (value - 1));
 705}
 706
 707/**
 708 * Return @value rounded down to the nearest power of two or zero.
 709 */
 710static inline uint64_t pow2floor(uint64_t value)
 711{
 712    if (!value) {
 713        /* Avoid undefined shift by 64 */
 714        return 0;
 715    }
 716    return 0x8000000000000000ull >> clz64(value);
 717}
 718
 719/*
 720 * Return @value rounded up to the nearest power of two modulo 2^64.
 721 * This is *zero* for @value > 2^63, so be careful.
 722 */
 723static inline uint64_t pow2ceil(uint64_t value)
 724{
 725    int n = clz64(value - 1);
 726
 727    if (!n) {
 728        /*
 729         * @value - 1 has no leading zeroes, thus @value - 1 >= 2^63
 730         * Therefore, either @value == 0 or @value > 2^63.
 731         * If it's 0, return 1, else return 0.
 732         */
 733        return !value;
 734    }
 735    return 0x8000000000000000ull >> (n - 1);
 736}
 737
 738static inline uint32_t pow2roundup32(uint32_t x)
 739{
 740    x |= (x >> 1);
 741    x |= (x >> 2);
 742    x |= (x >> 4);
 743    x |= (x >> 8);
 744    x |= (x >> 16);
 745    return x + 1;
 746}
 747
 748/**
 749 * urshift - 128-bit Unsigned Right Shift.
 750 * @plow: in/out - lower 64-bit integer.
 751 * @phigh: in/out - higher 64-bit integer.
 752 * @shift: in - bytes to shift, between 0 and 127.
 753 *
 754 * Result is zero-extended and stored in plow/phigh, which are
 755 * input/output variables. Shift values outside the range will
 756 * be mod to 128. In other words, the caller is responsible to
 757 * verify/assert both the shift range and plow/phigh pointers.
 758 */
 759void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift);
 760
 761/**
 762 * ulshift - 128-bit Unsigned Left Shift.
 763 * @plow: in/out - lower 64-bit integer.
 764 * @phigh: in/out - higher 64-bit integer.
 765 * @shift: in - bytes to shift, between 0 and 127.
 766 * @overflow: out - true if any 1-bit is shifted out.
 767 *
 768 * Result is zero-extended and stored in plow/phigh, which are
 769 * input/output variables. Shift values outside the range will
 770 * be mod to 128. In other words, the caller is responsible to
 771 * verify/assert both the shift range and plow/phigh pointers.
 772 */
 773void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow);
 774
 775/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
 776 * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
 777 *
 778 * Licensed under the GPLv2/LGPLv3
 779 */
 780static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
 781                                  uint64_t n0, uint64_t d)
 782{
 783#if defined(__x86_64__)
 784    uint64_t q;
 785    asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
 786    return q;
 787#elif defined(__s390x__) && !defined(__clang__)
 788    /* Need to use a TImode type to get an even register pair for DLGR.  */
 789    unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
 790    asm("dlgr %0, %1" : "+r"(n) : "r"(d));
 791    *r = n >> 64;
 792    return n;
 793#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
 794    /* From Power ISA 2.06, programming note for divdeu.  */
 795    uint64_t q1, q2, Q, r1, r2, R;
 796    asm("divdeu %0,%2,%4; divdu %1,%3,%4"
 797        : "=&r"(q1), "=r"(q2)
 798        : "r"(n1), "r"(n0), "r"(d));
 799    r1 = -(q1 * d);         /* low part of (n1<<64) - (q1 * d) */
 800    r2 = n0 - (q2 * d);
 801    Q = q1 + q2;
 802    R = r1 + r2;
 803    if (R >= d || R < r2) { /* overflow implies R > d */
 804        Q += 1;
 805        R -= d;
 806    }
 807    *r = R;
 808    return Q;
 809#else
 810    uint64_t d0, d1, q0, q1, r1, r0, m;
 811
 812    d0 = (uint32_t)d;
 813    d1 = d >> 32;
 814
 815    r1 = n1 % d1;
 816    q1 = n1 / d1;
 817    m = q1 * d0;
 818    r1 = (r1 << 32) | (n0 >> 32);
 819    if (r1 < m) {
 820        q1 -= 1;
 821        r1 += d;
 822        if (r1 >= d) {
 823            if (r1 < m) {
 824                q1 -= 1;
 825                r1 += d;
 826            }
 827        }
 828    }
 829    r1 -= m;
 830
 831    r0 = r1 % d1;
 832    q0 = r1 / d1;
 833    m = q0 * d0;
 834    r0 = (r0 << 32) | (uint32_t)n0;
 835    if (r0 < m) {
 836        q0 -= 1;
 837        r0 += d;
 838        if (r0 >= d) {
 839            if (r0 < m) {
 840                q0 -= 1;
 841                r0 += d;
 842            }
 843        }
 844    }
 845    r0 -= m;
 846
 847    *r = r0;
 848    return (q1 << 32) | q0;
 849#endif
 850}
 851
 852#endif
 853