1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30#ifndef HOST_UTILS_H
31#define HOST_UTILS_H
32
33#include "qemu/compiler.h"
34#include "qemu/bswap.h"
35#include "qemu/int128.h"
36
37#ifdef CONFIG_INT128
38static inline void mulu64(uint64_t *plow, uint64_t *phigh,
39 uint64_t a, uint64_t b)
40{
41 __uint128_t r = (__uint128_t)a * b;
42 *plow = r;
43 *phigh = r >> 64;
44}
45
46static inline void muls64(uint64_t *plow, uint64_t *phigh,
47 int64_t a, int64_t b)
48{
49 __int128_t r = (__int128_t)a * b;
50 *plow = r;
51 *phigh = r >> 64;
52}
53
54
55static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
56{
57 return (__int128_t)a * b / c;
58}
59
60static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
61 uint64_t divisor)
62{
63 __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
64 __uint128_t result = dividend / divisor;
65
66 *plow = result;
67 *phigh = result >> 64;
68 return dividend % divisor;
69}
70
71static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
72 int64_t divisor)
73{
74 __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
75 __int128_t result = dividend / divisor;
76
77 *plow = result;
78 *phigh = result >> 64;
79 return dividend % divisor;
80}
81#else
82void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
83void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
84uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
85int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
86
87static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
88{
89 union {
90 uint64_t ll;
91 struct {
92#if HOST_BIG_ENDIAN
93 uint32_t high, low;
94#else
95 uint32_t low, high;
96#endif
97 } l;
98 } u, res;
99 uint64_t rl, rh;
100
101 u.ll = a;
102 rl = (uint64_t)u.l.low * (uint64_t)b;
103 rh = (uint64_t)u.l.high * (uint64_t)b;
104 rh += (rl >> 32);
105 res.l.high = rh / c;
106 res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
107 return res.ll;
108}
109#endif
110
111
112
113
114
115
116
117
118static inline int clz32(uint32_t val)
119{
120 return val ? __builtin_clz(val) : 32;
121}
122
123
124
125
126
127
128
129static inline int clo32(uint32_t val)
130{
131 return clz32(~val);
132}
133
134
135
136
137
138
139
140
141static inline int clz64(uint64_t val)
142{
143 return val ? __builtin_clzll(val) : 64;
144}
145
146
147
148
149
150
151
152static inline int clo64(uint64_t val)
153{
154 return clz64(~val);
155}
156
157
158
159
160
161
162
163
164static inline int ctz32(uint32_t val)
165{
166 return val ? __builtin_ctz(val) : 32;
167}
168
169
170
171
172
173
174
175static inline int cto32(uint32_t val)
176{
177 return ctz32(~val);
178}
179
180
181
182
183
184
185
186
187static inline int ctz64(uint64_t val)
188{
189 return val ? __builtin_ctzll(val) : 64;
190}
191
192
193
194
195
196
197
198static inline int cto64(uint64_t val)
199{
200 return ctz64(~val);
201}
202
203
204
205
206
207
208
209
210static inline int clrsb32(uint32_t val)
211{
212#if __has_builtin(__builtin_clrsb) || !defined(__clang__)
213 return __builtin_clrsb(val);
214#else
215 return clz32(val ^ ((int32_t)val >> 1)) - 1;
216#endif
217}
218
219
220
221
222
223
224
225
226static inline int clrsb64(uint64_t val)
227{
228#if __has_builtin(__builtin_clrsbll) || !defined(__clang__)
229 return __builtin_clrsbll(val);
230#else
231 return clz64(val ^ ((int64_t)val >> 1)) - 1;
232#endif
233}
234
235
236
237
238
239static inline int ctpop8(uint8_t val)
240{
241 return __builtin_popcount(val);
242}
243
244
245
246
247
248static inline int ctpop16(uint16_t val)
249{
250 return __builtin_popcount(val);
251}
252
253
254
255
256
257static inline int ctpop32(uint32_t val)
258{
259 return __builtin_popcount(val);
260}
261
262
263
264
265
266static inline int ctpop64(uint64_t val)
267{
268 return __builtin_popcountll(val);
269}
270
271
272
273
274
275static inline uint8_t revbit8(uint8_t x)
276{
277#if __has_builtin(__builtin_bitreverse8)
278 return __builtin_bitreverse8(x);
279#else
280
281 x = ((x & 0xf0) >> 4)
282 | ((x & 0x0f) << 4);
283
284 x = ((x & 0x88) >> 3)
285 | ((x & 0x44) >> 1)
286 | ((x & 0x22) << 1)
287 | ((x & 0x11) << 3);
288 return x;
289#endif
290}
291
292
293
294
295
296static inline uint16_t revbit16(uint16_t x)
297{
298#if __has_builtin(__builtin_bitreverse16)
299 return __builtin_bitreverse16(x);
300#else
301
302 x = bswap16(x);
303
304 x = ((x & 0xf0f0) >> 4)
305 | ((x & 0x0f0f) << 4);
306
307 x = ((x & 0x8888) >> 3)
308 | ((x & 0x4444) >> 1)
309 | ((x & 0x2222) << 1)
310 | ((x & 0x1111) << 3);
311 return x;
312#endif
313}
314
315
316
317
318
319static inline uint32_t revbit32(uint32_t x)
320{
321#if __has_builtin(__builtin_bitreverse32)
322 return __builtin_bitreverse32(x);
323#else
324
325 x = bswap32(x);
326
327 x = ((x & 0xf0f0f0f0u) >> 4)
328 | ((x & 0x0f0f0f0fu) << 4);
329
330 x = ((x & 0x88888888u) >> 3)
331 | ((x & 0x44444444u) >> 1)
332 | ((x & 0x22222222u) << 1)
333 | ((x & 0x11111111u) << 3);
334 return x;
335#endif
336}
337
338
339
340
341
342static inline uint64_t revbit64(uint64_t x)
343{
344#if __has_builtin(__builtin_bitreverse64)
345 return __builtin_bitreverse64(x);
346#else
347
348 x = bswap64(x);
349
350 x = ((x & 0xf0f0f0f0f0f0f0f0ull) >> 4)
351 | ((x & 0x0f0f0f0f0f0f0f0full) << 4);
352
353 x = ((x & 0x8888888888888888ull) >> 3)
354 | ((x & 0x4444444444444444ull) >> 1)
355 | ((x & 0x2222222222222222ull) << 1)
356 | ((x & 0x1111111111111111ull) << 3);
357 return x;
358#endif
359}
360
361
362
363
364static inline uint64_t uabs64(int64_t v)
365{
366 return v < 0 ? -v : v;
367}
368
369
370
371
372
373
374
375
376
377static inline bool sadd32_overflow(int32_t x, int32_t y, int32_t *ret)
378{
379 return __builtin_add_overflow(x, y, ret);
380}
381
382
383
384
385
386
387
388
389
390static inline bool sadd64_overflow(int64_t x, int64_t y, int64_t *ret)
391{
392 return __builtin_add_overflow(x, y, ret);
393}
394
395
396
397
398
399
400
401
402
403static inline bool uadd32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
404{
405 return __builtin_add_overflow(x, y, ret);
406}
407
408
409
410
411
412
413
414
415
416static inline bool uadd64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
417{
418 return __builtin_add_overflow(x, y, ret);
419}
420
421
422
423
424
425
426
427
428
429
430static inline bool ssub32_overflow(int32_t x, int32_t y, int32_t *ret)
431{
432 return __builtin_sub_overflow(x, y, ret);
433}
434
435
436
437
438
439
440
441
442
443
444static inline bool ssub64_overflow(int64_t x, int64_t y, int64_t *ret)
445{
446 return __builtin_sub_overflow(x, y, ret);
447}
448
449
450
451
452
453
454
455
456
457
458static inline bool usub32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
459{
460 return __builtin_sub_overflow(x, y, ret);
461}
462
463
464
465
466
467
468
469
470
471
472static inline bool usub64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
473{
474 return __builtin_sub_overflow(x, y, ret);
475}
476
477
478
479
480
481
482
483
484
485static inline bool smul32_overflow(int32_t x, int32_t y, int32_t *ret)
486{
487 return __builtin_mul_overflow(x, y, ret);
488}
489
490
491
492
493
494
495
496
497
498static inline bool smul64_overflow(int64_t x, int64_t y, int64_t *ret)
499{
500 return __builtin_mul_overflow(x, y, ret);
501}
502
503
504
505
506
507
508
509
510
511static inline bool umul32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
512{
513 return __builtin_mul_overflow(x, y, ret);
514}
515
516
517
518
519
520
521
522
523
524static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
525{
526 return __builtin_mul_overflow(x, y, ret);
527}
528
529
530
531
532
533
534static inline bool mulu128(uint64_t *plow, uint64_t *phigh, uint64_t factor)
535{
536#if defined(CONFIG_INT128)
537 bool res;
538 __uint128_t r;
539 __uint128_t f = ((__uint128_t)*phigh << 64) | *plow;
540 res = __builtin_mul_overflow(f, factor, &r);
541
542 *plow = r;
543 *phigh = r >> 64;
544
545 return res;
546#else
547 uint64_t dhi = *phigh;
548 uint64_t dlo = *plow;
549 uint64_t ahi;
550 uint64_t blo, bhi;
551
552 if (dhi == 0) {
553 mulu64(plow, phigh, dlo, factor);
554 return false;
555 }
556
557 mulu64(plow, &ahi, dlo, factor);
558 mulu64(&blo, &bhi, dhi, factor);
559
560 return uadd64_overflow(ahi, blo, phigh) || bhi != 0;
561#endif
562}
563
564
565
566
567
568
569
570
571
572static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)
573{
574#if __has_builtin(__builtin_addcll)
575 unsigned long long c = *pcarry;
576 x = __builtin_addcll(x, y, c, &c);
577 *pcarry = c & 1;
578 return x;
579#else
580 bool c = *pcarry;
581
582 c = uadd64_overflow(x, c, &x);
583 c |= uadd64_overflow(x, y, &x);
584 *pcarry = c;
585 return x;
586#endif
587}
588
589
590
591
592
593
594
595
596
597static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow)
598{
599#if __has_builtin(__builtin_subcll) && !defined(BUILTIN_SUBCLL_BROKEN)
600 unsigned long long b = *pborrow;
601 x = __builtin_subcll(x, y, b, &b);
602 *pborrow = b & 1;
603 return x;
604#else
605 bool b = *pborrow;
606 b = usub64_overflow(x, b, &x);
607 b |= usub64_overflow(x, y, &x);
608 *pborrow = b;
609 return x;
610#endif
611}
612
613
614
615#if ULONG_MAX == UINT32_MAX
616# define clzl clz32
617# define ctzl ctz32
618# define clol clo32
619# define ctol cto32
620# define ctpopl ctpop32
621# define revbitl revbit32
622#elif ULONG_MAX == UINT64_MAX
623# define clzl clz64
624# define ctzl ctz64
625# define clol clo64
626# define ctol cto64
627# define ctpopl ctpop64
628# define revbitl revbit64
629#else
630# error Unknown sizeof long
631#endif
632
633static inline bool is_power_of_2(uint64_t value)
634{
635 if (!value) {
636 return false;
637 }
638
639 return !(value & (value - 1));
640}
641
642
643
644
645static inline uint64_t pow2floor(uint64_t value)
646{
647 if (!value) {
648
649 return 0;
650 }
651 return 0x8000000000000000ull >> clz64(value);
652}
653
654
655
656
657
658static inline uint64_t pow2ceil(uint64_t value)
659{
660 int n = clz64(value - 1);
661
662 if (!n) {
663
664
665
666
667
668 return !value;
669 }
670 return 0x8000000000000000ull >> (n - 1);
671}
672
673static inline uint32_t pow2roundup32(uint32_t x)
674{
675 x |= (x >> 1);
676 x |= (x >> 2);
677 x |= (x >> 4);
678 x |= (x >> 8);
679 x |= (x >> 16);
680 return x + 1;
681}
682
683
684
685
686
687
688
689
690
691
692
693
694void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift);
695
696
697
698
699
700
701
702
703
704
705
706
707
708void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow);
709
710
711
712
713
714
715static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
716 uint64_t n0, uint64_t d)
717{
718#if defined(__x86_64__)
719 uint64_t q;
720 asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
721 return q;
722#elif defined(__s390x__) && !defined(__clang__)
723
724 unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
725 asm("dlgr %0, %1" : "+r"(n) : "r"(d));
726 *r = n >> 64;
727 return n;
728#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
729
730 uint64_t q1, q2, Q, r1, r2, R;
731 asm("divdeu %0,%2,%4; divdu %1,%3,%4"
732 : "=&r"(q1), "=r"(q2)
733 : "r"(n1), "r"(n0), "r"(d));
734 r1 = -(q1 * d);
735 r2 = n0 - (q2 * d);
736 Q = q1 + q2;
737 R = r1 + r2;
738 if (R >= d || R < r2) {
739 Q += 1;
740 R -= d;
741 }
742 *r = R;
743 return Q;
744#else
745 uint64_t d0, d1, q0, q1, r1, r0, m;
746
747 d0 = (uint32_t)d;
748 d1 = d >> 32;
749
750 r1 = n1 % d1;
751 q1 = n1 / d1;
752 m = q1 * d0;
753 r1 = (r1 << 32) | (n0 >> 32);
754 if (r1 < m) {
755 q1 -= 1;
756 r1 += d;
757 if (r1 >= d) {
758 if (r1 < m) {
759 q1 -= 1;
760 r1 += d;
761 }
762 }
763 }
764 r1 -= m;
765
766 r0 = r1 % d1;
767 q0 = r1 / d1;
768 m = q0 * d0;
769 r0 = (r0 << 32) | (uint32_t)n0;
770 if (r0 < m) {
771 q0 -= 1;
772 r0 += d;
773 if (r0 >= d) {
774 if (r0 < m) {
775 q0 -= 1;
776 r0 += d;
777 }
778 }
779 }
780 r0 -= m;
781
782 *r = r0;
783 return (q1 << 32) | q0;
784#endif
785}
786
787Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor);
788Int128 divs256(Int128 *plow, Int128 *phigh, Int128 divisor);
789#endif
790