1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#ifndef MULTI_ARITH_H
20#define MULTI_ARITH_H
21
22#if 0
23
24
25typedef unsigned int int128[4];
26
27
28enum {
29 MSW128,
30 NMSW128,
31 NLSW128,
32 LSW128
33};
34
35
36#define LO_WORD(ll) (((unsigned int *) &ll)[1])
37#define HI_WORD(ll) (((unsigned int *) &ll)[0])
38
39
40
41static inline void zero128(int128 a)
42{
43 a[LSW128] = a[NLSW128] = a[NMSW128] = a[MSW128] = 0;
44}
45
46
47static inline void set128(unsigned int i3, unsigned int i2, unsigned int i1,
48 unsigned int i0, int128 a)
49{
50 a[LSW128] = i0;
51 a[NLSW128] = i1;
52 a[NMSW128] = i2;
53 a[MSW128] = i3;
54}
55
56
57static inline void int64_to_128(unsigned long long src, int128 dest)
58{
59 dest[LSW128] = (unsigned int) src;
60 dest[NLSW128] = src >> 32;
61 dest[NMSW128] = dest[MSW128] = 0;
62}
63
64static inline void int128_to_64(const int128 src, unsigned long long *dest)
65{
66 *dest = src[LSW128] | (long long) src[NLSW128] << 32;
67}
68
69static inline void put_i128(const int128 a)
70{
71 printk("%08x %08x %08x %08x\n", a[MSW128], a[NMSW128],
72 a[NLSW128], a[LSW128]);
73}
74
75
76
77
78
79
80static inline void _lsl128(unsigned int count, int128 a)
81{
82 a[MSW128] = (a[MSW128] << count) | (a[NMSW128] >> (32 - count));
83 a[NMSW128] = (a[NMSW128] << count) | (a[NLSW128] >> (32 - count));
84 a[NLSW128] = (a[NLSW128] << count) | (a[LSW128] >> (32 - count));
85 a[LSW128] <<= count;
86}
87
88static inline void _lsr128(unsigned int count, int128 a)
89{
90 a[LSW128] = (a[LSW128] >> count) | (a[NLSW128] << (32 - count));
91 a[NLSW128] = (a[NLSW128] >> count) | (a[NMSW128] << (32 - count));
92 a[NMSW128] = (a[NMSW128] >> count) | (a[MSW128] << (32 - count));
93 a[MSW128] >>= count;
94}
95
96
97
98static inline void lslone128(int128 a)
99{
100 asm volatile ("lsl.l #1,%0\n"
101 "roxl.l #1,%1\n"
102 "roxl.l #1,%2\n"
103 "roxl.l #1,%3\n"
104 :
105 "=d" (a[LSW128]),
106 "=d"(a[NLSW128]),
107 "=d"(a[NMSW128]),
108 "=d"(a[MSW128])
109 :
110 "0"(a[LSW128]),
111 "1"(a[NLSW128]),
112 "2"(a[NMSW128]),
113 "3"(a[MSW128]));
114}
115
116static inline void lsrone128(int128 a)
117{
118 asm volatile ("lsr.l #1,%0\n"
119 "roxr.l #1,%1\n"
120 "roxr.l #1,%2\n"
121 "roxr.l #1,%3\n"
122 :
123 "=d" (a[MSW128]),
124 "=d"(a[NMSW128]),
125 "=d"(a[NLSW128]),
126 "=d"(a[LSW128])
127 :
128 "0"(a[MSW128]),
129 "1"(a[NMSW128]),
130 "2"(a[NLSW128]),
131 "3"(a[LSW128]));
132}
133
134
135
136
137
138static inline void lsl128(unsigned int count, int128 a)
139{
140 int wordcount, i;
141
142 if (count % 32)
143 _lsl128(count % 32, a);
144
145 if (0 == (wordcount = count / 32))
146 return;
147
148
149 for (i = 0; i < 4 - wordcount; i++) {
150 a[i] = a[i + wordcount];
151 }
152 for (i = 3; i >= 4 - wordcount; --i) {
153 a[i] = 0;
154 }
155}
156
157static inline void lsr128(unsigned int count, int128 a)
158{
159 int wordcount, i;
160
161 if (count % 32)
162 _lsr128(count % 32, a);
163
164 if (0 == (wordcount = count / 32))
165 return;
166
167 for (i = 3; i >= wordcount; --i) {
168 a[i] = a[i - wordcount];
169 }
170 for (i = 0; i < wordcount; i++) {
171 a[i] = 0;
172 }
173}
174
175static inline int orl128(int a, int128 b)
176{
177 b[LSW128] |= a;
178}
179
180static inline int btsthi128(const int128 a)
181{
182 return a[MSW128] & 0x80000000;
183}
184
185
186static inline int bftestlo128(int top, const int128 a)
187{
188 int r = 0;
189
190 if (top > 31)
191 r |= a[LSW128];
192 if (top > 63)
193 r |= a[NLSW128];
194 if (top > 95)
195 r |= a[NMSW128];
196
197 r |= a[3 - (top / 32)] & ((1 << (top % 32 + 1)) - 1);
198
199 return (r != 0);
200}
201
202
203
204static inline void mask64(int pos, unsigned long long *mask)
205{
206 *mask = 0;
207
208 if (pos < 32) {
209 LO_WORD(*mask) = (1 << pos) - 1;
210 return;
211 }
212 LO_WORD(*mask) = -1;
213 HI_WORD(*mask) = (1 << (pos - 32)) - 1;
214}
215
216static inline void bset64(int pos, unsigned long long *dest)
217{
218
219 if (pos < 32)
220 asm volatile ("bset %1,%0":"=m"
221 (LO_WORD(*dest)):"id"(pos));
222 else
223 asm volatile ("bset %1,%0":"=m"
224 (HI_WORD(*dest)):"id"(pos - 32));
225}
226
227static inline int btst64(int pos, unsigned long long dest)
228{
229 if (pos < 32)
230 return (0 != (LO_WORD(dest) & (1 << pos)));
231 else
232 return (0 != (HI_WORD(dest) & (1 << (pos - 32))));
233}
234
235static inline void lsl64(int count, unsigned long long *dest)
236{
237 if (count < 32) {
238 HI_WORD(*dest) = (HI_WORD(*dest) << count)
239 | (LO_WORD(*dest) >> count);
240 LO_WORD(*dest) <<= count;
241 return;
242 }
243 count -= 32;
244 HI_WORD(*dest) = LO_WORD(*dest) << count;
245 LO_WORD(*dest) = 0;
246}
247
248static inline void lsr64(int count, unsigned long long *dest)
249{
250 if (count < 32) {
251 LO_WORD(*dest) = (LO_WORD(*dest) >> count)
252 | (HI_WORD(*dest) << (32 - count));
253 HI_WORD(*dest) >>= count;
254 return;
255 }
256 count -= 32;
257 LO_WORD(*dest) = HI_WORD(*dest) >> count;
258 HI_WORD(*dest) = 0;
259}
260#endif
261
262static inline void fp_denormalize(struct fp_ext *reg, unsigned int cnt)
263{
264 reg->exp += cnt;
265
266 switch (cnt) {
267 case 0 ... 8:
268 reg->lowmant = reg->mant.m32[1] << (8 - cnt);
269 reg->mant.m32[1] = (reg->mant.m32[1] >> cnt) |
270 (reg->mant.m32[0] << (32 - cnt));
271 reg->mant.m32[0] = reg->mant.m32[0] >> cnt;
272 break;
273 case 9 ... 32:
274 reg->lowmant = reg->mant.m32[1] >> (cnt - 8);
275 if (reg->mant.m32[1] << (40 - cnt))
276 reg->lowmant |= 1;
277 reg->mant.m32[1] = (reg->mant.m32[1] >> cnt) |
278 (reg->mant.m32[0] << (32 - cnt));
279 reg->mant.m32[0] = reg->mant.m32[0] >> cnt;
280 break;
281 case 33 ... 39:
282 asm volatile ("bfextu %1{%2,#8},%0" : "=d" (reg->lowmant)
283 : "m" (reg->mant.m32[0]), "d" (64 - cnt));
284 if (reg->mant.m32[1] << (40 - cnt))
285 reg->lowmant |= 1;
286 reg->mant.m32[1] = reg->mant.m32[0] >> (cnt - 32);
287 reg->mant.m32[0] = 0;
288 break;
289 case 40 ... 71:
290 reg->lowmant = reg->mant.m32[0] >> (cnt - 40);
291 if ((reg->mant.m32[0] << (72 - cnt)) || reg->mant.m32[1])
292 reg->lowmant |= 1;
293 reg->mant.m32[1] = reg->mant.m32[0] >> (cnt - 32);
294 reg->mant.m32[0] = 0;
295 break;
296 default:
297 reg->lowmant = reg->mant.m32[0] || reg->mant.m32[1];
298 reg->mant.m32[0] = 0;
299 reg->mant.m32[1] = 0;
300 break;
301 }
302}
303
304static inline int fp_overnormalize(struct fp_ext *reg)
305{
306 int shift;
307
308 if (reg->mant.m32[0]) {
309 asm ("bfffo %1{#0,#32},%0" : "=d" (shift) : "dm" (reg->mant.m32[0]));
310 reg->mant.m32[0] = (reg->mant.m32[0] << shift) | (reg->mant.m32[1] >> (32 - shift));
311 reg->mant.m32[1] = (reg->mant.m32[1] << shift);
312 } else {
313 asm ("bfffo %1{#0,#32},%0" : "=d" (shift) : "dm" (reg->mant.m32[1]));
314 reg->mant.m32[0] = (reg->mant.m32[1] << shift);
315 reg->mant.m32[1] = 0;
316 shift += 32;
317 }
318
319 return shift;
320}
321
322static inline int fp_addmant(struct fp_ext *dest, struct fp_ext *src)
323{
324 int carry;
325
326
327 asm volatile ("add.b %1,%0" : "=d,g" (dest->lowmant)
328 : "g,d" (src->lowmant), "0,0" (dest->lowmant));
329 asm volatile ("addx.l %1,%0" : "=d" (dest->mant.m32[1])
330 : "d" (src->mant.m32[1]), "0" (dest->mant.m32[1]));
331 asm volatile ("addx.l %1,%0" : "=d" (dest->mant.m32[0])
332 : "d" (src->mant.m32[0]), "0" (dest->mant.m32[0]));
333 asm volatile ("addx.l %0,%0" : "=d" (carry) : "0" (0));
334
335 return carry;
336}
337
338static inline int fp_addcarry(struct fp_ext *reg)
339{
340 if (++reg->exp == 0x7fff) {
341 if (reg->mant.m64)
342 fp_set_sr(FPSR_EXC_INEX2);
343 reg->mant.m64 = 0;
344 fp_set_sr(FPSR_EXC_OVFL);
345 return 0;
346 }
347 reg->lowmant = (reg->mant.m32[1] << 7) | (reg->lowmant ? 1 : 0);
348 reg->mant.m32[1] = (reg->mant.m32[1] >> 1) |
349 (reg->mant.m32[0] << 31);
350 reg->mant.m32[0] = (reg->mant.m32[0] >> 1) | 0x80000000;
351
352 return 1;
353}
354
355static inline void fp_submant(struct fp_ext *dest, struct fp_ext *src1,
356 struct fp_ext *src2)
357{
358
359 asm volatile ("sub.b %1,%0" : "=d,g" (dest->lowmant)
360 : "g,d" (src2->lowmant), "0,0" (src1->lowmant));
361 asm volatile ("subx.l %1,%0" : "=d" (dest->mant.m32[1])
362 : "d" (src2->mant.m32[1]), "0" (src1->mant.m32[1]));
363 asm volatile ("subx.l %1,%0" : "=d" (dest->mant.m32[0])
364 : "d" (src2->mant.m32[0]), "0" (src1->mant.m32[0]));
365}
366
367#define fp_mul64(desth, destl, src1, src2) ({ \
368 asm ("mulu.l %2,%1:%0" : "=d" (destl), "=d" (desth) \
369 : "dm" (src1), "0" (src2)); \
370})
371#define fp_div64(quot, rem, srch, srcl, div) \
372 asm ("divu.l %2,%1:%0" : "=d" (quot), "=d" (rem) \
373 : "dm" (div), "1" (srch), "0" (srcl))
374#define fp_add64(dest1, dest2, src1, src2) ({ \
375 asm ("add.l %1,%0" : "=d,dm" (dest2) \
376 : "dm,d" (src2), "0,0" (dest2)); \
377 asm ("addx.l %1,%0" : "=d" (dest1) \
378 : "d" (src1), "0" (dest1)); \
379})
380#define fp_addx96(dest, src) ({ \
381 \
382 asm volatile ("add.l %1,%0" : "=d,g" (dest->m32[2]) \
383 : "g,d" (temp.m32[1]), "0,0" (dest->m32[2])); \
384 asm volatile ("addx.l %1,%0" : "=d" (dest->m32[1]) \
385 : "d" (temp.m32[0]), "0" (dest->m32[1])); \
386 asm volatile ("addx.l %1,%0" : "=d" (dest->m32[0]) \
387 : "d" (0), "0" (dest->m32[0])); \
388})
389#define fp_sub64(dest, src) ({ \
390 asm ("sub.l %1,%0" : "=d,dm" (dest.m32[1]) \
391 : "dm,d" (src.m32[1]), "0,0" (dest.m32[1])); \
392 asm ("subx.l %1,%0" : "=d" (dest.m32[0]) \
393 : "d" (src.m32[0]), "0" (dest.m32[0])); \
394})
395#define fp_sub96c(dest, srch, srcm, srcl) ({ \
396 char carry; \
397 asm ("sub.l %1,%0" : "=d,dm" (dest.m32[2]) \
398 : "dm,d" (srcl), "0,0" (dest.m32[2])); \
399 asm ("subx.l %1,%0" : "=d" (dest.m32[1]) \
400 : "d" (srcm), "0" (dest.m32[1])); \
401 asm ("subx.l %2,%1; scs %0" : "=d" (carry), "=d" (dest.m32[0]) \
402 : "d" (srch), "1" (dest.m32[0])); \
403 carry; \
404})
405
406static inline void fp_multiplymant(union fp_mant128 *dest, struct fp_ext *src1,
407 struct fp_ext *src2)
408{
409 union fp_mant64 temp;
410
411 fp_mul64(dest->m32[0], dest->m32[1], src1->mant.m32[0], src2->mant.m32[0]);
412 fp_mul64(dest->m32[2], dest->m32[3], src1->mant.m32[1], src2->mant.m32[1]);
413
414 fp_mul64(temp.m32[0], temp.m32[1], src1->mant.m32[0], src2->mant.m32[1]);
415 fp_addx96(dest, temp);
416
417 fp_mul64(temp.m32[0], temp.m32[1], src1->mant.m32[1], src2->mant.m32[0]);
418 fp_addx96(dest, temp);
419}
420
421static inline void fp_dividemant(union fp_mant128 *dest, struct fp_ext *src,
422 struct fp_ext *div)
423{
424 union fp_mant128 tmp;
425 union fp_mant64 tmp64;
426 unsigned long *mantp = dest->m32;
427 unsigned long fix, rem, first, dummy;
428 int i;
429
430
431
432 if (src->mant.m64 >= div->mant.m64) {
433 fp_sub64(src->mant, div->mant);
434 *mantp = 1;
435 } else
436 *mantp = 0;
437 mantp++;
438
439
440
441
442
443
444
445
446
447
448 fix = 0x80000000;
449 dummy = div->mant.m32[1] / div->mant.m32[0] + 1;
450 dummy = (dummy >> 1) | fix;
451 fp_div64(fix, dummy, fix, 0, dummy);
452 fix--;
453
454 for (i = 0; i < 3; i++, mantp++) {
455 if (src->mant.m32[0] == div->mant.m32[0]) {
456 fp_div64(first, rem, 0, src->mant.m32[1], div->mant.m32[0]);
457
458 fp_mul64(*mantp, dummy, first, fix);
459 *mantp += fix;
460 } else {
461 fp_div64(first, rem, src->mant.m32[0], src->mant.m32[1], div->mant.m32[0]);
462
463 fp_mul64(*mantp, dummy, first, fix);
464 }
465
466 fp_mul64(tmp.m32[0], tmp.m32[1], div->mant.m32[0], first - *mantp);
467 fp_add64(tmp.m32[0], tmp.m32[1], 0, rem);
468 tmp.m32[2] = 0;
469
470 fp_mul64(tmp64.m32[0], tmp64.m32[1], *mantp, div->mant.m32[1]);
471 fp_sub96c(tmp, 0, tmp64.m32[0], tmp64.m32[1]);
472
473 src->mant.m32[0] = tmp.m32[1];
474 src->mant.m32[1] = tmp.m32[2];
475
476 while (!fp_sub96c(tmp, 0, div->mant.m32[0], div->mant.m32[1])) {
477 src->mant.m32[0] = tmp.m32[1];
478 src->mant.m32[1] = tmp.m32[2];
479 *mantp += 1;
480 }
481 }
482}
483
484#if 0
485static inline unsigned int fp_fls128(union fp_mant128 *src)
486{
487 unsigned long data;
488 unsigned int res, off;
489
490 if ((data = src->m32[0]))
491 off = 0;
492 else if ((data = src->m32[1]))
493 off = 32;
494 else if ((data = src->m32[2]))
495 off = 64;
496 else if ((data = src->m32[3]))
497 off = 96;
498 else
499 return 128;
500
501 asm ("bfffo %1{#0,#32},%0" : "=d" (res) : "dm" (data));
502 return res + off;
503}
504
505static inline void fp_shiftmant128(union fp_mant128 *src, int shift)
506{
507 unsigned long sticky;
508
509 switch (shift) {
510 case 0:
511 return;
512 case 1:
513 asm volatile ("lsl.l #1,%0"
514 : "=d" (src->m32[3]) : "0" (src->m32[3]));
515 asm volatile ("roxl.l #1,%0"
516 : "=d" (src->m32[2]) : "0" (src->m32[2]));
517 asm volatile ("roxl.l #1,%0"
518 : "=d" (src->m32[1]) : "0" (src->m32[1]));
519 asm volatile ("roxl.l #1,%0"
520 : "=d" (src->m32[0]) : "0" (src->m32[0]));
521 return;
522 case 2 ... 31:
523 src->m32[0] = (src->m32[0] << shift) | (src->m32[1] >> (32 - shift));
524 src->m32[1] = (src->m32[1] << shift) | (src->m32[2] >> (32 - shift));
525 src->m32[2] = (src->m32[2] << shift) | (src->m32[3] >> (32 - shift));
526 src->m32[3] = (src->m32[3] << shift);
527 return;
528 case 32 ... 63:
529 shift -= 32;
530 src->m32[0] = (src->m32[1] << shift) | (src->m32[2] >> (32 - shift));
531 src->m32[1] = (src->m32[2] << shift) | (src->m32[3] >> (32 - shift));
532 src->m32[2] = (src->m32[3] << shift);
533 src->m32[3] = 0;
534 return;
535 case 64 ... 95:
536 shift -= 64;
537 src->m32[0] = (src->m32[2] << shift) | (src->m32[3] >> (32 - shift));
538 src->m32[1] = (src->m32[3] << shift);
539 src->m32[2] = src->m32[3] = 0;
540 return;
541 case 96 ... 127:
542 shift -= 96;
543 src->m32[0] = (src->m32[3] << shift);
544 src->m32[1] = src->m32[2] = src->m32[3] = 0;
545 return;
546 case -31 ... -1:
547 shift = -shift;
548 sticky = 0;
549 if (src->m32[3] << (32 - shift))
550 sticky = 1;
551 src->m32[3] = (src->m32[3] >> shift) | (src->m32[2] << (32 - shift)) | sticky;
552 src->m32[2] = (src->m32[2] >> shift) | (src->m32[1] << (32 - shift));
553 src->m32[1] = (src->m32[1] >> shift) | (src->m32[0] << (32 - shift));
554 src->m32[0] = (src->m32[0] >> shift);
555 return;
556 case -63 ... -32:
557 shift = -shift - 32;
558 sticky = 0;
559 if ((src->m32[2] << (32 - shift)) || src->m32[3])
560 sticky = 1;
561 src->m32[3] = (src->m32[2] >> shift) | (src->m32[1] << (32 - shift)) | sticky;
562 src->m32[2] = (src->m32[1] >> shift) | (src->m32[0] << (32 - shift));
563 src->m32[1] = (src->m32[0] >> shift);
564 src->m32[0] = 0;
565 return;
566 case -95 ... -64:
567 shift = -shift - 64;
568 sticky = 0;
569 if ((src->m32[1] << (32 - shift)) || src->m32[2] || src->m32[3])
570 sticky = 1;
571 src->m32[3] = (src->m32[1] >> shift) | (src->m32[0] << (32 - shift)) | sticky;
572 src->m32[2] = (src->m32[0] >> shift);
573 src->m32[1] = src->m32[0] = 0;
574 return;
575 case -127 ... -96:
576 shift = -shift - 96;
577 sticky = 0;
578 if ((src->m32[0] << (32 - shift)) || src->m32[1] || src->m32[2] || src->m32[3])
579 sticky = 1;
580 src->m32[3] = (src->m32[0] >> shift) | sticky;
581 src->m32[2] = src->m32[1] = src->m32[0] = 0;
582 return;
583 }
584
585 if (shift < 0 && (src->m32[0] || src->m32[1] || src->m32[2] || src->m32[3]))
586 src->m32[3] = 1;
587 else
588 src->m32[3] = 0;
589 src->m32[2] = 0;
590 src->m32[1] = 0;
591 src->m32[0] = 0;
592}
593#endif
594
595static inline void fp_putmant128(struct fp_ext *dest, union fp_mant128 *src,
596 int shift)
597{
598 unsigned long tmp;
599
600 switch (shift) {
601 case 0:
602 dest->mant.m64 = src->m64[0];
603 dest->lowmant = src->m32[2] >> 24;
604 if (src->m32[3] || (src->m32[2] << 8))
605 dest->lowmant |= 1;
606 break;
607 case 1:
608 asm volatile ("lsl.l #1,%0"
609 : "=d" (tmp) : "0" (src->m32[2]));
610 asm volatile ("roxl.l #1,%0"
611 : "=d" (dest->mant.m32[1]) : "0" (src->m32[1]));
612 asm volatile ("roxl.l #1,%0"
613 : "=d" (dest->mant.m32[0]) : "0" (src->m32[0]));
614 dest->lowmant = tmp >> 24;
615 if (src->m32[3] || (tmp << 8))
616 dest->lowmant |= 1;
617 break;
618 case 31:
619 asm volatile ("lsr.l #1,%1; roxr.l #1,%0"
620 : "=d" (dest->mant.m32[0])
621 : "d" (src->m32[0]), "0" (src->m32[1]));
622 asm volatile ("roxr.l #1,%0"
623 : "=d" (dest->mant.m32[1]) : "0" (src->m32[2]));
624 asm volatile ("roxr.l #1,%0"
625 : "=d" (tmp) : "0" (src->m32[3]));
626 dest->lowmant = tmp >> 24;
627 if (src->m32[3] << 7)
628 dest->lowmant |= 1;
629 break;
630 case 32:
631 dest->mant.m32[0] = src->m32[1];
632 dest->mant.m32[1] = src->m32[2];
633 dest->lowmant = src->m32[3] >> 24;
634 if (src->m32[3] << 8)
635 dest->lowmant |= 1;
636 break;
637 }
638}
639
640#if 0
641static inline int fls(unsigned int a)
642{
643 int r;
644
645 asm volatile ("bfffo %1{#0,#32},%0"
646 : "=d" (r) : "md" (a));
647 return r;
648}
649
650
651static inline int fls128(const int128 a)
652{
653 if (a[MSW128])
654 return fls(a[MSW128]);
655 if (a[NMSW128])
656 return fls(a[NMSW128]) + 32;
657
658
659
660
661
662 if (a[NLSW128])
663 return fls(a[NLSW128]) + 64;
664 if (a[LSW128])
665 return fls(a[LSW128]) + 96;
666 else
667 return -1;
668}
669
670static inline int zerop128(const int128 a)
671{
672 return !(a[LSW128] | a[NLSW128] | a[NMSW128] | a[MSW128]);
673}
674
675static inline int nonzerop128(const int128 a)
676{
677 return (a[LSW128] | a[NLSW128] | a[NMSW128] | a[MSW128]);
678}
679
680
681
682
683static inline void add128(const int128 a, int128 b)
684{
685
686 unsigned int carry[2];
687
688 carry[0] = a[LSW128] > (0xffffffff - b[LSW128]);
689 b[LSW128] += a[LSW128];
690
691 carry[1] = a[NLSW128] > (0xffffffff - b[NLSW128] - carry[0]);
692 b[NLSW128] = a[NLSW128] + b[NLSW128] + carry[0];
693
694 carry[0] = a[NMSW128] > (0xffffffff - b[NMSW128] - carry[1]);
695 b[NMSW128] = a[NMSW128] + b[NMSW128] + carry[1];
696
697 b[MSW128] = a[MSW128] + b[MSW128] + carry[0];
698}
699
700
701static inline void sub128(const int128 a, int128 b)
702{
703
704 unsigned int borrow[2];
705
706 borrow[0] = b[LSW128] < a[LSW128];
707 b[LSW128] -= a[LSW128];
708
709 borrow[1] = b[NLSW128] < a[NLSW128] + borrow[0];
710 b[NLSW128] = b[NLSW128] - a[NLSW128] - borrow[0];
711
712 borrow[0] = b[NMSW128] < a[NMSW128] + borrow[1];
713 b[NMSW128] = b[NMSW128] - a[NMSW128] - borrow[1];
714
715 b[MSW128] = b[MSW128] - a[MSW128] - borrow[0];
716}
717
718
719static inline void mul64(unsigned long long a, unsigned long long b, int128 c)
720{
721 unsigned long long acc;
722 int128 acc128;
723
724 zero128(acc128);
725 zero128(c);
726
727
728 if (LO_WORD(a) && LO_WORD(b)) {
729 acc = (long long) LO_WORD(a) * LO_WORD(b);
730 c[NLSW128] = HI_WORD(acc);
731 c[LSW128] = LO_WORD(acc);
732 }
733
734 if (HI_WORD(a) && HI_WORD(b)) {
735 acc = (long long) HI_WORD(a) * HI_WORD(b);
736 c[MSW128] = HI_WORD(acc);
737 c[NMSW128] = LO_WORD(acc);
738 }
739
740 if (LO_WORD(a) && HI_WORD(b)) {
741 acc = (long long) LO_WORD(a) * HI_WORD(b);
742 acc128[NMSW128] = HI_WORD(acc);
743 acc128[NLSW128] = LO_WORD(acc);
744 add128(acc128, c);
745 }
746
747 if (HI_WORD(a) && LO_WORD(b)) {
748 acc = (long long) HI_WORD(a) * LO_WORD(b);
749 acc128[NMSW128] = HI_WORD(acc);
750 acc128[NLSW128] = LO_WORD(acc);
751 add128(acc128, c);
752 }
753}
754
755
756static inline int cmp128(int128 a, int128 b)
757{
758 if (a[MSW128] < b[MSW128])
759 return -1;
760 if (a[MSW128] > b[MSW128])
761 return 1;
762 if (a[NMSW128] < b[NMSW128])
763 return -1;
764 if (a[NMSW128] > b[NMSW128])
765 return 1;
766 if (a[NLSW128] < b[NLSW128])
767 return -1;
768 if (a[NLSW128] > b[NLSW128])
769 return 1;
770
771 return (signed) a[LSW128] - b[LSW128];
772}
773
774inline void div128(int128 a, int128 b, int128 c)
775{
776 int128 mask;
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798 set128(0, 0, 0, 1, mask);
799 while (cmp128(b, a) < 0 && !btsthi128(b)) {
800 lslone128(b);
801 lslone128(mask);
802 }
803
804
805 zero128(c);
806 do {
807 if (cmp128(a, b) >= 0) {
808 sub128(b, a);
809 add128(mask, c);
810 }
811 lsrone128(mask);
812 lsrone128(b);
813 } while (nonzerop128(mask));
814
815
816}
817#endif
818
819#endif
820