1
2
3
4
5
6#define __STDC_WANT_IEC_60559_BFP_EXT__
7#define __STDC_WANT_IEC_60559_TYPES_EXT__
8
9#include <stdio.h>
10#include <inttypes.h>
11#include <math.h>
12#include <float.h>
13#include <fenv.h>
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16
17static char flag_str[256];
18
19static char *get_flag_state(int flags)
20{
21 if (flags) {
22 snprintf(flag_str, sizeof(flag_str), "%s %s %s %s %s",
23 flags & FE_OVERFLOW ? "OVERFLOW" : "",
24 flags & FE_UNDERFLOW ? "UNDERFLOW" : "",
25 flags & FE_DIVBYZERO ? "DIV0" : "",
26 flags & FE_INEXACT ? "INEXACT" : "",
27 flags & FE_INVALID ? "INVALID" : "");
28 } else {
29 snprintf(flag_str, sizeof(flag_str), "OK");
30 }
31
32 return flag_str;
33}
34
35static void print_double_number(int i, double num)
36{
37 uint64_t double_as_hex = *(uint64_t *) #
38 int flags = fetestexcept(FE_ALL_EXCEPT);
39 char *fstr = get_flag_state(flags);
40
41 printf("%02d DOUBLE: %02.20e / %#020" PRIx64 " (%#x => %s)\n",
42 i, num, double_as_hex, flags, fstr);
43}
44
45static void print_single_number(int i, float num)
46{
47 uint32_t single_as_hex = *(uint32_t *) #
48 int flags = fetestexcept(FE_ALL_EXCEPT);
49 char *fstr = get_flag_state(flags);
50
51 printf("%02d SINGLE: %02.20e / %#010x (%#x => %s)\n",
52 i, num, single_as_hex, flags, fstr);
53}
54
55static void print_half_number(int i, uint16_t num)
56{
57 int flags = fetestexcept(FE_ALL_EXCEPT);
58 char *fstr = get_flag_state(flags);
59
60 printf("%02d HALF: %#04x (%#x => %s)\n",
61 i, num, flags, fstr);
62}
63
64static void print_int64(int i, int64_t num)
65{
66 uint64_t int64_as_hex = *(uint64_t *) #
67 int flags = fetestexcept(FE_ALL_EXCEPT);
68 char *fstr = get_flag_state(flags);
69
70 printf("%02d INT64: %20" PRId64 "/%#020" PRIx64 " (%#x => %s)\n",
71 i, num, int64_as_hex, flags, fstr);
72}
73
74#ifndef SNANF
75
76# define SNANF (__builtin_nansf (""))
77# define SNAN (__builtin_nans (""))
78# define SNANL (__builtin_nansl (""))
79#endif
80
81float single_numbers[] = { -SNANF,
82 -NAN,
83 -INFINITY,
84 -FLT_MAX,
85 -1.111E+31,
86 -1.111E+30,
87 -1.08700982e-12,
88 -1.78051176e-20,
89 -FLT_MIN,
90 0.0,
91 FLT_MIN,
92 2.98023224e-08,
93 5.96046E-8,
94 6.09756E-5,
95 6.10352E-5,
96 1.0,
97 1.0009765625,
98 2.0,
99 M_E, M_PI,
100 65503.0,
101 65504.0,
102 65505.0,
103 131007.0,
104 131008.0,
105 131009.0,
106 1.111E+30,
107 FLT_MAX,
108 INFINITY,
109 NAN,
110 SNANF };
111
112static void convert_single_to_half(void)
113{
114 int i;
115
116 printf("Converting single-precision to half-precision\n");
117
118 for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) {
119 float input = single_numbers[i];
120
121 feclearexcept(FE_ALL_EXCEPT);
122
123 print_single_number(i, input);
124#if defined(__arm__)
125 uint32_t output;
126 asm("vcvtb.f16.f32 %0, %1" : "=t" (output) : "x" (input));
127#else
128 uint16_t output;
129 asm("fcvt %h0, %s1" : "=w" (output) : "x" (input));
130#endif
131 print_half_number(i, output);
132 }
133}
134
135static void convert_single_to_double(void)
136{
137 int i;
138
139 printf("Converting single-precision to double-precision\n");
140
141 for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) {
142 float input = single_numbers[i];
143
144 double output;
145
146 feclearexcept(FE_ALL_EXCEPT);
147
148 print_single_number(i, input);
149#if defined(__arm__)
150 asm("vcvt.f64.f32 %P0, %1" : "=w" (output) : "t" (input));
151#else
152 asm("fcvt %d0, %s1" : "=w" (output) : "x" (input));
153#endif
154 print_double_number(i, output);
155 }
156}
157
158static void convert_single_to_integer(void)
159{
160 int i;
161
162 printf("Converting single-precision to integer\n");
163
164 for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) {
165 float input = single_numbers[i];
166 int64_t output;
167
168 feclearexcept(FE_ALL_EXCEPT);
169
170 print_single_number(i, input);
171#if defined(__arm__)
172
173 output = input;
174#else
175 asm("fcvtzs %0, %s1" : "=r" (output) : "w" (input));
176#endif
177 print_int64(i, output);
178 }
179}
180
181
182typedef union {
183 double d;
184 uint64_t h;
185} test_doubles;
186
187test_doubles double_numbers[] = {
188 {SNAN},
189 {-NAN},
190 {-INFINITY},
191 {-DBL_MAX},
192 {-FLT_MAX-1.0},
193 {-FLT_MAX},
194 {-1.111E+31},
195 {-1.111E+30},
196 {-2.0}, {-1.0},
197 {-DBL_MIN},
198 {-FLT_MIN},
199 {0.0},
200 {FLT_MIN},
201 {2.98023224e-08},
202 {5.96046E-8},
203 {6.09756E-5},
204 {6.10352E-5},
205 {1.0},
206 {1.0009765625},
207 {DBL_MIN},
208 {1.3789972848607228e-308},
209 {1.4914738736681624e-308},
210 {1.0}, {2.0},
211 {M_E}, {M_PI},
212 {65503.0},
213 {65504.0},
214 {65505.0},
215 {131007.0},
216 {131008.0},
217 {131009.0},
218 {.h = 0x41dfffffffc00000 },
219 {FLT_MAX},
220 {FLT_MAX + 1.0},
221 {DBL_MAX},
222 {INFINITY},
223 {NAN},
224 {.h = 0x7ff0000000000001},
225 {SNAN},
226};
227
228static void convert_double_to_half(void)
229{
230 int i;
231
232 printf("Converting double-precision to half-precision\n");
233
234 for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) {
235 double input = double_numbers[i].d;
236 uint16_t output;
237
238 feclearexcept(FE_ALL_EXCEPT);
239
240 print_double_number(i, input);
241
242
243#if defined(__arm__)
244
245 output = input;
246#else
247 asm("fcvt %h0, %d1" : "=w" (output) : "x" (input));
248#endif
249 print_half_number(i, output);
250 }
251}
252
253static void convert_double_to_single(void)
254{
255 int i;
256
257 printf("Converting double-precision to single-precision\n");
258
259 for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) {
260 double input = double_numbers[i].d;
261 uint32_t output;
262
263 feclearexcept(FE_ALL_EXCEPT);
264
265 print_double_number(i, input);
266
267#if defined(__arm__)
268 asm("vcvt.f32.f64 %0, %P1" : "=w" (output) : "x" (input));
269#else
270 asm("fcvt %s0, %d1" : "=w" (output) : "x" (input));
271#endif
272
273 print_single_number(i, output);
274 }
275}
276
277static void convert_double_to_integer(void)
278{
279 int i;
280
281 printf("Converting double-precision to integer\n");
282
283 for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) {
284 double input = double_numbers[i].d;
285 int64_t output;
286
287 feclearexcept(FE_ALL_EXCEPT);
288
289 print_double_number(i, input);
290#if defined(__arm__)
291
292 output = input;
293#else
294 asm("fcvtzs %0, %d1" : "=r" (output) : "w" (input));
295#endif
296 print_int64(i, output);
297 }
298}
299
300
301uint16_t half_numbers[] = {
302 0xffff,
303 0xfcff,
304 0xfc01,
305 0xfc00,
306 0xfbff,
307 0xc000,
308 0xbc00,
309 0x8001,
310 0x8000,
311 0x0000,
312 0x0001,
313 0x3c00,
314 0x7bff,
315 0x7c00,
316 0x7c01,
317 0x7cff,
318 0x7fff,
319};
320
321static void convert_half_to_double(void)
322{
323 int i;
324
325 printf("Converting half-precision to double-precision\n");
326
327 for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) {
328 uint16_t input = half_numbers[i];
329 double output;
330
331 feclearexcept(FE_ALL_EXCEPT);
332
333 print_half_number(i, input);
334#if defined(__arm__)
335
336 output = input;
337#else
338 asm("fcvt %d0, %h1" : "=w" (output) : "x" (input));
339#endif
340 print_double_number(i, output);
341 }
342}
343
344static void convert_half_to_single(void)
345{
346 int i;
347
348 printf("Converting half-precision to single-precision\n");
349
350 for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) {
351 uint16_t input = half_numbers[i];
352 float output;
353
354 feclearexcept(FE_ALL_EXCEPT);
355
356 print_half_number(i, input);
357#if defined(__arm__)
358 asm("vcvtb.f32.f16 %0, %1" : "=w" (output) : "x" ((uint32_t)input));
359#else
360 asm("fcvt %s0, %h1" : "=w" (output) : "x" (input));
361#endif
362 print_single_number(i, output);
363 }
364}
365
366static void convert_half_to_integer(void)
367{
368 int i;
369
370 printf("Converting half-precision to integer\n");
371
372 for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) {
373 uint16_t input = half_numbers[i];
374 int64_t output;
375
376 feclearexcept(FE_ALL_EXCEPT);
377
378 print_half_number(i, input);
379#if defined(__arm__)
380
381 output = input;
382#else
383 asm("fcvt %s0, %h1" : "=w" (output) : "x" (input));
384#endif
385 print_int64(i, output);
386 }
387}
388
389typedef struct {
390 int flag;
391 char *desc;
392} float_mapping;
393
394float_mapping round_flags[] = {
395 { FE_TONEAREST, "to nearest" },
396 { FE_UPWARD, "upwards" },
397 { FE_DOWNWARD, "downwards" },
398 { FE_TOWARDZERO, "to zero" }
399};
400
401int main(int argc, char *argv[argc])
402{
403 int i;
404
405 printf("#### Enabling IEEE Half Precision\n");
406
407 for (i = 0; i < ARRAY_SIZE(round_flags); ++i) {
408 fesetround(round_flags[i].flag);
409 printf("### Rounding %s\n", round_flags[i].desc);
410 convert_single_to_half();
411 convert_single_to_double();
412 convert_double_to_half();
413 convert_double_to_single();
414 convert_half_to_single();
415 convert_half_to_double();
416 }
417
418
419 convert_single_to_integer();
420 convert_double_to_integer();
421 convert_half_to_integer();
422
423
424#if defined(__arm__)
425
426 asm("mrc p10, 7, r1, cr1, cr0, 0\n\t"
427 "orr r1, r1, %[flags]\n\t"
428 "mcr p10, 7, r1, cr1, cr0, 0\n\t"
429 : : [flags] "n" (1 << 26) : "r1" );
430#else
431 asm("mrs x1, fpcr\n\t"
432 "orr x1, x1, %[flags]\n\t"
433 "msr fpcr, x1\n\t"
434 : : [flags] "n" (1 << 26) : "x1" );
435#endif
436
437 printf("#### Enabling ARM Alternative Half Precision\n");
438
439 for (i = 0; i < ARRAY_SIZE(round_flags); ++i) {
440 fesetround(round_flags[i].flag);
441 printf("### Rounding %s\n", round_flags[i].desc);
442 convert_single_to_half();
443 convert_single_to_double();
444 convert_double_to_half();
445 convert_double_to_single();
446 convert_half_to_single();
447 convert_half_to_double();
448 }
449
450
451 convert_single_to_integer();
452 convert_double_to_integer();
453 convert_half_to_integer();
454
455 return 0;
456}
457