1
2
3
4
5
6#define __STDC_WANT_IEC_60559_BFP_EXT__
7#define __STDC_WANT_IEC_60559_TYPES_EXT__
8
9#include <stdio.h>
10#include <inttypes.h>
11#include <math.h>
12#include <float.h>
13#include <fenv.h>
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16
17static char flag_str[256];
18
19static char *get_flag_state(int flags)
20{
21 if (flags) {
22 snprintf(flag_str, sizeof(flag_str), "%s %s %s %s %s",
23 flags & FE_OVERFLOW ? "OVERFLOW" : "",
24 flags & FE_UNDERFLOW ? "UNDERFLOW" : "",
25 flags & FE_DIVBYZERO ? "DIV0" : "",
26 flags & FE_INEXACT ? "INEXACT" : "",
27 flags & FE_INVALID ? "INVALID" : "");
28 } else {
29 snprintf(flag_str, sizeof(flag_str), "OK");
30 }
31
32 return flag_str;
33}
34
35static void print_double_number(int i, double num)
36{
37 uint64_t double_as_hex = *(uint64_t *) #
38 int flags = fetestexcept(FE_ALL_EXCEPT);
39 char *fstr = get_flag_state(flags);
40
41 printf("%02d DOUBLE: %02.20e / %#020" PRIx64 " (%#x => %s)\n",
42 i, num, double_as_hex, flags, fstr);
43}
44
45static void print_single_number(int i, float num)
46{
47 uint32_t single_as_hex = *(uint32_t *) #
48 int flags = fetestexcept(FE_ALL_EXCEPT);
49 char *fstr = get_flag_state(flags);
50
51 printf("%02d SINGLE: %02.20e / %#010x (%#x => %s)\n",
52 i, num, single_as_hex, flags, fstr);
53}
54
55static void print_half_number(int i, uint16_t num)
56{
57 int flags = fetestexcept(FE_ALL_EXCEPT);
58 char *fstr = get_flag_state(flags);
59
60 printf("%02d HALF: %#04x (%#x => %s)\n",
61 i, num, flags, fstr);
62}
63
64static void print_int64(int i, int64_t num)
65{
66 uint64_t int64_as_hex = *(uint64_t *) #
67 int flags = fetestexcept(FE_ALL_EXCEPT);
68 char *fstr = get_flag_state(flags);
69
70 printf("%02d INT64: %20" PRId64 "/%#020" PRIx64 " (%#x => %s)\n",
71 i, num, int64_as_hex, flags, fstr);
72}
73
74#ifndef SNANF
75
76# if __GNUC_PREREQ(3, 3)
77# define SNANF (__builtin_nansf (""))
78# define SNAN (__builtin_nans (""))
79# define SNANL (__builtin_nansl (""))
80# endif
81#endif
82
83float single_numbers[] = { -SNANF,
84 -NAN,
85 -INFINITY,
86 -FLT_MAX,
87 -1.111E+31,
88 -1.111E+30,
89 -1.08700982e-12,
90 -1.78051176e-20,
91 -FLT_MIN,
92 0.0,
93 FLT_MIN,
94 2.98023224e-08,
95 5.96046E-8,
96 6.09756E-5,
97 6.10352E-5,
98 1.0,
99 1.0009765625,
100 2.0,
101 M_E, M_PI,
102 65503.0,
103 65504.0,
104 65505.0,
105 131007.0,
106 131008.0,
107 131009.0,
108 1.111E+30,
109 FLT_MAX,
110 INFINITY,
111 NAN,
112 SNANF };
113
114static void convert_single_to_half(void)
115{
116 int i;
117
118 printf("Converting single-precision to half-precision\n");
119
120 for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) {
121 float input = single_numbers[i];
122
123 feclearexcept(FE_ALL_EXCEPT);
124
125 print_single_number(i, input);
126#if defined(__arm__)
127 uint32_t output;
128 asm("vcvtb.f16.f32 %0, %1" : "=t" (output) : "x" (input));
129#else
130 uint16_t output;
131 asm("fcvt %h0, %s1" : "=w" (output) : "x" (input));
132#endif
133 print_half_number(i, output);
134 }
135}
136
137static void convert_single_to_double(void)
138{
139 int i;
140
141 printf("Converting single-precision to double-precision\n");
142
143 for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) {
144 float input = single_numbers[i];
145
146 double output;
147
148 feclearexcept(FE_ALL_EXCEPT);
149
150 print_single_number(i, input);
151#if defined(__arm__)
152 asm("vcvt.f64.f32 %P0, %1" : "=w" (output) : "t" (input));
153#else
154 asm("fcvt %d0, %s1" : "=w" (output) : "x" (input));
155#endif
156 print_double_number(i, output);
157 }
158}
159
160static void convert_single_to_integer(void)
161{
162 int i;
163
164 printf("Converting single-precision to integer\n");
165
166 for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) {
167 float input = single_numbers[i];
168 int64_t output;
169
170 feclearexcept(FE_ALL_EXCEPT);
171
172 print_single_number(i, input);
173#if defined(__arm__)
174
175 output = input;
176#else
177 asm("fcvtzs %0, %s1" : "=r" (output) : "w" (input));
178#endif
179 print_int64(i, output);
180 }
181}
182
183
184typedef union {
185 double d;
186 uint64_t h;
187} test_doubles;
188
189test_doubles double_numbers[] = {
190 {SNAN},
191 {-NAN},
192 {-INFINITY},
193 {-DBL_MAX},
194 {-FLT_MAX-1.0},
195 {-FLT_MAX},
196 {-1.111E+31},
197 {-1.111E+30},
198 {-2.0}, {-1.0},
199 {-DBL_MIN},
200 {-FLT_MIN},
201 {0.0},
202 {FLT_MIN},
203 {2.98023224e-08},
204 {5.96046E-8},
205 {6.09756E-5},
206 {6.10352E-5},
207 {1.0},
208 {1.0009765625},
209 {DBL_MIN},
210 {1.3789972848607228e-308},
211 {1.4914738736681624e-308},
212 {1.0}, {2.0},
213 {M_E}, {M_PI},
214 {65503.0},
215 {65504.0},
216 {65505.0},
217 {131007.0},
218 {131008.0},
219 {131009.0},
220 {.h = 0x41dfffffffc00000 },
221 {FLT_MAX},
222 {FLT_MAX + 1.0},
223 {DBL_MAX},
224 {INFINITY},
225 {NAN},
226 {.h = 0x7ff0000000000001},
227 {SNAN},
228};
229
230static void convert_double_to_half(void)
231{
232 int i;
233
234 printf("Converting double-precision to half-precision\n");
235
236 for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) {
237 double input = double_numbers[i].d;
238 uint16_t output;
239
240 feclearexcept(FE_ALL_EXCEPT);
241
242 print_double_number(i, input);
243
244
245#if defined(__arm__)
246
247 output = input;
248#else
249 asm("fcvt %h0, %d1" : "=w" (output) : "x" (input));
250#endif
251 print_half_number(i, output);
252 }
253}
254
255static void convert_double_to_single(void)
256{
257 int i;
258
259 printf("Converting double-precision to single-precision\n");
260
261 for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) {
262 double input = double_numbers[i].d;
263 uint32_t output;
264
265 feclearexcept(FE_ALL_EXCEPT);
266
267 print_double_number(i, input);
268
269#if defined(__arm__)
270 asm("vcvt.f32.f64 %0, %P1" : "=w" (output) : "x" (input));
271#else
272 asm("fcvt %s0, %d1" : "=w" (output) : "x" (input));
273#endif
274
275 print_single_number(i, output);
276 }
277}
278
279static void convert_double_to_integer(void)
280{
281 int i;
282
283 printf("Converting double-precision to integer\n");
284
285 for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) {
286 double input = double_numbers[i].d;
287 int64_t output;
288
289 feclearexcept(FE_ALL_EXCEPT);
290
291 print_double_number(i, input);
292#if defined(__arm__)
293
294 output = input;
295#else
296 asm("fcvtzs %0, %d1" : "=r" (output) : "w" (input));
297#endif
298 print_int64(i, output);
299 }
300}
301
302
303uint16_t half_numbers[] = {
304 0xffff,
305 0xfcff,
306 0xfc01,
307 0xfc00,
308 0xfbff,
309 0xc000,
310 0xbc00,
311 0x8001,
312 0x8000,
313 0x0000,
314 0x0001,
315 0x3c00,
316 0x7bff,
317 0x7c00,
318 0x7c01,
319 0x7cff,
320 0x7fff,
321};
322
323static void convert_half_to_double(void)
324{
325 int i;
326
327 printf("Converting half-precision to double-precision\n");
328
329 for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) {
330 uint16_t input = half_numbers[i];
331 double output;
332
333 feclearexcept(FE_ALL_EXCEPT);
334
335 print_half_number(i, input);
336#if defined(__arm__)
337
338 output = input;
339#else
340 asm("fcvt %d0, %h1" : "=w" (output) : "x" (input));
341#endif
342 print_double_number(i, output);
343 }
344}
345
346static void convert_half_to_single(void)
347{
348 int i;
349
350 printf("Converting half-precision to single-precision\n");
351
352 for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) {
353 uint16_t input = half_numbers[i];
354 float output;
355
356 feclearexcept(FE_ALL_EXCEPT);
357
358 print_half_number(i, input);
359#if defined(__arm__)
360 asm("vcvtb.f32.f16 %0, %1" : "=w" (output) : "x" ((uint32_t)input));
361#else
362 asm("fcvt %s0, %h1" : "=w" (output) : "x" (input));
363#endif
364 print_single_number(i, output);
365 }
366}
367
368static void convert_half_to_integer(void)
369{
370 int i;
371
372 printf("Converting half-precision to integer\n");
373
374 for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) {
375 uint16_t input = half_numbers[i];
376 int64_t output;
377
378 feclearexcept(FE_ALL_EXCEPT);
379
380 print_half_number(i, input);
381#if defined(__arm__)
382
383 output = input;
384#else
385 asm("fcvt %s0, %h1" : "=w" (output) : "x" (input));
386#endif
387 print_int64(i, output);
388 }
389}
390
391typedef struct {
392 int flag;
393 char *desc;
394} float_mapping;
395
396float_mapping round_flags[] = {
397 { FE_TONEAREST, "to nearest" },
398 { FE_UPWARD, "upwards" },
399 { FE_DOWNWARD, "downwards" },
400 { FE_TOWARDZERO, "to zero" }
401};
402
403int main(int argc, char *argv[argc])
404{
405 int i;
406
407 printf("#### Enabling IEEE Half Precision\n");
408
409 for (i = 0; i < ARRAY_SIZE(round_flags); ++i) {
410 fesetround(round_flags[i].flag);
411 printf("### Rounding %s\n", round_flags[i].desc);
412 convert_single_to_half();
413 convert_single_to_double();
414 convert_double_to_half();
415 convert_double_to_single();
416 convert_half_to_single();
417 convert_half_to_double();
418 }
419
420
421 convert_single_to_integer();
422 convert_double_to_integer();
423 convert_half_to_integer();
424
425
426#if defined(__arm__)
427
428 asm("mrc p10, 7, r1, cr1, cr0, 0\n\t"
429 "orr r1, r1, %[flags]\n\t"
430 "mcr p10, 7, r1, cr1, cr0, 0\n\t"
431 : : [flags] "n" (1 << 26) : "r1" );
432#else
433 asm("mrs x1, fpcr\n\t"
434 "orr x1, x1, %[flags]\n\t"
435 "msr fpcr, x1\n\t"
436 : : [flags] "n" (1 << 26) : "x1" );
437#endif
438
439 printf("#### Enabling ARM Alternative Half Precision\n");
440
441 for (i = 0; i < ARRAY_SIZE(round_flags); ++i) {
442 fesetround(round_flags[i].flag);
443 printf("### Rounding %s\n", round_flags[i].desc);
444 convert_single_to_half();
445 convert_single_to_double();
446 convert_double_to_half();
447 convert_double_to_single();
448 convert_half_to_single();
449 convert_half_to_double();
450 }
451
452
453 convert_single_to_integer();
454 convert_double_to_integer();
455 convert_half_to_integer();
456
457 return 0;
458}
459