1|
2| res_func.sa 3.9 7/29/91
3|
4| Normalizes denormalized numbers if necessary and updates the
5| stack frame. The function is then restored back into the
6| machine and the 040 completes the operation. This routine
7| is only used by the unsupported data type/format handler.
8| (Exception vector 55).
9|
10| For packed move out (fmove.p fpm,<ea>) the operation is
11| completed here; data is packed and moved to user memory.
12| The stack is restored to the 040 only in the case of a
13| reportable exception in the conversion.
14|
15|
16| Copyright (C) Motorola, Inc. 1990
17| All Rights Reserved
18|
19| For details on the license for this file, please see the
20| file, README, in this same directory.
21
22RES_FUNC: |idnt 2,1 | Motorola 040 Floating Point Software Package
23
24 |section 8
25
26#include "fpsp.h"
27
28sp_bnds: .short 0x3f81,0x407e
29 .short 0x3f6a,0x0000
30dp_bnds: .short 0x3c01,0x43fe
31 .short 0x3bcd,0x0000
32
33 |xref mem_write
34 |xref bindec
35 |xref get_fline
36 |xref round
37 |xref denorm
38 |xref dest_ext
39 |xref dest_dbl
40 |xref dest_sgl
41 |xref unf_sub
42 |xref nrm_set
43 |xref dnrm_lp
44 |xref ovf_res
45 |xref reg_dest
46 |xref t_ovfl
47 |xref t_unfl
48
49 .global res_func
50 .global p_move
51
52res_func:
53 clrb DNRM_FLG(%a6)
54 clrb RES_FLG(%a6)
55 clrb CU_ONLY(%a6)
56 tstb DY_MO_FLG(%a6)
57 beqs monadic
58dyadic:
59 btstb
60| ;inf=010 or nan=011
61 beqs monadic |then branch
62| ;else denorm
63| HANDLE DESTINATION DENORM HERE
64| ;set dtag to norm
65| ;write the tag & fpte15 to the fstack
66 leal FPTEMP(%a6),%a0
67
68 bclrb
69 sne LOCAL_SGN(%a0)
70
71 bsr nrm_set |normalize number (exp will go negative)
72 bclrb
73 bfclr LOCAL_SGN(%a0){
74 beqs dpos
75 bsetb
76dpos:
77 bfclr DTAG(%a6){
78 bsetb
79 orb
80monadic:
81 leal ETEMP(%a6),%a0
82 btstb
83 bne opclass3 |it is a mv out
84|
85| At this point, only opclass 0 and 2 possible
86|
87 btstb
88| ;inf=010 or nan=011
89 bne mon_dnrm |else denorm
90 tstb DY_MO_FLG(%a6) |all cases of dyadic instructions would
91 bne normal |require normalization of denorm
92
93| At this point:
94| monadic instructions: fabs = $18 fneg = $1a ftst = $3a
95| fmove = $00 fsmove = $40 fdmove = $44
96| fsqrt = $05* fssqrt = $41 fdsqrt = $45
97| (*fsqrt reencoded to $05)
98|
99 movew CMDREG1B(%a6),%d0 |get command register
100 andil
101|
102| At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and
103| fdsqrt are possible.
104| For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
105| For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
106|
107 btstl
108 bne normal |weed out fsqrt instructions
109|
110| cu_norm handles fmove in instructions with normalized inputs.
111| The routine round is used to correctly round the input for the
112| destination precision and mode.
113|
114cu_norm:
115 st CU_ONLY(%a6) |set cu-only inst flag
116 movew CMDREG1B(%a6),%d0
117 andib
118 tstb %d0
119 beql cu_nmove |if zero, it is an fmove
120 cmpib
121 beql cu_nabs |if $18, it is fabs
122 cmpib
123 beql cu_nneg |if $1a, it is fneg
124|
125| Inst is ftst. Check the source operand and set the cc's accordingly.
126| No write is done, so simply rts.
127|
128cu_ntst:
129 movew LOCAL_EX(%a0),%d0
130 bclrl
131 sne LOCAL_SGN(%a0)
132 beqs cu_ntpo
133 orl
134cu_ntpo:
135 cmpiw
136 bnes cu_ntcz
137 tstl LOCAL_HI(%a0)
138 bnes cu_ntn
139 tstl LOCAL_LO(%a0)
140 bnes cu_ntn
141 orl
142 rts
143cu_ntn:
144 orl
145 movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for
146| ;snan handler
147
148 rts
149cu_ntcz:
150 tstl LOCAL_HI(%a0)
151 bnel cu_ntsx
152 tstl LOCAL_LO(%a0)
153 bnel cu_ntsx
154 orl
155cu_ntsx:
156 rts
157|
158| Inst is fabs. Execute the absolute value function on the input.
159| Branch to the fmove code. If the operand is NaN, do nothing.
160|
161cu_nabs:
162 moveb STAG(%a6),%d0
163 btstl
164 bne wr_etemp |if either, simply write it
165 bclrb
166 bras cu_nmove |fmove code will finish
167|
168| Inst is fneg. Execute the negate value function on the input.
169| Fall though to the fmove code. If the operand is NaN, do nothing.
170|
171cu_nneg:
172 moveb STAG(%a6),%d0
173 btstl
174 bne wr_etemp |if either, simply write it
175 bchgb
176|
177| Inst is fmove. This code also handles all result writes.
178| If bit 2 is set, round is forced to double. If it is clear,
179| and bit 6 is set, round is forced to single. If both are clear,
180| the round precision is found in the fpcr. If the rounding precision
181| is double or single, round the result before the write.
182|
183cu_nmove:
184 moveb STAG(%a6),%d0
185 andib
186 bne wr_etemp |if not norm, simply write it
187 btstb
188 bne cu_nmrd
189 btstb
190 bne cu_nmrs
191|
192| The move or operation is not with forced precision. Test for
193| nan or inf as the input; if so, simply write it to FPn. Use the
194| FPCR_MODE byte to get rounding on norms and zeros.
195|
196cu_nmnr:
197 bfextu FPCR_MODE(%a6){
198 tstb %d0 |check for extended
199 beq cu_wrexn |if so, just write result
200 cmpib
201 beq cu_nmrs |fall through to double
202|
203| The move is fdmove or round precision is double.
204|
205cu_nmrd:
206 movel
207 movew LOCAL_EX(%a0),%d1 |compare exponent to double threshold
208 andw
209 cmpw
210 bls cu_nunfl
211 bfextu FPCR_MODE(%a6){
212 orl
213 clrl %d0 |clear g,r,s for round
214 bclrb
215 sne LOCAL_SGN(%a0)
216 bsrl round
217 bfclr LOCAL_SGN(%a0){
218 beqs cu_nmrdc
219 bsetb
220cu_nmrdc:
221 movew LOCAL_EX(%a0),%d1 |check for overflow
222 andw
223 cmpw
224 bge cu_novfl |take care of overflow case
225 bra cu_wrexn
226|
227| The move is fsmove or round precision is single.
228|
229cu_nmrs:
230 movel
231 movew LOCAL_EX(%a0),%d1
232 andw
233 cmpw
234 bls cu_nunfl
235 bfextu FPCR_MODE(%a6){
236 orl
237 clrl %d0
238 bclrb
239 sne LOCAL_SGN(%a0)
240 bsrl round
241 bfclr LOCAL_SGN(%a0){
242 beqs cu_nmrsc
243 bsetb
244cu_nmrsc:
245 movew LOCAL_EX(%a0),%d1
246 andw
247 cmpw
248 blt cu_wrexn
249|
250| The operand is above precision boundaries. Use t_ovfl to
251| generate the correct value.
252|
253cu_novfl:
254 bsr t_ovfl
255 bra cu_wrexn
256|
257| The operand is below precision boundaries. Use denorm to
258| generate the correct value.
259|
260cu_nunfl:
261 bclrb
262 sne LOCAL_SGN(%a0)
263 bsr denorm
264 bfclr LOCAL_SGN(%a0){
265 beqs cu_nucont
266 bsetb
267cu_nucont:
268 bfextu FPCR_MODE(%a6){
269 btstb
270 bne inst_d
271 btstb
272 bne inst_s
273 swap %d1
274 moveb FPCR_MODE(%a6),%d1
275 lsrb
276 swap %d1
277 bra inst_sd
278inst_d:
279 orl
280 bra inst_sd
281inst_s:
282 orl
283inst_sd:
284 bclrb
285 sne LOCAL_SGN(%a0)
286 bsrl round
287 bfclr LOCAL_SGN(%a0){
288 beqs cu_nuflp
289 bsetb
290cu_nuflp:
291 btstb
292 beqs cu_nuninx
293 orl
294cu_nuninx:
295 tstl LOCAL_HI(%a0) |test for zero
296 bnes cu_nunzro
297 tstl LOCAL_LO(%a0)
298 bnes cu_nunzro
299|
300| The mantissa is zero from the denorm loop. Check sign and rmode
301| to see if rounding should have occurred which would leave the lsb.
302|
303 movel USER_FPCR(%a6),%d0
304 andil
305 cmpil
306 blts cu_nzro
307 bnes cu_nrp
308cu_nrm:
309 tstw LOCAL_EX(%a0) |if positive, set lsb
310 bges cu_nzro
311 btstb
312 beqs cu_nincs
313 bras cu_nincd
314cu_nrp:
315 tstw LOCAL_EX(%a0) |if positive, set lsb
316 blts cu_nzro
317 btstb
318 beqs cu_nincs
319cu_nincd:
320 orl
321 bra cu_nunzro
322cu_nincs:
323 orl
324 bra cu_nunzro
325cu_nzro:
326 orl
327 moveb STAG(%a6),%d0
328 andib
329 cmpib
330 beqs cu_numv
331cu_nunzro:
332 orl
333cu_numv:
334 movel (%a0),ETEMP(%a6)
335 movel 4(%a0),ETEMP_HI(%a6)
336 movel 8(%a0),ETEMP_LO(%a6)
337|
338| Write the result to memory, setting the fpsr cc bits. NaN and Inf
339| bypass cu_wrexn.
340|
341cu_wrexn:
342 tstw LOCAL_EX(%a0) |test for zero
343 beqs cu_wrzero
344 cmpw
345 bnes cu_wreon
346cu_wrzero:
347 orl
348cu_wreon:
349 tstw LOCAL_EX(%a0)
350 bpl wr_etemp
351 orl
352 bra wr_etemp
353
354|
355| HANDLE SOURCE DENORM HERE
356|
357| ;clear denorm stag to norm
358| ;write the new tag & ete15 to the fstack
359mon_dnrm:
360|
361| At this point, check for the cases in which normalizing the
362| denorm produces incorrect results.
363|
364 tstb DY_MO_FLG(%a6) |all cases of dyadic instructions would
365 bnes nrm_src |require normalization of denorm
366
367| At this point:
368| monadic instructions: fabs = $18 fneg = $1a ftst = $3a
369| fmove = $00 fsmove = $40 fdmove = $44
370| fsqrt = $05* fssqrt = $41 fdsqrt = $45
371| (*fsqrt reencoded to $05)
372|
373 movew CMDREG1B(%a6),%d0 |get command register
374 andil
375|
376| At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and
377| fdsqrt are possible.
378| For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
379| For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
380|
381 btstl
382 bnes nrm_src |weed out fsqrt instructions
383 st CU_ONLY(%a6) |set cu-only inst flag
384 bra cu_dnrm |fmove, fabs, fneg, ftst
385| ;cases go to cu_dnrm
386nrm_src:
387 bclrb
388 sne LOCAL_SGN(%a0)
389 bsr nrm_set |normalize number (exponent will go
390| ; negative)
391 bclrb
392
393 bfclr LOCAL_SGN(%a0){
394 beqs spos
395 bsetb
396spos:
397 bfclr STAG(%a6){
398 bsetb
399 orb
400normal:
401 tstb DNRM_FLG(%a6) |check if any of the ops were denorms
402 bne ck_wrap |if so, check if it is a potential
403| ;wrap-around case
404fix_stk:
405 moveb
406 bclrb
407
408 clrw NMNEXC(%a6)
409
410 st RES_FLG(%a6) |indicate that a restore is needed
411 rts
412
413|
414| cu_dnrm handles all cu-only instructions (fmove, fabs, fneg, and
415| ftst) completely in software without an frestore to the 040.
416|
417cu_dnrm:
418 st CU_ONLY(%a6)
419 movew CMDREG1B(%a6),%d0
420 andib
421 tstb %d0
422 beql cu_dmove |if zero, it is an fmove
423 cmpib
424 beql cu_dabs |if $18, it is fabs
425 cmpib
426 beql cu_dneg |if $1a, it is fneg
427|
428| Inst is ftst. Check the source operand and set the cc's accordingly.
429| No write is done, so simply rts.
430|
431cu_dtst:
432 movew LOCAL_EX(%a0),%d0
433 bclrl
434 sne LOCAL_SGN(%a0)
435 beqs cu_dtpo
436 orl
437cu_dtpo:
438 cmpiw
439 bnes cu_dtcz
440 tstl LOCAL_HI(%a0)
441 bnes cu_dtn
442 tstl LOCAL_LO(%a0)
443 bnes cu_dtn
444 orl
445 rts
446cu_dtn:
447 orl
448 movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for
449| ;snan handler
450 rts
451cu_dtcz:
452 tstl LOCAL_HI(%a0)
453 bnel cu_dtsx
454 tstl LOCAL_LO(%a0)
455 bnel cu_dtsx
456 orl
457cu_dtsx:
458 rts
459|
460| Inst is fabs. Execute the absolute value function on the input.
461| Branch to the fmove code.
462|
463cu_dabs:
464 bclrb
465 bras cu_dmove |fmove code will finish
466|
467| Inst is fneg. Execute the negate value function on the input.
468| Fall though to the fmove code.
469|
470cu_dneg:
471 bchgb
472|
473| Inst is fmove. This code also handles all result writes.
474| If bit 2 is set, round is forced to double. If it is clear,
475| and bit 6 is set, round is forced to single. If both are clear,
476| the round precision is found in the fpcr. If the rounding precision
477| is double or single, the result is zero, and the mode is checked
478| to determine if the lsb of the result should be set.
479|
480cu_dmove:
481 btstb
482 bne cu_dmrd
483 btstb
484 bne cu_dmrs
485|
486| The move or operation is not with forced precision. Use the
487| FPCR_MODE byte to get rounding.
488|
489cu_dmnr:
490 bfextu FPCR_MODE(%a6){
491 tstb %d0 |check for extended
492 beq cu_wrexd |if so, just write result
493 cmpib
494 beq cu_dmrs |fall through to double
495|
496| The move is fdmove or round precision is double. Result is zero.
497| Check rmode for rp or rm and set lsb accordingly.
498|
499cu_dmrd:
500 bfextu FPCR_MODE(%a6){
501 tstw LOCAL_EX(%a0) |check sign
502 blts cu_dmdn
503 cmpib
504 bne cu_dpd |load double pos zero
505 bra cu_dpdr |load double pos zero w/lsb
506cu_dmdn:
507 cmpib
508 bne cu_dnd |load double neg zero
509 bra cu_dndr |load double neg zero w/lsb
510|
511| The move is fsmove or round precision is single. Result is zero.
512| Check for rp or rm and set lsb accordingly.
513|
514cu_dmrs:
515 bfextu FPCR_MODE(%a6){
516 tstw LOCAL_EX(%a0) |check sign
517 blts cu_dmsn
518 cmpib
519 bne cu_spd |load single pos zero
520 bra cu_spdr |load single pos zero w/lsb
521cu_dmsn:
522 cmpib
523 bne cu_snd |load single neg zero
524 bra cu_sndr |load single neg zero w/lsb
525|
526| The precision is extended, so the result in etemp is correct.
527| Simply set unfl (not inex2 or aunfl) and write the result to
528| the correct fp register.
529cu_wrexd:
530 orl
531 tstw LOCAL_EX(%a0)
532 beq wr_etemp
533 orl
534 bra wr_etemp
535|
536| These routines write +/- zero in double format. The routines
537| cu_dpdr and cu_dndr set the double lsb.
538|
539cu_dpd:
540 movel
541 clrl LOCAL_HI(%a0)
542 clrl LOCAL_LO(%a0)
543 orl
544 orl
545 bra wr_etemp
546cu_dpdr:
547 movel
548 clrl LOCAL_HI(%a0)
549 movel
550 orl
551 bra wr_etemp
552cu_dnd:
553 movel
554 clrl LOCAL_HI(%a0)
555 clrl LOCAL_LO(%a0)
556 orl
557 orl
558 orl
559 bra wr_etemp
560cu_dndr:
561 movel
562 clrl LOCAL_HI(%a0)
563 movel
564 orl
565 orl
566 bra wr_etemp
567|
568| These routines write +/- zero in single format. The routines
569| cu_dpdr and cu_dndr set the single lsb.
570|
571cu_spd:
572 movel
573 clrl LOCAL_HI(%a0)
574 clrl LOCAL_LO(%a0)
575 orl
576 orl
577 bra wr_etemp
578cu_spdr:
579 movel
580 movel
581 clrl LOCAL_LO(%a0)
582 orl
583 bra wr_etemp
584cu_snd:
585 movel
586 clrl LOCAL_HI(%a0)
587 clrl LOCAL_LO(%a0)
588 orl
589 orl
590 orl
591 bra wr_etemp
592cu_sndr:
593 movel
594 movel
595 clrl LOCAL_LO(%a0)
596 orl
597 orl
598 bra wr_etemp
599
600|
601| This code checks for 16-bit overflow conditions on dyadic
602| operations which are not restorable into the floating-point
603| unit and must be completed in software. Basically, this
604| condition exists with a very large norm and a denorm. One
605| of the operands must be denormalized to enter this code.
606|
607| Flags used:
608| DY_MO_FLG contains 0 for monadic op, $ff for dyadic
609| DNRM_FLG contains $00 for neither op denormalized
610| $0f for the destination op denormalized
611| $f0 for the source op denormalized
612| $ff for both ops denormalized
613|
614| The wrap-around condition occurs for add, sub, div, and cmp
615| when
616|
617| abs(dest_exp - src_exp) >= $8000
618|
619| and for mul when
620|
621| (dest_exp + src_exp) < $0
622|
623| we must process the operation here if this case is true.
624|
625| The rts following the frcfpn routine is the exit from res_func
626| for this condition. The restore flag (RES_FLG) is left clear.
627| No frestore is done unless an exception is to be reported.
628|
629| For fadd:
630| if(sign_of(dest) != sign_of(src))
631| replace exponent of src with $3fff (keep sign)
632| use fpu to perform dest+new_src (user's rmode and X)
633| clr sticky
634| else
635| set sticky
636| call round with user's precision and mode
637| move result to fpn and wbtemp
638|
639| For fsub:
640| if(sign_of(dest) == sign_of(src))
641| replace exponent of src with $3fff (keep sign)
642| use fpu to perform dest+new_src (user's rmode and X)
643| clr sticky
644| else
645| set sticky
646| call round with user's precision and mode
647| move result to fpn and wbtemp
648|
649| For fdiv/fsgldiv:
650| if(both operands are denorm)
651| restore_to_fpu;
652| if(dest is norm)
653| force_ovf;
654| else(dest is denorm)
655| force_unf:
656|
657| For fcmp:
658| if(dest is norm)
659| N = sign_of(dest);
660| else(dest is denorm)
661| N = sign_of(src);
662|
663| For fmul:
664| if(both operands are denorm)
665| force_unf;
666| if((dest_exp + src_exp) < 0)
667| force_unf:
668| else
669| restore_to_fpu;
670|
671| local equates:
672 .set addcode,0x22
673 .set subcode,0x28
674 .set mulcode,0x23
675 .set divcode,0x20
676 .set cmpcode,0x38
677ck_wrap:
678 | tstb DY_MO_FLG(%a6) ;check for fsqrt
679 beq fix_stk |if zero, it is fsqrt
680 movew CMDREG1B(%a6),%d0
681 andiw
682 cmpiw
683 beq wrap_add
684 cmpiw
685 beq wrap_sub
686 cmpiw
687 beq wrap_mul
688 cmpiw
689 beq wrap_cmp
690|
691| Inst is fdiv.
692|
693wrap_div:
694 cmpb
695 beq fix_stk |restore to fpu
696|
697| One of the ops is denormalized. Test for wrap condition
698| and force the result.
699|
700 cmpb
701 bnes div_srcd
702div_destd:
703 bsrl ckinf_ns
704 bne fix_stk
705 bfextu ETEMP_EX(%a6){
706 bfexts FPTEMP_EX(%a6){
707 subl %d1,%d0 |subtract dest from src
708 cmpl
709 blt fix_stk |if less, not wrap case
710 clrb WBTEMP_SGN(%a6)
711 movew ETEMP_EX(%a6),%d0 |find the sign of the result
712 movew FPTEMP_EX(%a6),%d1
713 eorw %d1,%d0
714 andiw
715 beq force_unf
716 st WBTEMP_SGN(%a6)
717 bra force_unf
718
719ckinf_ns:
720 moveb STAG(%a6),%d0 |check source tag for inf or nan
721 bra ck_in_com
722ckinf_nd:
723 moveb DTAG(%a6),%d0 |check destination tag for inf or nan
724ck_in_com:
725 andib
726 cmpb
727 beq nan_or_inf |not wrap case
728 cmpb
729 beq nan_or_inf |yes, not wrap case?
730 cmpb
731 beq nan_or_inf |yes
732 clrl %d0
733 rts |then ; it is either a zero of norm,
734| ;check wrap case
735nan_or_inf:
736 moveql
737 rts
738
739
740
741div_srcd:
742 bsrl ckinf_nd
743 bne fix_stk
744 bfextu FPTEMP_EX(%a6){
745 bfexts ETEMP_EX(%a6){
746 subl %d1,%d0 |subtract src from dest
747 cmpl
748 blt fix_stk |if less, not wrap case
749 clrb WBTEMP_SGN(%a6)
750 movew ETEMP_EX(%a6),%d0 |find the sign of the result
751 movew FPTEMP_EX(%a6),%d1
752 eorw %d1,%d0
753 andiw
754 beqs force_ovf
755 st WBTEMP_SGN(%a6)
756|
757| This code handles the case of the instruction resulting in
758| an overflow condition.
759|
760force_ovf:
761 bclrb
762 orl
763 clrw NMNEXC(%a6)
764 leal WBTEMP(%a6),%a0 |point a0 to memory location
765 movew CMDREG1B(%a6),%d0
766 btstl
767 beqs frcovf_fpcr
768 btstl
769 bnes frcovf_dbl
770 movel
771 bras frcovf_rnd
772frcovf_dbl:
773 movel
774 bras frcovf_rnd
775frcovf_fpcr:
776 bfextu FPCR_MODE(%a6){
777frcovf_rnd:
778
779| The 881/882 does not set inex2 for the following case, so the
780| line is commented out to be compatible with 881/882
781| tst.b %d0
782| beq.b frcovf_x
783| or.l
784
785|frcovf_x:
786 bsrl ovf_res |get correct result based on
787| ;round precision/mode. This
788| ;sets FPSR_CC correctly
789| ;returns in external format
790 bfclr WBTEMP_SGN(%a6){
791 beq frcfpn
792 bsetb
793 bra frcfpn
794|
795| Inst is fadd.
796|
797wrap_add:
798 cmpb
799 beq fix_stk |restore to fpu
800|
801| One of the ops is denormalized. Test for wrap condition
802| and complete the instruction.
803|
804 cmpb
805 bnes add_srcd
806add_destd:
807 bsrl ckinf_ns
808 bne fix_stk
809 bfextu ETEMP_EX(%a6){
810 bfexts FPTEMP_EX(%a6){
811 subl %d1,%d0 |subtract dest from src
812 cmpl
813 blt fix_stk |if less, not wrap case
814 bra add_wrap
815add_srcd:
816 bsrl ckinf_nd
817 bne fix_stk
818 bfextu FPTEMP_EX(%a6){
819 bfexts ETEMP_EX(%a6){
820 subl %d1,%d0 |subtract src from dest
821 cmpl
822 blt fix_stk |if less, not wrap case
823|
824| Check the signs of the operands. If they are unlike, the fpu
825| can be used to add the norm and 1.0 with the sign of the
826| denorm and it will correctly generate the result in extended
827| precision. We can then call round with no sticky and the result
828| will be correct for the user's rounding mode and precision. If
829| the signs are the same, we call round with the sticky bit set
830| and the result will be correct for the user's rounding mode and
831| precision.
832|
833add_wrap:
834 movew ETEMP_EX(%a6),%d0
835 movew FPTEMP_EX(%a6),%d1
836 eorw %d1,%d0
837 andiw
838 beq add_same
839|
840| The signs are unlike.
841|
842 cmpb
843 bnes add_u_srcd
844 movew FPTEMP_EX(%a6),%d0
845 andiw
846 orw
847 movew %d0,FPTEMP_EX(%a6) |in the denorm
848 movel USER_FPCR(%a6),%d0
849 andil
850 fmovel %d0,%fpcr |set up users rmode and X
851 fmovex ETEMP(%a6),%fp0
852 faddx FPTEMP(%a6),%fp0
853 leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame
854 fmovel %fpsr,%d1
855 orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd
856 fmovex %fp0,WBTEMP(%a6) |write result to memory
857 lsrl
858 movel USER_FPCR(%a6),%d1
859 andil
860 lsrl
861 swap %d1
862 orl %d0,%d1 |set up for round call
863 clrl %d0 |force sticky to zero
864 bclrb
865 sne WBTEMP_SGN(%a6)
866 bsrl round |round result to users rmode & prec
867 bfclr WBTEMP_SGN(%a6){
868 beq frcfpnr
869 bsetb
870 bra frcfpnr
871add_u_srcd:
872 movew ETEMP_EX(%a6),%d0
873 andiw
874 orw
875 movew %d0,ETEMP_EX(%a6) |in the denorm
876 movel USER_FPCR(%a6),%d0
877 andil
878 fmovel %d0,%fpcr |set up users rmode and X
879 fmovex ETEMP(%a6),%fp0
880 faddx FPTEMP(%a6),%fp0
881 fmovel %fpsr,%d1
882 orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd
883 leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame
884 fmovex %fp0,WBTEMP(%a6) |write result to memory
885 lsrl
886 movel USER_FPCR(%a6),%d1
887 andil
888 lsrl
889 swap %d1
890 orl %d0,%d1 |set up for round call
891 clrl %d0 |force sticky to zero
892 bclrb
893 sne WBTEMP_SGN(%a6) |use internal format for round
894 bsrl round |round result to users rmode & prec
895 bfclr WBTEMP_SGN(%a6){
896 beq frcfpnr
897 bsetb
898 bra frcfpnr
899|
900| Signs are alike:
901|
902add_same:
903 cmpb
904 bnes add_s_srcd
905add_s_destd:
906 leal ETEMP(%a6),%a0
907 movel USER_FPCR(%a6),%d0
908 andil
909 lsrl
910 movel USER_FPCR(%a6),%d1
911 andil
912 lsrl
913 swap %d1
914 orl %d0,%d1 |set up for round call
915 movel
916 bclrb
917 sne ETEMP_SGN(%a6)
918 bsrl round |round result to users rmode & prec
919 bfclr ETEMP_SGN(%a6){
920 beqs add_s_dclr
921 bsetb
922add_s_dclr:
923 leal WBTEMP(%a6),%a0
924 movel ETEMP(%a6),(%a0) |write result to wbtemp
925 movel ETEMP_HI(%a6),4(%a0)
926 movel ETEMP_LO(%a6),8(%a0)
927 tstw ETEMP_EX(%a6)
928 bgt add_ckovf
929 orl
930 bra add_ckovf
931add_s_srcd:
932 leal FPTEMP(%a6),%a0
933 movel USER_FPCR(%a6),%d0
934 andil
935 lsrl
936 movel USER_FPCR(%a6),%d1
937 andil
938 lsrl
939 swap %d1
940 orl %d0,%d1 |set up for round call
941 movel
942 bclrb
943 sne FPTEMP_SGN(%a6)
944 bsrl round |round result to users rmode & prec
945 bfclr FPTEMP_SGN(%a6){
946 beqs add_s_sclr
947 bsetb
948add_s_sclr:
949 leal WBTEMP(%a6),%a0
950 movel FPTEMP(%a6),(%a0) |write result to wbtemp
951 movel FPTEMP_HI(%a6),4(%a0)
952 movel FPTEMP_LO(%a6),8(%a0)
953 tstw FPTEMP_EX(%a6)
954 bgt add_ckovf
955 orl
956add_ckovf:
957 movew WBTEMP_EX(%a6),%d0
958 andiw
959 cmpiw
960 bne frcfpnr
961|
962| The result has overflowed to $7fff exponent. Set I, ovfl,
963| and aovfl, and clr the mantissa (incorrectly set by the
964| round routine.)
965|
966 orl
967 clrl 4(%a0)
968 bra frcfpnr
969|
970| Inst is fsub.
971|
972wrap_sub:
973 cmpb
974 beq fix_stk |restore to fpu
975|
976| One of the ops is denormalized. Test for wrap condition
977| and complete the instruction.
978|
979 cmpb
980 bnes sub_srcd
981sub_destd:
982 bsrl ckinf_ns
983 bne fix_stk
984 bfextu ETEMP_EX(%a6){
985 bfexts FPTEMP_EX(%a6){
986 subl %d1,%d0 |subtract src from dest
987 cmpl
988 blt fix_stk |if less, not wrap case
989 bra sub_wrap
990sub_srcd:
991 bsrl ckinf_nd
992 bne fix_stk
993 bfextu FPTEMP_EX(%a6){
994 bfexts ETEMP_EX(%a6){
995 subl %d1,%d0 |subtract dest from src
996 cmpl
997 blt fix_stk |if less, not wrap case
998|
999| Check the signs of the operands. If they are alike, the fpu
1000| can be used to subtract from the norm 1.0 with the sign of the
1001| denorm and it will correctly generate the result in extended
1002| precision. We can then call round with no sticky and the result
1003| will be correct for the user's rounding mode and precision. If
1004| the signs are unlike, we call round with the sticky bit set
1005| and the result will be correct for the user's rounding mode and
1006| precision.
1007|
1008sub_wrap:
1009 movew ETEMP_EX(%a6),%d0
1010 movew FPTEMP_EX(%a6),%d1
1011 eorw %d1,%d0
1012 andiw
1013 bne sub_diff
1014|
1015| The signs are alike.
1016|
1017 cmpb
1018 bnes sub_u_srcd
1019 movew FPTEMP_EX(%a6),%d0
1020 andiw
1021 orw
1022 movew %d0,FPTEMP_EX(%a6) |in the denorm
1023 movel USER_FPCR(%a6),%d0
1024 andil
1025 fmovel %d0,%fpcr |set up users rmode and X
1026 fmovex FPTEMP(%a6),%fp0
1027 fsubx ETEMP(%a6),%fp0
1028 fmovel %fpsr,%d1
1029 orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd
1030 leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame
1031 fmovex %fp0,WBTEMP(%a6) |write result to memory
1032 lsrl
1033 movel USER_FPCR(%a6),%d1
1034 andil
1035 lsrl
1036 swap %d1
1037 orl %d0,%d1 |set up for round call
1038 clrl %d0 |force sticky to zero
1039 bclrb
1040 sne WBTEMP_SGN(%a6)
1041 bsrl round |round result to users rmode & prec
1042 bfclr WBTEMP_SGN(%a6){
1043 beq frcfpnr
1044 bsetb
1045 bra frcfpnr
1046sub_u_srcd:
1047 movew ETEMP_EX(%a6),%d0
1048 andiw
1049 orw
1050 movew %d0,ETEMP_EX(%a6) |in the denorm
1051 movel USER_FPCR(%a6),%d0
1052 andil
1053 fmovel %d0,%fpcr |set up users rmode and X
1054 fmovex FPTEMP(%a6),%fp0
1055 fsubx ETEMP(%a6),%fp0
1056 fmovel %fpsr,%d1
1057 orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd
1058 leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame
1059 fmovex %fp0,WBTEMP(%a6) |write result to memory
1060 lsrl
1061 movel USER_FPCR(%a6),%d1
1062 andil
1063 lsrl
1064 swap %d1
1065 orl %d0,%d1 |set up for round call
1066 clrl %d0 |force sticky to zero
1067 bclrb
1068 sne WBTEMP_SGN(%a6)
1069 bsrl round |round result to users rmode & prec
1070 bfclr WBTEMP_SGN(%a6){
1071 beq frcfpnr
1072 bsetb
1073 bra frcfpnr
1074|
1075| Signs are unlike:
1076|
1077sub_diff:
1078 cmpb
1079 bnes sub_s_srcd
1080sub_s_destd:
1081 leal ETEMP(%a6),%a0
1082 movel USER_FPCR(%a6),%d0
1083 andil
1084 lsrl
1085 movel USER_FPCR(%a6),%d1
1086 andil
1087 lsrl
1088 swap %d1
1089 orl %d0,%d1 |set up for round call
1090 movel
1091|
1092| Since the dest is the denorm, the sign is the opposite of the
1093| norm sign.
1094|
1095 eoriw
1096 tstw ETEMP_EX(%a6)
1097 bgts sub_s_dwr
1098 orl
1099sub_s_dwr:
1100 bclrb
1101 sne ETEMP_SGN(%a6)
1102 bsrl round |round result to users rmode & prec
1103 bfclr ETEMP_SGN(%a6){
1104 beqs sub_s_dclr
1105 bsetb
1106sub_s_dclr:
1107 leal WBTEMP(%a6),%a0
1108 movel ETEMP(%a6),(%a0) |write result to wbtemp
1109 movel ETEMP_HI(%a6),4(%a0)
1110 movel ETEMP_LO(%a6),8(%a0)
1111 bra sub_ckovf
1112sub_s_srcd:
1113 leal FPTEMP(%a6),%a0
1114 movel USER_FPCR(%a6),%d0
1115 andil
1116 lsrl
1117 movel USER_FPCR(%a6),%d1
1118 andil
1119 lsrl
1120 swap %d1
1121 orl %d0,%d1 |set up for round call
1122 movel
1123 bclrb
1124 sne FPTEMP_SGN(%a6)
1125 bsrl round |round result to users rmode & prec
1126 bfclr FPTEMP_SGN(%a6){
1127 beqs sub_s_sclr
1128 bsetb
1129sub_s_sclr:
1130 leal WBTEMP(%a6),%a0
1131 movel FPTEMP(%a6),(%a0) |write result to wbtemp
1132 movel FPTEMP_HI(%a6),4(%a0)
1133 movel FPTEMP_LO(%a6),8(%a0)
1134 tstw FPTEMP_EX(%a6)
1135 bgt sub_ckovf
1136 orl
1137sub_ckovf:
1138 movew WBTEMP_EX(%a6),%d0
1139 andiw
1140 cmpiw
1141 bne frcfpnr
1142|
1143| The result has overflowed to $7fff exponent. Set I, ovfl,
1144| and aovfl, and clr the mantissa (incorrectly set by the
1145| round routine.)
1146|
1147 orl
1148 clrl 4(%a0)
1149 bra frcfpnr
1150|
1151| Inst is fcmp.
1152|
1153wrap_cmp:
1154 cmpb
1155 beq fix_stk |restore to fpu
1156|
1157| One of the ops is denormalized. Test for wrap condition
1158| and complete the instruction.
1159|
1160 cmpb
1161 bnes cmp_srcd
1162cmp_destd:
1163 bsrl ckinf_ns
1164 bne fix_stk
1165 bfextu ETEMP_EX(%a6){
1166 bfexts FPTEMP_EX(%a6){
1167 subl %d1,%d0 |subtract dest from src
1168 cmpl
1169 blt fix_stk |if less, not wrap case
1170 tstw ETEMP_EX(%a6) |set N to ~sign_of(src)
1171 bge cmp_setn
1172 rts
1173cmp_srcd:
1174 bsrl ckinf_nd
1175 bne fix_stk
1176 bfextu FPTEMP_EX(%a6){
1177 bfexts ETEMP_EX(%a6){
1178 subl %d1,%d0 |subtract src from dest
1179 cmpl
1180 blt fix_stk |if less, not wrap case
1181 tstw FPTEMP_EX(%a6) |set N to sign_of(dest)
1182 blt cmp_setn
1183 rts
1184cmp_setn:
1185 orl
1186 rts
1187
1188|
1189| Inst is fmul.
1190|
1191wrap_mul:
1192 cmpb
1193 beq force_unf |force an underflow (really!)
1194|
1195| One of the ops is denormalized. Test for wrap condition
1196| and complete the instruction.
1197|
1198 cmpb
1199 bnes mul_srcd
1200mul_destd:
1201 bsrl ckinf_ns
1202 bne fix_stk
1203 bfextu ETEMP_EX(%a6){
1204 bfexts FPTEMP_EX(%a6){
1205 addl %d1,%d0 |subtract dest from src
1206 bgt fix_stk
1207 bra force_unf
1208mul_srcd:
1209 bsrl ckinf_nd
1210 bne fix_stk
1211 bfextu FPTEMP_EX(%a6){
1212 bfexts ETEMP_EX(%a6){
1213 addl %d1,%d0 |subtract src from dest
1214 bgt fix_stk
1215
1216|
1217| This code handles the case of the instruction resulting in
1218| an underflow condition.
1219|
1220force_unf:
1221 bclrb
1222 orl
1223 clrw NMNEXC(%a6)
1224 clrb WBTEMP_SGN(%a6)
1225 movew ETEMP_EX(%a6),%d0 |find the sign of the result
1226 movew FPTEMP_EX(%a6),%d1
1227 eorw %d1,%d0
1228 andiw
1229 beqs frcunfcont
1230 st WBTEMP_SGN(%a6)
1231frcunfcont:
1232 lea WBTEMP(%a6),%a0 |point a0 to memory location
1233 movew CMDREG1B(%a6),%d0
1234 btstl
1235 beqs frcunf_fpcr
1236 btstl
1237 bnes frcunf_dbl
1238 movel
1239 bras frcunf_rnd
1240frcunf_dbl:
1241 movel
1242 bras frcunf_rnd
1243frcunf_fpcr:
1244 bfextu FPCR_MODE(%a6){
1245frcunf_rnd:
1246 bsrl unf_sub |get correct result based on
1247| ;round precision/mode. This
1248| ;sets FPSR_CC correctly
1249 bfclr WBTEMP_SGN(%a6){
1250 beqs frcfpn
1251 bsetb
1252 bra frcfpn
1253
1254|
1255| Write the result to the user's fpn. All results must be HUGE to be
1256| written; otherwise the results would have overflowed or underflowed.
1257| If the rounding precision is single or double, the ovf_res routine
1258| is needed to correctly supply the max value.
1259|
1260frcfpnr:
1261 movew CMDREG1B(%a6),%d0
1262 btstl
1263 beqs frcfpn_fpcr
1264 btstl
1265 bnes frcfpn_dbl
1266 movel
1267 bras frcfpn_rnd
1268frcfpn_dbl:
1269 movel
1270 bras frcfpn_rnd
1271frcfpn_fpcr:
1272 bfextu FPCR_MODE(%a6){
1273 tstb %d0
1274 beqs frcfpn |if extended, write what you got
1275frcfpn_rnd:
1276 bclrb
1277 sne WBTEMP_SGN(%a6)
1278 bsrl ovf_res |get correct result based on
1279| ;round precision/mode. This
1280| ;sets FPSR_CC correctly
1281 bfclr WBTEMP_SGN(%a6){
1282 beqs frcfpn_clr
1283 bsetb
1284frcfpn_clr:
1285 orl
1286|
1287| Perform the write.
1288|
1289frcfpn:
1290 bfextu CMDREG1B(%a6){
1291 cmpib
1292 bles frc0123 |check if dest is fp0-fp3
1293 movel
1294 subl %d0,%d1
1295 clrl %d0
1296 bsetl %d1,%d0
1297 fmovemx WBTEMP(%a6),%d0
1298 rts
1299frc0123:
1300 cmpib
1301 beqs frc0_dst
1302 cmpib
1303 beqs frc1_dst
1304 cmpib
1305 beqs frc2_dst
1306frc3_dst:
1307 movel WBTEMP_EX(%a6),USER_FP3(%a6)
1308 movel WBTEMP_HI(%a6),USER_FP3+4(%a6)
1309 movel WBTEMP_LO(%a6),USER_FP3+8(%a6)
1310 rts
1311frc2_dst:
1312 movel WBTEMP_EX(%a6),USER_FP2(%a6)
1313 movel WBTEMP_HI(%a6),USER_FP2+4(%a6)
1314 movel WBTEMP_LO(%a6),USER_FP2+8(%a6)
1315 rts
1316frc1_dst:
1317 movel WBTEMP_EX(%a6),USER_FP1(%a6)
1318 movel WBTEMP_HI(%a6),USER_FP1+4(%a6)
1319 movel WBTEMP_LO(%a6),USER_FP1+8(%a6)
1320 rts
1321frc0_dst:
1322 movel WBTEMP_EX(%a6),USER_FP0(%a6)
1323 movel WBTEMP_HI(%a6),USER_FP0+4(%a6)
1324 movel WBTEMP_LO(%a6),USER_FP0+8(%a6)
1325 rts
1326
1327|
1328| Write etemp to fpn.
1329| A check is made on enabled and signalled snan exceptions,
1330| and the destination is not overwritten if this condition exists.
1331| This code is designed to make fmoveins of unsupported data types
1332| faster.
1333|
1334wr_etemp:
1335 btstb
1336 beqs fmoveinc |enabled, force restore
1337 btstb
1338 beqs fmoveinc |the dest
1339 movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for
1340| ;snan handler
1341 tstb ETEMP(%a6) |check for negative
1342 blts snan_neg
1343 rts
1344snan_neg:
1345 orl
1346 rts
1347fmoveinc:
1348 clrw NMNEXC(%a6)
1349 bclrb
1350 moveb STAG(%a6),%d0 |check if stag is inf
1351 andib
1352 cmpib
1353 bnes fminc_cnan
1354 orl
1355 tstw LOCAL_EX(%a0) |check sign
1356 bges fminc_con
1357 orl
1358 bra fminc_con
1359fminc_cnan:
1360 cmpib
1361 bnes fminc_czero
1362 orl
1363 movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for
1364| ;snan handler
1365 tstw LOCAL_EX(%a0) |check sign
1366 bges fminc_con
1367 orl
1368 bra fminc_con
1369fminc_czero:
1370 cmpib
1371 bnes fminc_con
1372 orl
1373 tstw LOCAL_EX(%a0) |check sign
1374 bges fminc_con
1375 orl
1376fminc_con:
1377 bfextu CMDREG1B(%a6){
1378 cmpib
1379 bles fp0123 |check if dest is fp0-fp3
1380 movel
1381 subl %d0,%d1
1382 clrl %d0
1383 bsetl %d1,%d0
1384 fmovemx ETEMP(%a6),%d0
1385 rts
1386
1387fp0123:
1388 cmpib
1389 beqs fp0_dst
1390 cmpib
1391 beqs fp1_dst
1392 cmpib
1393 beqs fp2_dst
1394fp3_dst:
1395 movel ETEMP_EX(%a6),USER_FP3(%a6)
1396 movel ETEMP_HI(%a6),USER_FP3+4(%a6)
1397 movel ETEMP_LO(%a6),USER_FP3+8(%a6)
1398 rts
1399fp2_dst:
1400 movel ETEMP_EX(%a6),USER_FP2(%a6)
1401 movel ETEMP_HI(%a6),USER_FP2+4(%a6)
1402 movel ETEMP_LO(%a6),USER_FP2+8(%a6)
1403 rts
1404fp1_dst:
1405 movel ETEMP_EX(%a6),USER_FP1(%a6)
1406 movel ETEMP_HI(%a6),USER_FP1+4(%a6)
1407 movel ETEMP_LO(%a6),USER_FP1+8(%a6)
1408 rts
1409fp0_dst:
1410 movel ETEMP_EX(%a6),USER_FP0(%a6)
1411 movel ETEMP_HI(%a6),USER_FP0+4(%a6)
1412 movel ETEMP_LO(%a6),USER_FP0+8(%a6)
1413 rts
1414
1415opclass3:
1416 st CU_ONLY(%a6)
1417 movew CMDREG1B(%a6),%d0 |check if packed moveout
1418 andiw
1419 cmpiw
1420 beq pack_out |else it is norm or denorm
1421 bra mv_out
1422
1423
1424|
1425| MOVE OUT
1426|
1427
1428mv_tbl:
1429 .long li
1430 .long sgp
1431 .long xp
1432 .long mvout_end |should never be taken
1433 .long wi
1434 .long dp
1435 .long bi
1436 .long mvout_end |should never be taken
1437mv_out:
1438 bfextu CMDREG1B(%a6){
1439 leal mv_tbl,%a0
1440 movel %a0@(%d1:l:4),%a0
1441 jmp (%a0)
1442
1443|
1444| This exit is for move-out to memory. The aunfl bit is
1445| set if the result is inex and unfl is signalled.
1446|
1447mvout_end:
1448 btstb
1449 beqs no_aufl
1450 btstb
1451 beqs no_aufl
1452 bsetb
1453no_aufl:
1454 clrw NMNEXC(%a6)
1455 bclrb
1456 fmovel
1457|
1458| Return ETEMP to extended format from internal extended format so
1459| that gen_except will have a correctly signed value for ovfl/unfl
1460| handlers.
1461|
1462 bfclr ETEMP_SGN(%a6){
1463 beqs mvout_con
1464 bsetb
1465mvout_con:
1466 rts
1467|
1468| This exit is for move-out to int register. The aunfl bit is
1469| not set in any case for this move.
1470|
1471mvouti_end:
1472 clrw NMNEXC(%a6)
1473 bclrb
1474 fmovel
1475|
1476| Return ETEMP to extended format from internal extended format so
1477| that gen_except will have a correctly signed value for ovfl/unfl
1478| handlers.
1479|
1480 bfclr ETEMP_SGN(%a6){
1481 beqs mvouti_con
1482 bsetb
1483mvouti_con:
1484 rts
1485|
1486| li is used to handle a long integer source specifier
1487|
1488
1489li:
1490 moveql
1491
1492 btstb
1493 bne int_dnrm |if so, branch
1494
1495 fmovemx ETEMP(%a6),%fp0-%fp0
1496 fcmpd
1497| 41dfffffffc00000 in dbl prec = 401d0000fffffffe00000000 in ext prec
1498 fbge lo_plrg
1499 fcmpd
1500| c1e0000000000000 in dbl prec = c01e00008000000000000000 in ext prec
1501 fble lo_nlrg
1502|
1503| at this point, the answer is between the largest pos and neg values
1504|
1505 movel USER_FPCR(%a6),%d1 |use user's rounding mode
1506 andil
1507 fmovel %d1,%fpcr
1508 fmovel %fp0,L_SCR1(%a6) |let the 040 perform conversion
1509 fmovel %fpsr,%d1
1510 orl %d1,USER_FPSR(%a6) |capture inex2/ainex if set
1511 bra int_wrt
1512
1513
1514lo_plrg:
1515 movel
1516 fbeq int_wrt |exact answer
1517 fcmpd
1518| 41dfffffffe00000 in dbl prec = 401d0000ffffffff00000000 in ext prec
1519 fbge int_operr |set operr
1520 bra int_inx |set inexact
1521
1522lo_nlrg:
1523 movel
1524 fbeq int_wrt |exact answer
1525 fcmpd
1526| c1e0000000100000 in dbl prec = c01e00008000000080000000 in ext prec
1527 fblt int_operr |set operr
1528 bra int_inx |set inexact
1529
1530|
1531| wi is used to handle a word integer source specifier
1532|
1533
1534wi:
1535 moveql
1536
1537 btstb
1538 bne int_dnrm |branch if so
1539
1540 fmovemx ETEMP(%a6),%fp0-%fp0
1541 fcmps
1542| 46fffe00 in sgl prec = 400d0000fffe000000000000 in ext prec
1543 fbge wo_plrg
1544 fcmps
1545| c7000000 in sgl prec = c00e00008000000000000000 in ext prec
1546 fble wo_nlrg
1547
1548|
1549| at this point, the answer is between the largest pos and neg values
1550|
1551 movel USER_FPCR(%a6),%d1 |use user's rounding mode
1552 andil
1553 fmovel %d1,%fpcr
1554 fmovew %fp0,L_SCR1(%a6) |let the 040 perform conversion
1555 fmovel %fpsr,%d1
1556 orl %d1,USER_FPSR(%a6) |capture inex2/ainex if set
1557 bra int_wrt
1558
1559wo_plrg:
1560 movew
1561 fbeq int_wrt |exact answer
1562 fcmps
1563| 46ffff00 in sgl prec = 400d0000ffff000000000000 in ext prec
1564 fbge int_operr |set operr
1565 bra int_inx |set inexact
1566
1567wo_nlrg:
1568 movew
1569 fbeq int_wrt |exact answer
1570 fcmps
1571| c7000080 in sgl prec = c00e00008000800000000000 in ext prec
1572 fblt int_operr |set operr
1573 bra int_inx |set inexact
1574
1575|
1576| bi is used to handle a byte integer source specifier
1577|
1578
1579bi:
1580 moveql
1581
1582 btstb
1583 bne int_dnrm |branch if so
1584
1585 fmovemx ETEMP(%a6),%fp0-%fp0
1586 fcmps
1587| 42fe0000 in sgl prec = 40050000fe00000000000000 in ext prec
1588 fbge by_plrg
1589 fcmps
1590| c3000000 in sgl prec = c00600008000000000000000 in ext prec
1591 fble by_nlrg
1592
1593|
1594| at this point, the answer is between the largest pos and neg values
1595|
1596 movel USER_FPCR(%a6),%d1 |use user's rounding mode
1597 andil
1598 fmovel %d1,%fpcr
1599 fmoveb %fp0,L_SCR1(%a6) |let the 040 perform conversion
1600 fmovel %fpsr,%d1
1601 orl %d1,USER_FPSR(%a6) |capture inex2/ainex if set
1602 bra int_wrt
1603
1604by_plrg:
1605 moveb
1606 fbeq int_wrt |exact answer
1607 fcmps
1608| 42ff0000 in sgl prec = 40050000ff00000000000000 in ext prec
1609 fbge int_operr |set operr
1610 bra int_inx |set inexact
1611
1612by_nlrg:
1613 moveb
1614 fbeq int_wrt |exact answer
1615 fcmps
1616| c3008000 in sgl prec = c00600008080000000000000 in ext prec
1617 fblt int_operr |set operr
1618 bra int_inx |set inexact
1619
1620|
1621| Common integer routines
1622|
1623| int_drnrm---account for possible nonzero result for round up with positive
1624| operand and round down for negative answer. In the first case (result = 1)
1625| byte-width (store in d0) of result must be honored. In the second case,
1626| -1 in L_SCR1(a6) will cover all contingencies (FMOVE.B/W/L out).
1627
1628int_dnrm:
1629 movel
1630 bfextu FPCR_MODE(%a6){
1631 cmpb
1632 bmis int_inx | if RN or RZ, done
1633 bnes int_rp | if RP, continue below
1634 tstw ETEMP(%a6) | RM: store -1 in L_SCR1 if src is negative
1635 bpls int_inx | otherwise result is 0
1636 movel
1637 bras int_inx
1638int_rp:
1639 tstw ETEMP(%a6) | RP: store +1 of proper width in L_SCR1 if
1640| ; source is greater than 0
1641 bmis int_inx | otherwise, result is 0
1642 lea L_SCR1(%a6),%a1 | a1 is address of L_SCR1
1643 addal %d0,%a1 | offset by destination width -1
1644 subal
1645 bsetb
1646int_inx:
1647 oril
1648 bras int_wrt
1649int_operr:
1650 fmovemx %fp0-%fp0,FPTEMP(%a6) |FPTEMP must contain the extended
1651| ;precision source that needs to be
1652| ;converted to integer this is required
1653| ;if the operr exception is enabled.
1654| ;set operr/aiop (no inex2 on int ovfl)
1655
1656 oril
1657| ;fall through to perform int_wrt
1658int_wrt:
1659 movel EXC_EA(%a6),%a1 |load destination address
1660 tstl %a1 |check to see if it is a dest register
1661 beqs wrt_dn |write data register
1662 lea L_SCR1(%a6),%a0 |point to supervisor source address
1663 bsrl mem_write
1664 bra mvouti_end
1665
1666wrt_dn:
1667 movel %d0,-(%sp) |d0 currently contains the size to write
1668 bsrl get_fline |get_fline returns Dn in d0
1669 andiw
1670 movel (%sp)+,%d1 |get size
1671 cmpil
1672 beqs sz_long
1673 cmpil
1674 bnes sz_con
1675 orl
1676 bras sz_con
1677sz_long:
1678 orl
1679sz_con:
1680 movel %d0,%d1 |reg_dest expects size:reg in d1
1681 bsrl reg_dest |load proper data register
1682 bra mvouti_end
1683xp:
1684 lea ETEMP(%a6),%a0
1685 bclrb
1686 sne LOCAL_SGN(%a0)
1687 btstb
1688 bne xdnrm
1689 clrl %d0
1690 bras do_fp |do normal case
1691sgp:
1692 lea ETEMP(%a6),%a0
1693 bclrb
1694 sne LOCAL_SGN(%a0)
1695 btstb
1696 bne sp_catas |branch if so
1697 movew LOCAL_EX(%a0),%d0
1698 lea sp_bnds,%a1
1699 cmpw (%a1),%d0
1700 blt sp_under
1701 cmpw 2(%a1),%d0
1702 bgt sp_over
1703 movel
1704 bras do_fp |do normal case
1705dp:
1706 lea ETEMP(%a6),%a0
1707 bclrb
1708 sne LOCAL_SGN(%a0)
1709
1710 btstb
1711 bne dp_catas |branch if so
1712
1713 movew LOCAL_EX(%a0),%d0
1714 lea dp_bnds,%a1
1715
1716 cmpw (%a1),%d0
1717 blt dp_under
1718 cmpw 2(%a1),%d0
1719 bgt dp_over
1720
1721 movel
1722| ;fall through to do_fp
1723|
1724do_fp:
1725 bfextu FPCR_MODE(%a6){
1726 swap %d0 |rnd prec in upper word
1727 addl %d0,%d1 |d1 has PREC/MODE info
1728
1729 clrl %d0 |clear g,r,s
1730
1731 bsrl round |round
1732
1733 movel %a0,%a1
1734 movel EXC_EA(%a6),%a0
1735
1736 bfextu CMDREG1B(%a6){
1737| ;at this point only the dest
1738| ;formats sgl, dbl, ext are
1739| ;possible
1740 cmpb
1741 bgts ddbl |double=5, extended=2, single=1
1742 bnes dsgl
1743| ;fall through to dext
1744dext:
1745 bsrl dest_ext
1746 bra mvout_end
1747dsgl:
1748 bsrl dest_sgl
1749 bra mvout_end
1750ddbl:
1751 bsrl dest_dbl
1752 bra mvout_end
1753
1754|
1755| Handle possible denorm or catastrophic underflow cases here
1756|
1757xdnrm:
1758 bsr set_xop |initialize WBTEMP
1759 bsetb
1760
1761 movel %a0,%a1
1762 movel EXC_EA(%a6),%a0 |a0 has the destination pointer
1763 bsrl dest_ext |store to memory
1764 bsetb
1765 bra mvout_end
1766
1767sp_under:
1768 bsetb
1769
1770 cmpw 4(%a1),%d0
1771 blts sp_catas |catastrophic underflow case
1772
1773 movel
1774 movel
1775 bsrl dpspdnrm |expects d1 to have the proper
1776| ;denorm threshold
1777 bsrl dest_sgl |stores value to destination
1778 bsetb
1779 bra mvout_end |exit
1780
1781dp_under:
1782 bsetb
1783
1784 cmpw 4(%a1),%d0
1785 blts dp_catas |catastrophic underflow case
1786
1787 movel
1788 movel
1789 bsrl dpspdnrm |expects d1 to have proper
1790| ;denorm threshold
1791| ;expects d0 to have round precision
1792 bsrl dest_dbl |store value to destination
1793 bsetb
1794 bra mvout_end |exit
1795
1796|
1797| Handle catastrophic underflow cases here
1798|
1799sp_catas:
1800| Temp fix for z bit set in unf_sub
1801 movel USER_FPSR(%a6),-(%a7)
1802
1803 movel
1804
1805 bsrl unf_sub |a0 points to result
1806
1807 movel (%a7)+,USER_FPSR(%a6)
1808
1809 movel
1810 subw %d0,LOCAL_EX(%a0) |account for difference between
1811| ;denorm/norm bias
1812
1813 movel %a0,%a1 |a1 has the operand input
1814 movel EXC_EA(%a6),%a0 |a0 has the destination pointer
1815
1816 bsrl dest_sgl |store the result
1817 oril
1818 bra mvout_end
1819
1820dp_catas:
1821| Temp fix for z bit set in unf_sub
1822 movel USER_FPSR(%a6),-(%a7)
1823
1824 movel
1825 bsrl unf_sub |a0 points to result
1826
1827 movel (%a7)+,USER_FPSR(%a6)
1828
1829 movel
1830 subw %d0,LOCAL_EX(%a0) |account for difference between
1831| ;denorm/norm bias
1832
1833 movel %a0,%a1 |a1 has the operand input
1834 movel EXC_EA(%a6),%a0 |a0 has the destination pointer
1835
1836 bsrl dest_dbl |store the result
1837 oril
1838 bra mvout_end
1839
1840|
1841| Handle catastrophic overflow cases here
1842|
1843sp_over:
1844| Temp fix for z bit set in unf_sub
1845 movel USER_FPSR(%a6),-(%a7)
1846
1847 movel
1848 leal FP_SCR1(%a6),%a0 |use FP_SCR1 for creating result
1849 movel ETEMP_EX(%a6),(%a0)
1850 movel ETEMP_HI(%a6),4(%a0)
1851 movel ETEMP_LO(%a6),8(%a0)
1852 bsrl ovf_res
1853
1854 movel (%a7)+,USER_FPSR(%a6)
1855
1856 movel %a0,%a1
1857 movel EXC_EA(%a6),%a0
1858 bsrl dest_sgl
1859 orl
1860 bra mvout_end
1861
1862dp_over:
1863| Temp fix for z bit set in ovf_res
1864 movel USER_FPSR(%a6),-(%a7)
1865
1866 movel
1867 leal FP_SCR1(%a6),%a0 |use FP_SCR1 for creating result
1868 movel ETEMP_EX(%a6),(%a0)
1869 movel ETEMP_HI(%a6),4(%a0)
1870 movel ETEMP_LO(%a6),8(%a0)
1871 bsrl ovf_res
1872
1873 movel (%a7)+,USER_FPSR(%a6)
1874
1875 movel %a0,%a1
1876 movel EXC_EA(%a6),%a0
1877 bsrl dest_dbl
1878 orl
1879 bra mvout_end
1880
1881|
1882| DPSPDNRM
1883|
1884| This subroutine takes an extended normalized number and denormalizes
1885| it to the given round precision. This subroutine also decrements
1886| the input operand's exponent by 1 to account for the fact that
1887| dest_sgl or dest_dbl expects a normalized number's bias.
1888|
1889| Input: a0 points to a normalized number in internal extended format
1890| d0 is the round precision (=1 for sgl; =2 for dbl)
1891| d1 is the single precision or double precision
1892| denorm threshold
1893|
1894| Output: (In the format for dest_sgl or dest_dbl)
1895| a0 points to the destination
1896| a1 points to the operand
1897|
1898| Exceptions: Reports inexact 2 exception by setting USER_FPSR bits
1899|
1900dpspdnrm:
1901 movel %d0,-(%a7) |save round precision
1902 clrl %d0 |clear initial g,r,s
1903 bsrl dnrm_lp |careful with d0, it's needed by round
1904
1905 bfextu FPCR_MODE(%a6){
1906 swap %d1
1907 movew 2(%a7),%d1 |set rounding precision
1908 swap %d1 |at this point d1 has PREC/MODE info
1909 bsrl round |round result, sets the inex bit in
1910| ;USER_FPSR if needed
1911
1912 movew
1913 subw %d0,LOCAL_EX(%a0) |account for difference in denorm
1914| ;vs norm bias
1915
1916 movel %a0,%a1 |a1 has the operand input
1917 movel EXC_EA(%a6),%a0 |a0 has the destination pointer
1918 addw
1919 rts
1920|
1921| SET_XOP initialized WBTEMP with the value pointed to by a0
1922| input: a0 points to input operand in the internal extended format
1923|
1924set_xop:
1925 movel LOCAL_EX(%a0),WBTEMP_EX(%a6)
1926 movel LOCAL_HI(%a0),WBTEMP_HI(%a6)
1927 movel LOCAL_LO(%a0),WBTEMP_LO(%a6)
1928 bfclr WBTEMP_SGN(%a6){
1929 beqs sxop
1930 bsetb
1931sxop:
1932 bfclr STAG(%a6){
1933 rts
1934|
1935| P_MOVE
1936|
1937p_movet:
1938 .long p_move
1939 .long p_movez
1940 .long p_movei
1941 .long p_moven
1942 .long p_move
1943p_regd:
1944 .long p_dyd0
1945 .long p_dyd1
1946 .long p_dyd2
1947 .long p_dyd3
1948 .long p_dyd4
1949 .long p_dyd5
1950 .long p_dyd6
1951 .long p_dyd7
1952
1953pack_out:
1954 leal p_movet,%a0 |load jmp table address
1955 movew STAG(%a6),%d0 |get source tag
1956 bfextu %d0{
1957 movel (%a0,%d0.w*4),%a0 |load a0 with routine label for tag
1958 jmp (%a0) |go to the routine
1959
1960p_write:
1961 movel
1962 movel EXC_EA(%a6),%a1 |get the destination address
1963 bsr mem_write |write the user's destination
1964 moveb
1965
1966|
1967| Also note that the dtag must be set to norm here - this is because
1968| the 040 uses the dtag to execute the correct microcode.
1969|
1970 bfclr DTAG(%a6){
1971
1972 rts
1973
1974| Notes on handling of special case (zero, inf, and nan) inputs:
1975| 1. Operr is not signalled if the k-factor is greater than 18.
1976| 2. Per the manual, status bits are not set.
1977|
1978
1979p_move:
1980 movew CMDREG1B(%a6),%d0
1981 btstl
1982 beqs statick |if clear, k-factor is static
1983dynamick:
1984 bfextu %d0{
1985 lea p_regd,%a0
1986 movel %a0@(%d0:l:4),%a0
1987 jmp (%a0)
1988statick:
1989 andiw
1990 bfexts %d0{
1991 leal ETEMP(%a6),%a0 |a0 will point to the packed decimal
1992 bsrl bindec |perform the convert; data at a6
1993 leal FP_SCR1(%a6),%a0 |load a0 with result address
1994 bral p_write
1995p_movez:
1996 leal ETEMP(%a6),%a0 |a0 will point to the packed decimal
1997 clrw 2(%a0) |clear lower word of exp
1998 clrl 4(%a0) |load second lword of ZERO
1999 clrl 8(%a0) |load third lword of ZERO
2000 bra p_write |go write results
2001p_movei:
2002 fmovel
2003 leal ETEMP(%a6),%a0 |a0 will point to the packed decimal
2004 clrw 2(%a0) |clear lower word of exp
2005 bra p_write |go write the result
2006p_moven:
2007 leal ETEMP(%a6),%a0 |a0 will point to the packed decimal
2008 clrw 2(%a0) |clear lower word of exp
2009 bra p_write |go write the result
2010
2011|
2012| Routines to read the dynamic k-factor from Dn.
2013|
2014p_dyd0:
2015 movel USER_D0(%a6),%d0
2016 bras statick
2017p_dyd1:
2018 movel USER_D1(%a6),%d0
2019 bras statick
2020p_dyd2:
2021 movel %d2,%d0
2022 bras statick
2023p_dyd3:
2024 movel %d3,%d0
2025 bras statick
2026p_dyd4:
2027 movel %d4,%d0
2028 bras statick
2029p_dyd5:
2030 movel %d5,%d0
2031 bras statick
2032p_dyd6:
2033 movel %d6,%d0
2034 bra statick
2035p_dyd7:
2036 movel %d7,%d0
2037 bra statick
2038
2039 |end
2040