linux/arch/m68k/ifpsp060/src/fpsp.S
<<
>>
Prefs
   1~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
   3M68000 Hi-Performance Microprocessor Division
   4M68060 Software Package
   5Production Release P1.00 -- October 10, 1994
   6
   7M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
   8
   9THE SOFTWARE is provided on an "AS IS" basis and without warranty.
  10To the maximum extent permitted by applicable law,
  11MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
  12INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
  13and any warranty against infringement with regard to the SOFTWARE
  14(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
  15
  16To the maximum extent permitted by applicable law,
  17IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
  18(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
  19BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
  20ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
  21Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
  22
  23You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
  24so long as this entire notice is retained without alteration in any modified and/or
  25redistributed versions, and that such modified versions are clearly identified as such.
  26No licenses are granted by implication, estoppel or otherwise under any patents
  27or trademarks of Motorola, Inc.
  28~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  29#
  30# freal.s:
  31#       This file is appended to the top of the 060FPSP package
  32# and contains the entry points into the package. The user, in
  33# effect, branches to one of the branch table entries located
  34# after _060FPSP_TABLE.
  35#       Also, subroutine stubs exist in this file (_fpsp_done for
  36# example) that are referenced by the FPSP package itself in order
  37# to call a given routine. The stub routine actually performs the
  38# callout. The FPSP code does a "bsr" to the stub routine. This
  39# extra layer of hierarchy adds a slight performance penalty but
  40# it makes the FPSP code easier to read and more mainatinable.
  41#
  42
  43set     _off_bsun,      0x00
  44set     _off_snan,      0x04
  45set     _off_operr,     0x08
  46set     _off_ovfl,      0x0c
  47set     _off_unfl,      0x10
  48set     _off_dz,        0x14
  49set     _off_inex,      0x18
  50set     _off_fline,     0x1c
  51set     _off_fpu_dis,   0x20
  52set     _off_trap,      0x24
  53set     _off_trace,     0x28
  54set     _off_access,    0x2c
  55set     _off_done,      0x30
  56
  57set     _off_imr,       0x40
  58set     _off_dmr,       0x44
  59set     _off_dmw,       0x48
  60set     _off_irw,       0x4c
  61set     _off_irl,       0x50
  62set     _off_drb,       0x54
  63set     _off_drw,       0x58
  64set     _off_drl,       0x5c
  65set     _off_dwb,       0x60
  66set     _off_dww,       0x64
  67set     _off_dwl,       0x68
  68
  69_060FPSP_TABLE:
  70
  71###############################################################
  72
  73# Here's the table of ENTRY POINTS for those linking the package.
  74        bra.l           _fpsp_snan
  75        short           0x0000
  76        bra.l           _fpsp_operr
  77        short           0x0000
  78        bra.l           _fpsp_ovfl
  79        short           0x0000
  80        bra.l           _fpsp_unfl
  81        short           0x0000
  82        bra.l           _fpsp_dz
  83        short           0x0000
  84        bra.l           _fpsp_inex
  85        short           0x0000
  86        bra.l           _fpsp_fline
  87        short           0x0000
  88        bra.l           _fpsp_unsupp
  89        short           0x0000
  90        bra.l           _fpsp_effadd
  91        short           0x0000
  92
  93        space           56
  94
  95###############################################################
  96        global          _fpsp_done
  97_fpsp_done:
  98        mov.l           %d0,-(%sp)
  99        mov.l           (_060FPSP_TABLE-0x80+_off_done,%pc),%d0
 100        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 101        mov.l           0x4(%sp),%d0
 102        rtd             &0x4
 103
 104        global          _real_ovfl
 105_real_ovfl:
 106        mov.l           %d0,-(%sp)
 107        mov.l           (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
 108        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 109        mov.l           0x4(%sp),%d0
 110        rtd             &0x4
 111
 112        global          _real_unfl
 113_real_unfl:
 114        mov.l           %d0,-(%sp)
 115        mov.l           (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
 116        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 117        mov.l           0x4(%sp),%d0
 118        rtd             &0x4
 119
 120        global          _real_inex
 121_real_inex:
 122        mov.l           %d0,-(%sp)
 123        mov.l           (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
 124        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 125        mov.l           0x4(%sp),%d0
 126        rtd             &0x4
 127
 128        global          _real_bsun
 129_real_bsun:
 130        mov.l           %d0,-(%sp)
 131        mov.l           (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
 132        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 133        mov.l           0x4(%sp),%d0
 134        rtd             &0x4
 135
 136        global          _real_operr
 137_real_operr:
 138        mov.l           %d0,-(%sp)
 139        mov.l           (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
 140        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 141        mov.l           0x4(%sp),%d0
 142        rtd             &0x4
 143
 144        global          _real_snan
 145_real_snan:
 146        mov.l           %d0,-(%sp)
 147        mov.l           (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
 148        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 149        mov.l           0x4(%sp),%d0
 150        rtd             &0x4
 151
 152        global          _real_dz
 153_real_dz:
 154        mov.l           %d0,-(%sp)
 155        mov.l           (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
 156        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 157        mov.l           0x4(%sp),%d0
 158        rtd             &0x4
 159
 160        global          _real_fline
 161_real_fline:
 162        mov.l           %d0,-(%sp)
 163        mov.l           (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
 164        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 165        mov.l           0x4(%sp),%d0
 166        rtd             &0x4
 167
 168        global          _real_fpu_disabled
 169_real_fpu_disabled:
 170        mov.l           %d0,-(%sp)
 171        mov.l           (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
 172        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 173        mov.l           0x4(%sp),%d0
 174        rtd             &0x4
 175
 176        global          _real_trap
 177_real_trap:
 178        mov.l           %d0,-(%sp)
 179        mov.l           (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
 180        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 181        mov.l           0x4(%sp),%d0
 182        rtd             &0x4
 183
 184        global          _real_trace
 185_real_trace:
 186        mov.l           %d0,-(%sp)
 187        mov.l           (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
 188        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 189        mov.l           0x4(%sp),%d0
 190        rtd             &0x4
 191
 192        global          _real_access
 193_real_access:
 194        mov.l           %d0,-(%sp)
 195        mov.l           (_060FPSP_TABLE-0x80+_off_access,%pc),%d0
 196        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 197        mov.l           0x4(%sp),%d0
 198        rtd             &0x4
 199
 200#######################################
 201
 202        global          _imem_read
 203_imem_read:
 204        mov.l           %d0,-(%sp)
 205        mov.l           (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
 206        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 207        mov.l           0x4(%sp),%d0
 208        rtd             &0x4
 209
 210        global          _dmem_read
 211_dmem_read:
 212        mov.l           %d0,-(%sp)
 213        mov.l           (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
 214        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 215        mov.l           0x4(%sp),%d0
 216        rtd             &0x4
 217
 218        global          _dmem_write
 219_dmem_write:
 220        mov.l           %d0,-(%sp)
 221        mov.l           (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
 222        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 223        mov.l           0x4(%sp),%d0
 224        rtd             &0x4
 225
 226        global          _imem_read_word
 227_imem_read_word:
 228        mov.l           %d0,-(%sp)
 229        mov.l           (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
 230        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 231        mov.l           0x4(%sp),%d0
 232        rtd             &0x4
 233
 234        global          _imem_read_long
 235_imem_read_long:
 236        mov.l           %d0,-(%sp)
 237        mov.l           (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
 238        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 239        mov.l           0x4(%sp),%d0
 240        rtd             &0x4
 241
 242        global          _dmem_read_byte
 243_dmem_read_byte:
 244        mov.l           %d0,-(%sp)
 245        mov.l           (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
 246        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 247        mov.l           0x4(%sp),%d0
 248        rtd             &0x4
 249
 250        global          _dmem_read_word
 251_dmem_read_word:
 252        mov.l           %d0,-(%sp)
 253        mov.l           (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
 254        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 255        mov.l           0x4(%sp),%d0
 256        rtd             &0x4
 257
 258        global          _dmem_read_long
 259_dmem_read_long:
 260        mov.l           %d0,-(%sp)
 261        mov.l           (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
 262        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 263        mov.l           0x4(%sp),%d0
 264        rtd             &0x4
 265
 266        global          _dmem_write_byte
 267_dmem_write_byte:
 268        mov.l           %d0,-(%sp)
 269        mov.l           (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
 270        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 271        mov.l           0x4(%sp),%d0
 272        rtd             &0x4
 273
 274        global          _dmem_write_word
 275_dmem_write_word:
 276        mov.l           %d0,-(%sp)
 277        mov.l           (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
 278        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 279        mov.l           0x4(%sp),%d0
 280        rtd             &0x4
 281
 282        global          _dmem_write_long
 283_dmem_write_long:
 284        mov.l           %d0,-(%sp)
 285        mov.l           (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
 286        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 287        mov.l           0x4(%sp),%d0
 288        rtd             &0x4
 289
 290#
 291# This file contains a set of define statements for constants
 292# in order to promote readability within the corecode itself.
 293#
 294
 295set LOCAL_SIZE,         192                     # stack frame size(bytes)
 296set LV,                 -LOCAL_SIZE             # stack offset
 297
 298set EXC_SR,             0x4                     # stack status register
 299set EXC_PC,             0x6                     # stack pc
 300set EXC_VOFF,           0xa                     # stacked vector offset
 301set EXC_EA,             0xc                     # stacked <ea>
 302
 303set EXC_FP,             0x0                     # frame pointer
 304
 305set EXC_AREGS,          -68                     # offset of all address regs
 306set EXC_DREGS,          -100                    # offset of all data regs
 307set EXC_FPREGS,         -36                     # offset of all fp regs
 308
 309set EXC_A7,             EXC_AREGS+(7*4)         # offset of saved a7
 310set OLD_A7,             EXC_AREGS+(6*4)         # extra copy of saved a7
 311set EXC_A6,             EXC_AREGS+(6*4)         # offset of saved a6
 312set EXC_A5,             EXC_AREGS+(5*4)
 313set EXC_A4,             EXC_AREGS+(4*4)
 314set EXC_A3,             EXC_AREGS+(3*4)
 315set EXC_A2,             EXC_AREGS+(2*4)
 316set EXC_A1,             EXC_AREGS+(1*4)
 317set EXC_A0,             EXC_AREGS+(0*4)
 318set EXC_D7,             EXC_DREGS+(7*4)
 319set EXC_D6,             EXC_DREGS+(6*4)
 320set EXC_D5,             EXC_DREGS+(5*4)
 321set EXC_D4,             EXC_DREGS+(4*4)
 322set EXC_D3,             EXC_DREGS+(3*4)
 323set EXC_D2,             EXC_DREGS+(2*4)
 324set EXC_D1,             EXC_DREGS+(1*4)
 325set EXC_D0,             EXC_DREGS+(0*4)
 326
 327set EXC_FP0,            EXC_FPREGS+(0*12)       # offset of saved fp0
 328set EXC_FP1,            EXC_FPREGS+(1*12)       # offset of saved fp1
 329set EXC_FP2,            EXC_FPREGS+(2*12)       # offset of saved fp2 (not used)
 330
 331set FP_SCR1,            LV+80                   # fp scratch 1
 332set FP_SCR1_EX,         FP_SCR1+0
 333set FP_SCR1_SGN,        FP_SCR1+2
 334set FP_SCR1_HI,         FP_SCR1+4
 335set FP_SCR1_LO,         FP_SCR1+8
 336
 337set FP_SCR0,            LV+68                   # fp scratch 0
 338set FP_SCR0_EX,         FP_SCR0+0
 339set FP_SCR0_SGN,        FP_SCR0+2
 340set FP_SCR0_HI,         FP_SCR0+4
 341set FP_SCR0_LO,         FP_SCR0+8
 342
 343set FP_DST,             LV+56                   # fp destination operand
 344set FP_DST_EX,          FP_DST+0
 345set FP_DST_SGN,         FP_DST+2
 346set FP_DST_HI,          FP_DST+4
 347set FP_DST_LO,          FP_DST+8
 348
 349set FP_SRC,             LV+44                   # fp source operand
 350set FP_SRC_EX,          FP_SRC+0
 351set FP_SRC_SGN,         FP_SRC+2
 352set FP_SRC_HI,          FP_SRC+4
 353set FP_SRC_LO,          FP_SRC+8
 354
 355set USER_FPIAR,         LV+40                   # FP instr address register
 356
 357set USER_FPSR,          LV+36                   # FP status register
 358set FPSR_CC,            USER_FPSR+0             # FPSR condition codes
 359set FPSR_QBYTE,         USER_FPSR+1             # FPSR qoutient byte
 360set FPSR_EXCEPT,        USER_FPSR+2             # FPSR exception status byte
 361set FPSR_AEXCEPT,       USER_FPSR+3             # FPSR accrued exception byte
 362
 363set USER_FPCR,          LV+32                   # FP control register
 364set FPCR_ENABLE,        USER_FPCR+2             # FPCR exception enable
 365set FPCR_MODE,          USER_FPCR+3             # FPCR rounding mode control
 366
 367set L_SCR3,             LV+28                   # integer scratch 3
 368set L_SCR2,             LV+24                   # integer scratch 2
 369set L_SCR1,             LV+20                   # integer scratch 1
 370
 371set STORE_FLG,          LV+19                   # flag: operand store (ie. not fcmp/ftst)
 372
 373set EXC_TEMP2,          LV+24                   # temporary space
 374set EXC_TEMP,           LV+16                   # temporary space
 375
 376set DTAG,               LV+15                   # destination operand type
 377set STAG,               LV+14                   # source operand type
 378
 379set SPCOND_FLG,         LV+10                   # flag: special case (see below)
 380
 381set EXC_CC,             LV+8                    # saved condition codes
 382set EXC_EXTWPTR,        LV+4                    # saved current PC (active)
 383set EXC_EXTWORD,        LV+2                    # saved extension word
 384set EXC_CMDREG,         LV+2                    # saved extension word
 385set EXC_OPWORD,         LV+0                    # saved operation word
 386
 387################################
 388
 389# Helpful macros
 390
 391set FTEMP,              0                       # offsets within an
 392set FTEMP_EX,           0                       # extended precision
 393set FTEMP_SGN,          2                       # value saved in memory.
 394set FTEMP_HI,           4
 395set FTEMP_LO,           8
 396set FTEMP_GRS,          12
 397
 398set LOCAL,              0                       # offsets within an
 399set LOCAL_EX,           0                       # extended precision
 400set LOCAL_SGN,          2                       # value saved in memory.
 401set LOCAL_HI,           4
 402set LOCAL_LO,           8
 403set LOCAL_GRS,          12
 404
 405set DST,                0                       # offsets within an
 406set DST_EX,             0                       # extended precision
 407set DST_HI,             4                       # value saved in memory.
 408set DST_LO,             8
 409
 410set SRC,                0                       # offsets within an
 411set SRC_EX,             0                       # extended precision
 412set SRC_HI,             4                       # value saved in memory.
 413set SRC_LO,             8
 414
 415set SGL_LO,             0x3f81                  # min sgl prec exponent
 416set SGL_HI,             0x407e                  # max sgl prec exponent
 417set DBL_LO,             0x3c01                  # min dbl prec exponent
 418set DBL_HI,             0x43fe                  # max dbl prec exponent
 419set EXT_LO,             0x0                     # min ext prec exponent
 420set EXT_HI,             0x7ffe                  # max ext prec exponent
 421
 422set EXT_BIAS,           0x3fff                  # extended precision bias
 423set SGL_BIAS,           0x007f                  # single precision bias
 424set DBL_BIAS,           0x03ff                  # double precision bias
 425
 426set NORM,               0x00                    # operand type for STAG/DTAG
 427set ZERO,               0x01                    # operand type for STAG/DTAG
 428set INF,                0x02                    # operand type for STAG/DTAG
 429set QNAN,               0x03                    # operand type for STAG/DTAG
 430set DENORM,             0x04                    # operand type for STAG/DTAG
 431set SNAN,               0x05                    # operand type for STAG/DTAG
 432set UNNORM,             0x06                    # operand type for STAG/DTAG
 433
 434##################
 435# FPSR/FPCR bits #
 436##################
 437set neg_bit,            0x3                     # negative result
 438set z_bit,              0x2                     # zero result
 439set inf_bit,            0x1                     # infinite result
 440set nan_bit,            0x0                     # NAN result
 441
 442set q_sn_bit,           0x7                     # sign bit of quotient byte
 443
 444set bsun_bit,           7                       # branch on unordered
 445set snan_bit,           6                       # signalling NAN
 446set operr_bit,          5                       # operand error
 447set ovfl_bit,           4                       # overflow
 448set unfl_bit,           3                       # underflow
 449set dz_bit,             2                       # divide by zero
 450set inex2_bit,          1                       # inexact result 2
 451set inex1_bit,          0                       # inexact result 1
 452
 453set aiop_bit,           7                       # accrued inexact operation bit
 454set aovfl_bit,          6                       # accrued overflow bit
 455set aunfl_bit,          5                       # accrued underflow bit
 456set adz_bit,            4                       # accrued dz bit
 457set ainex_bit,          3                       # accrued inexact bit
 458
 459#############################
 460# FPSR individual bit masks #
 461#############################
 462set neg_mask,           0x08000000              # negative bit mask (lw)
 463set inf_mask,           0x02000000              # infinity bit mask (lw)
 464set z_mask,             0x04000000              # zero bit mask (lw)
 465set nan_mask,           0x01000000              # nan bit mask (lw)
 466
 467set neg_bmask,          0x08                    # negative bit mask (byte)
 468set inf_bmask,          0x02                    # infinity bit mask (byte)
 469set z_bmask,            0x04                    # zero bit mask (byte)
 470set nan_bmask,          0x01                    # nan bit mask (byte)
 471
 472set bsun_mask,          0x00008000              # bsun exception mask
 473set snan_mask,          0x00004000              # snan exception mask
 474set operr_mask,         0x00002000              # operr exception mask
 475set ovfl_mask,          0x00001000              # overflow exception mask
 476set unfl_mask,          0x00000800              # underflow exception mask
 477set dz_mask,            0x00000400              # dz exception mask
 478set inex2_mask,         0x00000200              # inex2 exception mask
 479set inex1_mask,         0x00000100              # inex1 exception mask
 480
 481set aiop_mask,          0x00000080              # accrued illegal operation
 482set aovfl_mask,         0x00000040              # accrued overflow
 483set aunfl_mask,         0x00000020              # accrued underflow
 484set adz_mask,           0x00000010              # accrued divide by zero
 485set ainex_mask,         0x00000008              # accrued inexact
 486
 487######################################
 488# FPSR combinations used in the FPSP #
 489######################################
 490set dzinf_mask,         inf_mask+dz_mask+adz_mask
 491set opnan_mask,         nan_mask+operr_mask+aiop_mask
 492set nzi_mask,           0x01ffffff              #clears N, Z, and I
 493set unfinx_mask,        unfl_mask+inex2_mask+aunfl_mask+ainex_mask
 494set unf2inx_mask,       unfl_mask+inex2_mask+ainex_mask
 495set ovfinx_mask,        ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
 496set inx1a_mask,         inex1_mask+ainex_mask
 497set inx2a_mask,         inex2_mask+ainex_mask
 498set snaniop_mask,       nan_mask+snan_mask+aiop_mask
 499set snaniop2_mask,      snan_mask+aiop_mask
 500set naniop_mask,        nan_mask+aiop_mask
 501set neginf_mask,        neg_mask+inf_mask
 502set infaiop_mask,       inf_mask+aiop_mask
 503set negz_mask,          neg_mask+z_mask
 504set opaop_mask,         operr_mask+aiop_mask
 505set unfl_inx_mask,      unfl_mask+aunfl_mask+ainex_mask
 506set ovfl_inx_mask,      ovfl_mask+aovfl_mask+ainex_mask
 507
 508#########
 509# misc. #
 510#########
 511set rnd_stky_bit,       29                      # stky bit pos in longword
 512
 513set sign_bit,           0x7                     # sign bit
 514set signan_bit,         0x6                     # signalling nan bit
 515
 516set sgl_thresh,         0x3f81                  # minimum sgl exponent
 517set dbl_thresh,         0x3c01                  # minimum dbl exponent
 518
 519set x_mode,             0x0                     # extended precision
 520set s_mode,             0x4                     # single precision
 521set d_mode,             0x8                     # double precision
 522
 523set rn_mode,            0x0                     # round-to-nearest
 524set rz_mode,            0x1                     # round-to-zero
 525set rm_mode,            0x2                     # round-tp-minus-infinity
 526set rp_mode,            0x3                     # round-to-plus-infinity
 527
 528set mantissalen,        64                      # length of mantissa in bits
 529
 530set BYTE,               1                       # len(byte) == 1 byte
 531set WORD,               2                       # len(word) == 2 bytes
 532set LONG,               4                       # len(longword) == 2 bytes
 533
 534set BSUN_VEC,           0xc0                    # bsun    vector offset
 535set INEX_VEC,           0xc4                    # inexact vector offset
 536set DZ_VEC,             0xc8                    # dz      vector offset
 537set UNFL_VEC,           0xcc                    # unfl    vector offset
 538set OPERR_VEC,          0xd0                    # operr   vector offset
 539set OVFL_VEC,           0xd4                    # ovfl    vector offset
 540set SNAN_VEC,           0xd8                    # snan    vector offset
 541
 542###########################
 543# SPecial CONDition FLaGs #
 544###########################
 545set ftrapcc_flg,        0x01                    # flag bit: ftrapcc exception
 546set fbsun_flg,          0x02                    # flag bit: bsun exception
 547set mia7_flg,           0x04                    # flag bit: (a7)+ <ea>
 548set mda7_flg,           0x08                    # flag bit: -(a7) <ea>
 549set fmovm_flg,          0x40                    # flag bit: fmovm instruction
 550set immed_flg,          0x80                    # flag bit: &<data> <ea>
 551
 552set ftrapcc_bit,        0x0
 553set fbsun_bit,          0x1
 554set mia7_bit,           0x2
 555set mda7_bit,           0x3
 556set immed_bit,          0x7
 557
 558##################################
 559# TRANSCENDENTAL "LAST-OP" FLAGS #
 560##################################
 561set FMUL_OP,            0x0                     # fmul instr performed last
 562set FDIV_OP,            0x1                     # fdiv performed last
 563set FADD_OP,            0x2                     # fadd performed last
 564set FMOV_OP,            0x3                     # fmov performed last
 565
 566#############
 567# CONSTANTS #
 568#############
 569T1:     long            0x40C62D38,0xD3D64634   # 16381 LOG2 LEAD
 570T2:     long            0x3D6F90AE,0xB1E75CC7   # 16381 LOG2 TRAIL
 571
 572PI:     long            0x40000000,0xC90FDAA2,0x2168C235,0x00000000
 573PIBY2:  long            0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
 574
 575TWOBYPI:
 576        long            0x3FE45F30,0x6DC9C883
 577
 578#########################################################################
 579# XDEF **************************************************************** #
 580#       _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception.    #
 581#                                                                       #
 582#       This handler should be the first code executed upon taking the  #
 583#       FP Overflow exception in an operating system.                   #
 584#                                                                       #
 585# XREF **************************************************************** #
 586#       _imem_read_long() - read instruction longword                   #
 587#       fix_skewed_ops() - adjust src operand in fsave frame            #
 588#       set_tag_x() - determine optype of src/dst operands              #
 589#       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
 590#       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
 591#       load_fpn2() - load dst operand from FP regfile                  #
 592#       fout() - emulate an opclass 3 instruction                       #
 593#       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
 594#       _fpsp_done() - "callout" for 060FPSP exit (all work done!)      #
 595#       _real_ovfl() - "callout" for Overflow exception enabled code    #
 596#       _real_inex() - "callout" for Inexact exception enabled code     #
 597#       _real_trace() - "callout" for Trace exception code              #
 598#                                                                       #
 599# INPUT *************************************************************** #
 600#       - The system stack contains the FP Ovfl exception stack frame   #
 601#       - The fsave frame contains the source operand                   #
 602#                                                                       #
 603# OUTPUT ************************************************************** #
 604#       Overflow Exception enabled:                                     #
 605#       - The system stack is unchanged                                 #
 606#       - The fsave frame contains the adjusted src op for opclass 0,2  #
 607#       Overflow Exception disabled:                                    #
 608#       - The system stack is unchanged                                 #
 609#       - The "exception present" flag in the fsave frame is cleared    #
 610#                                                                       #
 611# ALGORITHM *********************************************************** #
 612#       On the 060, if an FP overflow is present as the result of any   #
 613# instruction, the 060 will take an overflow exception whether the      #
 614# exception is enabled or disabled in the FPCR. For the disabled case,  #
 615# This handler emulates the instruction to determine what the correct   #
 616# default result should be for the operation. This default result is    #
 617# then stored in either the FP regfile, data regfile, or memory.        #
 618# Finally, the handler exits through the "callout" _fpsp_done()         #
 619# denoting that no exceptional conditions exist within the machine.     #
 620#       If the exception is enabled, then this handler must create the  #
 621# exceptional operand and plave it in the fsave state frame, and store  #
 622# the default result (only if the instruction is opclass 3). For        #
 623# exceptions enabled, this handler must exit through the "callout"      #
 624# _real_ovfl() so that the operating system enabled overflow handler    #
 625# can handle this case.                                                 #
 626#       Two other conditions exist. First, if overflow was disabled     #
 627# but the inexact exception was enabled, this handler must exit         #
 628# through the "callout" _real_inex() regardless of whether the result   #
 629# was inexact.                                                          #
 630#       Also, in the case of an opclass three instruction where         #
 631# overflow was disabled and the trace exception was enabled, this       #
 632# handler must exit through the "callout" _real_trace().                #
 633#                                                                       #
 634#########################################################################
 635
 636        global          _fpsp_ovfl
 637_fpsp_ovfl:
 638
 639#$#     sub.l           &24,%sp                 # make room for src/dst
 640
 641        link.w          %a6,&-LOCAL_SIZE        # init stack frame
 642
 643        fsave           FP_SRC(%a6)             # grab the "busy" frame
 644
 645        movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
 646        fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
 647        fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
 648
 649# the FPIAR holds the "current PC" of the faulting instruction
 650        mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
 651        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
 652        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
 653        bsr.l           _imem_read_long         # fetch the instruction words
 654        mov.l           %d0,EXC_OPWORD(%a6)
 655
 656##############################################################################
 657
 658        btst            &0x5,EXC_CMDREG(%a6)    # is instr an fmove out?
 659        bne.w           fovfl_out
 660
 661
 662        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
 663        bsr.l           fix_skewed_ops          # fix src op
 664
 665# since, I believe, only NORMs and DENORMs can come through here,
 666# maybe we can avoid the subroutine call.
 667        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
 668        bsr.l           set_tag_x               # tag the operand type
 669        mov.b           %d0,STAG(%a6)           # maybe NORM,DENORM
 670
 671# bit five of the fp extension word separates the monadic and dyadic operations
 672# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
 673# will never take this exception.
 674        btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
 675        beq.b           fovfl_extract           # monadic
 676
 677        bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
 678        bsr.l           load_fpn2               # load dst into FP_DST
 679
 680        lea             FP_DST(%a6),%a0         # pass: ptr to dst op
 681        bsr.l           set_tag_x               # tag the operand type
 682        cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
 683        bne.b           fovfl_op2_done          # no
 684        bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
 685fovfl_op2_done:
 686        mov.b           %d0,DTAG(%a6)           # save dst optype tag
 687
 688fovfl_extract:
 689
 690#$#     mov.l           FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
 691#$#     mov.l           FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
 692#$#     mov.l           FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
 693#$#     mov.l           FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
 694#$#     mov.l           FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
 695#$#     mov.l           FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
 696
 697        clr.l           %d0
 698        mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
 699
 700        mov.b           1+EXC_CMDREG(%a6),%d1
 701        andi.w          &0x007f,%d1             # extract extension
 702
 703        andi.l          &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
 704
 705        fmov.l          &0x0,%fpcr              # zero current control regs
 706        fmov.l          &0x0,%fpsr
 707
 708        lea             FP_SRC(%a6),%a0
 709        lea             FP_DST(%a6),%a1
 710
 711# maybe we can make these entry points ONLY the OVFL entry points of each routine.
 712        mov.l           (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
 713        jsr             (tbl_unsupp.l,%pc,%d1.l*1)
 714
 715# the operation has been emulated. the result is in fp0.
 716# the EXOP, if an exception occurred, is in fp1.
 717# we must save the default result regardless of whether
 718# traps are enabled or disabled.
 719        bfextu          EXC_CMDREG(%a6){&6:&3},%d0
 720        bsr.l           store_fpreg
 721
 722# the exceptional possibilities we have left ourselves with are ONLY overflow
 723# and inexact. and, the inexact is such that overflow occurred and was disabled
 724# but inexact was enabled.
 725        btst            &ovfl_bit,FPCR_ENABLE(%a6)
 726        bne.b           fovfl_ovfl_on
 727
 728        btst            &inex2_bit,FPCR_ENABLE(%a6)
 729        bne.b           fovfl_inex_on
 730
 731        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 732        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 733        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 734
 735        unlk            %a6
 736#$#     add.l           &24,%sp
 737        bra.l           _fpsp_done
 738
 739# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
 740# in fp1. now, simply jump to _real_ovfl()!
 741fovfl_ovfl_on:
 742        fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP (fp1) to stack
 743
 744        mov.w           &0xe005,2+FP_SRC(%a6)   # save exc status
 745
 746        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 747        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 748        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 749
 750        frestore        FP_SRC(%a6)             # do this after fmovm,other f<op>s!
 751
 752        unlk            %a6
 753
 754        bra.l           _real_ovfl
 755
 756# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
 757# we must jump to real_inex().
 758fovfl_inex_on:
 759
 760        fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP (fp1) to stack
 761
 762        mov.b           &0xc4,1+EXC_VOFF(%a6)   # vector offset = 0xc4
 763        mov.w           &0xe001,2+FP_SRC(%a6)   # save exc status
 764
 765        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 766        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 767        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 768
 769        frestore        FP_SRC(%a6)             # do this after fmovm,other f<op>s!
 770
 771        unlk            %a6
 772
 773        bra.l           _real_inex
 774
 775########################################################################
 776fovfl_out:
 777
 778
 779#$#     mov.l           FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
 780#$#     mov.l           FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
 781#$#     mov.l           FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
 782
 783# the src operand is definitely a NORM(!), so tag it as such
 784        mov.b           &NORM,STAG(%a6)         # set src optype tag
 785
 786        clr.l           %d0
 787        mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
 788
 789        and.l           &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
 790
 791        fmov.l          &0x0,%fpcr              # zero current control regs
 792        fmov.l          &0x0,%fpsr
 793
 794        lea             FP_SRC(%a6),%a0         # pass ptr to src operand
 795
 796        bsr.l           fout
 797
 798        btst            &ovfl_bit,FPCR_ENABLE(%a6)
 799        bne.w           fovfl_ovfl_on
 800
 801        btst            &inex2_bit,FPCR_ENABLE(%a6)
 802        bne.w           fovfl_inex_on
 803
 804        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 805        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 806        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 807
 808        unlk            %a6
 809#$#     add.l           &24,%sp
 810
 811        btst            &0x7,(%sp)              # is trace on?
 812        beq.l           _fpsp_done              # no
 813
 814        fmov.l          %fpiar,0x8(%sp)         # "Current PC" is in FPIAR
 815        mov.w           &0x2024,0x6(%sp)        # stk fmt = 0x2; voff = 0x024
 816        bra.l           _real_trace
 817
 818#########################################################################
 819# XDEF **************************************************************** #
 820#       _fpsp_unfl(): 060FPSP entry point for FP Underflow exception.   #
 821#                                                                       #
 822#       This handler should be the first code executed upon taking the  #
 823#       FP Underflow exception in an operating system.                  #
 824#                                                                       #
 825# XREF **************************************************************** #
 826#       _imem_read_long() - read instruction longword                   #
 827#       fix_skewed_ops() - adjust src operand in fsave frame            #
 828#       set_tag_x() - determine optype of src/dst operands              #
 829#       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
 830#       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
 831#       load_fpn2() - load dst operand from FP regfile                  #
 832#       fout() - emulate an opclass 3 instruction                       #
 833#       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
 834#       _fpsp_done() - "callout" for 060FPSP exit (all work done!)      #
 835#       _real_ovfl() - "callout" for Overflow exception enabled code    #
 836#       _real_inex() - "callout" for Inexact exception enabled code     #
 837#       _real_trace() - "callout" for Trace exception code              #
 838#                                                                       #
 839# INPUT *************************************************************** #
 840#       - The system stack contains the FP Unfl exception stack frame   #
 841#       - The fsave frame contains the source operand                   #
 842#                                                                       #
 843# OUTPUT ************************************************************** #
 844#       Underflow Exception enabled:                                    #
 845#       - The system stack is unchanged                                 #
 846#       - The fsave frame contains the adjusted src op for opclass 0,2  #
 847#       Underflow Exception disabled:                                   #
 848#       - The system stack is unchanged                                 #
 849#       - The "exception present" flag in the fsave frame is cleared    #
 850#                                                                       #
 851# ALGORITHM *********************************************************** #
 852#       On the 060, if an FP underflow is present as the result of any  #
 853# instruction, the 060 will take an underflow exception whether the     #
 854# exception is enabled or disabled in the FPCR. For the disabled case,  #
 855# This handler emulates the instruction to determine what the correct   #
 856# default result should be for the operation. This default result is    #
 857# then stored in either the FP regfile, data regfile, or memory.        #
 858# Finally, the handler exits through the "callout" _fpsp_done()         #
 859# denoting that no exceptional conditions exist within the machine.     #
 860#       If the exception is enabled, then this handler must create the  #
 861# exceptional operand and plave it in the fsave state frame, and store  #
 862# the default result (only if the instruction is opclass 3). For        #
 863# exceptions enabled, this handler must exit through the "callout"      #
 864# _real_unfl() so that the operating system enabled overflow handler    #
 865# can handle this case.                                                 #
 866#       Two other conditions exist. First, if underflow was disabled    #
 867# but the inexact exception was enabled and the result was inexact,     #
 868# this handler must exit through the "callout" _real_inex().            #
 869# was inexact.                                                          #
 870#       Also, in the case of an opclass three instruction where         #
 871# underflow was disabled and the trace exception was enabled, this      #
 872# handler must exit through the "callout" _real_trace().                #
 873#                                                                       #
 874#########################################################################
 875
 876        global          _fpsp_unfl
 877_fpsp_unfl:
 878
 879#$#     sub.l           &24,%sp                 # make room for src/dst
 880
 881        link.w          %a6,&-LOCAL_SIZE        # init stack frame
 882
 883        fsave           FP_SRC(%a6)             # grab the "busy" frame
 884
 885        movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
 886        fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
 887        fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
 888
 889# the FPIAR holds the "current PC" of the faulting instruction
 890        mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
 891        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
 892        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
 893        bsr.l           _imem_read_long         # fetch the instruction words
 894        mov.l           %d0,EXC_OPWORD(%a6)
 895
 896##############################################################################
 897
 898        btst            &0x5,EXC_CMDREG(%a6)    # is instr an fmove out?
 899        bne.w           funfl_out
 900
 901
 902        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
 903        bsr.l           fix_skewed_ops          # fix src op
 904
 905        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
 906        bsr.l           set_tag_x               # tag the operand type
 907        mov.b           %d0,STAG(%a6)           # maybe NORM,DENORM
 908
 909# bit five of the fp ext word separates the monadic and dyadic operations
 910# that can pass through fpsp_unfl(). remember that fcmp, and ftst
 911# will never take this exception.
 912        btst            &0x5,1+EXC_CMDREG(%a6)  # is op monadic or dyadic?
 913        beq.b           funfl_extract           # monadic
 914
 915# now, what's left that's not dyadic is fsincos. we can distinguish it
 916# from all dyadics by the '0110xxx pattern
 917        btst            &0x4,1+EXC_CMDREG(%a6)  # is op an fsincos?
 918        bne.b           funfl_extract           # yes
 919
 920        bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
 921        bsr.l           load_fpn2               # load dst into FP_DST
 922
 923        lea             FP_DST(%a6),%a0         # pass: ptr to dst op
 924        bsr.l           set_tag_x               # tag the operand type
 925        cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
 926        bne.b           funfl_op2_done          # no
 927        bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
 928funfl_op2_done:
 929        mov.b           %d0,DTAG(%a6)           # save dst optype tag
 930
 931funfl_extract:
 932
 933#$#     mov.l           FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
 934#$#     mov.l           FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
 935#$#     mov.l           FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
 936#$#     mov.l           FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
 937#$#     mov.l           FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
 938#$#     mov.l           FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
 939
 940        clr.l           %d0
 941        mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
 942
 943        mov.b           1+EXC_CMDREG(%a6),%d1
 944        andi.w          &0x007f,%d1             # extract extension
 945
 946        andi.l          &0x00ff01ff,USER_FPSR(%a6)
 947
 948        fmov.l          &0x0,%fpcr              # zero current control regs
 949        fmov.l          &0x0,%fpsr
 950
 951        lea             FP_SRC(%a6),%a0
 952        lea             FP_DST(%a6),%a1
 953
 954# maybe we can make these entry points ONLY the OVFL entry points of each routine.
 955        mov.l           (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
 956        jsr             (tbl_unsupp.l,%pc,%d1.l*1)
 957
 958        bfextu          EXC_CMDREG(%a6){&6:&3},%d0
 959        bsr.l           store_fpreg
 960
 961# The `060 FPU multiplier hardware is such that if the result of a
 962# multiply operation is the smallest possible normalized number
 963# (0x00000000_80000000_00000000), then the machine will take an
 964# underflow exception. Since this is incorrect, we need to check
 965# if our emulation, after re-doing the operation, decided that
 966# no underflow was called for. We do these checks only in
 967# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
 968# special case will simply exit gracefully with the correct result.
 969
 970# the exceptional possibilities we have left ourselves with are ONLY overflow
 971# and inexact. and, the inexact is such that overflow occurred and was disabled
 972# but inexact was enabled.
 973        btst            &unfl_bit,FPCR_ENABLE(%a6)
 974        bne.b           funfl_unfl_on
 975
 976funfl_chkinex:
 977        btst            &inex2_bit,FPCR_ENABLE(%a6)
 978        bne.b           funfl_inex_on
 979
 980funfl_exit:
 981        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 982        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 983        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 984
 985        unlk            %a6
 986#$#     add.l           &24,%sp
 987        bra.l           _fpsp_done
 988
 989# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
 990# in fp1 (don't forget to save fp0). what to do now?
 991# well, we simply have to get to go to _real_unfl()!
 992funfl_unfl_on:
 993
 994# The `060 FPU multiplier hardware is such that if the result of a
 995# multiply operation is the smallest possible normalized number
 996# (0x00000000_80000000_00000000), then the machine will take an
 997# underflow exception. Since this is incorrect, we check here to see
 998# if our emulation, after re-doing the operation, decided that
 999# no underflow was called for.
1000        btst            &unfl_bit,FPSR_EXCEPT(%a6)
1001        beq.w           funfl_chkinex
1002
1003funfl_unfl_on2:
1004        fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP (fp1) to stack
1005
1006        mov.w           &0xe003,2+FP_SRC(%a6)   # save exc status
1007
1008        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
1009        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1010        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1011
1012        frestore        FP_SRC(%a6)             # do this after fmovm,other f<op>s!
1013
1014        unlk            %a6
1015
1016        bra.l           _real_unfl
1017
1018# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
1019# we must jump to real_inex().
1020funfl_inex_on:
1021
1022# The `060 FPU multiplier hardware is such that if the result of a
1023# multiply operation is the smallest possible normalized number
1024# (0x00000000_80000000_00000000), then the machine will take an
1025# underflow exception.
1026# But, whether bogus or not, if inexact is enabled AND it occurred,
1027# then we have to branch to real_inex.
1028
1029        btst            &inex2_bit,FPSR_EXCEPT(%a6)
1030        beq.w           funfl_exit
1031
1032funfl_inex_on2:
1033
1034        fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP to stack
1035
1036        mov.b           &0xc4,1+EXC_VOFF(%a6)   # vector offset = 0xc4
1037        mov.w           &0xe001,2+FP_SRC(%a6)   # save exc status
1038
1039        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
1040        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1041        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1042
1043        frestore        FP_SRC(%a6)             # do this after fmovm,other f<op>s!
1044
1045        unlk            %a6
1046
1047        bra.l           _real_inex
1048
1049#######################################################################
1050funfl_out:
1051
1052
1053#$#     mov.l           FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1054#$#     mov.l           FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1055#$#     mov.l           FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1056
1057# the src operand is definitely a NORM(!), so tag it as such
1058        mov.b           &NORM,STAG(%a6)         # set src optype tag
1059
1060        clr.l           %d0
1061        mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
1062
1063        and.l           &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1064
1065        fmov.l          &0x0,%fpcr              # zero current control regs
1066        fmov.l          &0x0,%fpsr
1067
1068        lea             FP_SRC(%a6),%a0         # pass ptr to src operand
1069
1070        bsr.l           fout
1071
1072        btst            &unfl_bit,FPCR_ENABLE(%a6)
1073        bne.w           funfl_unfl_on2
1074
1075        btst            &inex2_bit,FPCR_ENABLE(%a6)
1076        bne.w           funfl_inex_on2
1077
1078        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
1079        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1080        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1081
1082        unlk            %a6
1083#$#     add.l           &24,%sp
1084
1085        btst            &0x7,(%sp)              # is trace on?
1086        beq.l           _fpsp_done              # no
1087
1088        fmov.l          %fpiar,0x8(%sp)         # "Current PC" is in FPIAR
1089        mov.w           &0x2024,0x6(%sp)        # stk fmt = 0x2; voff = 0x024
1090        bra.l           _real_trace
1091
1092#########################################################################
1093# XDEF **************************************************************** #
1094#       _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented       #
1095#                       Data Type" exception.                           #
1096#                                                                       #
1097#       This handler should be the first code executed upon taking the  #
1098#       FP Unimplemented Data Type exception in an operating system.    #
1099#                                                                       #
1100# XREF **************************************************************** #
1101#       _imem_read_{word,long}() - read instruction word/longword       #
1102#       fix_skewed_ops() - adjust src operand in fsave frame            #
1103#       set_tag_x() - determine optype of src/dst operands              #
1104#       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
1105#       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
1106#       load_fpn2() - load dst operand from FP regfile                  #
1107#       load_fpn1() - load src operand from FP regfile                  #
1108#       fout() - emulate an opclass 3 instruction                       #
1109#       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
1110#       _real_inex() - "callout" to operating system inexact handler    #
1111#       _fpsp_done() - "callout" for exit; work all done                #
1112#       _real_trace() - "callout" for Trace enabled exception           #
1113#       funimp_skew() - adjust fsave src ops to "incorrect" value       #
1114#       _real_snan() - "callout" for SNAN exception                     #
1115#       _real_operr() - "callout" for OPERR exception                   #
1116#       _real_ovfl() - "callout" for OVFL exception                     #
1117#       _real_unfl() - "callout" for UNFL exception                     #
1118#       get_packed() - fetch packed operand from memory                 #
1119#                                                                       #
1120# INPUT *************************************************************** #
1121#       - The system stack contains the "Unimp Data Type" stk frame     #
1122#       - The fsave frame contains the ssrc op (for UNNORM/DENORM)      #
1123#                                                                       #
1124# OUTPUT ************************************************************** #
1125#       If Inexact exception (opclass 3):                               #
1126#       - The system stack is changed to an Inexact exception stk frame #
1127#       If SNAN exception (opclass 3):                                  #
1128#       - The system stack is changed to an SNAN exception stk frame    #
1129#       If OPERR exception (opclass 3):                                 #
1130#       - The system stack is changed to an OPERR exception stk frame   #
1131#       If OVFL exception (opclass 3):                                  #
1132#       - The system stack is changed to an OVFL exception stk frame    #
1133#       If UNFL exception (opclass 3):                                  #
1134#       - The system stack is changed to an UNFL exception stack frame  #
1135#       If Trace exception enabled:                                     #
1136#       - The system stack is changed to a Trace exception stack frame  #
1137#       Else: (normal case)                                             #
1138#       - Correct result has been stored as appropriate                 #
1139#                                                                       #
1140# ALGORITHM *********************************************************** #
1141#       Two main instruction types can enter here: (1) DENORM or UNNORM #
1142# unimplemented data types. These can be either opclass 0,2 or 3        #
1143# instructions, and (2) PACKED unimplemented data format instructions   #
1144# also of opclasses 0,2, or 3.                                          #
1145#       For UNNORM/DENORM opclass 0 and 2, the handler fetches the src  #
1146# operand from the fsave state frame and the dst operand (if dyadic)    #
1147# from the FP register file. The instruction is then emulated by        #
1148# choosing an emulation routine from a table of routines indexed by     #
1149# instruction type. Once the instruction has been emulated and result   #
1150# saved, then we check to see if any enabled exceptions resulted from   #
1151# instruction emulation. If none, then we exit through the "callout"    #
1152# _fpsp_done(). If there is an enabled FP exception, then we insert     #
1153# this exception into the FPU in the fsave state frame and then exit    #
1154# through _fpsp_done().                                                 #
1155#       PACKED opclass 0 and 2 is similar in how the instruction is     #
1156# emulated and exceptions handled. The differences occur in how the     #
1157# handler loads the packed op (by calling get_packed() routine) and     #
1158# by the fact that a Trace exception could be pending for PACKED ops.   #
1159# If a Trace exception is pending, then the current exception stack     #
1160# frame is changed to a Trace exception stack frame and an exit is      #
1161# made through _real_trace().                                           #
1162#       For UNNORM/DENORM opclass 3, the actual move out to memory is   #
1163# performed by calling the routine fout(). If no exception should occur #
1164# as the result of emulation, then an exit either occurs through        #
1165# _fpsp_done() or through _real_trace() if a Trace exception is pending #
1166# (a Trace stack frame must be created here, too). If an FP exception   #
1167# should occur, then we must create an exception stack frame of that    #
1168# type and jump to either _real_snan(), _real_operr(), _real_inex(),    #
1169# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3        #
1170# emulation is performed in a similar manner.                           #
1171#                                                                       #
1172#########################################################################
1173
1174#
1175# (1) DENORM and UNNORM (unimplemented) data types:
1176#
1177#                               post-instruction
1178#                               *****************
1179#                               *      EA       *
1180#        pre-instruction        *               *
1181#       *****************       *****************
1182#       * 0x0 *  0x0dc  *       * 0x3 *  0x0dc  *
1183#       *****************       *****************
1184#       *     Next      *       *     Next      *
1185#       *      PC       *       *      PC       *
1186#       *****************       *****************
1187#       *      SR       *       *      SR       *
1188#       *****************       *****************
1189#
1190# (2) PACKED format (unsupported) opclasses two and three:
1191#       *****************
1192#       *      EA       *
1193#       *               *
1194#       *****************
1195#       * 0x2 *  0x0dc  *
1196#       *****************
1197#       *     Next      *
1198#       *      PC       *
1199#       *****************
1200#       *      SR       *
1201#       *****************
1202#
1203        global          _fpsp_unsupp
1204_fpsp_unsupp:
1205
1206        link.w          %a6,&-LOCAL_SIZE        # init stack frame
1207
1208        fsave           FP_SRC(%a6)             # save fp state
1209
1210        movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
1211        fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1212        fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
1213
1214        btst            &0x5,EXC_SR(%a6)        # user or supervisor mode?
1215        bne.b           fu_s
1216fu_u:
1217        mov.l           %usp,%a0                # fetch user stack pointer
1218        mov.l           %a0,EXC_A7(%a6)         # save on stack
1219        bra.b           fu_cont
1220# if the exception is an opclass zero or two unimplemented data type
1221# exception, then the a7' calculated here is wrong since it doesn't
1222# stack an ea. however, we don't need an a7' for this case anyways.
1223fu_s:
1224        lea             0x4+EXC_EA(%a6),%a0     # load old a7'
1225        mov.l           %a0,EXC_A7(%a6)         # save on stack
1226
1227fu_cont:
1228
1229# the FPIAR holds the "current PC" of the faulting instruction
1230# the FPIAR should be set correctly for ALL exceptions passing through
1231# this point.
1232        mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1233        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
1234        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
1235        bsr.l           _imem_read_long         # fetch the instruction words
1236        mov.l           %d0,EXC_OPWORD(%a6)     # store OPWORD and EXTWORD
1237
1238############################
1239
1240        clr.b           SPCOND_FLG(%a6)         # clear special condition flag
1241
1242# Separate opclass three (fpn-to-mem) ops since they have a different
1243# stack frame and protocol.
1244        btst            &0x5,EXC_CMDREG(%a6)    # is it an fmove out?
1245        bne.w           fu_out                  # yes
1246
1247# Separate packed opclass two instructions.
1248        bfextu          EXC_CMDREG(%a6){&0:&6},%d0
1249        cmpi.b          %d0,&0x13
1250        beq.w           fu_in_pack
1251
1252
1253# I'm not sure at this point what FPSR bits are valid for this instruction.
1254# so, since the emulation routines re-create them anyways, zero exception field
1255        andi.l          &0x00ff00ff,USER_FPSR(%a6) # zero exception field
1256
1257        fmov.l          &0x0,%fpcr              # zero current control regs
1258        fmov.l          &0x0,%fpsr
1259
1260# Opclass two w/ memory-to-fpn operation will have an incorrect extended
1261# precision format if the src format was single or double and the
1262# source data type was an INF, NAN, DENORM, or UNNORM
1263        lea             FP_SRC(%a6),%a0         # pass ptr to input
1264        bsr.l           fix_skewed_ops
1265
1266# we don't know whether the src operand or the dst operand (or both) is the
1267# UNNORM or DENORM. call the function that tags the operand type. if the
1268# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1269        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
1270        bsr.l           set_tag_x               # tag the operand type
1271        cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
1272        bne.b           fu_op2                  # no
1273        bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
1274
1275fu_op2:
1276        mov.b           %d0,STAG(%a6)           # save src optype tag
1277
1278        bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1279
1280# bit five of the fp extension word separates the monadic and dyadic operations
1281# at this point
1282        btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
1283        beq.b           fu_extract              # monadic
1284        cmpi.b          1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1285        beq.b           fu_extract              # yes, so it's monadic, too
1286
1287        bsr.l           load_fpn2               # load dst into FP_DST
1288
1289        lea             FP_DST(%a6),%a0         # pass: ptr to dst op
1290        bsr.l           set_tag_x               # tag the operand type
1291        cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
1292        bne.b           fu_op2_done             # no
1293        bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
1294fu_op2_done:
1295        mov.b           %d0,DTAG(%a6)           # save dst optype tag
1296
1297fu_extract:
1298        clr.l           %d0
1299        mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode/prec
1300
1301        bfextu          1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1302
1303        lea             FP_SRC(%a6),%a0
1304        lea             FP_DST(%a6),%a1
1305
1306        mov.l           (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1307        jsr             (tbl_unsupp.l,%pc,%d1.l*1)
1308
1309#
1310# Exceptions in order of precedence:
1311#       BSUN    : none
1312#       SNAN    : all dyadic ops
1313#       OPERR   : fsqrt(-NORM)
1314#       OVFL    : all except ftst,fcmp
1315#       UNFL    : all except ftst,fcmp
1316#       DZ      : fdiv
1317#       INEX2   : all except ftst,fcmp
1318#       INEX1   : none (packed doesn't go through here)
1319#
1320
1321# we determine the highest priority exception(if any) set by the
1322# emulation routine that has also been enabled by the user.
1323        mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions set
1324        bne.b           fu_in_ena               # some are enabled
1325
1326fu_in_cont:
1327# fcmp and ftst do not store any result.
1328        mov.b           1+EXC_CMDREG(%a6),%d0   # fetch extension
1329        andi.b          &0x38,%d0               # extract bits 3-5
1330        cmpi.b          %d0,&0x38               # is instr fcmp or ftst?
1331        beq.b           fu_in_exit              # yes
1332
1333        bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1334        bsr.l           store_fpreg             # store the result
1335
1336fu_in_exit:
1337
1338        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1339        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1340        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1341
1342        unlk            %a6
1343
1344        bra.l           _fpsp_done
1345
1346fu_in_ena:
1347        and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled
1348        bfffo           %d0{&24:&8},%d0         # find highest priority exception
1349        bne.b           fu_in_exc               # there is at least one set
1350
1351#
1352# No exceptions occurred that were also enabled. Now:
1353#
1354#       if (OVFL && ovfl_disabled && inexact_enabled) {
1355#           branch to _real_inex() (even if the result was exact!);
1356#       } else {
1357#           save the result in the proper fp reg (unless the op is fcmp or ftst);
1358#           return;
1359#       }
1360#
1361        btst            &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1362        beq.b           fu_in_cont              # no
1363
1364fu_in_ovflchk:
1365        btst            &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1366        beq.b           fu_in_cont              # no
1367        bra.w           fu_in_exc_ovfl          # go insert overflow frame
1368
1369#
1370# An exception occurred and that exception was enabled:
1371#
1372#       shift enabled exception field into lo byte of d0;
1373#       if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1374#           ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1375#               /*
1376#                * this is the case where we must call _real_inex() now or else
1377#                * there will be no other way to pass it the exceptional operand
1378#                */
1379#               call _real_inex();
1380#       } else {
1381#               restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1382#       }
1383#
1384fu_in_exc:
1385        subi.l          &24,%d0                 # fix offset to be 0-8
1386        cmpi.b          %d0,&0x6                # is exception INEX? (6)
1387        bne.b           fu_in_exc_exit          # no
1388
1389# the enabled exception was inexact
1390        btst            &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1391        bne.w           fu_in_exc_unfl          # yes
1392        btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1393        bne.w           fu_in_exc_ovfl          # yes
1394
1395# here, we insert the correct fsave status value into the fsave frame for the
1396# corresponding exception. the operand in the fsave frame should be the original
1397# src operand.
1398fu_in_exc_exit:
1399        mov.l           %d0,-(%sp)              # save d0
1400        bsr.l           funimp_skew             # skew sgl or dbl inputs
1401        mov.l           (%sp)+,%d0              # restore d0
1402
1403        mov.w           (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1404
1405        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1406        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1407        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1408
1409        frestore        FP_SRC(%a6)             # restore src op
1410
1411        unlk            %a6
1412
1413        bra.l           _fpsp_done
1414
1415tbl_except:
1416        short           0xe000,0xe006,0xe004,0xe005
1417        short           0xe003,0xe002,0xe001,0xe001
1418
1419fu_in_exc_unfl:
1420        mov.w           &0x4,%d0
1421        bra.b           fu_in_exc_exit
1422fu_in_exc_ovfl:
1423        mov.w           &0x03,%d0
1424        bra.b           fu_in_exc_exit
1425
1426# If the input operand to this operation was opclass two and a single
1427# or double precision denorm, inf, or nan, the operand needs to be
1428# "corrected" in order to have the proper equivalent extended precision
1429# number.
1430        global          fix_skewed_ops
1431fix_skewed_ops:
1432        bfextu          EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1433        cmpi.b          %d0,&0x11               # is class = 2 & fmt = sgl?
1434        beq.b           fso_sgl                 # yes
1435        cmpi.b          %d0,&0x15               # is class = 2 & fmt = dbl?
1436        beq.b           fso_dbl                 # yes
1437        rts                                     # no
1438
1439fso_sgl:
1440        mov.w           LOCAL_EX(%a0),%d0       # fetch src exponent
1441        andi.w          &0x7fff,%d0             # strip sign
1442        cmpi.w          %d0,&0x3f80             # is |exp| == $3f80?
1443        beq.b           fso_sgl_dnrm_zero       # yes
1444        cmpi.w          %d0,&0x407f             # no; is |exp| == $407f?
1445        beq.b           fso_infnan              # yes
1446        rts                                     # no
1447
1448fso_sgl_dnrm_zero:
1449        andi.l          &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1450        beq.b           fso_zero                # it's a skewed zero
1451fso_sgl_dnrm:
1452# here, we count on norm not to alter a0...
1453        bsr.l           norm                    # normalize mantissa
1454        neg.w           %d0                     # -shft amt
1455        addi.w          &0x3f81,%d0             # adjust new exponent
1456        andi.w          &0x8000,LOCAL_EX(%a0)   # clear old exponent
1457        or.w            %d0,LOCAL_EX(%a0)       # insert new exponent
1458        rts
1459
1460fso_zero:
1461        andi.w          &0x8000,LOCAL_EX(%a0)   # clear bogus exponent
1462        rts
1463
1464fso_infnan:
1465        andi.b          &0x7f,LOCAL_HI(%a0)     # clear j-bit
1466        ori.w           &0x7fff,LOCAL_EX(%a0)   # make exponent = $7fff
1467        rts
1468
1469fso_dbl:
1470        mov.w           LOCAL_EX(%a0),%d0       # fetch src exponent
1471        andi.w          &0x7fff,%d0             # strip sign
1472        cmpi.w          %d0,&0x3c00             # is |exp| == $3c00?
1473        beq.b           fso_dbl_dnrm_zero       # yes
1474        cmpi.w          %d0,&0x43ff             # no; is |exp| == $43ff?
1475        beq.b           fso_infnan              # yes
1476        rts                                     # no
1477
1478fso_dbl_dnrm_zero:
1479        andi.l          &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1480        bne.b           fso_dbl_dnrm            # it's a skewed denorm
1481        tst.l           LOCAL_LO(%a0)           # is it a zero?
1482        beq.b           fso_zero                # yes
1483fso_dbl_dnrm:
1484# here, we count on norm not to alter a0...
1485        bsr.l           norm                    # normalize mantissa
1486        neg.w           %d0                     # -shft amt
1487        addi.w          &0x3c01,%d0             # adjust new exponent
1488        andi.w          &0x8000,LOCAL_EX(%a0)   # clear old exponent
1489        or.w            %d0,LOCAL_EX(%a0)       # insert new exponent
1490        rts
1491
1492#################################################################
1493
1494# fmove out took an unimplemented data type exception.
1495# the src operand is in FP_SRC. Call _fout() to write out the result and
1496# to determine which exceptions, if any, to take.
1497fu_out:
1498
1499# Separate packed move outs from the UNNORM and DENORM move outs.
1500        bfextu          EXC_CMDREG(%a6){&3:&3},%d0
1501        cmpi.b          %d0,&0x3
1502        beq.w           fu_out_pack
1503        cmpi.b          %d0,&0x7
1504        beq.w           fu_out_pack
1505
1506
1507# I'm not sure at this point what FPSR bits are valid for this instruction.
1508# so, since the emulation routines re-create them anyways, zero exception field.
1509# fmove out doesn't affect ccodes.
1510        and.l           &0xffff00ff,USER_FPSR(%a6) # zero exception field
1511
1512        fmov.l          &0x0,%fpcr              # zero current control regs
1513        fmov.l          &0x0,%fpsr
1514
1515# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1516# call here. just figure out what it is...
1517        mov.w           FP_SRC_EX(%a6),%d0      # get exponent
1518        andi.w          &0x7fff,%d0             # strip sign
1519        beq.b           fu_out_denorm           # it's a DENORM
1520
1521        lea             FP_SRC(%a6),%a0
1522        bsr.l           unnorm_fix              # yes; fix it
1523
1524        mov.b           %d0,STAG(%a6)
1525
1526        bra.b           fu_out_cont
1527fu_out_denorm:
1528        mov.b           &DENORM,STAG(%a6)
1529fu_out_cont:
1530
1531        clr.l           %d0
1532        mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode/prec
1533
1534        lea             FP_SRC(%a6),%a0         # pass ptr to src operand
1535
1536        mov.l           (%a6),EXC_A6(%a6)       # in case a6 changes
1537        bsr.l           fout                    # call fmove out routine
1538
1539# Exceptions in order of precedence:
1540#       BSUN    : none
1541#       SNAN    : none
1542#       OPERR   : fmove.{b,w,l} out of large UNNORM
1543#       OVFL    : fmove.{s,d}
1544#       UNFL    : fmove.{s,d,x}
1545#       DZ      : none
1546#       INEX2   : all
1547#       INEX1   : none (packed doesn't travel through here)
1548
1549# determine the highest priority exception(if any) set by the
1550# emulation routine that has also been enabled by the user.
1551        mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
1552        bne.w           fu_out_ena              # some are enabled
1553
1554fu_out_done:
1555
1556        mov.l           EXC_A6(%a6),(%a6)       # in case a6 changed
1557
1558# on extended precision opclass three instructions using pre-decrement or
1559# post-increment addressing mode, the address register is not updated. is the
1560# address register was the stack pointer used from user mode, then let's update
1561# it here. if it was used from supervisor mode, then we have to handle this
1562# as a special case.
1563        btst            &0x5,EXC_SR(%a6)
1564        bne.b           fu_out_done_s
1565
1566        mov.l           EXC_A7(%a6),%a0         # restore a7
1567        mov.l           %a0,%usp
1568
1569fu_out_done_cont:
1570        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1571        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1572        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1573
1574        unlk            %a6
1575
1576        btst            &0x7,(%sp)              # is trace on?
1577        bne.b           fu_out_trace            # yes
1578
1579        bra.l           _fpsp_done
1580
1581# is the ea mode pre-decrement of the stack pointer from supervisor mode?
1582# ("fmov.x fpm,-(a7)") if so,
1583fu_out_done_s:
1584        cmpi.b          SPCOND_FLG(%a6),&mda7_flg
1585        bne.b           fu_out_done_cont
1586
1587# the extended precision result is still in fp0. but, we need to save it
1588# somewhere on the stack until we can copy it to its final resting place.
1589# here, we're counting on the top of the stack to be the old place-holders
1590# for fp0/fp1 which have already been restored. that way, we can write
1591# over those destinations with the shifted stack frame.
1592        fmovm.x         &0x80,FP_SRC(%a6)       # put answer on stack
1593
1594        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1595        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1596        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1597
1598        mov.l           (%a6),%a6               # restore frame pointer
1599
1600        mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1601        mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1602
1603# now, copy the result to the proper place on the stack
1604        mov.l           LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1605        mov.l           LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1606        mov.l           LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1607
1608        add.l           &LOCAL_SIZE-0x8,%sp
1609
1610        btst            &0x7,(%sp)
1611        bne.b           fu_out_trace
1612
1613        bra.l           _fpsp_done
1614
1615fu_out_ena:
1616        and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled
1617        bfffo           %d0{&24:&8},%d0         # find highest priority exception
1618        bne.b           fu_out_exc              # there is at least one set
1619
1620# no exceptions were set.
1621# if a disabled overflow occurred and inexact was enabled but the result
1622# was exact, then a branch to _real_inex() is made.
1623        btst            &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1624        beq.w           fu_out_done             # no
1625
1626fu_out_ovflchk:
1627        btst            &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1628        beq.w           fu_out_done             # no
1629        bra.w           fu_inex                 # yes
1630
1631#
1632# The fp move out that took the "Unimplemented Data Type" exception was
1633# being traced. Since the stack frames are similar, get the "current" PC
1634# from FPIAR and put it in the trace stack frame then jump to _real_trace().
1635#
1636#                 UNSUPP FRAME             TRACE FRAME
1637#               *****************       *****************
1638#               *      EA       *       *    Current    *
1639#               *               *       *      PC       *
1640#               *****************       *****************
1641#               * 0x3 *  0x0dc  *       * 0x2 *  0x024  *
1642#               *****************       *****************
1643#               *     Next      *       *     Next      *
1644#               *      PC       *       *      PC       *
1645#               *****************       *****************
1646#               *      SR       *       *      SR       *
1647#               *****************       *****************
1648#
1649fu_out_trace:
1650        mov.w           &0x2024,0x6(%sp)
1651        fmov.l          %fpiar,0x8(%sp)
1652        bra.l           _real_trace
1653
1654# an exception occurred and that exception was enabled.
1655fu_out_exc:
1656        subi.l          &24,%d0                 # fix offset to be 0-8
1657
1658# we don't mess with the existing fsave frame. just re-insert it and
1659# jump to the "_real_{}()" handler...
1660        mov.w           (tbl_fu_out.b,%pc,%d0.w*2),%d0
1661        jmp             (tbl_fu_out.b,%pc,%d0.w*1)
1662
1663        swbeg           &0x8
1664tbl_fu_out:
1665        short           tbl_fu_out      - tbl_fu_out    # BSUN can't happen
1666        short           tbl_fu_out      - tbl_fu_out    # SNAN can't happen
1667        short           fu_operr        - tbl_fu_out    # OPERR
1668        short           fu_ovfl         - tbl_fu_out    # OVFL
1669        short           fu_unfl         - tbl_fu_out    # UNFL
1670        short           tbl_fu_out      - tbl_fu_out    # DZ can't happen
1671        short           fu_inex         - tbl_fu_out    # INEX2
1672        short           tbl_fu_out      - tbl_fu_out    # INEX1 won't make it here
1673
1674# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1675# frestore it.
1676fu_snan:
1677        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1678        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1679        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1680
1681        mov.w           &0x30d8,EXC_VOFF(%a6)   # vector offset = 0xd8
1682        mov.w           &0xe006,2+FP_SRC(%a6)
1683
1684        frestore        FP_SRC(%a6)
1685
1686        unlk            %a6
1687
1688
1689        bra.l           _real_snan
1690
1691fu_operr:
1692        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1693        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1694        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1695
1696        mov.w           &0x30d0,EXC_VOFF(%a6)   # vector offset = 0xd0
1697        mov.w           &0xe004,2+FP_SRC(%a6)
1698
1699        frestore        FP_SRC(%a6)
1700
1701        unlk            %a6
1702
1703
1704        bra.l           _real_operr
1705
1706fu_ovfl:
1707        fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP to the stack
1708
1709        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1710        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1711        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1712
1713        mov.w           &0x30d4,EXC_VOFF(%a6)   # vector offset = 0xd4
1714        mov.w           &0xe005,2+FP_SRC(%a6)
1715
1716        frestore        FP_SRC(%a6)             # restore EXOP
1717
1718        unlk            %a6
1719
1720        bra.l           _real_ovfl
1721
1722# underflow can happen for extended precision. extended precision opclass
1723# three instruction exceptions don't update the stack pointer. so, if the
1724# exception occurred from user mode, then simply update a7 and exit normally.
1725# if the exception occurred from supervisor mode, check if
1726fu_unfl:
1727        mov.l           EXC_A6(%a6),(%a6)       # restore a6
1728
1729        btst            &0x5,EXC_SR(%a6)
1730        bne.w           fu_unfl_s
1731
1732        mov.l           EXC_A7(%a6),%a0         # restore a7 whether we need
1733        mov.l           %a0,%usp                # to or not...
1734
1735fu_unfl_cont:
1736        fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP to the stack
1737
1738        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1739        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1740        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1741
1742        mov.w           &0x30cc,EXC_VOFF(%a6)   # vector offset = 0xcc
1743        mov.w           &0xe003,2+FP_SRC(%a6)
1744
1745        frestore        FP_SRC(%a6)             # restore EXOP
1746
1747        unlk            %a6
1748
1749        bra.l           _real_unfl
1750
1751fu_unfl_s:
1752        cmpi.b          SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1753        bne.b           fu_unfl_cont
1754
1755# the extended precision result is still in fp0. but, we need to save it
1756# somewhere on the stack until we can copy it to its final resting place
1757# (where the exc frame is currently). make sure it's not at the top of the
1758# frame or it will get overwritten when the exc stack frame is shifted "down".
1759        fmovm.x         &0x80,FP_SRC(%a6)       # put answer on stack
1760        fmovm.x         &0x40,FP_DST(%a6)       # put EXOP on stack
1761
1762        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1763        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1764        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1765
1766        mov.w           &0x30cc,EXC_VOFF(%a6)   # vector offset = 0xcc
1767        mov.w           &0xe003,2+FP_DST(%a6)
1768
1769        frestore        FP_DST(%a6)             # restore EXOP
1770
1771        mov.l           (%a6),%a6               # restore frame pointer
1772
1773        mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1774        mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1775        mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1776
1777# now, copy the result to the proper place on the stack
1778        mov.l           LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1779        mov.l           LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1780        mov.l           LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1781
1782        add.l           &LOCAL_SIZE-0x8,%sp
1783
1784        bra.l           _real_unfl
1785
1786# fmove in and out enter here.
1787fu_inex:
1788        fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP to the stack
1789
1790        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1791        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1792        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1793
1794        mov.w           &0x30c4,EXC_VOFF(%a6)   # vector offset = 0xc4
1795        mov.w           &0xe001,2+FP_SRC(%a6)
1796
1797        frestore        FP_SRC(%a6)             # restore EXOP
1798
1799        unlk            %a6
1800
1801
1802        bra.l           _real_inex
1803
1804#########################################################################
1805#########################################################################
1806fu_in_pack:
1807
1808
1809# I'm not sure at this point what FPSR bits are valid for this instruction.
1810# so, since the emulation routines re-create them anyways, zero exception field
1811        andi.l          &0x0ff00ff,USER_FPSR(%a6) # zero exception field
1812
1813        fmov.l          &0x0,%fpcr              # zero current control regs
1814        fmov.l          &0x0,%fpsr
1815
1816        bsr.l           get_packed              # fetch packed src operand
1817
1818        lea             FP_SRC(%a6),%a0         # pass ptr to src
1819        bsr.l           set_tag_x               # set src optype tag
1820
1821        mov.b           %d0,STAG(%a6)           # save src optype tag
1822
1823        bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1824
1825# bit five of the fp extension word separates the monadic and dyadic operations
1826# at this point
1827        btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
1828        beq.b           fu_extract_p            # monadic
1829        cmpi.b          1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1830        beq.b           fu_extract_p            # yes, so it's monadic, too
1831
1832        bsr.l           load_fpn2               # load dst into FP_DST
1833
1834        lea             FP_DST(%a6),%a0         # pass: ptr to dst op
1835        bsr.l           set_tag_x               # tag the operand type
1836        cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
1837        bne.b           fu_op2_done_p           # no
1838        bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
1839fu_op2_done_p:
1840        mov.b           %d0,DTAG(%a6)           # save dst optype tag
1841
1842fu_extract_p:
1843        clr.l           %d0
1844        mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode/prec
1845
1846        bfextu          1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1847
1848        lea             FP_SRC(%a6),%a0
1849        lea             FP_DST(%a6),%a1
1850
1851        mov.l           (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1852        jsr             (tbl_unsupp.l,%pc,%d1.l*1)
1853
1854#
1855# Exceptions in order of precedence:
1856#       BSUN    : none
1857#       SNAN    : all dyadic ops
1858#       OPERR   : fsqrt(-NORM)
1859#       OVFL    : all except ftst,fcmp
1860#       UNFL    : all except ftst,fcmp
1861#       DZ      : fdiv
1862#       INEX2   : all except ftst,fcmp
1863#       INEX1   : all
1864#
1865
1866# we determine the highest priority exception(if any) set by the
1867# emulation routine that has also been enabled by the user.
1868        mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
1869        bne.w           fu_in_ena_p             # some are enabled
1870
1871fu_in_cont_p:
1872# fcmp and ftst do not store any result.
1873        mov.b           1+EXC_CMDREG(%a6),%d0   # fetch extension
1874        andi.b          &0x38,%d0               # extract bits 3-5
1875        cmpi.b          %d0,&0x38               # is instr fcmp or ftst?
1876        beq.b           fu_in_exit_p            # yes
1877
1878        bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1879        bsr.l           store_fpreg             # store the result
1880
1881fu_in_exit_p:
1882
1883        btst            &0x5,EXC_SR(%a6)        # user or supervisor?
1884        bne.w           fu_in_exit_s_p          # supervisor
1885
1886        mov.l           EXC_A7(%a6),%a0         # update user a7
1887        mov.l           %a0,%usp
1888
1889fu_in_exit_cont_p:
1890        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1891        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1892        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1893
1894        unlk            %a6                     # unravel stack frame
1895
1896        btst            &0x7,(%sp)              # is trace on?
1897        bne.w           fu_trace_p              # yes
1898
1899        bra.l           _fpsp_done              # exit to os
1900
1901# the exception occurred in supervisor mode. check to see if the
1902# addressing mode was (a7)+. if so, we'll need to shift the
1903# stack frame "up".
1904fu_in_exit_s_p:
1905        btst            &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1906        beq.b           fu_in_exit_cont_p       # no
1907
1908        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1909        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1910        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1911
1912        unlk            %a6                     # unravel stack frame
1913
1914# shift the stack frame "up". we don't really care about the <ea> field.
1915        mov.l           0x4(%sp),0x10(%sp)
1916        mov.l           0x0(%sp),0xc(%sp)
1917        add.l           &0xc,%sp
1918
1919        btst            &0x7,(%sp)              # is trace on?
1920        bne.w           fu_trace_p              # yes
1921
1922        bra.l           _fpsp_done              # exit to os
1923
1924fu_in_ena_p:
1925        and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled & set
1926        bfffo           %d0{&24:&8},%d0         # find highest priority exception
1927        bne.b           fu_in_exc_p             # at least one was set
1928
1929#
1930# No exceptions occurred that were also enabled. Now:
1931#
1932#       if (OVFL && ovfl_disabled && inexact_enabled) {
1933#           branch to _real_inex() (even if the result was exact!);
1934#       } else {
1935#           save the result in the proper fp reg (unless the op is fcmp or ftst);
1936#           return;
1937#       }
1938#
1939        btst            &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1940        beq.w           fu_in_cont_p            # no
1941
1942fu_in_ovflchk_p:
1943        btst            &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1944        beq.w           fu_in_cont_p            # no
1945        bra.w           fu_in_exc_ovfl_p        # do _real_inex() now
1946
1947#
1948# An exception occurred and that exception was enabled:
1949#
1950#       shift enabled exception field into lo byte of d0;
1951#       if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1952#           ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1953#               /*
1954#                * this is the case where we must call _real_inex() now or else
1955#                * there will be no other way to pass it the exceptional operand
1956#                */
1957#               call _real_inex();
1958#       } else {
1959#               restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1960#       }
1961#
1962fu_in_exc_p:
1963        subi.l          &24,%d0                 # fix offset to be 0-8
1964        cmpi.b          %d0,&0x6                # is exception INEX? (6 or 7)
1965        blt.b           fu_in_exc_exit_p        # no
1966
1967# the enabled exception was inexact
1968        btst            &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1969        bne.w           fu_in_exc_unfl_p        # yes
1970        btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1971        bne.w           fu_in_exc_ovfl_p        # yes
1972
1973# here, we insert the correct fsave status value into the fsave frame for the
1974# corresponding exception. the operand in the fsave frame should be the original
1975# src operand.
1976# as a reminder for future predicted pain and agony, we are passing in fsave the
1977# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1978# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1979fu_in_exc_exit_p:
1980        btst            &0x5,EXC_SR(%a6)        # user or supervisor?
1981        bne.w           fu_in_exc_exit_s_p      # supervisor
1982
1983        mov.l           EXC_A7(%a6),%a0         # update user a7
1984        mov.l           %a0,%usp
1985
1986fu_in_exc_exit_cont_p:
1987        mov.w           (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1988
1989        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1990        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1991        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1992
1993        frestore        FP_SRC(%a6)             # restore src op
1994
1995        unlk            %a6
1996
1997        btst            &0x7,(%sp)              # is trace enabled?
1998        bne.w           fu_trace_p              # yes
1999
2000        bra.l           _fpsp_done
2001
2002tbl_except_p:
2003        short           0xe000,0xe006,0xe004,0xe005
2004        short           0xe003,0xe002,0xe001,0xe001
2005
2006fu_in_exc_ovfl_p:
2007        mov.w           &0x3,%d0
2008        bra.w           fu_in_exc_exit_p
2009
2010fu_in_exc_unfl_p:
2011        mov.w           &0x4,%d0
2012        bra.w           fu_in_exc_exit_p
2013
2014fu_in_exc_exit_s_p:
2015        btst            &mia7_bit,SPCOND_FLG(%a6)
2016        beq.b           fu_in_exc_exit_cont_p
2017
2018        mov.w           (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2019
2020        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2021        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2022        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2023
2024        frestore        FP_SRC(%a6)             # restore src op
2025
2026        unlk            %a6                     # unravel stack frame
2027
2028# shift stack frame "up". who cares about <ea> field.
2029        mov.l           0x4(%sp),0x10(%sp)
2030        mov.l           0x0(%sp),0xc(%sp)
2031        add.l           &0xc,%sp
2032
2033        btst            &0x7,(%sp)              # is trace on?
2034        bne.b           fu_trace_p              # yes
2035
2036        bra.l           _fpsp_done              # exit to os
2037
2038#
2039# The opclass two PACKED instruction that took an "Unimplemented Data Type"
2040# exception was being traced. Make the "current" PC the FPIAR and put it in the
2041# trace stack frame then jump to _real_trace().
2042#
2043#                 UNSUPP FRAME             TRACE FRAME
2044#               *****************       *****************
2045#               *      EA       *       *    Current    *
2046#               *               *       *      PC       *
2047#               *****************       *****************
2048#               * 0x2 * 0x0dc   *       * 0x2 *  0x024  *
2049#               *****************       *****************
2050#               *     Next      *       *     Next      *
2051#               *      PC       *       *      PC       *
2052#               *****************       *****************
2053#               *      SR       *       *      SR       *
2054#               *****************       *****************
2055fu_trace_p:
2056        mov.w           &0x2024,0x6(%sp)
2057        fmov.l          %fpiar,0x8(%sp)
2058
2059        bra.l           _real_trace
2060
2061#########################################################
2062#########################################################
2063fu_out_pack:
2064
2065
2066# I'm not sure at this point what FPSR bits are valid for this instruction.
2067# so, since the emulation routines re-create them anyways, zero exception field.
2068# fmove out doesn't affect ccodes.
2069        and.l           &0xffff00ff,USER_FPSR(%a6) # zero exception field
2070
2071        fmov.l          &0x0,%fpcr              # zero current control regs
2072        fmov.l          &0x0,%fpsr
2073
2074        bfextu          EXC_CMDREG(%a6){&6:&3},%d0
2075        bsr.l           load_fpn1
2076
2077# unlike other opclass 3, unimplemented data type exceptions, packed must be
2078# able to detect all operand types.
2079        lea             FP_SRC(%a6),%a0
2080        bsr.l           set_tag_x               # tag the operand type
2081        cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
2082        bne.b           fu_op2_p                # no
2083        bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
2084
2085fu_op2_p:
2086        mov.b           %d0,STAG(%a6)           # save src optype tag
2087
2088        clr.l           %d0
2089        mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode/prec
2090
2091        lea             FP_SRC(%a6),%a0         # pass ptr to src operand
2092
2093        mov.l           (%a6),EXC_A6(%a6)       # in case a6 changes
2094        bsr.l           fout                    # call fmove out routine
2095
2096# Exceptions in order of precedence:
2097#       BSUN    : no
2098#       SNAN    : yes
2099#       OPERR   : if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2100#       OVFL    : no
2101#       UNFL    : no
2102#       DZ      : no
2103#       INEX2   : yes
2104#       INEX1   : no
2105
2106# determine the highest priority exception(if any) set by the
2107# emulation routine that has also been enabled by the user.
2108        mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
2109        bne.w           fu_out_ena_p            # some are enabled
2110
2111fu_out_exit_p:
2112        mov.l           EXC_A6(%a6),(%a6)       # restore a6
2113
2114        btst            &0x5,EXC_SR(%a6)        # user or supervisor?
2115        bne.b           fu_out_exit_s_p         # supervisor
2116
2117        mov.l           EXC_A7(%a6),%a0         # update user a7
2118        mov.l           %a0,%usp
2119
2120fu_out_exit_cont_p:
2121        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2122        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2123        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2124
2125        unlk            %a6                     # unravel stack frame
2126
2127        btst            &0x7,(%sp)              # is trace on?
2128        bne.w           fu_trace_p              # yes
2129
2130        bra.l           _fpsp_done              # exit to os
2131
2132# the exception occurred in supervisor mode. check to see if the
2133# addressing mode was -(a7). if so, we'll need to shift the
2134# stack frame "down".
2135fu_out_exit_s_p:
2136        btst            &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2137        beq.b           fu_out_exit_cont_p      # no
2138
2139        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2140        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2141        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2142
2143        mov.l           (%a6),%a6               # restore frame pointer
2144
2145        mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2146        mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2147
2148# now, copy the result to the proper place on the stack
2149        mov.l           LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2150        mov.l           LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2151        mov.l           LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2152
2153        add.l           &LOCAL_SIZE-0x8,%sp
2154
2155        btst            &0x7,(%sp)
2156        bne.w           fu_trace_p
2157
2158        bra.l           _fpsp_done
2159
2160fu_out_ena_p:
2161        and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled
2162        bfffo           %d0{&24:&8},%d0         # find highest priority exception
2163        beq.w           fu_out_exit_p
2164
2165        mov.l           EXC_A6(%a6),(%a6)       # restore a6
2166
2167# an exception occurred and that exception was enabled.
2168# the only exception possible on packed move out are INEX, OPERR, and SNAN.
2169fu_out_exc_p:
2170        cmpi.b          %d0,&0x1a
2171        bgt.w           fu_inex_p2
2172        beq.w           fu_operr_p
2173
2174fu_snan_p:
2175        btst            &0x5,EXC_SR(%a6)
2176        bne.b           fu_snan_s_p
2177
2178        mov.l           EXC_A7(%a6),%a0
2179        mov.l           %a0,%usp
2180        bra.w           fu_snan
2181
2182fu_snan_s_p:
2183        cmpi.b          SPCOND_FLG(%a6),&mda7_flg
2184        bne.w           fu_snan
2185
2186# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2187# the strategy is to move the exception frame "down" 12 bytes. then, we
2188# can store the default result where the exception frame was.
2189        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2190        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2191        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2192
2193        mov.w           &0x30d8,EXC_VOFF(%a6)   # vector offset = 0xd0
2194        mov.w           &0xe006,2+FP_SRC(%a6)   # set fsave status
2195
2196        frestore        FP_SRC(%a6)             # restore src operand
2197
2198        mov.l           (%a6),%a6               # restore frame pointer
2199
2200        mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2201        mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2202        mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2203
2204# now, we copy the default result to its proper location
2205        mov.l           LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2206        mov.l           LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2207        mov.l           LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2208
2209        add.l           &LOCAL_SIZE-0x8,%sp
2210
2211
2212        bra.l           _real_snan
2213
2214fu_operr_p:
2215        btst            &0x5,EXC_SR(%a6)
2216        bne.w           fu_operr_p_s
2217
2218        mov.l           EXC_A7(%a6),%a0
2219        mov.l           %a0,%usp
2220        bra.w           fu_operr
2221
2222fu_operr_p_s:
2223        cmpi.b          SPCOND_FLG(%a6),&mda7_flg
2224        bne.w           fu_operr
2225
2226# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2227# the strategy is to move the exception frame "down" 12 bytes. then, we
2228# can store the default result where the exception frame was.
2229        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2230        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2231        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2232
2233        mov.w           &0x30d0,EXC_VOFF(%a6)   # vector offset = 0xd0
2234        mov.w           &0xe004,2+FP_SRC(%a6)   # set fsave status
2235
2236        frestore        FP_SRC(%a6)             # restore src operand
2237
2238        mov.l           (%a6),%a6               # restore frame pointer
2239
2240        mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2241        mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2242        mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2243
2244# now, we copy the default result to its proper location
2245        mov.l           LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2246        mov.l           LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2247        mov.l           LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2248
2249        add.l           &LOCAL_SIZE-0x8,%sp
2250
2251
2252        bra.l           _real_operr
2253
2254fu_inex_p2:
2255        btst            &0x5,EXC_SR(%a6)
2256        bne.w           fu_inex_s_p2
2257
2258        mov.l           EXC_A7(%a6),%a0
2259        mov.l           %a0,%usp
2260        bra.w           fu_inex
2261
2262fu_inex_s_p2:
2263        cmpi.b          SPCOND_FLG(%a6),&mda7_flg
2264        bne.w           fu_inex
2265
2266# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2267# the strategy is to move the exception frame "down" 12 bytes. then, we
2268# can store the default result where the exception frame was.
2269        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2270        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2271        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2272
2273        mov.w           &0x30c4,EXC_VOFF(%a6)   # vector offset = 0xc4
2274        mov.w           &0xe001,2+FP_SRC(%a6)   # set fsave status
2275
2276        frestore        FP_SRC(%a6)             # restore src operand
2277
2278        mov.l           (%a6),%a6               # restore frame pointer
2279
2280        mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2281        mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2282        mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2283
2284# now, we copy the default result to its proper location
2285        mov.l           LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2286        mov.l           LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2287        mov.l           LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2288
2289        add.l           &LOCAL_SIZE-0x8,%sp
2290
2291
2292        bra.l           _real_inex
2293
2294#########################################################################
2295
2296#
2297# if we're stuffing a source operand back into an fsave frame then we
2298# have to make sure that for single or double source operands that the
2299# format stuffed is as weird as the hardware usually makes it.
2300#
2301        global          funimp_skew
2302funimp_skew:
2303        bfextu          EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2304        cmpi.b          %d0,&0x1                # was src sgl?
2305        beq.b           funimp_skew_sgl         # yes
2306        cmpi.b          %d0,&0x5                # was src dbl?
2307        beq.b           funimp_skew_dbl         # yes
2308        rts
2309
2310funimp_skew_sgl:
2311        mov.w           FP_SRC_EX(%a6),%d0      # fetch DENORM exponent
2312        andi.w          &0x7fff,%d0             # strip sign
2313        beq.b           funimp_skew_sgl_not
2314        cmpi.w          %d0,&0x3f80
2315        bgt.b           funimp_skew_sgl_not
2316        neg.w           %d0                     # make exponent negative
2317        addi.w          &0x3f81,%d0             # find amt to shift
2318        mov.l           FP_SRC_HI(%a6),%d1      # fetch DENORM hi(man)
2319        lsr.l           %d0,%d1                 # shift it
2320        bset            &31,%d1                 # set j-bit
2321        mov.l           %d1,FP_SRC_HI(%a6)      # insert new hi(man)
2322        andi.w          &0x8000,FP_SRC_EX(%a6)  # clear old exponent
2323        ori.w           &0x3f80,FP_SRC_EX(%a6)  # insert new "skewed" exponent
2324funimp_skew_sgl_not:
2325        rts
2326
2327funimp_skew_dbl:
2328        mov.w           FP_SRC_EX(%a6),%d0      # fetch DENORM exponent
2329        andi.w          &0x7fff,%d0             # strip sign
2330        beq.b           funimp_skew_dbl_not
2331        cmpi.w          %d0,&0x3c00
2332        bgt.b           funimp_skew_dbl_not
2333
2334        tst.b           FP_SRC_EX(%a6)          # make "internal format"
2335        smi.b           0x2+FP_SRC(%a6)
2336        mov.w           %d0,FP_SRC_EX(%a6)      # insert exponent with cleared sign
2337        clr.l           %d0                     # clear g,r,s
2338        lea             FP_SRC(%a6),%a0         # pass ptr to src op
2339        mov.w           &0x3c01,%d1             # pass denorm threshold
2340        bsr.l           dnrm_lp                 # denorm it
2341        mov.w           &0x3c00,%d0             # new exponent
2342        tst.b           0x2+FP_SRC(%a6)         # is sign set?
2343        beq.b           fss_dbl_denorm_done     # no
2344        bset            &15,%d0                 # set sign
2345fss_dbl_denorm_done:
2346        bset            &0x7,FP_SRC_HI(%a6)     # set j-bit
2347        mov.w           %d0,FP_SRC_EX(%a6)      # insert new exponent
2348funimp_skew_dbl_not:
2349        rts
2350
2351#########################################################################
2352        global          _mem_write2
2353_mem_write2:
2354        btst            &0x5,EXC_SR(%a6)
2355        beq.l           _dmem_write
2356        mov.l           0x0(%a0),FP_DST_EX(%a6)
2357        mov.l           0x4(%a0),FP_DST_HI(%a6)
2358        mov.l           0x8(%a0),FP_DST_LO(%a6)
2359        clr.l           %d1
2360        rts
2361
2362#########################################################################
2363# XDEF **************************************************************** #
2364#       _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented       #
2365#                       effective address" exception.                   #
2366#                                                                       #
2367#       This handler should be the first code executed upon taking the  #
2368#       FP Unimplemented Effective Address exception in an operating    #
2369#       system.                                                         #
2370#                                                                       #
2371# XREF **************************************************************** #
2372#       _imem_read_long() - read instruction longword                   #
2373#       fix_skewed_ops() - adjust src operand in fsave frame            #
2374#       set_tag_x() - determine optype of src/dst operands              #
2375#       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
2376#       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
2377#       load_fpn2() - load dst operand from FP regfile                  #
2378#       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
2379#       decbin() - convert packed data to FP binary data                #
2380#       _real_fpu_disabled() - "callout" for "FPU disabled" exception   #
2381#       _real_access() - "callout" for access error exception           #
2382#       _mem_read() - read extended immediate operand from memory       #
2383#       _fpsp_done() - "callout" for exit; work all done                #
2384#       _real_trace() - "callout" for Trace enabled exception           #
2385#       fmovm_dynamic() - emulate dynamic fmovm instruction             #
2386#       fmovm_ctrl() - emulate fmovm control instruction                #
2387#                                                                       #
2388# INPUT *************************************************************** #
2389#       - The system stack contains the "Unimplemented <ea>" stk frame  #
2390#                                                                       #
2391# OUTPUT ************************************************************** #
2392#       If access error:                                                #
2393#       - The system stack is changed to an access error stack frame    #
2394#       If FPU disabled:                                                #
2395#       - The system stack is changed to an FPU disabled stack frame    #
2396#       If Trace exception enabled:                                     #
2397#       - The system stack is changed to a Trace exception stack frame  #
2398#       Else: (normal case)                                             #
2399#       - None (correct result has been stored as appropriate)          #
2400#                                                                       #
2401# ALGORITHM *********************************************************** #
2402#       This exception handles 3 types of operations:                   #
2403# (1) FP Instructions using extended precision or packed immediate      #
2404#     addressing mode.                                                  #
2405# (2) The "fmovm.x" instruction w/ dynamic register specification.      #
2406# (3) The "fmovm.l" instruction w/ 2 or 3 control registers.            #
2407#                                                                       #
2408#       For immediate data operations, the data is read in w/ a         #
2409# _mem_read() "callout", converted to FP binary (if packed), and used   #
2410# as the source operand to the instruction specified by the instruction #
2411# word. If no FP exception should be reported ads a result of the       #
2412# emulation, then the result is stored to the destination register and  #
2413# the handler exits through _fpsp_done(). If an enabled exc has been    #
2414# signalled as a result of emulation, then an fsave state frame         #
2415# corresponding to the FP exception type must be entered into the 060   #
2416# FPU before exiting. In either the enabled or disabled cases, we       #
2417# must also check if a Trace exception is pending, in which case, we    #
2418# must create a Trace exception stack frame from the current exception  #
2419# stack frame. If no Trace is pending, we simply exit through           #
2420# _fpsp_done().                                                         #
2421#       For "fmovm.x", call the routine fmovm_dynamic() which will      #
2422# decode and emulate the instruction. No FP exceptions can be pending   #
2423# as a result of this operation emulation. A Trace exception can be     #
2424# pending, though, which means the current stack frame must be changed  #
2425# to a Trace stack frame and an exit made through _real_trace().        #
2426# For the case of "fmovm.x Dn,-(a7)", where the offending instruction   #
2427# was executed from supervisor mode, this handler must store the FP     #
2428# register file values to the system stack by itself since              #
2429# fmovm_dynamic() can't handle this. A normal exit is made through      #
2430# fpsp_done().                                                          #
2431#       For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. #
2432# Again, a Trace exception may be pending and an exit made through      #
2433# _real_trace(). Else, a normal exit is made through _fpsp_done().      #
2434#                                                                       #
2435#       Before any of the above is attempted, it must be checked to     #
2436# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken #
2437# before the "FPU disabled" exception, but the "FPU disabled" exception #
2438# has higher priority, we check the disabled bit in the PCR. If set,    #
2439# then we must create an 8 word "FPU disabled" exception stack frame    #
2440# from the current 4 word exception stack frame. This includes          #
2441# reproducing the effective address of the instruction to put on the    #
2442# new stack frame.                                                      #
2443#                                                                       #
2444#       In the process of all emulation work, if a _mem_read()          #
2445# "callout" returns a failing result indicating an access error, then   #
2446# we must create an access error stack frame from the current stack     #
2447# frame. This information includes a faulting address and a fault-      #
2448# status-longword. These are created within this handler.               #
2449#                                                                       #
2450#########################################################################
2451
2452        global          _fpsp_effadd
2453_fpsp_effadd:
2454
2455# This exception type takes priority over the "Line F Emulator"
2456# exception. Therefore, the FPU could be disabled when entering here.
2457# So, we must check to see if it's disabled and handle that case separately.
2458        mov.l           %d0,-(%sp)              # save d0
2459        movc            %pcr,%d0                # load proc cr
2460        btst            &0x1,%d0                # is FPU disabled?
2461        bne.w           iea_disabled            # yes
2462        mov.l           (%sp)+,%d0              # restore d0
2463
2464        link            %a6,&-LOCAL_SIZE        # init stack frame
2465
2466        movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
2467        fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2468        fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
2469
2470# PC of instruction that took the exception is the PC in the frame
2471        mov.l           EXC_PC(%a6),EXC_EXTWPTR(%a6)
2472
2473        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
2474        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
2475        bsr.l           _imem_read_long         # fetch the instruction words
2476        mov.l           %d0,EXC_OPWORD(%a6)     # store OPWORD and EXTWORD
2477
2478#########################################################################
2479
2480        tst.w           %d0                     # is operation fmovem?
2481        bmi.w           iea_fmovm               # yes
2482
2483#
2484# here, we will have:
2485#       fabs    fdabs   fsabs           facos           fmod
2486#       fadd    fdadd   fsadd           fasin           frem
2487#       fcmp                            fatan           fscale
2488#       fdiv    fddiv   fsdiv           fatanh          fsin
2489#       fint                            fcos            fsincos
2490#       fintrz                          fcosh           fsinh
2491#       fmove   fdmove  fsmove          fetox           ftan
2492#       fmul    fdmul   fsmul           fetoxm1         ftanh
2493#       fneg    fdneg   fsneg           fgetexp         ftentox
2494#       fsgldiv                         fgetman         ftwotox
2495#       fsglmul                         flog10
2496#       fsqrt                           flog2
2497#       fsub    fdsub   fssub           flogn
2498#       ftst                            flognp1
2499# which can all use f<op>.{x,p}
2500# so, now it's immediate data extended precision AND PACKED FORMAT!
2501#
2502iea_op:
2503        andi.l          &0x00ff00ff,USER_FPSR(%a6)
2504
2505        btst            &0xa,%d0                # is src fmt x or p?
2506        bne.b           iea_op_pack             # packed
2507
2508
2509        mov.l           EXC_EXTWPTR(%a6),%a0    # pass: ptr to #<data>
2510        lea             FP_SRC(%a6),%a1         # pass: ptr to super addr
2511        mov.l           &0xc,%d0                # pass: 12 bytes
2512        bsr.l           _imem_read              # read extended immediate
2513
2514        tst.l           %d1                     # did ifetch fail?
2515        bne.w           iea_iacc                # yes
2516
2517        bra.b           iea_op_setsrc
2518
2519iea_op_pack:
2520
2521        mov.l           EXC_EXTWPTR(%a6),%a0    # pass: ptr to #<data>
2522        lea             FP_SRC(%a6),%a1         # pass: ptr to super dst
2523        mov.l           &0xc,%d0                # pass: 12 bytes
2524        bsr.l           _imem_read              # read packed operand
2525
2526        tst.l           %d1                     # did ifetch fail?
2527        bne.w           iea_iacc                # yes
2528
2529# The packed operand is an INF or a NAN if the exponent field is all ones.
2530        bfextu          FP_SRC(%a6){&1:&15},%d0 # get exp
2531        cmpi.w          %d0,&0x7fff             # INF or NAN?
2532        beq.b           iea_op_setsrc           # operand is an INF or NAN
2533
2534# The packed operand is a zero if the mantissa is all zero, else it's
2535# a normal packed op.
2536        mov.b           3+FP_SRC(%a6),%d0       # get byte 4
2537        andi.b          &0x0f,%d0               # clear all but last nybble
2538        bne.b           iea_op_gp_not_spec      # not a zero
2539        tst.l           FP_SRC_HI(%a6)          # is lw 2 zero?
2540        bne.b           iea_op_gp_not_spec      # not a zero
2541        tst.l           FP_SRC_LO(%a6)          # is lw 3 zero?
2542        beq.b           iea_op_setsrc           # operand is a ZERO
2543iea_op_gp_not_spec:
2544        lea             FP_SRC(%a6),%a0         # pass: ptr to packed op
2545        bsr.l           decbin                  # convert to extended
2546        fmovm.x         &0x80,FP_SRC(%a6)       # make this the srcop
2547
2548iea_op_setsrc:
2549        addi.l          &0xc,EXC_EXTWPTR(%a6)   # update extension word pointer
2550
2551# FP_SRC now holds the src operand.
2552        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
2553        bsr.l           set_tag_x               # tag the operand type
2554        mov.b           %d0,STAG(%a6)           # could be ANYTHING!!!
2555        cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
2556        bne.b           iea_op_getdst           # no
2557        bsr.l           unnorm_fix              # yes; convert to NORM/DENORM/ZERO
2558        mov.b           %d0,STAG(%a6)           # set new optype tag
2559iea_op_getdst:
2560        clr.b           STORE_FLG(%a6)          # clear "store result" boolean
2561
2562        btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
2563        beq.b           iea_op_extract          # monadic
2564        btst            &0x4,1+EXC_CMDREG(%a6)  # is operation fsincos,ftst,fcmp?
2565        bne.b           iea_op_spec             # yes
2566
2567iea_op_loaddst:
2568        bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2569        bsr.l           load_fpn2               # load dst operand
2570
2571        lea             FP_DST(%a6),%a0         # pass: ptr to dst op
2572        bsr.l           set_tag_x               # tag the operand type
2573        mov.b           %d0,DTAG(%a6)           # could be ANYTHING!!!
2574        cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
2575        bne.b           iea_op_extract          # no
2576        bsr.l           unnorm_fix              # yes; convert to NORM/DENORM/ZERO
2577        mov.b           %d0,DTAG(%a6)           # set new optype tag
2578        bra.b           iea_op_extract
2579
2580# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2581iea_op_spec:
2582        btst            &0x3,1+EXC_CMDREG(%a6)  # is operation fsincos?
2583        beq.b           iea_op_extract          # yes
2584# now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2585# store a result. then, only fcmp will branch back and pick up a dst operand.
2586        st              STORE_FLG(%a6)          # don't store a final result
2587        btst            &0x1,1+EXC_CMDREG(%a6)  # is operation fcmp?
2588        beq.b           iea_op_loaddst          # yes
2589
2590iea_op_extract:
2591        clr.l           %d0
2592        mov.b           FPCR_MODE(%a6),%d0      # pass: rnd mode,prec
2593
2594        mov.b           1+EXC_CMDREG(%a6),%d1
2595        andi.w          &0x007f,%d1             # extract extension
2596
2597        fmov.l          &0x0,%fpcr
2598        fmov.l          &0x0,%fpsr
2599
2600        lea             FP_SRC(%a6),%a0
2601        lea             FP_DST(%a6),%a1
2602
2603        mov.l           (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2604        jsr             (tbl_unsupp.l,%pc,%d1.l*1)
2605
2606#
2607# Exceptions in order of precedence:
2608#       BSUN    : none
2609#       SNAN    : all operations
2610#       OPERR   : all reg-reg or mem-reg operations that can normally operr
2611#       OVFL    : same as OPERR
2612#       UNFL    : same as OPERR
2613#       DZ      : same as OPERR
2614#       INEX2   : same as OPERR
2615#       INEX1   : all packed immediate operations
2616#
2617
2618# we determine the highest priority exception(if any) set by the
2619# emulation routine that has also been enabled by the user.
2620        mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
2621        bne.b           iea_op_ena              # some are enabled
2622
2623# now, we save the result, unless, of course, the operation was ftst or fcmp.
2624# these don't save results.
2625iea_op_save:
2626        tst.b           STORE_FLG(%a6)          # does this op store a result?
2627        bne.b           iea_op_exit1            # exit with no frestore
2628
2629iea_op_store:
2630        bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2631        bsr.l           store_fpreg             # store the result
2632
2633iea_op_exit1:
2634        mov.l           EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2635        mov.l           EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2636
2637        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
2638        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2639        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2640
2641        unlk            %a6                     # unravel the frame
2642
2643        btst            &0x7,(%sp)              # is trace on?
2644        bne.w           iea_op_trace            # yes
2645
2646        bra.l           _fpsp_done              # exit to os
2647
2648iea_op_ena:
2649        and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enable and set
2650        bfffo           %d0{&24:&8},%d0         # find highest priority exception
2651        bne.b           iea_op_exc              # at least one was set
2652
2653# no exception occurred. now, did a disabled, exact overflow occur with inexact
2654# enabled? if so, then we have to stuff an overflow frame into the FPU.
2655        btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2656        beq.b           iea_op_save
2657
2658iea_op_ovfl:
2659        btst            &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2660        beq.b           iea_op_store            # no
2661        bra.b           iea_op_exc_ovfl         # yes
2662
2663# an enabled exception occurred. we have to insert the exception type back into
2664# the machine.
2665iea_op_exc:
2666        subi.l          &24,%d0                 # fix offset to be 0-8
2667        cmpi.b          %d0,&0x6                # is exception INEX?
2668        bne.b           iea_op_exc_force        # no
2669
2670# the enabled exception was inexact. so, if it occurs with an overflow
2671# or underflow that was disabled, then we have to force an overflow or
2672# underflow frame.
2673        btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2674        bne.b           iea_op_exc_ovfl         # yes
2675        btst            &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2676        bne.b           iea_op_exc_unfl         # yes
2677
2678iea_op_exc_force:
2679        mov.w           (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2680        bra.b           iea_op_exit2            # exit with frestore
2681
2682tbl_iea_except:
2683        short           0xe002, 0xe006, 0xe004, 0xe005
2684        short           0xe003, 0xe002, 0xe001, 0xe001
2685
2686iea_op_exc_ovfl:
2687        mov.w           &0xe005,2+FP_SRC(%a6)
2688        bra.b           iea_op_exit2
2689
2690iea_op_exc_unfl:
2691        mov.w           &0xe003,2+FP_SRC(%a6)
2692
2693iea_op_exit2:
2694        mov.l           EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2695        mov.l           EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2696
2697        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
2698        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2699        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2700
2701        frestore        FP_SRC(%a6)             # restore exceptional state
2702
2703        unlk            %a6                     # unravel the frame
2704
2705        btst            &0x7,(%sp)              # is trace on?
2706        bne.b           iea_op_trace            # yes
2707
2708        bra.l           _fpsp_done              # exit to os
2709
2710#
2711# The opclass two instruction that took an "Unimplemented Effective Address"
2712# exception was being traced. Make the "current" PC the FPIAR and put it in
2713# the trace stack frame then jump to _real_trace().
2714#
2715#                UNIMP EA FRAME            TRACE FRAME
2716#               *****************       *****************
2717#               * 0x0 *  0x0f0  *       *    Current    *
2718#               *****************       *      PC       *
2719#               *    Current    *       *****************
2720#               *      PC       *       * 0x2 *  0x024  *
2721#               *****************       *****************
2722#               *      SR       *       *     Next      *
2723#               *****************       *      PC       *
2724#                                       *****************
2725#                                       *      SR       *
2726#                                       *****************
2727iea_op_trace:
2728        mov.l           (%sp),-(%sp)            # shift stack frame "down"
2729        mov.w           0x8(%sp),0x4(%sp)
2730        mov.w           &0x2024,0x6(%sp)        # stk fmt = 0x2; voff = 0x024
2731        fmov.l          %fpiar,0x8(%sp)         # "Current PC" is in FPIAR
2732
2733        bra.l           _real_trace
2734
2735#########################################################################
2736iea_fmovm:
2737        btst            &14,%d0                 # ctrl or data reg
2738        beq.w           iea_fmovm_ctrl
2739
2740iea_fmovm_data:
2741
2742        btst            &0x5,EXC_SR(%a6)        # user or supervisor mode
2743        bne.b           iea_fmovm_data_s
2744
2745iea_fmovm_data_u:
2746        mov.l           %usp,%a0
2747        mov.l           %a0,EXC_A7(%a6)         # store current a7
2748        bsr.l           fmovm_dynamic           # do dynamic fmovm
2749        mov.l           EXC_A7(%a6),%a0         # load possibly new a7
2750        mov.l           %a0,%usp                # update usp
2751        bra.w           iea_fmovm_exit
2752
2753iea_fmovm_data_s:
2754        clr.b           SPCOND_FLG(%a6)
2755        lea             0x2+EXC_VOFF(%a6),%a0
2756        mov.l           %a0,EXC_A7(%a6)
2757        bsr.l           fmovm_dynamic           # do dynamic fmovm
2758
2759        cmpi.b          SPCOND_FLG(%a6),&mda7_flg
2760        beq.w           iea_fmovm_data_predec
2761        cmpi.b          SPCOND_FLG(%a6),&mia7_flg
2762        bne.w           iea_fmovm_exit
2763
2764# right now, d0 = the size.
2765# the data has been fetched from the supervisor stack, but we have not
2766# incremented the stack pointer by the appropriate number of bytes.
2767# do it here.
2768iea_fmovm_data_postinc:
2769        btst            &0x7,EXC_SR(%a6)
2770        bne.b           iea_fmovm_data_pi_trace
2771
2772        mov.w           EXC_SR(%a6),(EXC_SR,%a6,%d0)
2773        mov.l           EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2774        mov.w           &0x00f0,(EXC_VOFF,%a6,%d0)
2775
2776        lea             (EXC_SR,%a6,%d0),%a0
2777        mov.l           %a0,EXC_SR(%a6)
2778
2779        fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
2780        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2781        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2782
2783        unlk            %a6
2784        mov.l           (%sp)+,%sp
2785        bra.l           _fpsp_done
2786
2787iea_fmovm_data_pi_trace:
2788        mov.w           EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2789        mov.l           EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2790        mov.w           &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2791        mov.l           EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2792
2793        lea             (EXC_SR-0x4,%a6,%d0),%a0
2794        mov.l           %a0,EXC_SR(%a6)
2795
2796        fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
2797        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2798        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2799
2800        unlk            %a6
2801        mov.l           (%sp)+,%sp
2802        bra.l           _real_trace
2803
2804# right now, d1 = size and d0 = the strg.
2805iea_fmovm_data_predec:
2806        mov.b           %d1,EXC_VOFF(%a6)       # store strg
2807        mov.b           %d0,0x1+EXC_VOFF(%a6)   # store size
2808
2809        fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
2810        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2811        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2812
2813        mov.l           (%a6),-(%sp)            # make a copy of a6
2814        mov.l           %d0,-(%sp)              # save d0
2815        mov.l           %d1,-(%sp)              # save d1
2816        mov.l           EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC
2817
2818        clr.l           %d0
2819        mov.b           0x1+EXC_VOFF(%a6),%d0   # fetch size
2820        neg.l           %d0                     # get negative of size
2821
2822        btst            &0x7,EXC_SR(%a6)        # is trace enabled?
2823        beq.b           iea_fmovm_data_p2
2824
2825        mov.w           EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2826        mov.l           EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2827        mov.l           (%sp)+,(EXC_PC-0x4,%a6,%d0)
2828        mov.w           &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2829
2830        pea             (%a6,%d0)               # create final sp
2831        bra.b           iea_fmovm_data_p3
2832
2833iea_fmovm_data_p2:
2834        mov.w           EXC_SR(%a6),(EXC_SR,%a6,%d0)
2835        mov.l           (%sp)+,(EXC_PC,%a6,%d0)
2836        mov.w           &0x00f0,(EXC_VOFF,%a6,%d0)
2837
2838        pea             (0x4,%a6,%d0)           # create final sp
2839
2840iea_fmovm_data_p3:
2841        clr.l           %d1
2842        mov.b           EXC_VOFF(%a6),%d1       # fetch strg
2843
2844        tst.b           %d1
2845        bpl.b           fm_1
2846        fmovm.x         &0x80,(0x4+0x8,%a6,%d0)
2847        addi.l          &0xc,%d0
2848fm_1:
2849        lsl.b           &0x1,%d1
2850        bpl.b           fm_2
2851        fmovm.x         &0x40,(0x4+0x8,%a6,%d0)
2852        addi.l          &0xc,%d0
2853fm_2:
2854        lsl.b           &0x1,%d1
2855        bpl.b           fm_3
2856        fmovm.x         &0x20,(0x4+0x8,%a6,%d0)
2857        addi.l          &0xc,%d0
2858fm_3:
2859        lsl.b           &0x1,%d1
2860        bpl.b           fm_4
2861        fmovm.x         &0x10,(0x4+0x8,%a6,%d0)
2862        addi.l          &0xc,%d0
2863fm_4:
2864        lsl.b           &0x1,%d1
2865        bpl.b           fm_5
2866        fmovm.x         &0x08,(0x4+0x8,%a6,%d0)
2867        addi.l          &0xc,%d0
2868fm_5:
2869        lsl.b           &0x1,%d1
2870        bpl.b           fm_6
2871        fmovm.x         &0x04,(0x4+0x8,%a6,%d0)
2872        addi.l          &0xc,%d0
2873fm_6:
2874        lsl.b           &0x1,%d1
2875        bpl.b           fm_7
2876        fmovm.x         &0x02,(0x4+0x8,%a6,%d0)
2877        addi.l          &0xc,%d0
2878fm_7:
2879        lsl.b           &0x1,%d1
2880        bpl.b           fm_end
2881        fmovm.x         &0x01,(0x4+0x8,%a6,%d0)
2882fm_end:
2883        mov.l           0x4(%sp),%d1
2884        mov.l           0x8(%sp),%d0
2885        mov.l           0xc(%sp),%a6
2886        mov.l           (%sp)+,%sp
2887
2888        btst            &0x7,(%sp)              # is trace enabled?
2889        beq.l           _fpsp_done
2890        bra.l           _real_trace
2891
2892#########################################################################
2893iea_fmovm_ctrl:
2894
2895        bsr.l           fmovm_ctrl              # load ctrl regs
2896
2897iea_fmovm_exit:
2898        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
2899        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2900        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2901
2902        btst            &0x7,EXC_SR(%a6)        # is trace on?
2903        bne.b           iea_fmovm_trace         # yes
2904
2905        mov.l           EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2906
2907        unlk            %a6                     # unravel the frame
2908
2909        bra.l           _fpsp_done              # exit to os
2910
2911#
2912# The control reg instruction that took an "Unimplemented Effective Address"
2913# exception was being traced. The "Current PC" for the trace frame is the
2914# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2915# After fixing the stack frame, jump to _real_trace().
2916#
2917#                UNIMP EA FRAME            TRACE FRAME
2918#               *****************       *****************
2919#               * 0x0 *  0x0f0  *       *    Current    *
2920#               *****************       *      PC       *
2921#               *    Current    *       *****************
2922#               *      PC       *       * 0x2 *  0x024  *
2923#               *****************       *****************
2924#               *      SR       *       *     Next      *
2925#               *****************       *      PC       *
2926#                                       *****************
2927#                                       *      SR       *
2928#                                       *****************
2929# this ain't a pretty solution, but it works:
2930# -restore a6 (not with unlk)
2931# -shift stack frame down over where old a6 used to be
2932# -add LOCAL_SIZE to stack pointer
2933iea_fmovm_trace:
2934        mov.l           (%a6),%a6               # restore frame pointer
2935        mov.w           EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2936        mov.l           EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2937        mov.l           EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2938        mov.w           &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2939        add.l           &LOCAL_SIZE,%sp         # clear stack frame
2940
2941        bra.l           _real_trace
2942
2943#########################################################################
2944# The FPU is disabled and so we should really have taken the "Line
2945# F Emulator" exception. So, here we create an 8-word stack frame
2946# from our 4-word stack frame. This means we must calculate the length
2947# the faulting instruction to get the "next PC". This is trivial for
2948# immediate operands but requires some extra work for fmovm dynamic
2949# which can use most addressing modes.
2950iea_disabled:
2951        mov.l           (%sp)+,%d0              # restore d0
2952
2953        link            %a6,&-LOCAL_SIZE        # init stack frame
2954
2955        movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
2956
2957# PC of instruction that took the exception is the PC in the frame
2958        mov.l           EXC_PC(%a6),EXC_EXTWPTR(%a6)
2959        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
2960        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
2961        bsr.l           _imem_read_long         # fetch the instruction words
2962        mov.l           %d0,EXC_OPWORD(%a6)     # store OPWORD and EXTWORD
2963
2964        tst.w           %d0                     # is instr fmovm?
2965        bmi.b           iea_dis_fmovm           # yes
2966# instruction is using an extended precision immediate operand. Therefore,
2967# the total instruction length is 16 bytes.
2968iea_dis_immed:
2969        mov.l           &0x10,%d0               # 16 bytes of instruction
2970        bra.b           iea_dis_cont
2971iea_dis_fmovm:
2972        btst            &0xe,%d0                # is instr fmovm ctrl
2973        bne.b           iea_dis_fmovm_data      # no
2974# the instruction is a fmovm.l with 2 or 3 registers.
2975        bfextu          %d0{&19:&3},%d1
2976        mov.l           &0xc,%d0
2977        cmpi.b          %d1,&0x7                # move all regs?
2978        bne.b           iea_dis_cont
2979        addq.l          &0x4,%d0
2980        bra.b           iea_dis_cont
2981# the instruction is an fmovm.x dynamic which can use many addressing
2982# modes and thus can have several different total instruction lengths.
2983# call fmovm_calc_ea which will go through the ea calc process and,
2984# as a by-product, will tell us how long the instruction is.
2985iea_dis_fmovm_data:
2986        clr.l           %d0
2987        bsr.l           fmovm_calc_ea
2988        mov.l           EXC_EXTWPTR(%a6),%d0
2989        sub.l           EXC_PC(%a6),%d0
2990iea_dis_cont:
2991        mov.w           %d0,EXC_VOFF(%a6)       # store stack shift value
2992
2993        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2994
2995        unlk            %a6
2996
2997# here, we actually create the 8-word frame from the 4-word frame,
2998# with the "next PC" as additional info.
2999# the <ea> field is let as undefined.
3000        subq.l          &0x8,%sp                # make room for new stack
3001        mov.l           %d0,-(%sp)              # save d0
3002        mov.w           0xc(%sp),0x4(%sp)       # move SR
3003        mov.l           0xe(%sp),0x6(%sp)       # move Current PC
3004        clr.l           %d0
3005        mov.w           0x12(%sp),%d0
3006        mov.l           0x6(%sp),0x10(%sp)      # move Current PC
3007        add.l           %d0,0x6(%sp)            # make Next PC
3008        mov.w           &0x402c,0xa(%sp)        # insert offset,frame format
3009        mov.l           (%sp)+,%d0              # restore d0
3010
3011        bra.l           _real_fpu_disabled
3012
3013##########
3014
3015iea_iacc:
3016        movc            %pcr,%d0
3017        btst            &0x1,%d0
3018        bne.b           iea_iacc_cont
3019        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3020        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1 on stack
3021iea_iacc_cont:
3022        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3023
3024        unlk            %a6
3025
3026        subq.w          &0x8,%sp                # make stack frame bigger
3027        mov.l           0x8(%sp),(%sp)          # store SR,hi(PC)
3028        mov.w           0xc(%sp),0x4(%sp)       # store lo(PC)
3029        mov.w           &0x4008,0x6(%sp)        # store voff
3030        mov.l           0x2(%sp),0x8(%sp)       # store ea
3031        mov.l           &0x09428001,0xc(%sp)    # store fslw
3032
3033iea_acc_done:
3034        btst            &0x5,(%sp)              # user or supervisor mode?
3035        beq.b           iea_acc_done2           # user
3036        bset            &0x2,0xd(%sp)           # set supervisor TM bit
3037
3038iea_acc_done2:
3039        bra.l           _real_access
3040
3041iea_dacc:
3042        lea             -LOCAL_SIZE(%a6),%sp
3043
3044        movc            %pcr,%d1
3045        btst            &0x1,%d1
3046        bne.b           iea_dacc_cont
3047        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1 on stack
3048        fmovm.l         LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3049iea_dacc_cont:
3050        mov.l           (%a6),%a6
3051
3052        mov.l           0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3053        mov.w           0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3054        mov.w           &0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3055        mov.l           %a0,-0x8+0xc+LOCAL_SIZE(%sp)
3056        mov.w           %d0,-0x8+0x10+LOCAL_SIZE(%sp)
3057        mov.w           &0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3058
3059        movm.l          LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3060        add.w           &LOCAL_SIZE-0x4,%sp
3061
3062        bra.b           iea_acc_done
3063
3064#########################################################################
3065# XDEF **************************************************************** #
3066#       _fpsp_operr(): 060FPSP entry point for FP Operr exception.      #
3067#                                                                       #
3068#       This handler should be the first code executed upon taking the  #
3069#       FP Operand Error exception in an operating system.              #
3070#                                                                       #
3071# XREF **************************************************************** #
3072#       _imem_read_long() - read instruction longword                   #
3073#       fix_skewed_ops() - adjust src operand in fsave frame            #
3074#       _real_operr() - "callout" to operating system operr handler     #
3075#       _dmem_write_{byte,word,long}() - store data to mem (opclass 3)  #
3076#       store_dreg_{b,w,l}() - store data to data regfile (opclass 3)   #
3077#       facc_out_{b,w,l}() - store to memory took access error (opcl 3) #
3078#                                                                       #
3079# INPUT *************************************************************** #
3080#       - The system stack contains the FP Operr exception frame        #
3081#       - The fsave frame contains the source operand                   #
3082#                                                                       #
3083# OUTPUT ************************************************************** #
3084#       No access error:                                                #
3085#       - The system stack is unchanged                                 #
3086#       - The fsave frame contains the adjusted src op for opclass 0,2  #
3087#                                                                       #
3088# ALGORITHM *********************************************************** #
3089#       In a system where the FP Operr exception is enabled, the goal   #
3090# is to get to the handler specified at _real_operr(). But, on the 060, #
3091# for opclass zero and two instruction taking this exception, the       #
3092# input operand in the fsave frame may be incorrect for some cases      #
3093# and needs to be corrected. This handler calls fix_skewed_ops() to     #
3094# do just this and then exits through _real_operr().                    #
3095#       For opclass 3 instructions, the 060 doesn't store the default   #
3096# operr result out to memory or data register file as it should.        #
3097# This code must emulate the move out before finally exiting through    #
3098# _real_inex(). The move out, if to memory, is performed using          #
3099# _mem_write() "callout" routines that may return a failing result.     #
3100# In this special case, the handler must exit through facc_out()        #
3101# which creates an access error stack frame from the current operr      #
3102# stack frame.                                                          #
3103#                                                                       #
3104#########################################################################
3105
3106        global          _fpsp_operr
3107_fpsp_operr:
3108
3109        link.w          %a6,&-LOCAL_SIZE        # init stack frame
3110
3111        fsave           FP_SRC(%a6)             # grab the "busy" frame
3112
3113        movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3114        fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3115        fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
3116
3117# the FPIAR holds the "current PC" of the faulting instruction
3118        mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3119
3120        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3121        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3122        bsr.l           _imem_read_long         # fetch the instruction words
3123        mov.l           %d0,EXC_OPWORD(%a6)
3124
3125##############################################################################
3126
3127        btst            &13,%d0                 # is instr an fmove out?
3128        bne.b           foperr_out              # fmove out
3129
3130
3131# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3132# this would be the case for opclass two operations with a source infinity or
3133# denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3134# cause an operr so we don't need to check for them here.
3135        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3136        bsr.l           fix_skewed_ops          # fix src op
3137
3138foperr_exit:
3139        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3140        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3141        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3142
3143        frestore        FP_SRC(%a6)
3144
3145        unlk            %a6
3146        bra.l           _real_operr
3147
3148########################################################################
3149
3150#
3151# the hardware does not save the default result to memory on enabled
3152# operand error exceptions. we do this here before passing control to
3153# the user operand error handler.
3154#
3155# byte, word, and long destination format operations can pass
3156# through here. we simply need to test the sign of the src
3157# operand and save the appropriate minimum or maximum integer value
3158# to the effective address as pointed to by the stacked effective address.
3159#
3160# although packed opclass three operations can take operand error
3161# exceptions, they won't pass through here since they are caught
3162# first by the unsupported data format exception handler. that handler
3163# sends them directly to _real_operr() if necessary.
3164#
3165foperr_out:
3166
3167        mov.w           FP_SRC_EX(%a6),%d1      # fetch exponent
3168        andi.w          &0x7fff,%d1
3169        cmpi.w          %d1,&0x7fff
3170        bne.b           foperr_out_not_qnan
3171# the operand is either an infinity or a QNAN.
3172        tst.l           FP_SRC_LO(%a6)
3173        bne.b           foperr_out_qnan
3174        mov.l           FP_SRC_HI(%a6),%d1
3175        andi.l          &0x7fffffff,%d1
3176        beq.b           foperr_out_not_qnan
3177foperr_out_qnan:
3178        mov.l           FP_SRC_HI(%a6),L_SCR1(%a6)
3179        bra.b           foperr_out_jmp
3180
3181foperr_out_not_qnan:
3182        mov.l           &0x7fffffff,%d1
3183        tst.b           FP_SRC_EX(%a6)
3184        bpl.b           foperr_out_not_qnan2
3185        addq.l          &0x1,%d1
3186foperr_out_not_qnan2:
3187        mov.l           %d1,L_SCR1(%a6)
3188
3189foperr_out_jmp:
3190        bfextu          %d0{&19:&3},%d0         # extract dst format field
3191        mov.b           1+EXC_OPWORD(%a6),%d1   # extract <ea> mode,reg
3192        mov.w           (tbl_operr.b,%pc,%d0.w*2),%a0
3193        jmp             (tbl_operr.b,%pc,%a0)
3194
3195tbl_operr:
3196        short           foperr_out_l - tbl_operr # long word integer
3197        short           tbl_operr    - tbl_operr # sgl prec shouldn't happen
3198        short           tbl_operr    - tbl_operr # ext prec shouldn't happen
3199        short           foperr_exit  - tbl_operr # packed won't enter here
3200        short           foperr_out_w - tbl_operr # word integer
3201        short           tbl_operr    - tbl_operr # dbl prec shouldn't happen
3202        short           foperr_out_b - tbl_operr # byte integer
3203        short           tbl_operr    - tbl_operr # packed won't enter here
3204
3205foperr_out_b:
3206        mov.b           L_SCR1(%a6),%d0         # load positive default result
3207        cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3208        ble.b           foperr_out_b_save_dn    # yes
3209        mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3210        bsr.l           _dmem_write_byte        # write the default result
3211
3212        tst.l           %d1                     # did dstore fail?
3213        bne.l           facc_out_b              # yes
3214
3215        bra.w           foperr_exit
3216foperr_out_b_save_dn:
3217        andi.w          &0x0007,%d1
3218        bsr.l           store_dreg_b            # store result to regfile
3219        bra.w           foperr_exit
3220
3221foperr_out_w:
3222        mov.w           L_SCR1(%a6),%d0         # load positive default result
3223        cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3224        ble.b           foperr_out_w_save_dn    # yes
3225        mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3226        bsr.l           _dmem_write_word        # write the default result
3227
3228        tst.l           %d1                     # did dstore fail?
3229        bne.l           facc_out_w              # yes
3230
3231        bra.w           foperr_exit
3232foperr_out_w_save_dn:
3233        andi.w          &0x0007,%d1
3234        bsr.l           store_dreg_w            # store result to regfile
3235        bra.w           foperr_exit
3236
3237foperr_out_l:
3238        mov.l           L_SCR1(%a6),%d0         # load positive default result
3239        cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3240        ble.b           foperr_out_l_save_dn    # yes
3241        mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3242        bsr.l           _dmem_write_long        # write the default result
3243
3244        tst.l           %d1                     # did dstore fail?
3245        bne.l           facc_out_l              # yes
3246
3247        bra.w           foperr_exit
3248foperr_out_l_save_dn:
3249        andi.w          &0x0007,%d1
3250        bsr.l           store_dreg_l            # store result to regfile
3251        bra.w           foperr_exit
3252
3253#########################################################################
3254# XDEF **************************************************************** #
3255#       _fpsp_snan(): 060FPSP entry point for FP SNAN exception.        #
3256#                                                                       #
3257#       This handler should be the first code executed upon taking the  #
3258#       FP Signalling NAN exception in an operating system.             #
3259#                                                                       #
3260# XREF **************************************************************** #
3261#       _imem_read_long() - read instruction longword                   #
3262#       fix_skewed_ops() - adjust src operand in fsave frame            #
3263#       _real_snan() - "callout" to operating system SNAN handler       #
3264#       _dmem_write_{byte,word,long}() - store data to mem (opclass 3)  #
3265#       store_dreg_{b,w,l}() - store data to data regfile (opclass 3)   #
3266#       facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3)   #
3267#       _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea>   #
3268#                                                                       #
3269# INPUT *************************************************************** #
3270#       - The system stack contains the FP SNAN exception frame         #
3271#       - The fsave frame contains the source operand                   #
3272#                                                                       #
3273# OUTPUT ************************************************************** #
3274#       No access error:                                                #
3275#       - The system stack is unchanged                                 #
3276#       - The fsave frame contains the adjusted src op for opclass 0,2  #
3277#                                                                       #
3278# ALGORITHM *********************************************************** #
3279#       In a system where the FP SNAN exception is enabled, the goal    #
3280# is to get to the handler specified at _real_snan(). But, on the 060,  #
3281# for opclass zero and two instructions taking this exception, the      #
3282# input operand in the fsave frame may be incorrect for some cases      #
3283# and needs to be corrected. This handler calls fix_skewed_ops() to     #
3284# do just this and then exits through _real_snan().                     #
3285#       For opclass 3 instructions, the 060 doesn't store the default   #
3286# SNAN result out to memory or data register file as it should.         #
3287# This code must emulate the move out before finally exiting through    #
3288# _real_snan(). The move out, if to memory, is performed using          #
3289# _mem_write() "callout" routines that may return a failing result.     #
3290# In this special case, the handler must exit through facc_out()        #
3291# which creates an access error stack frame from the current SNAN       #
3292# stack frame.                                                          #
3293#       For the case of an extended precision opclass 3 instruction,    #
3294# if the effective addressing mode was -() or ()+, then the address     #
3295# register must get updated by calling _calc_ea_fout(). If the <ea>     #
3296# was -(a7) from supervisor mode, then the exception frame currently    #
3297# on the system stack must be carefully moved "down" to make room       #
3298# for the operand being moved.                                          #
3299#                                                                       #
3300#########################################################################
3301
3302        global          _fpsp_snan
3303_fpsp_snan:
3304
3305        link.w          %a6,&-LOCAL_SIZE        # init stack frame
3306
3307        fsave           FP_SRC(%a6)             # grab the "busy" frame
3308
3309        movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3310        fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3311        fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
3312
3313# the FPIAR holds the "current PC" of the faulting instruction
3314        mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3315
3316        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3317        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3318        bsr.l           _imem_read_long         # fetch the instruction words
3319        mov.l           %d0,EXC_OPWORD(%a6)
3320
3321##############################################################################
3322
3323        btst            &13,%d0                 # is instr an fmove out?
3324        bne.w           fsnan_out               # fmove out
3325
3326
3327# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3328# this would be the case for opclass two operations with a source infinity or
3329# denorm operand in the sgl or dbl format. NANs also become skewed and must be
3330# fixed here.
3331        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3332        bsr.l           fix_skewed_ops          # fix src op
3333
3334fsnan_exit:
3335        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3336        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3337        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3338
3339        frestore        FP_SRC(%a6)
3340
3341        unlk            %a6
3342        bra.l           _real_snan
3343
3344########################################################################
3345
3346#
3347# the hardware does not save the default result to memory on enabled
3348# snan exceptions. we do this here before passing control to
3349# the user snan handler.
3350#
3351# byte, word, long, and packed destination format operations can pass
3352# through here. since packed format operations already were handled by
3353# fpsp_unsupp(), then we need to do nothing else for them here.
3354# for byte, word, and long, we simply need to test the sign of the src
3355# operand and save the appropriate minimum or maximum integer value
3356# to the effective address as pointed to by the stacked effective address.
3357#
3358fsnan_out:
3359
3360        bfextu          %d0{&19:&3},%d0         # extract dst format field
3361        mov.b           1+EXC_OPWORD(%a6),%d1   # extract <ea> mode,reg
3362        mov.w           (tbl_snan.b,%pc,%d0.w*2),%a0
3363        jmp             (tbl_snan.b,%pc,%a0)
3364
3365tbl_snan:
3366        short           fsnan_out_l - tbl_snan # long word integer
3367        short           fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3368        short           fsnan_out_x - tbl_snan # ext prec shouldn't happen
3369        short           tbl_snan    - tbl_snan # packed needs no help
3370        short           fsnan_out_w - tbl_snan # word integer
3371        short           fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3372        short           fsnan_out_b - tbl_snan # byte integer
3373        short           tbl_snan    - tbl_snan # packed needs no help
3374
3375fsnan_out_b:
3376        mov.b           FP_SRC_HI(%a6),%d0      # load upper byte of SNAN
3377        bset            &6,%d0                  # set SNAN bit
3378        cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3379        ble.b           fsnan_out_b_dn          # yes
3380        mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3381        bsr.l           _dmem_write_byte        # write the default result
3382
3383        tst.l           %d1                     # did dstore fail?
3384        bne.l           facc_out_b              # yes
3385
3386        bra.w           fsnan_exit
3387fsnan_out_b_dn:
3388        andi.w          &0x0007,%d1
3389        bsr.l           store_dreg_b            # store result to regfile
3390        bra.w           fsnan_exit
3391
3392fsnan_out_w:
3393        mov.w           FP_SRC_HI(%a6),%d0      # load upper word of SNAN
3394        bset            &14,%d0                 # set SNAN bit
3395        cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3396        ble.b           fsnan_out_w_dn          # yes
3397        mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3398        bsr.l           _dmem_write_word        # write the default result
3399
3400        tst.l           %d1                     # did dstore fail?
3401        bne.l           facc_out_w              # yes
3402
3403        bra.w           fsnan_exit
3404fsnan_out_w_dn:
3405        andi.w          &0x0007,%d1
3406        bsr.l           store_dreg_w            # store result to regfile
3407        bra.w           fsnan_exit
3408
3409fsnan_out_l:
3410        mov.l           FP_SRC_HI(%a6),%d0      # load upper longword of SNAN
3411        bset            &30,%d0                 # set SNAN bit
3412        cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3413        ble.b           fsnan_out_l_dn          # yes
3414        mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3415        bsr.l           _dmem_write_long        # write the default result
3416
3417        tst.l           %d1                     # did dstore fail?
3418        bne.l           facc_out_l              # yes
3419
3420        bra.w           fsnan_exit
3421fsnan_out_l_dn:
3422        andi.w          &0x0007,%d1
3423        bsr.l           store_dreg_l            # store result to regfile
3424        bra.w           fsnan_exit
3425
3426fsnan_out_s:
3427        cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3428        ble.b           fsnan_out_d_dn          # yes
3429        mov.l           FP_SRC_EX(%a6),%d0      # fetch SNAN sign
3430        andi.l          &0x80000000,%d0         # keep sign
3431        ori.l           &0x7fc00000,%d0         # insert new exponent,SNAN bit
3432        mov.l           FP_SRC_HI(%a6),%d1      # load mantissa
3433        lsr.l           &0x8,%d1                # shift mantissa for sgl
3434        or.l            %d1,%d0                 # create sgl SNAN
3435        mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3436        bsr.l           _dmem_write_long        # write the default result
3437
3438        tst.l           %d1                     # did dstore fail?
3439        bne.l           facc_out_l              # yes
3440
3441        bra.w           fsnan_exit
3442fsnan_out_d_dn:
3443        mov.l           FP_SRC_EX(%a6),%d0      # fetch SNAN sign
3444        andi.l          &0x80000000,%d0         # keep sign
3445        ori.l           &0x7fc00000,%d0         # insert new exponent,SNAN bit
3446        mov.l           %d1,-(%sp)
3447        mov.l           FP_SRC_HI(%a6),%d1      # load mantissa
3448        lsr.l           &0x8,%d1                # shift mantissa for sgl
3449        or.l            %d1,%d0                 # create sgl SNAN
3450        mov.l           (%sp)+,%d1
3451        andi.w          &0x0007,%d1
3452        bsr.l           store_dreg_l            # store result to regfile
3453        bra.w           fsnan_exit
3454
3455fsnan_out_d:
3456        mov.l           FP_SRC_EX(%a6),%d0      # fetch SNAN sign
3457        andi.l          &0x80000000,%d0         # keep sign
3458        ori.l           &0x7ff80000,%d0         # insert new exponent,SNAN bit
3459        mov.l           FP_SRC_HI(%a6),%d1      # load hi mantissa
3460        mov.l           %d0,FP_SCR0_EX(%a6)     # store to temp space
3461        mov.l           &11,%d0                 # load shift amt
3462        lsr.l           %d0,%d1
3463        or.l            %d1,FP_SCR0_EX(%a6)     # create dbl hi
3464        mov.l           FP_SRC_HI(%a6),%d1      # load hi mantissa
3465        andi.l          &0x000007ff,%d1
3466        ror.l           %d0,%d1
3467        mov.l           %d1,FP_SCR0_HI(%a6)     # store to temp space
3468        mov.l           FP_SRC_LO(%a6),%d1      # load lo mantissa
3469        lsr.l           %d0,%d1
3470        or.l            %d1,FP_SCR0_HI(%a6)     # create dbl lo
3471        lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
3472        mov.l           EXC_EA(%a6),%a1         # pass: dst addr
3473        movq.l          &0x8,%d0                # pass: size of 8 bytes
3474        bsr.l           _dmem_write             # write the default result
3475
3476        tst.l           %d1                     # did dstore fail?
3477        bne.l           facc_out_d              # yes
3478
3479        bra.w           fsnan_exit
3480
3481# for extended precision, if the addressing mode is pre-decrement or
3482# post-increment, then the address register did not get updated.
3483# in addition, for pre-decrement, the stacked <ea> is incorrect.
3484fsnan_out_x:
3485        clr.b           SPCOND_FLG(%a6)         # clear special case flag
3486
3487        mov.w           FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3488        clr.w           2+FP_SCR0(%a6)
3489        mov.l           FP_SRC_HI(%a6),%d0
3490        bset            &30,%d0
3491        mov.l           %d0,FP_SCR0_HI(%a6)
3492        mov.l           FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3493
3494        btst            &0x5,EXC_SR(%a6)        # supervisor mode exception?
3495        bne.b           fsnan_out_x_s           # yes
3496
3497        mov.l           %usp,%a0                # fetch user stack pointer
3498        mov.l           %a0,EXC_A7(%a6)         # save on stack for calc_ea()
3499        mov.l           (%a6),EXC_A6(%a6)
3500
3501        bsr.l           _calc_ea_fout           # find the correct ea,update An
3502        mov.l           %a0,%a1
3503        mov.l           %a0,EXC_EA(%a6)         # stack correct <ea>
3504
3505        mov.l           EXC_A7(%a6),%a0
3506        mov.l           %a0,%usp                # restore user stack pointer
3507        mov.l           EXC_A6(%a6),(%a6)
3508
3509fsnan_out_x_save:
3510        lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
3511        movq.l          &0xc,%d0                # pass: size of extended
3512        bsr.l           _dmem_write             # write the default result
3513
3514        tst.l           %d1                     # did dstore fail?
3515        bne.l           facc_out_x              # yes
3516
3517        bra.w           fsnan_exit
3518
3519fsnan_out_x_s:
3520        mov.l           (%a6),EXC_A6(%a6)
3521
3522        bsr.l           _calc_ea_fout           # find the correct ea,update An
3523        mov.l           %a0,%a1
3524        mov.l           %a0,EXC_EA(%a6)         # stack correct <ea>
3525
3526        mov.l           EXC_A6(%a6),(%a6)
3527
3528        cmpi.b          SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3529        bne.b           fsnan_out_x_save        # no
3530
3531# the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3532        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3533        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3534        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3535
3536        frestore        FP_SRC(%a6)
3537
3538        mov.l           EXC_A6(%a6),%a6         # restore frame pointer
3539
3540        mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3541        mov.l           LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3542        mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3543
3544        mov.l           LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3545        mov.l           LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3546        mov.l           LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3547
3548        add.l           &LOCAL_SIZE-0x8,%sp
3549
3550        bra.l           _real_snan
3551
3552#########################################################################
3553# XDEF **************************************************************** #
3554#       _fpsp_inex(): 060FPSP entry point for FP Inexact exception.     #
3555#                                                                       #
3556#       This handler should be the first code executed upon taking the  #
3557#       FP Inexact exception in an operating system.                    #
3558#                                                                       #
3559# XREF **************************************************************** #
3560#       _imem_read_long() - read instruction longword                   #
3561#       fix_skewed_ops() - adjust src operand in fsave frame            #
3562#       set_tag_x() - determine optype of src/dst operands              #
3563#       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
3564#       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
3565#       load_fpn2() - load dst operand from FP regfile                  #
3566#       smovcr() - emulate an "fmovcr" instruction                      #
3567#       fout() - emulate an opclass 3 instruction                       #
3568#       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
3569#       _real_inex() - "callout" to operating system inexact handler    #
3570#                                                                       #
3571# INPUT *************************************************************** #
3572#       - The system stack contains the FP Inexact exception frame      #
3573#       - The fsave frame contains the source operand                   #
3574#                                                                       #
3575# OUTPUT ************************************************************** #
3576#       - The system stack is unchanged                                 #
3577#       - The fsave frame contains the adjusted src op for opclass 0,2  #
3578#                                                                       #
3579# ALGORITHM *********************************************************** #
3580#       In a system where the FP Inexact exception is enabled, the goal #
3581# is to get to the handler specified at _real_inex(). But, on the 060,  #
3582# for opclass zero and two instruction taking this exception, the       #
3583# hardware doesn't store the correct result to the destination FP       #
3584# register as did the '040 and '881/2. This handler must emulate the    #
3585# instruction in order to get this value and then store it to the       #
3586# correct register before calling _real_inex().                         #
3587#       For opclass 3 instructions, the 060 doesn't store the default   #
3588# inexact result out to memory or data register file as it should.      #
3589# This code must emulate the move out by calling fout() before finally  #
3590# exiting through _real_inex().                                         #
3591#                                                                       #
3592#########################################################################
3593
3594        global          _fpsp_inex
3595_fpsp_inex:
3596
3597        link.w          %a6,&-LOCAL_SIZE        # init stack frame
3598
3599        fsave           FP_SRC(%a6)             # grab the "busy" frame
3600
3601        movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3602        fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3603        fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
3604
3605# the FPIAR holds the "current PC" of the faulting instruction
3606        mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3607
3608        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3609        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3610        bsr.l           _imem_read_long         # fetch the instruction words
3611        mov.l           %d0,EXC_OPWORD(%a6)
3612
3613##############################################################################
3614
3615        btst            &13,%d0                 # is instr an fmove out?
3616        bne.w           finex_out               # fmove out
3617
3618
3619# the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3620# longword integer directly into the upper longword of the mantissa along
3621# w/ an exponent value of 0x401e. we convert this to extended precision here.
3622        bfextu          %d0{&19:&3},%d0         # fetch instr size
3623        bne.b           finex_cont              # instr size is not long
3624        cmpi.w          FP_SRC_EX(%a6),&0x401e  # is exponent 0x401e?
3625        bne.b           finex_cont              # no
3626        fmov.l          &0x0,%fpcr
3627        fmov.l          FP_SRC_HI(%a6),%fp0     # load integer src
3628        fmov.x          %fp0,FP_SRC(%a6)        # store integer as extended precision
3629        mov.w           &0xe001,0x2+FP_SRC(%a6)
3630
3631finex_cont:
3632        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3633        bsr.l           fix_skewed_ops          # fix src op
3634
3635# Here, we zero the ccode and exception byte field since we're going to
3636# emulate the whole instruction. Notice, though, that we don't kill the
3637# INEX1 bit. This is because a packed op has long since been converted
3638# to extended before arriving here. Therefore, we need to retain the
3639# INEX1 bit from when the operand was first converted.
3640        andi.l          &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3641
3642        fmov.l          &0x0,%fpcr              # zero current control regs
3643        fmov.l          &0x0,%fpsr
3644
3645        bfextu          EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3646        cmpi.b          %d1,&0x17               # is op an fmovecr?
3647        beq.w           finex_fmovcr            # yes
3648
3649        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3650        bsr.l           set_tag_x               # tag the operand type
3651        mov.b           %d0,STAG(%a6)           # maybe NORM,DENORM
3652
3653# bits four and five of the fp extension word separate the monadic and dyadic
3654# operations that can pass through fpsp_inex(). remember that fcmp and ftst
3655# will never take this exception, but fsincos will.
3656        btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
3657        beq.b           finex_extract           # monadic
3658
3659        btst            &0x4,1+EXC_CMDREG(%a6)  # is operation an fsincos?
3660        bne.b           finex_extract           # yes
3661
3662        bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3663        bsr.l           load_fpn2               # load dst into FP_DST
3664
3665        lea             FP_DST(%a6),%a0         # pass: ptr to dst op
3666        bsr.l           set_tag_x               # tag the operand type
3667        cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
3668        bne.b           finex_op2_done          # no
3669        bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
3670finex_op2_done:
3671        mov.b           %d0,DTAG(%a6)           # save dst optype tag
3672
3673finex_extract:
3674        clr.l           %d0
3675        mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
3676
3677        mov.b           1+EXC_CMDREG(%a6),%d1
3678        andi.w          &0x007f,%d1             # extract extension
3679
3680        lea             FP_SRC(%a6),%a0
3681        lea             FP_DST(%a6),%a1
3682
3683        mov.l           (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3684        jsr             (tbl_unsupp.l,%pc,%d1.l*1)
3685
3686# the operation has been emulated. the result is in fp0.
3687finex_save:
3688        bfextu          EXC_CMDREG(%a6){&6:&3},%d0
3689        bsr.l           store_fpreg
3690
3691finex_exit:
3692        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3693        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3694        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3695
3696        frestore        FP_SRC(%a6)
3697
3698        unlk            %a6
3699        bra.l           _real_inex
3700
3701finex_fmovcr:
3702        clr.l           %d0
3703        mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec,mode
3704        mov.b           1+EXC_CMDREG(%a6),%d1
3705        andi.l          &0x0000007f,%d1         # pass rom offset
3706        bsr.l           smovcr
3707        bra.b           finex_save
3708
3709########################################################################
3710
3711#
3712# the hardware does not save the default result to memory on enabled
3713# inexact exceptions. we do this here before passing control to
3714# the user inexact handler.
3715#
3716# byte, word, and long destination format operations can pass
3717# through here. so can double and single precision.
3718# although packed opclass three operations can take inexact
3719# exceptions, they won't pass through here since they are caught
3720# first by the unsupported data format exception handler. that handler
3721# sends them directly to _real_inex() if necessary.
3722#
3723finex_out:
3724
3725        mov.b           &NORM,STAG(%a6)         # src is a NORM
3726
3727        clr.l           %d0
3728        mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec,mode
3729
3730        andi.l          &0xffff00ff,USER_FPSR(%a6) # zero exception field
3731
3732        lea             FP_SRC(%a6),%a0         # pass ptr to src operand
3733
3734        bsr.l           fout                    # store the default result
3735
3736        bra.b           finex_exit
3737
3738#########################################################################
3739# XDEF **************************************************************** #
3740#       _fpsp_dz(): 060FPSP entry point for FP DZ exception.            #
3741#                                                                       #
3742#       This handler should be the first code executed upon taking      #
3743#       the FP DZ exception in an operating system.                     #
3744#                                                                       #
3745# XREF **************************************************************** #
3746#       _imem_read_long() - read instruction longword from memory       #
3747#       fix_skewed_ops() - adjust fsave operand                         #
3748#       _real_dz() - "callout" exit point from FP DZ handler            #
3749#                                                                       #
3750# INPUT *************************************************************** #
3751#       - The system stack contains the FP DZ exception stack.          #
3752#       - The fsave frame contains the source operand.                  #
3753#                                                                       #
3754# OUTPUT ************************************************************** #
3755#       - The system stack contains the FP DZ exception stack.          #
3756#       - The fsave frame contains the adjusted source operand.         #
3757#                                                                       #
3758# ALGORITHM *********************************************************** #
3759#       In a system where the DZ exception is enabled, the goal is to   #
3760# get to the handler specified at _real_dz(). But, on the 060, when the #
3761# exception is taken, the input operand in the fsave state frame may    #
3762# be incorrect for some cases and need to be adjusted. So, this package #
3763# adjusts the operand using fix_skewed_ops() and then branches to       #
3764# _real_dz().                                                           #
3765#                                                                       #
3766#########################################################################
3767
3768        global          _fpsp_dz
3769_fpsp_dz:
3770
3771        link.w          %a6,&-LOCAL_SIZE        # init stack frame
3772
3773        fsave           FP_SRC(%a6)             # grab the "busy" frame
3774
3775        movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3776        fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3777        fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
3778
3779# the FPIAR holds the "current PC" of the faulting instruction
3780        mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3781
3782        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3783        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3784        bsr.l           _imem_read_long         # fetch the instruction words
3785        mov.l           %d0,EXC_OPWORD(%a6)
3786
3787##############################################################################
3788
3789
3790# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3791# this would be the case for opclass two operations with a source zero
3792# in the sgl or dbl format.
3793        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3794        bsr.l           fix_skewed_ops          # fix src op
3795
3796fdz_exit:
3797        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3798        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3799        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3800
3801        frestore        FP_SRC(%a6)
3802
3803        unlk            %a6
3804        bra.l           _real_dz
3805
3806#########################################################################
3807# XDEF **************************************************************** #
3808#       _fpsp_fline(): 060FPSP entry point for "Line F emulator" exc.   #
3809#                                                                       #
3810#       This handler should be the first code executed upon taking the  #
3811#       "Line F Emulator" exception in an operating system.             #
3812#                                                                       #
3813# XREF **************************************************************** #
3814#       _fpsp_unimp() - handle "FP Unimplemented" exceptions            #
3815#       _real_fpu_disabled() - handle "FPU disabled" exceptions         #
3816#       _real_fline() - handle "FLINE" exceptions                       #
3817#       _imem_read_long() - read instruction longword                   #
3818#                                                                       #
3819# INPUT *************************************************************** #
3820#       - The system stack contains a "Line F Emulator" exception       #
3821#         stack frame.                                                  #
3822#                                                                       #
3823# OUTPUT ************************************************************** #
3824#       - The system stack is unchanged                                 #
3825#                                                                       #
3826# ALGORITHM *********************************************************** #
3827#       When a "Line F Emulator" exception occurs, there are 3 possible #
3828# exception types, denoted by the exception stack frame format number:  #
3829#       (1) FPU unimplemented instruction (6 word stack frame)          #
3830#       (2) FPU disabled (8 word stack frame)                           #
3831#       (3) Line F (4 word stack frame)                                 #
3832#                                                                       #
3833#       This module determines which and forks the flow off to the      #
3834# appropriate "callout" (for "disabled" and "Line F") or to the         #
3835# correct emulation code (for "FPU unimplemented").                     #
3836#       This code also must check for "fmovecr" instructions w/ a       #
3837# non-zero <ea> field. These may get flagged as "Line F" but should     #
3838# really be flagged as "FPU Unimplemented". (This is a "feature" on     #
3839# the '060.                                                             #
3840#                                                                       #
3841#########################################################################
3842
3843        global          _fpsp_fline
3844_fpsp_fline:
3845
3846# check to see if this exception is a "FP Unimplemented Instruction"
3847# exception. if so, branch directly to that handler's entry point.
3848        cmpi.w          0x6(%sp),&0x202c
3849        beq.l           _fpsp_unimp
3850
3851# check to see if the FPU is disabled. if so, jump to the OS entry
3852# point for that condition.
3853        cmpi.w          0x6(%sp),&0x402c
3854        beq.l           _real_fpu_disabled
3855
3856# the exception was an "F-Line Illegal" exception. we check to see
3857# if the F-Line instruction is an "fmovecr" w/ a non-zero <ea>. if
3858# so, convert the F-Line exception stack frame to an FP Unimplemented
3859# Instruction exception stack frame else branch to the OS entry
3860# point for the F-Line exception handler.
3861        link.w          %a6,&-LOCAL_SIZE        # init stack frame
3862
3863        movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3864
3865        mov.l           EXC_PC(%a6),EXC_EXTWPTR(%a6)
3866        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3867        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3868        bsr.l           _imem_read_long         # fetch instruction words
3869
3870        bfextu          %d0{&0:&10},%d1         # is it an fmovecr?
3871        cmpi.w          %d1,&0x03c8
3872        bne.b           fline_fline             # no
3873
3874        bfextu          %d0{&16:&6},%d1         # is it an fmovecr?
3875        cmpi.b          %d1,&0x17
3876        bne.b           fline_fline             # no
3877
3878# it's an fmovecr w/ a non-zero <ea> that has entered through
3879# the F-Line Illegal exception.
3880# so, we need to convert the F-Line exception stack frame into an
3881# FP Unimplemented Instruction stack frame and jump to that entry
3882# point.
3883#
3884# but, if the FPU is disabled, then we need to jump to the FPU disabled
3885# entry point.
3886        movc            %pcr,%d0
3887        btst            &0x1,%d0
3888        beq.b           fline_fmovcr
3889
3890        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3891
3892        unlk            %a6
3893
3894        sub.l           &0x8,%sp                # make room for "Next PC", <ea>
3895        mov.w           0x8(%sp),(%sp)
3896        mov.l           0xa(%sp),0x2(%sp)       # move "Current PC"
3897        mov.w           &0x402c,0x6(%sp)
3898        mov.l           0x2(%sp),0xc(%sp)
3899        addq.l          &0x4,0x2(%sp)           # set "Next PC"
3900
3901        bra.l           _real_fpu_disabled
3902
3903fline_fmovcr:
3904        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3905
3906        unlk            %a6
3907
3908        fmov.l          0x2(%sp),%fpiar         # set current PC
3909        addq.l          &0x4,0x2(%sp)           # set Next PC
3910
3911        mov.l           (%sp),-(%sp)
3912        mov.l           0x8(%sp),0x4(%sp)
3913        mov.b           &0x20,0x6(%sp)
3914
3915        bra.l           _fpsp_unimp
3916
3917fline_fline:
3918        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3919
3920        unlk            %a6
3921
3922        bra.l           _real_fline
3923
3924#########################################################################
3925# XDEF **************************************************************** #
3926#       _fpsp_unimp(): 060FPSP entry point for FP "Unimplemented        #
3927#                      Instruction" exception.                          #
3928#                                                                       #
3929#       This handler should be the first code executed upon taking the  #
3930#       FP Unimplemented Instruction exception in an operating system.  #
3931#                                                                       #
3932# XREF **************************************************************** #
3933#       _imem_read_{word,long}() - read instruction word/longword       #
3934#       load_fop() - load src/dst ops from memory and/or FP regfile     #
3935#       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
3936#       tbl_trans - addr of table of emulation routines for trnscndls   #
3937#       _real_access() - "callout" for access error exception           #
3938#       _fpsp_done() - "callout" for exit; work all done                #
3939#       _real_trace() - "callout" for Trace enabled exception           #
3940#       smovcr() - emulate "fmovecr" instruction                        #
3941#       funimp_skew() - adjust fsave src ops to "incorrect" value       #
3942#       _ftrapcc() - emulate an "ftrapcc" instruction                   #
3943#       _fdbcc() - emulate an "fdbcc" instruction                       #
3944#       _fscc() - emulate an "fscc" instruction                         #
3945#       _real_trap() - "callout" for Trap exception                     #
3946#       _real_bsun() - "callout" for enabled Bsun exception             #
3947#                                                                       #
3948# INPUT *************************************************************** #
3949#       - The system stack contains the "Unimplemented Instr" stk frame #
3950#                                                                       #
3951# OUTPUT ************************************************************** #
3952#       If access error:                                                #
3953#       - The system stack is changed to an access error stack frame    #
3954#       If Trace exception enabled:                                     #
3955#       - The system stack is changed to a Trace exception stack frame  #
3956#       Else: (normal case)                                             #
3957#       - Correct result has been stored as appropriate                 #
3958#                                                                       #
3959# ALGORITHM *********************************************************** #
3960#       There are two main cases of instructions that may enter here to #
3961# be emulated: (1) the FPgen instructions, most of which were also      #
3962# unimplemented on the 040, and (2) "ftrapcc", "fscc", and "fdbcc".     #
3963#       For the first set, this handler calls the routine load_fop()    #
3964# to load the source and destination (for dyadic) operands to be used   #
3965# for instruction emulation. The correct emulation routine is then      #
3966# chosen by decoding the instruction type and indexing into an          #
3967# emulation subroutine index table. After emulation returns, this       #
3968# handler checks to see if an exception should occur as a result of the #
3969# FP instruction emulation. If so, then an FP exception of the correct  #
3970# type is inserted into the FPU state frame using the "frestore"        #
3971# instruction before exiting through _fpsp_done(). In either the        #
3972# exceptional or non-exceptional cases, we must check to see if the     #
3973# Trace exception is enabled. If so, then we must create a Trace        #
3974# exception frame from the current exception frame and exit through     #
3975# _real_trace().                                                        #
3976#       For "fdbcc", "ftrapcc", and "fscc", the emulation subroutines   #
3977# _fdbcc(), _ftrapcc(), and _fscc() respectively are used. All three    #
3978# may flag that a BSUN exception should be taken. If so, then the       #
3979# current exception stack frame is converted into a BSUN exception      #
3980# stack frame and an exit is made through _real_bsun(). If the          #
3981# instruction was "ftrapcc" and a Trap exception should result, a Trap  #
3982# exception stack frame is created from the current frame and an exit   #
3983# is made through _real_trap(). If a Trace exception is pending, then   #
3984# a Trace exception frame is created from the current frame and a jump  #
3985# is made to _real_trace(). Finally, if none of these conditions exist, #
3986# then the handler exits though the callout _fpsp_done().               #
3987#                                                                       #
3988#       In any of the above scenarios, if a _mem_read() or _mem_write() #
3989# "callout" returns a failing value, then an access error stack frame   #
3990# is created from the current stack frame and an exit is made through   #
3991# _real_access().                                                       #
3992#                                                                       #
3993#########################################################################
3994
3995#
3996# FP UNIMPLEMENTED INSTRUCTION STACK FRAME:
3997#
3998#       *****************
3999#       *               * => <ea> of fp unimp instr.
4000#       -      EA       -
4001#       *               *
4002#       *****************
4003#       * 0x2 *  0x02c  * => frame format and vector offset(vector #11)
4004#       *****************
4005#       *               *
4006#       -    Next PC    - => PC of instr to execute after exc handling
4007#       *               *
4008#       *****************
4009#       *      SR       * => SR at the time the exception was taken
4010#       *****************
4011#
4012# Note: the !NULL bit does not get set in the fsave frame when the
4013# machine encounters an fp unimp exception. Therefore, it must be set
4014# before leaving this handler.
4015#
4016        global          _fpsp_unimp
4017_fpsp_unimp:
4018
4019        link.w          %a6,&-LOCAL_SIZE        # init stack frame
4020
4021        movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
4022        fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
4023        fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1
4024
4025        btst            &0x5,EXC_SR(%a6)        # user mode exception?
4026        bne.b           funimp_s                # no; supervisor mode
4027
4028# save the value of the user stack pointer onto the stack frame
4029funimp_u:
4030        mov.l           %usp,%a0                # fetch user stack pointer
4031        mov.l           %a0,EXC_A7(%a6)         # store in stack frame
4032        bra.b           funimp_cont
4033
4034# store the value of the supervisor stack pointer BEFORE the exc occurred.
4035# old_sp is address just above stacked effective address.
4036funimp_s:
4037        lea             4+EXC_EA(%a6),%a0       # load old a7'
4038        mov.l           %a0,EXC_A7(%a6)         # store a7'
4039        mov.l           %a0,OLD_A7(%a6)         # make a copy
4040
4041funimp_cont:
4042
4043# the FPIAR holds the "current PC" of the faulting instruction.
4044        mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
4045
4046        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4047        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
4048        bsr.l           _imem_read_long         # fetch the instruction words
4049        mov.l           %d0,EXC_OPWORD(%a6)
4050
4051############################################################################
4052
4053        fmov.l          &0x0,%fpcr              # clear FPCR
4054        fmov.l          &0x0,%fpsr              # clear FPSR
4055
4056        clr.b           SPCOND_FLG(%a6)         # clear "special case" flag
4057
4058# Divide the fp instructions into 8 types based on the TYPE field in
4059# bits 6-8 of the opword(classes 6,7 are undefined).
4060# (for the '060, only two types  can take this exception)
4061#       bftst           %d0{&7:&3}              # test TYPE
4062        btst            &22,%d0                 # type 0 or 1 ?
4063        bne.w           funimp_misc             # type 1
4064
4065#########################################
4066# TYPE == 0: General instructions       #
4067#########################################
4068funimp_gen:
4069
4070        clr.b           STORE_FLG(%a6)          # clear "store result" flag
4071
4072# clear the ccode byte and exception status byte
4073        andi.l          &0x00ff00ff,USER_FPSR(%a6)
4074
4075        bfextu          %d0{&16:&6},%d1         # extract upper 6 of cmdreg
4076        cmpi.b          %d1,&0x17               # is op an fmovecr?
4077        beq.w           funimp_fmovcr           # yes
4078
4079funimp_gen_op:
4080        bsr.l           _load_fop               # load
4081
4082        clr.l           %d0
4083        mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode
4084
4085        mov.b           1+EXC_CMDREG(%a6),%d1
4086        andi.w          &0x003f,%d1             # extract extension bits
4087        lsl.w           &0x3,%d1                # shift right 3 bits
4088        or.b            STAG(%a6),%d1           # insert src optag bits
4089
4090        lea             FP_DST(%a6),%a1         # pass dst ptr in a1
4091        lea             FP_SRC(%a6),%a0         # pass src ptr in a0
4092
4093        mov.w           (tbl_trans.w,%pc,%d1.w*2),%d1
4094        jsr             (tbl_trans.w,%pc,%d1.w*1) # emulate
4095
4096funimp_fsave:
4097        mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
4098        bne.w           funimp_ena              # some are enabled
4099
4100funimp_store:
4101        bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # fetch Dn
4102        bsr.l           store_fpreg             # store result to fp regfile
4103
4104funimp_gen_exit:
4105        fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
4106        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4107        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
4108
4109funimp_gen_exit_cmp:
4110        cmpi.b          SPCOND_FLG(%a6),&mia7_flg # was the ea mode (sp)+ ?
4111        beq.b           funimp_gen_exit_a7      # yes
4112
4113        cmpi.b          SPCOND_FLG(%a6),&mda7_flg # was the ea mode -(sp) ?
4114        beq.b           funimp_gen_exit_a7      # yes
4115
4116funimp_gen_exit_cont:
4117        unlk            %a6
4118
4119funimp_gen_exit_cont2:
4120        btst            &0x7,(%sp)              # is trace on?
4121        beq.l           _fpsp_done              # no
4122
4123# this catches a problem with the case where an exception will be re-inserted
4124# into the machine. the frestore has already been executed...so, the fmov.l
4125# alone of the control register would trigger an unwanted exception.
4126# until I feel like fixing this, we'll sidestep the exception.
4127        fsave           -(%sp)
4128        fmov.l          %fpiar,0x14(%sp)        # "Current PC" is in FPIAR
4129        frestore        (%sp)+
4130        mov.w           &0x2024,0x6(%sp)        # stk fmt = 0x2; voff = 0x24
4131        bra.l           _real_trace
4132
4133funimp_gen_exit_a7:
4134        btst            &0x5,EXC_SR(%a6)        # supervisor or user mode?
4135        bne.b           funimp_gen_exit_a7_s    # supervisor
4136
4137        mov.l           %a0,-(%sp)
4138        mov.l           EXC_A7(%a6),%a0
4139        mov.l           %a0,%usp
4140        mov.l           (%sp)+,%a0
4141        bra.b           funimp_gen_exit_cont
4142
4143# if the instruction was executed from supervisor mode and the addressing
4144# mode was (a7)+, then the stack frame for the rte must be shifted "up"
4145# "n" bytes where "n" is the size of the src operand type.
4146# f<op>.{b,w,l,s,d,x,p}
4147funimp_gen_exit_a7_s:
4148        mov.l           %d0,-(%sp)              # save d0
4149        mov.l           EXC_A7(%a6),%d0         # load new a7'
4150        sub.l           OLD_A7(%a6),%d0         # subtract old a7'
4151        mov.l           0x2+EXC_PC(%a6),(0x2+EXC_PC,%a6,%d0) # shift stack frame
4152        mov.l           EXC_SR(%a6),(EXC_SR,%a6,%d0) # shift stack frame
4153        mov.w           %d0,EXC_SR(%a6)         # store incr number
4154        mov.l           (%sp)+,%d0              # restore d0
4155
4156        unlk            %a6
4157
4158        add.w           (%sp),%sp               # stack frame shifted
4159        bra.b           funimp_gen_exit_cont2
4160
4161######################
4162# fmovecr.x #ccc,fpn #
4163######################
4164funimp_fmovcr:
4165        clr.l           %d0
4166        mov.b           FPCR_MODE(%a6),%d0
4167        mov.b           1+EXC_CMDREG(%a6),%d1
4168        andi.l          &0x0000007f,%d1         # pass rom offset in d1
4169        bsr.l           smovcr
4170        bra.w           funimp_fsave
4171
4172#########################################################################
4173
4174#
4175# the user has enabled some exceptions. we figure not to see this too
4176# often so that's why it gets lower priority.
4177#
4178funimp_ena:
4179
4180# was an exception set that was also enabled?
4181        and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled and set
4182        bfffo           %d0{&24:&8},%d0         # find highest priority exception
4183        bne.b           funimp_exc              # at least one was set
4184
4185# no exception that was enabled was set BUT if we got an exact overflow
4186# and overflow wasn't enabled but inexact was (yech!) then this is
4187# an inexact exception; otherwise, return to normal non-exception flow.
4188        btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
4189        beq.w           funimp_store            # no; return to normal flow
4190
4191# the overflow w/ exact result happened but was inexact set in the FPCR?
4192funimp_ovfl:
4193        btst            &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
4194        beq.w           funimp_store            # no; return to normal flow
4195        bra.b           funimp_exc_ovfl         # yes
4196
4197# some exception happened that was actually enabled.
4198# we'll insert this new exception into the FPU and then return.
4199funimp_exc:
4200        subi.l          &24,%d0                 # fix offset to be 0-8
4201        cmpi.b          %d0,&0x6                # is exception INEX?
4202        bne.b           funimp_exc_force        # no
4203
4204# the enabled exception was inexact. so, if it occurs with an overflow
4205# or underflow that was disabled, then we have to force an overflow or
4206# underflow frame. the eventual overflow or underflow handler will see that
4207# it's actually an inexact and act appropriately. this is the only easy
4208# way to have the EXOP available for the enabled inexact handler when
4209# a disabled overflow or underflow has also happened.
4210        btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
4211        bne.b           funimp_exc_ovfl         # yes
4212        btst            &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
4213        bne.b           funimp_exc_unfl         # yes
4214
4215# force the fsave exception status bits to signal an exception of the
4216# appropriate type. don't forget to "skew" the source operand in case we
4217# "unskewed" the one the hardware initially gave us.
4218funimp_exc_force:
4219        mov.l           %d0,-(%sp)              # save d0
4220        bsr.l           funimp_skew             # check for special case
4221        mov.l           (%sp)+,%d0              # restore d0
4222        mov.w           (tbl_funimp_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
4223        bra.b           funimp_gen_exit2        # exit with frestore
4224
4225tbl_funimp_except:
4226        short           0xe002, 0xe006, 0xe004, 0xe005
4227        short           0xe003, 0xe002, 0xe001, 0xe001
4228
4229# insert an overflow frame
4230funimp_exc_ovfl:
4231        bsr.l           funimp_skew             # check for special case
4232        mov.w           &0xe005,2+FP_SRC(%a6)
4233        bra.b           funimp_gen_exit2
4234
4235# insert an underflow frame
4236funimp_exc_unfl:
4237        bsr.l           funimp_skew             # check for special case
4238        mov.w           &0xe003,2+FP_SRC(%a6)
4239
4240# this is the general exit point for an enabled exception that will be
4241# restored into the machine for the instruction just emulated.
4242funimp_gen_exit2:
4243        fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
4244        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4245        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
4246
4247        frestore        FP_SRC(%a6)             # insert exceptional status
4248
4249        bra.w           funimp_gen_exit_cmp
4250
4251############################################################################
4252
4253#
4254# TYPE == 1: FDB<cc>, FS<cc>, FTRAP<cc>
4255#
4256# These instructions were implemented on the '881/2 and '040 in hardware but
4257# are emulated in software on the '060.
4258#
4259funimp_misc:
4260        bfextu          %d0{&10:&3},%d1         # extract mode field
4261        cmpi.b          %d1,&0x1                # is it an fdb<cc>?
4262        beq.w           funimp_fdbcc            # yes
4263        cmpi.b          %d1,&0x7                # is it an fs<cc>?
4264        bne.w           funimp_fscc             # yes
4265        bfextu          %d0{&13:&3},%d1
4266        cmpi.b          %d1,&0x2                # is it an fs<cc>?
4267        blt.w           funimp_fscc             # yes
4268
4269#########################
4270# ftrap<cc>             #
4271# ftrap<cc>.w #<data>   #
4272# ftrap<cc>.l #<data>   #
4273#########################
4274funimp_ftrapcc:
4275
4276        bsr.l           _ftrapcc                # FTRAP<cc>()
4277
4278        cmpi.b          SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4279        beq.w           funimp_bsun             # yes
4280
4281        cmpi.b          SPCOND_FLG(%a6),&ftrapcc_flg # should a trap occur?
4282        bne.w           funimp_done             # no
4283
4284#        FP UNIMP FRAME            TRAP  FRAME
4285#       *****************       *****************
4286#       **    <EA>     **       **  Current PC **
4287#       *****************       *****************
4288#       * 0x2 *  0x02c  *       * 0x2 *  0x01c  *
4289#       *****************       *****************
4290#       **   Next PC   **       **   Next PC   **
4291#       *****************       *****************
4292#       *      SR       *       *      SR       *
4293#       *****************       *****************
4294#           (6 words)               (6 words)
4295#
4296# the ftrapcc instruction should take a trap. so, here we must create a
4297# trap stack frame from an unimplemented fp instruction stack frame and
4298# jump to the user supplied entry point for the trap exception
4299funimp_ftrapcc_tp:
4300        mov.l           USER_FPIAR(%a6),EXC_EA(%a6) # Address = Current PC
4301        mov.w           &0x201c,EXC_VOFF(%a6)   # Vector Offset = 0x01c
4302
4303        fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
4304        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4305        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
4306
4307        unlk            %a6
4308        bra.l           _real_trap
4309
4310#########################
4311# fdb<cc> Dn,<label>    #
4312#########################
4313funimp_fdbcc:
4314
4315        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4316        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4317        bsr.l           _imem_read_word         # read displacement
4318
4319        tst.l           %d1                     # did ifetch fail?
4320        bne.w           funimp_iacc             # yes
4321
4322        ext.l           %d0                     # sign extend displacement
4323
4324        bsr.l           _fdbcc                  # FDB<cc>()
4325
4326        cmpi.b          SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4327        beq.w           funimp_bsun
4328
4329        bra.w           funimp_done             # branch to finish
4330
4331#################
4332# fs<cc>.b <ea> #
4333#################
4334funimp_fscc:
4335
4336        bsr.l           _fscc                   # FS<cc>()
4337
4338# I am assuming here that an "fs<cc>.b -(An)" or "fs<cc>.b (An)+" instruction
4339# does not need to update "An" before taking a bsun exception.
4340        cmpi.b          SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4341        beq.w           funimp_bsun
4342
4343        btst            &0x5,EXC_SR(%a6)        # yes; is it a user mode exception?
4344        bne.b           funimp_fscc_s           # no
4345
4346funimp_fscc_u:
4347        mov.l           EXC_A7(%a6),%a0         # yes; set new USP
4348        mov.l           %a0,%usp
4349        bra.w           funimp_done             # branch to finish
4350
4351# remember, I'm assuming that post-increment is bogus...(it IS!!!)
4352# so, the least significant WORD of the stacked effective address got
4353# overwritten by the "fs<cc> -(An)". We must shift the stack frame "down"
4354# so that the rte will work correctly without destroying the result.
4355# even though the operation size is byte, the stack ptr is decr by 2.
4356#
4357# remember, also, this instruction may be traced.
4358funimp_fscc_s:
4359        cmpi.b          SPCOND_FLG(%a6),&mda7_flg # was a7 modified?
4360        bne.w           funimp_done             # no
4361
4362        fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
4363        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4364        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
4365
4366        unlk            %a6
4367
4368        btst            &0x7,(%sp)              # is trace enabled?
4369        bne.b           funimp_fscc_s_trace     # yes
4370
4371        subq.l          &0x2,%sp
4372        mov.l           0x2(%sp),(%sp)          # shift SR,hi(PC) "down"
4373        mov.l           0x6(%sp),0x4(%sp)       # shift lo(PC),voff "down"
4374        bra.l           _fpsp_done
4375
4376funimp_fscc_s_trace:
4377        subq.l          &0x2,%sp
4378        mov.l           0x2(%sp),(%sp)          # shift SR,hi(PC) "down"
4379        mov.w           0x6(%sp),0x4(%sp)       # shift lo(PC)
4380        mov.w           &0x2024,0x6(%sp)        # fmt/voff = $2024
4381        fmov.l          %fpiar,0x8(%sp)         # insert "current PC"
4382
4383        bra.l           _real_trace
4384
4385#
4386# The ftrap<cc>, fs<cc>, or fdb<cc> is to take an enabled bsun. we must convert
4387# the fp unimplemented instruction exception stack frame into a bsun stack frame,
4388# restore a bsun exception into the machine, and branch to the user
4389# supplied bsun hook.
4390#
4391#        FP UNIMP FRAME            BSUN FRAME
4392#       *****************       *****************
4393#       **    <EA>     **       * 0x0 * 0x0c0   *
4394#       *****************       *****************
4395#       * 0x2 *  0x02c  *       ** Current PC  **
4396#       *****************       *****************
4397#       **   Next PC   **       *      SR       *
4398#       *****************       *****************
4399#       *      SR       *           (4 words)
4400#       *****************
4401#           (6 words)
4402#
4403funimp_bsun:
4404        mov.w           &0x00c0,2+EXC_EA(%a6)   # Fmt = 0x0; Vector Offset = 0x0c0
4405        mov.l           USER_FPIAR(%a6),EXC_VOFF(%a6) # PC = Current PC
4406        mov.w           EXC_SR(%a6),2+EXC_PC(%a6) # shift SR "up"
4407
4408        mov.w           &0xe000,2+FP_SRC(%a6)   # bsun exception enabled
4409
4410        fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
4411        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4412        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
4413
4414        frestore        FP_SRC(%a6)             # restore bsun exception
4415
4416        unlk            %a6
4417
4418        addq.l          &0x4,%sp                # erase sludge
4419
4420        bra.l           _real_bsun              # branch to user bsun hook
4421
4422#
4423# all ftrapcc/fscc/fdbcc processing has been completed. unwind the stack frame
4424# and return.
4425#
4426# as usual, we have to check for trace mode being on here. since instructions
4427# modifying the supervisor stack frame don't pass through here, this is a
4428# relatively easy task.
4429#
4430funimp_done:
4431        fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
4432        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4433        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
4434
4435        unlk            %a6
4436
4437        btst            &0x7,(%sp)              # is trace enabled?
4438        bne.b           funimp_trace            # yes
4439
4440        bra.l           _fpsp_done
4441
4442#        FP UNIMP FRAME           TRACE  FRAME
4443#       *****************       *****************
4444#       **    <EA>     **       **  Current PC **
4445#       *****************       *****************
4446#       * 0x2 *  0x02c  *       * 0x2 *  0x024  *
4447#       *****************       *****************
4448#       **   Next PC   **       **   Next PC   **
4449#       *****************       *****************
4450#       *      SR       *       *      SR       *
4451#       *****************       *****************
4452#           (6 words)               (6 words)
4453#
4454# the fscc instruction should take a trace trap. so, here we must create a
4455# trace stack frame from an unimplemented fp instruction stack frame and
4456# jump to the user supplied entry point for the trace exception
4457funimp_trace:
4458        fmov.l          %fpiar,0x8(%sp)         # current PC is in fpiar
4459        mov.b           &0x24,0x7(%sp)          # vector offset = 0x024
4460
4461        bra.l           _real_trace
4462
4463################################################################
4464
4465        global          tbl_trans
4466        swbeg           &0x1c0
4467tbl_trans:
4468        short           tbl_trans - tbl_trans   # $00-0 fmovecr all
4469        short           tbl_trans - tbl_trans   # $00-1 fmovecr all
4470        short           tbl_trans - tbl_trans   # $00-2 fmovecr all
4471        short           tbl_trans - tbl_trans   # $00-3 fmovecr all
4472        short           tbl_trans - tbl_trans   # $00-4 fmovecr all
4473        short           tbl_trans - tbl_trans   # $00-5 fmovecr all
4474        short           tbl_trans - tbl_trans   # $00-6 fmovecr all
4475        short           tbl_trans - tbl_trans   # $00-7 fmovecr all
4476
4477        short           tbl_trans - tbl_trans   # $01-0 fint norm
4478        short           tbl_trans - tbl_trans   # $01-1 fint zero
4479        short           tbl_trans - tbl_trans   # $01-2 fint inf
4480        short           tbl_trans - tbl_trans   # $01-3 fint qnan
4481        short           tbl_trans - tbl_trans   # $01-5 fint denorm
4482        short           tbl_trans - tbl_trans   # $01-4 fint snan
4483        short           tbl_trans - tbl_trans   # $01-6 fint unnorm
4484        short           tbl_trans - tbl_trans   # $01-7 ERROR
4485
4486        short           ssinh    - tbl_trans    # $02-0 fsinh norm
4487        short           src_zero - tbl_trans    # $02-1 fsinh zero
4488        short           src_inf  - tbl_trans    # $02-2 fsinh inf
4489        short           src_qnan - tbl_trans    # $02-3 fsinh qnan
4490        short           ssinhd   - tbl_trans    # $02-5 fsinh denorm
4491        short           src_snan - tbl_trans    # $02-4 fsinh snan
4492        short           tbl_trans - tbl_trans   # $02-6 fsinh unnorm
4493        short           tbl_trans - tbl_trans   # $02-7 ERROR
4494
4495        short           tbl_trans - tbl_trans   # $03-0 fintrz norm
4496        short           tbl_trans - tbl_trans   # $03-1 fintrz zero
4497        short           tbl_trans - tbl_trans   # $03-2 fintrz inf
4498        short           tbl_trans - tbl_trans   # $03-3 fintrz qnan
4499        short           tbl_trans - tbl_trans   # $03-5 fintrz denorm
4500        short           tbl_trans - tbl_trans   # $03-4 fintrz snan
4501        short           tbl_trans - tbl_trans   # $03-6 fintrz unnorm
4502        short           tbl_trans - tbl_trans   # $03-7 ERROR
4503
4504        short           tbl_trans - tbl_trans   # $04-0 fsqrt norm
4505        short           tbl_trans - tbl_trans   # $04-1 fsqrt zero
4506        short           tbl_trans - tbl_trans   # $04-2 fsqrt inf
4507        short           tbl_trans - tbl_trans   # $04-3 fsqrt qnan
4508        short           tbl_trans - tbl_trans   # $04-5 fsqrt denorm
4509        short           tbl_trans - tbl_trans   # $04-4 fsqrt snan
4510        short           tbl_trans - tbl_trans   # $04-6 fsqrt unnorm
4511        short           tbl_trans - tbl_trans   # $04-7 ERROR
4512
4513        short           tbl_trans - tbl_trans   # $05-0 ERROR
4514        short           tbl_trans - tbl_trans   # $05-1 ERROR
4515        short           tbl_trans - tbl_trans   # $05-2 ERROR
4516        short           tbl_trans - tbl_trans   # $05-3 ERROR
4517        short           tbl_trans - tbl_trans   # $05-4 ERROR
4518        short           tbl_trans - tbl_trans   # $05-5 ERROR
4519        short           tbl_trans - tbl_trans   # $05-6 ERROR
4520        short           tbl_trans - tbl_trans   # $05-7 ERROR
4521
4522        short           slognp1  - tbl_trans    # $06-0 flognp1 norm
4523        short           src_zero - tbl_trans    # $06-1 flognp1 zero
4524        short           sopr_inf - tbl_trans    # $06-2 flognp1 inf
4525        short           src_qnan - tbl_trans    # $06-3 flognp1 qnan
4526        short           slognp1d - tbl_trans    # $06-5 flognp1 denorm
4527        short           src_snan - tbl_trans    # $06-4 flognp1 snan
4528        short           tbl_trans - tbl_trans   # $06-6 flognp1 unnorm
4529        short           tbl_trans - tbl_trans   # $06-7 ERROR
4530
4531        short           tbl_trans - tbl_trans   # $07-0 ERROR
4532        short           tbl_trans - tbl_trans   # $07-1 ERROR
4533        short           tbl_trans - tbl_trans   # $07-2 ERROR
4534        short           tbl_trans - tbl_trans   # $07-3 ERROR
4535        short           tbl_trans - tbl_trans   # $07-4 ERROR
4536        short           tbl_trans - tbl_trans   # $07-5 ERROR
4537        short           tbl_trans - tbl_trans   # $07-6 ERROR
4538        short           tbl_trans - tbl_trans   # $07-7 ERROR
4539
4540        short           setoxm1  - tbl_trans    # $08-0 fetoxm1 norm
4541        short           src_zero - tbl_trans    # $08-1 fetoxm1 zero
4542        short           setoxm1i - tbl_trans    # $08-2 fetoxm1 inf
4543        short           src_qnan - tbl_trans    # $08-3 fetoxm1 qnan
4544        short           setoxm1d - tbl_trans    # $08-5 fetoxm1 denorm
4545        short           src_snan - tbl_trans    # $08-4 fetoxm1 snan
4546        short           tbl_trans - tbl_trans   # $08-6 fetoxm1 unnorm
4547        short           tbl_trans - tbl_trans   # $08-7 ERROR
4548
4549        short           stanh    - tbl_trans    # $09-0 ftanh norm
4550        short           src_zero - tbl_trans    # $09-1 ftanh zero
4551        short           src_one  - tbl_trans    # $09-2 ftanh inf
4552        short           src_qnan - tbl_trans    # $09-3 ftanh qnan
4553        short           stanhd   - tbl_trans    # $09-5 ftanh denorm
4554        short           src_snan - tbl_trans    # $09-4 ftanh snan
4555        short           tbl_trans - tbl_trans   # $09-6 ftanh unnorm
4556        short           tbl_trans - tbl_trans   # $09-7 ERROR
4557
4558        short           satan    - tbl_trans    # $0a-0 fatan norm
4559        short           src_zero - tbl_trans    # $0a-1 fatan zero
4560        short           spi_2    - tbl_trans    # $0a-2 fatan inf
4561        short           src_qnan - tbl_trans    # $0a-3 fatan qnan
4562        short           satand   - tbl_trans    # $0a-5 fatan denorm
4563        short           src_snan - tbl_trans    # $0a-4 fatan snan
4564        short           tbl_trans - tbl_trans   # $0a-6 fatan unnorm
4565        short           tbl_trans - tbl_trans   # $0a-7 ERROR
4566
4567        short           tbl_trans - tbl_trans   # $0b-0 ERROR
4568        short           tbl_trans - tbl_trans   # $0b-1 ERROR
4569        short           tbl_trans - tbl_trans   # $0b-2 ERROR
4570        short           tbl_trans - tbl_trans   # $0b-3 ERROR
4571        short           tbl_trans - tbl_trans   # $0b-4 ERROR
4572        short           tbl_trans - tbl_trans   # $0b-5 ERROR
4573        short           tbl_trans - tbl_trans   # $0b-6 ERROR
4574        short           tbl_trans - tbl_trans   # $0b-7 ERROR
4575
4576        short           sasin    - tbl_trans    # $0c-0 fasin norm
4577        short           src_zero - tbl_trans    # $0c-1 fasin zero
4578        short           t_operr  - tbl_trans    # $0c-2 fasin inf
4579        short           src_qnan - tbl_trans    # $0c-3 fasin qnan
4580        short           sasind   - tbl_trans    # $0c-5 fasin denorm
4581        short           src_snan - tbl_trans    # $0c-4 fasin snan
4582        short           tbl_trans - tbl_trans   # $0c-6 fasin unnorm
4583        short           tbl_trans - tbl_trans   # $0c-7 ERROR
4584
4585        short           satanh   - tbl_trans    # $0d-0 fatanh norm
4586        short           src_zero - tbl_trans    # $0d-1 fatanh zero
4587        short           t_operr  - tbl_trans    # $0d-2 fatanh inf
4588        short           src_qnan - tbl_trans    # $0d-3 fatanh qnan
4589        short           satanhd  - tbl_trans    # $0d-5 fatanh denorm
4590        short           src_snan - tbl_trans    # $0d-4 fatanh snan
4591        short           tbl_trans - tbl_trans   # $0d-6 fatanh unnorm
4592        short           tbl_trans - tbl_trans   # $0d-7 ERROR
4593
4594        short           ssin     - tbl_trans    # $0e-0 fsin norm
4595        short           src_zero - tbl_trans    # $0e-1 fsin zero
4596        short           t_operr  - tbl_trans    # $0e-2 fsin inf
4597        short           src_qnan - tbl_trans    # $0e-3 fsin qnan
4598        short           ssind    - tbl_trans    # $0e-5 fsin denorm
4599        short           src_snan - tbl_trans    # $0e-4 fsin snan
4600        short           tbl_trans - tbl_trans   # $0e-6 fsin unnorm
4601        short           tbl_trans - tbl_trans   # $0e-7 ERROR
4602
4603        short           stan     - tbl_trans    # $0f-0 ftan norm
4604        short           src_zero - tbl_trans    # $0f-1 ftan zero
4605        short           t_operr  - tbl_trans    # $0f-2 ftan inf
4606        short           src_qnan - tbl_trans    # $0f-3 ftan qnan
4607        short           stand    - tbl_trans    # $0f-5 ftan denorm
4608        short           src_snan - tbl_trans    # $0f-4 ftan snan
4609        short           tbl_trans - tbl_trans   # $0f-6 ftan unnorm
4610        short           tbl_trans - tbl_trans   # $0f-7 ERROR
4611
4612        short           setox    - tbl_trans    # $10-0 fetox norm
4613        short           ld_pone  - tbl_trans    # $10-1 fetox zero
4614        short           szr_inf  - tbl_trans    # $10-2 fetox inf
4615        short           src_qnan - tbl_trans    # $10-3 fetox qnan
4616        short           setoxd   - tbl_trans    # $10-5 fetox denorm
4617        short           src_snan - tbl_trans    # $10-4 fetox snan
4618        short           tbl_trans - tbl_trans   # $10-6 fetox unnorm
4619        short           tbl_trans - tbl_trans   # $10-7 ERROR
4620
4621        short           stwotox  - tbl_trans    # $11-0 ftwotox norm
4622        short           ld_pone  - tbl_trans    # $11-1 ftwotox zero
4623        short           szr_inf  - tbl_trans    # $11-2 ftwotox inf
4624        short           src_qnan - tbl_trans    # $11-3 ftwotox qnan
4625        short           stwotoxd - tbl_trans    # $11-5 ftwotox denorm
4626        short           src_snan - tbl_trans    # $11-4 ftwotox snan
4627        short           tbl_trans - tbl_trans   # $11-6 ftwotox unnorm
4628        short           tbl_trans - tbl_trans   # $11-7 ERROR
4629
4630        short           stentox  - tbl_trans    # $12-0 ftentox norm
4631        short           ld_pone  - tbl_trans    # $12-1 ftentox zero
4632        short           szr_inf  - tbl_trans    # $12-2 ftentox inf
4633        short           src_qnan - tbl_trans    # $12-3 ftentox qnan
4634        short           stentoxd - tbl_trans    # $12-5 ftentox denorm
4635        short           src_snan - tbl_trans    # $12-4 ftentox snan
4636        short           tbl_trans - tbl_trans   # $12-6 ftentox unnorm
4637        short           tbl_trans - tbl_trans   # $12-7 ERROR
4638
4639        short           tbl_trans - tbl_trans   # $13-0 ERROR
4640        short           tbl_trans - tbl_trans   # $13-1 ERROR
4641        short           tbl_trans - tbl_trans   # $13-2 ERROR
4642        short           tbl_trans - tbl_trans   # $13-3 ERROR
4643        short           tbl_trans - tbl_trans   # $13-4 ERROR
4644        short           tbl_trans - tbl_trans   # $13-5 ERROR
4645        short           tbl_trans - tbl_trans   # $13-6 ERROR
4646        short           tbl_trans - tbl_trans   # $13-7 ERROR
4647
4648        short           slogn    - tbl_trans    # $14-0 flogn norm
4649        short           t_dz2    - tbl_trans    # $14-1 flogn zero
4650        short           sopr_inf - tbl_trans    # $14-2 flogn inf
4651        short           src_qnan - tbl_trans    # $14-3 flogn qnan
4652        short           slognd   - tbl_trans    # $14-5 flogn denorm
4653        short           src_snan - tbl_trans    # $14-4 flogn snan
4654        short           tbl_trans - tbl_trans   # $14-6 flogn unnorm
4655        short           tbl_trans - tbl_trans   # $14-7 ERROR
4656
4657        short           slog10   - tbl_trans    # $15-0 flog10 norm
4658        short           t_dz2    - tbl_trans    # $15-1 flog10 zero
4659        short           sopr_inf - tbl_trans    # $15-2 flog10 inf
4660        short           src_qnan - tbl_trans    # $15-3 flog10 qnan
4661        short           slog10d  - tbl_trans    # $15-5 flog10 denorm
4662        short           src_snan - tbl_trans    # $15-4 flog10 snan
4663        short           tbl_trans - tbl_trans   # $15-6 flog10 unnorm
4664        short           tbl_trans - tbl_trans   # $15-7 ERROR
4665
4666        short           slog2    - tbl_trans    # $16-0 flog2 norm
4667        short           t_dz2    - tbl_trans    # $16-1 flog2 zero
4668        short           sopr_inf - tbl_trans    # $16-2 flog2 inf
4669        short           src_qnan - tbl_trans    # $16-3 flog2 qnan
4670        short           slog2d   - tbl_trans    # $16-5 flog2 denorm
4671        short           src_snan - tbl_trans    # $16-4 flog2 snan
4672        short           tbl_trans - tbl_trans   # $16-6 flog2 unnorm
4673        short           tbl_trans - tbl_trans   # $16-7 ERROR
4674
4675        short           tbl_trans - tbl_trans   # $17-0 ERROR
4676        short           tbl_trans - tbl_trans   # $17-1 ERROR
4677        short           tbl_trans - tbl_trans   # $17-2 ERROR
4678        short           tbl_trans - tbl_trans   # $17-3 ERROR
4679        short           tbl_trans - tbl_trans   # $17-4 ERROR
4680        short           tbl_trans - tbl_trans   # $17-5 ERROR
4681        short           tbl_trans - tbl_trans   # $17-6 ERROR
4682        short           tbl_trans - tbl_trans   # $17-7 ERROR
4683
4684        short           tbl_trans - tbl_trans   # $18-0 fabs norm
4685        short           tbl_trans - tbl_trans   # $18-1 fabs zero
4686        short           tbl_trans - tbl_trans   # $18-2 fabs inf
4687        short           tbl_trans - tbl_trans   # $18-3 fabs qnan
4688        short           tbl_trans - tbl_trans   # $18-5 fabs denorm
4689        short           tbl_trans - tbl_trans   # $18-4 fabs snan
4690        short           tbl_trans - tbl_trans   # $18-6 fabs unnorm
4691        short           tbl_trans - tbl_trans   # $18-7 ERROR
4692
4693        short           scosh    - tbl_trans    # $19-0 fcosh norm
4694        short           ld_pone  - tbl_trans    # $19-1 fcosh zero
4695        short           ld_pinf  - tbl_trans    # $19-2 fcosh inf
4696        short           src_qnan - tbl_trans    # $19-3 fcosh qnan
4697        short           scoshd   - tbl_trans    # $19-5 fcosh denorm
4698        short           src_snan - tbl_trans    # $19-4 fcosh snan
4699        short           tbl_trans - tbl_trans   # $19-6 fcosh unnorm
4700        short           tbl_trans - tbl_trans   # $19-7 ERROR
4701
4702        short           tbl_trans - tbl_trans   # $1a-0 fneg norm
4703        short           tbl_trans - tbl_trans   # $1a-1 fneg zero
4704        short           tbl_trans - tbl_trans   # $1a-2 fneg inf
4705        short           tbl_trans - tbl_trans   # $1a-3 fneg qnan
4706        short           tbl_trans - tbl_trans   # $1a-5 fneg denorm
4707        short           tbl_trans - tbl_trans   # $1a-4 fneg snan
4708        short           tbl_trans - tbl_trans   # $1a-6 fneg unnorm
4709        short           tbl_trans - tbl_trans   # $1a-7 ERROR
4710
4711        short           tbl_trans - tbl_trans   # $1b-0 ERROR
4712        short           tbl_trans - tbl_trans   # $1b-1 ERROR
4713        short           tbl_trans - tbl_trans   # $1b-2 ERROR
4714        short           tbl_trans - tbl_trans   # $1b-3 ERROR
4715        short           tbl_trans - tbl_trans   # $1b-4 ERROR
4716        short           tbl_trans - tbl_trans   # $1b-5 ERROR
4717        short           tbl_trans - tbl_trans   # $1b-6 ERROR
4718        short           tbl_trans - tbl_trans   # $1b-7 ERROR
4719
4720        short           sacos    - tbl_trans    # $1c-0 facos norm
4721        short           ld_ppi2  - tbl_trans    # $1c-1 facos zero
4722        short           t_operr  - tbl_trans    # $1c-2 facos inf
4723        short           src_qnan - tbl_trans    # $1c-3 facos qnan
4724        short           sacosd   - tbl_trans    # $1c-5 facos denorm
4725        short           src_snan - tbl_trans    # $1c-4 facos snan
4726        short           tbl_trans - tbl_trans   # $1c-6 facos unnorm
4727        short           tbl_trans - tbl_trans   # $1c-7 ERROR
4728
4729        short           scos     - tbl_trans    # $1d-0 fcos norm
4730        short           ld_pone  - tbl_trans    # $1d-1 fcos zero
4731        short           t_operr  - tbl_trans    # $1d-2 fcos inf
4732        short           src_qnan - tbl_trans    # $1d-3 fcos qnan
4733        short           scosd    - tbl_trans    # $1d-5 fcos denorm
4734        short           src_snan - tbl_trans    # $1d-4 fcos snan
4735        short           tbl_trans - tbl_trans   # $1d-6 fcos unnorm
4736        short           tbl_trans - tbl_trans   # $1d-7 ERROR
4737
4738        short           sgetexp  - tbl_trans    # $1e-0 fgetexp norm
4739        short           src_zero - tbl_trans    # $1e-1 fgetexp zero
4740        short           t_operr  - tbl_trans    # $1e-2 fgetexp inf
4741        short           src_qnan - tbl_trans    # $1e-3 fgetexp qnan
4742        short           sgetexpd - tbl_trans    # $1e-5 fgetexp denorm
4743        short           src_snan - tbl_trans    # $1e-4 fgetexp snan
4744        short           tbl_trans - tbl_trans   # $1e-6 fgetexp unnorm
4745        short           tbl_trans - tbl_trans   # $1e-7 ERROR
4746
4747        short           sgetman  - tbl_trans    # $1f-0 fgetman norm
4748        short           src_zero - tbl_trans    # $1f-1 fgetman zero
4749        short           t_operr  - tbl_trans    # $1f-2 fgetman inf
4750        short           src_qnan - tbl_trans    # $1f-3 fgetman qnan
4751        short           sgetmand - tbl_trans    # $1f-5 fgetman denorm
4752        short           src_snan - tbl_trans    # $1f-4 fgetman snan
4753        short           tbl_trans - tbl_trans   # $1f-6 fgetman unnorm
4754        short           tbl_trans - tbl_trans   # $1f-7 ERROR
4755
4756        short           tbl_trans - tbl_trans   # $20-0 fdiv norm
4757        short           tbl_trans - tbl_trans   # $20-1 fdiv zero
4758        short           tbl_trans - tbl_trans   # $20-2 fdiv inf
4759        short           tbl_trans - tbl_trans   # $20-3 fdiv qnan
4760        short           tbl_trans - tbl_trans   # $20-5 fdiv denorm
4761        short           tbl_trans - tbl_trans   # $20-4 fdiv snan
4762        short           tbl_trans - tbl_trans   # $20-6 fdiv unnorm
4763        short           tbl_trans - tbl_trans   # $20-7 ERROR
4764
4765        short           smod_snorm - tbl_trans  # $21-0 fmod norm
4766        short           smod_szero - tbl_trans  # $21-1 fmod zero
4767        short           smod_sinf - tbl_trans   # $21-2 fmod inf
4768        short           sop_sqnan - tbl_trans   # $21-3 fmod qnan
4769        short           smod_sdnrm - tbl_trans  # $21-5 fmod denorm
4770        short           sop_ssnan - tbl_trans   # $21-4 fmod snan
4771        short           tbl_trans - tbl_trans   # $21-6 fmod unnorm
4772        short           tbl_trans - tbl_trans   # $21-7 ERROR
4773
4774        short           tbl_trans - tbl_trans   # $22-0 fadd norm
4775        short           tbl_trans - tbl_trans   # $22-1 fadd zero
4776        short           tbl_trans - tbl_trans   # $22-2 fadd inf
4777        short           tbl_trans - tbl_trans   # $22-3 fadd qnan
4778        short           tbl_trans - tbl_trans   # $22-5 fadd denorm
4779        short           tbl_trans - tbl_trans   # $22-4 fadd snan
4780        short           tbl_trans - tbl_trans   # $22-6 fadd unnorm
4781        short           tbl_trans - tbl_trans   # $22-7 ERROR
4782
4783        short           tbl_trans - tbl_trans   # $23-0 fmul norm
4784        short           tbl_trans - tbl_trans   # $23-1 fmul zero
4785        short           tbl_trans - tbl_trans   # $23-2 fmul inf
4786        short           tbl_trans - tbl_trans   # $23-3 fmul qnan
4787        short           tbl_trans - tbl_trans   # $23-5 fmul denorm
4788        short           tbl_trans - tbl_trans   # $23-4 fmul snan
4789        short           tbl_trans - tbl_trans   # $23-6 fmul unnorm
4790        short           tbl_trans - tbl_trans   # $23-7 ERROR
4791
4792        short           tbl_trans - tbl_trans   # $24-0 fsgldiv norm
4793        short           tbl_trans - tbl_trans   # $24-1 fsgldiv zero
4794        short           tbl_trans - tbl_trans   # $24-2 fsgldiv inf
4795        short           tbl_trans - tbl_trans   # $24-3 fsgldiv qnan
4796        short           tbl_trans - tbl_trans   # $24-5 fsgldiv denorm
4797        short           tbl_trans - tbl_trans   # $24-4 fsgldiv snan
4798        short           tbl_trans - tbl_trans   # $24-6 fsgldiv unnorm
4799        short           tbl_trans - tbl_trans   # $24-7 ERROR
4800
4801        short           srem_snorm - tbl_trans  # $25-0 frem norm
4802        short           srem_szero - tbl_trans  # $25-1 frem zero
4803        short           srem_sinf - tbl_trans   # $25-2 frem inf
4804        short           sop_sqnan - tbl_trans   # $25-3 frem qnan
4805        short           srem_sdnrm - tbl_trans  # $25-5 frem denorm
4806        short           sop_ssnan - tbl_trans   # $25-4 frem snan
4807        short           tbl_trans - tbl_trans   # $25-6 frem unnorm
4808        short           tbl_trans - tbl_trans   # $25-7 ERROR
4809
4810        short           sscale_snorm - tbl_trans # $26-0 fscale norm
4811        short           sscale_szero - tbl_trans # $26-1 fscale zero
4812        short           sscale_sinf - tbl_trans # $26-2 fscale inf
4813        short           sop_sqnan - tbl_trans   # $26-3 fscale qnan
4814        short           sscale_sdnrm - tbl_trans # $26-5 fscale denorm
4815        short           sop_ssnan - tbl_trans   # $26-4 fscale snan
4816        short           tbl_trans - tbl_trans   # $26-6 fscale unnorm
4817        short           tbl_trans - tbl_trans   # $26-7 ERROR
4818
4819        short           tbl_trans - tbl_trans   # $27-0 fsglmul norm
4820        short           tbl_trans - tbl_trans   # $27-1 fsglmul zero
4821        short           tbl_trans - tbl_trans   # $27-2 fsglmul inf
4822        short           tbl_trans - tbl_trans   # $27-3 fsglmul qnan
4823        short           tbl_trans - tbl_trans   # $27-5 fsglmul denorm
4824        short           tbl_trans - tbl_trans   # $27-4 fsglmul snan
4825        short           tbl_trans - tbl_trans   # $27-6 fsglmul unnorm
4826        short           tbl_trans - tbl_trans   # $27-7 ERROR
4827
4828        short           tbl_trans - tbl_trans   # $28-0 fsub norm
4829        short           tbl_trans - tbl_trans   # $28-1 fsub zero
4830        short           tbl_trans - tbl_trans   # $28-2 fsub inf
4831        short           tbl_trans - tbl_trans   # $28-3 fsub qnan
4832        short           tbl_trans - tbl_trans   # $28-5 fsub denorm
4833        short           tbl_trans - tbl_trans   # $28-4 fsub snan
4834        short           tbl_trans - tbl_trans   # $28-6 fsub unnorm
4835        short           tbl_trans - tbl_trans   # $28-7 ERROR
4836
4837        short           tbl_trans - tbl_trans   # $29-0 ERROR
4838        short           tbl_trans - tbl_trans   # $29-1 ERROR
4839        short           tbl_trans - tbl_trans   # $29-2 ERROR
4840        short           tbl_trans - tbl_trans   # $29-3 ERROR
4841        short           tbl_trans - tbl_trans   # $29-4 ERROR
4842        short           tbl_trans - tbl_trans   # $29-5 ERROR
4843        short           tbl_trans - tbl_trans   # $29-6 ERROR
4844        short           tbl_trans - tbl_trans   # $29-7 ERROR
4845
4846        short           tbl_trans - tbl_trans   # $2a-0 ERROR
4847        short           tbl_trans - tbl_trans   # $2a-1 ERROR
4848        short           tbl_trans - tbl_trans   # $2a-2 ERROR
4849        short           tbl_trans - tbl_trans   # $2a-3 ERROR
4850        short           tbl_trans - tbl_trans   # $2a-4 ERROR
4851        short           tbl_trans - tbl_trans   # $2a-5 ERROR
4852        short           tbl_trans - tbl_trans   # $2a-6 ERROR
4853        short           tbl_trans - tbl_trans   # $2a-7 ERROR
4854
4855        short           tbl_trans - tbl_trans   # $2b-0 ERROR
4856        short           tbl_trans - tbl_trans   # $2b-1 ERROR
4857        short           tbl_trans - tbl_trans   # $2b-2 ERROR
4858        short           tbl_trans - tbl_trans   # $2b-3 ERROR
4859        short           tbl_trans - tbl_trans   # $2b-4 ERROR
4860        short           tbl_trans - tbl_trans   # $2b-5 ERROR
4861        short           tbl_trans - tbl_trans   # $2b-6 ERROR
4862        short           tbl_trans - tbl_trans   # $2b-7 ERROR
4863
4864        short           tbl_trans - tbl_trans   # $2c-0 ERROR
4865        short           tbl_trans - tbl_trans   # $2c-1 ERROR
4866        short           tbl_trans - tbl_trans   # $2c-2 ERROR
4867        short           tbl_trans - tbl_trans   # $2c-3 ERROR
4868        short           tbl_trans - tbl_trans   # $2c-4 ERROR
4869        short           tbl_trans - tbl_trans   # $2c-5 ERROR
4870        short           tbl_trans - tbl_trans   # $2c-6 ERROR
4871        short           tbl_trans - tbl_trans   # $2c-7 ERROR
4872
4873        short           tbl_trans - tbl_trans   # $2d-0 ERROR
4874        short           tbl_trans - tbl_trans   # $2d-1 ERROR
4875        short           tbl_trans - tbl_trans   # $2d-2 ERROR
4876        short           tbl_trans - tbl_trans   # $2d-3 ERROR
4877        short           tbl_trans - tbl_trans   # $2d-4 ERROR
4878        short           tbl_trans - tbl_trans   # $2d-5 ERROR
4879        short           tbl_trans - tbl_trans   # $2d-6 ERROR
4880        short           tbl_trans - tbl_trans   # $2d-7 ERROR
4881
4882        short           tbl_trans - tbl_trans   # $2e-0 ERROR
4883        short           tbl_trans - tbl_trans   # $2e-1 ERROR
4884        short           tbl_trans - tbl_trans   # $2e-2 ERROR
4885        short           tbl_trans - tbl_trans   # $2e-3 ERROR
4886        short           tbl_trans - tbl_trans   # $2e-4 ERROR
4887        short           tbl_trans - tbl_trans   # $2e-5 ERROR
4888        short           tbl_trans - tbl_trans   # $2e-6 ERROR
4889        short           tbl_trans - tbl_trans   # $2e-7 ERROR
4890
4891        short           tbl_trans - tbl_trans   # $2f-0 ERROR
4892        short           tbl_trans - tbl_trans   # $2f-1 ERROR
4893        short           tbl_trans - tbl_trans   # $2f-2 ERROR
4894        short           tbl_trans - tbl_trans   # $2f-3 ERROR
4895        short           tbl_trans - tbl_trans   # $2f-4 ERROR
4896        short           tbl_trans - tbl_trans   # $2f-5 ERROR
4897        short           tbl_trans - tbl_trans   # $2f-6 ERROR
4898        short           tbl_trans - tbl_trans   # $2f-7 ERROR
4899
4900        short           ssincos  - tbl_trans    # $30-0 fsincos norm
4901        short           ssincosz - tbl_trans    # $30-1 fsincos zero
4902        short           ssincosi - tbl_trans    # $30-2 fsincos inf
4903        short           ssincosqnan - tbl_trans # $30-3 fsincos qnan
4904        short           ssincosd - tbl_trans    # $30-5 fsincos denorm
4905        short           ssincossnan - tbl_trans # $30-4 fsincos snan
4906        short           tbl_trans - tbl_trans   # $30-6 fsincos unnorm
4907        short           tbl_trans - tbl_trans   # $30-7 ERROR
4908
4909        short           ssincos  - tbl_trans    # $31-0 fsincos norm
4910        short           ssincosz - tbl_trans    # $31-1 fsincos zero
4911        short           ssincosi - tbl_trans    # $31-2 fsincos inf
4912        short           ssincosqnan - tbl_trans # $31-3 fsincos qnan
4913        short           ssincosd - tbl_trans    # $31-5 fsincos denorm
4914        short           ssincossnan - tbl_trans # $31-4 fsincos snan
4915        short           tbl_trans - tbl_trans   # $31-6 fsincos unnorm
4916        short           tbl_trans - tbl_trans   # $31-7 ERROR
4917
4918        short           ssincos  - tbl_trans    # $32-0 fsincos norm
4919        short           ssincosz - tbl_trans    # $32-1 fsincos zero
4920        short           ssincosi - tbl_trans    # $32-2 fsincos inf
4921        short           ssincosqnan - tbl_trans # $32-3 fsincos qnan
4922        short           ssincosd - tbl_trans    # $32-5 fsincos denorm
4923        short           ssincossnan - tbl_trans # $32-4 fsincos snan
4924        short           tbl_trans - tbl_trans   # $32-6 fsincos unnorm
4925        short           tbl_trans - tbl_trans   # $32-7 ERROR
4926
4927        short           ssincos  - tbl_trans    # $33-0 fsincos norm
4928        short           ssincosz - tbl_trans    # $33-1 fsincos zero
4929        short           ssincosi - tbl_trans    # $33-2 fsincos inf
4930        short           ssincosqnan - tbl_trans # $33-3 fsincos qnan
4931        short           ssincosd - tbl_trans    # $33-5 fsincos denorm
4932        short           ssincossnan - tbl_trans # $33-4 fsincos snan
4933        short           tbl_trans - tbl_trans   # $33-6 fsincos unnorm
4934        short           tbl_trans - tbl_trans   # $33-7 ERROR
4935
4936        short           ssincos  - tbl_trans    # $34-0 fsincos norm
4937        short           ssincosz - tbl_trans    # $34-1 fsincos zero
4938        short           ssincosi - tbl_trans    # $34-2 fsincos inf
4939        short           ssincosqnan - tbl_trans # $34-3 fsincos qnan
4940        short           ssincosd - tbl_trans    # $34-5 fsincos denorm
4941        short           ssincossnan - tbl_trans # $34-4 fsincos snan
4942        short           tbl_trans - tbl_trans   # $34-6 fsincos unnorm
4943        short           tbl_trans - tbl_trans   # $34-7 ERROR
4944
4945        short           ssincos  - tbl_trans    # $35-0 fsincos norm
4946        short           ssincosz - tbl_trans    # $35-1 fsincos zero
4947        short           ssincosi - tbl_trans    # $35-2 fsincos inf
4948        short           ssincosqnan - tbl_trans # $35-3 fsincos qnan
4949        short           ssincosd - tbl_trans    # $35-5 fsincos denorm
4950        short           ssincossnan - tbl_trans # $35-4 fsincos snan
4951        short           tbl_trans - tbl_trans   # $35-6 fsincos unnorm
4952        short           tbl_trans - tbl_trans   # $35-7 ERROR
4953
4954        short           ssincos  - tbl_trans    # $36-0 fsincos norm
4955        short           ssincosz - tbl_trans    # $36-1 fsincos zero
4956        short           ssincosi - tbl_trans    # $36-2 fsincos inf
4957        short           ssincosqnan - tbl_trans # $36-3 fsincos qnan
4958        short           ssincosd - tbl_trans    # $36-5 fsincos denorm
4959        short           ssincossnan - tbl_trans # $36-4 fsincos snan
4960        short           tbl_trans - tbl_trans   # $36-6 fsincos unnorm
4961        short           tbl_trans - tbl_trans   # $36-7 ERROR
4962
4963        short           ssincos  - tbl_trans    # $37-0 fsincos norm
4964        short           ssincosz - tbl_trans    # $37-1 fsincos zero
4965        short           ssincosi - tbl_trans    # $37-2 fsincos inf
4966        short           ssincosqnan - tbl_trans # $37-3 fsincos qnan
4967        short           ssincosd - tbl_trans    # $37-5 fsincos denorm
4968        short           ssincossnan - tbl_trans # $37-4 fsincos snan
4969        short           tbl_trans - tbl_trans   # $37-6 fsincos unnorm
4970        short           tbl_trans - tbl_trans   # $37-7 ERROR
4971
4972##########
4973
4974# the instruction fetch access for the displacement word for the
4975# fdbcc emulation failed. here, we create an access error frame
4976# from the current frame and branch to _real_access().
4977funimp_iacc:
4978        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
4979        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4980        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
4981
4982        mov.l           USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
4983
4984        unlk            %a6
4985
4986        mov.l           (%sp),-(%sp)            # store SR,hi(PC)
4987        mov.w           0x8(%sp),0x4(%sp)       # store lo(PC)
4988        mov.w           &0x4008,0x6(%sp)        # store voff
4989        mov.l           0x2(%sp),0x8(%sp)       # store EA
4990        mov.l           &0x09428001,0xc(%sp)    # store FSLW
4991
4992        btst            &0x5,(%sp)              # user or supervisor mode?
4993        beq.b           funimp_iacc_end         # user
4994        bset            &0x2,0xd(%sp)           # set supervisor TM bit
4995
4996funimp_iacc_end:
4997        bra.l           _real_access
4998
4999#########################################################################
5000# ssin():     computes the sine of a normalized input                   #
5001# ssind():    computes the sine of a denormalized input                 #
5002# scos():     computes the cosine of a normalized input                 #
5003# scosd():    computes the cosine of a denormalized input               #
5004# ssincos():  computes the sine and cosine of a normalized input        #
5005# ssincosd(): computes the sine and cosine of a denormalized input      #
5006#                                                                       #
5007# INPUT *************************************************************** #
5008#       a0 = pointer to extended precision input                        #
5009#       d0 = round precision,mode                                       #
5010#                                                                       #
5011# OUTPUT ************************************************************** #
5012#       fp0 = sin(X) or cos(X)                                          #
5013#                                                                       #
5014#    For ssincos(X):                                                    #
5015#       fp0 = sin(X)                                                    #
5016#       fp1 = cos(X)                                                    #
5017#                                                                       #
5018# ACCURACY and MONOTONICITY ******************************************* #
5019#       The returned result is within 1 ulp in 64 significant bit, i.e. #
5020#       within 0.5001 ulp to 53 bits if the result is subsequently      #
5021#       rounded to double precision. The result is provably monotonic   #
5022#       in double precision.                                            #
5023#                                                                       #
5024# ALGORITHM *********************************************************** #
5025#                                                                       #
5026#       SIN and COS:                                                    #
5027#       1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.  #
5028#                                                                       #
5029#       2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.                   #
5030#                                                                       #
5031#       3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let        #
5032#               k = N mod 4, so in particular, k = 0,1,2,or 3.          #
5033#               Overwrite k by k := k + AdjN.                           #
5034#                                                                       #
5035#       4. If k is even, go to 6.                                       #
5036#                                                                       #
5037#       5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j.                 #
5038#               Return sgn*cos(r) where cos(r) is approximated by an    #
5039#               even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)),  #
5040#               s = r*r.                                                #
5041#               Exit.                                                   #
5042#                                                                       #
5043#       6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)  #
5044#               where sin(r) is approximated by an odd polynomial in r  #
5045#               r + r*s*(A1+s*(A2+ ... + s*A7)),        s = r*r.        #
5046#               Exit.                                                   #
5047#                                                                       #
5048#       7. If |X| > 1, go to 9.                                         #
5049#                                                                       #
5050#       8. (|X|<2**(-40)) If SIN is invoked, return X;                  #
5051#               otherwise return 1.                                     #
5052#                                                                       #
5053#       9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi,           #
5054#               go back to 3.                                           #
5055#                                                                       #
5056#       SINCOS:                                                         #
5057#       1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.                   #
5058#                                                                       #
5059#       2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let        #
5060#               k = N mod 4, so in particular, k = 0,1,2,or 3.          #
5061#                                                                       #
5062#       3. If k is even, go to 5.                                       #
5063#                                                                       #
5064#       4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie.  #
5065#               j1 exclusive or with the l.s.b. of k.                   #
5066#               sgn1 := (-1)**j1, sgn2 := (-1)**j2.                     #
5067#               SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where   #
5068#               sin(r) and cos(r) are computed as odd and even          #
5069#               polynomials in r, respectively. Exit                    #
5070#                                                                       #
5071#       5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.                 #
5072#               SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where   #
5073#               sin(r) and cos(r) are computed as odd and even          #
5074#               polynomials in r, respectively. Exit                    #
5075#                                                                       #
5076#       6. If |X| > 1, go to 8.                                         #
5077#                                                                       #
5078#       7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.              #
5079#                                                                       #
5080#       8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi,           #
5081#               go back to 2.                                           #
5082#                                                                       #
5083#########################################################################
5084
5085SINA7:  long            0xBD6AAA77,0xCCC994F5
5086SINA6:  long            0x3DE61209,0x7AAE8DA1
5087SINA5:  long            0xBE5AE645,0x2A118AE4
5088SINA4:  long            0x3EC71DE3,0xA5341531
5089SINA3:  long            0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
5090SINA2:  long            0x3FF80000,0x88888888,0x888859AF,0x00000000
5091SINA1:  long            0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
5092
5093COSB8:  long            0x3D2AC4D0,0xD6011EE3
5094COSB7:  long            0xBDA9396F,0x9F45AC19
5095COSB6:  long            0x3E21EED9,0x0612C972
5096COSB5:  long            0xBE927E4F,0xB79D9FCF
5097COSB4:  long            0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
5098COSB3:  long            0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
5099COSB2:  long            0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
5100COSB1:  long            0xBF000000
5101
5102        set             INARG,FP_SCR0
5103
5104        set             X,FP_SCR0
5105#       set             XDCARE,X+2
5106        set             XFRAC,X+4
5107
5108        set             RPRIME,FP_SCR0
5109        set             SPRIME,FP_SCR1
5110
5111        set             POSNEG1,L_SCR1
5112        set             TWOTO63,L_SCR1
5113
5114        set             ENDFLAG,L_SCR2
5115        set             INT,L_SCR2
5116
5117        set             ADJN,L_SCR3
5118
5119############################################
5120        global          ssin
5121ssin:
5122        mov.l           &0,ADJN(%a6)            # yes; SET ADJN TO 0
5123        bra.b           SINBGN
5124
5125############################################
5126        global          scos
5127scos:
5128        mov.l           &1,ADJN(%a6)            # yes; SET ADJN TO 1
5129
5130############################################
5131SINBGN:
5132#--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
5133
5134        fmov.x          (%a0),%fp0              # LOAD INPUT
5135        fmov.x          %fp0,X(%a6)             # save input at X
5136
5137# "COMPACTIFY" X
5138        mov.l           (%a0),%d1               # put exp in hi word
5139        mov.w           4(%a0),%d1              # fetch hi(man)
5140        and.l           &0x7FFFFFFF,%d1         # strip sign
5141
5142        cmpi.l          %d1,&0x3FD78000         # is |X| >= 2**(-40)?
5143        bge.b           SOK1                    # no
5144        bra.w           SINSM                   # yes; input is very small
5145
5146SOK1:
5147        cmp.l           %d1,&0x4004BC7E         # is |X| < 15 PI?
5148        blt.b           SINMAIN                 # no
5149        bra.w           SREDUCEX                # yes; input is very large
5150
5151#--THIS IS THE USUAL CASE, |X| <= 15 PI.
5152#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5153SINMAIN:
5154        fmov.x          %fp0,%fp1
5155        fmul.d          TWOBYPI(%pc),%fp1       # X*2/PI
5156
5157        lea             PITBL+0x200(%pc),%a1    # TABLE OF N*PI/2, N = -32,...,32
5158
5159        fmov.l          %fp1,INT(%a6)           # CONVERT TO INTEGER
5160
5161        mov.l           INT(%a6),%d1            # make a copy of N
5162        asl.l           &4,%d1                  # N *= 16
5163        add.l           %d1,%a1                 # tbl_addr = a1 + (N*16)
5164
5165# A1 IS THE ADDRESS OF N*PIBY2
5166# ...WHICH IS IN TWO PIECES Y1 & Y2
5167        fsub.x          (%a1)+,%fp0             # X-Y1
5168        fsub.s          (%a1),%fp0              # fp0 = R = (X-Y1)-Y2
5169
5170SINCONT:
5171#--continuation from REDUCEX
5172
5173#--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
5174        mov.l           INT(%a6),%d1
5175        add.l           ADJN(%a6),%d1           # SEE IF D0 IS ODD OR EVEN
5176        ror.l           &1,%d1                  # D0 WAS ODD IFF D0 IS NEGATIVE
5177        cmp.l           %d1,&0
5178        blt.w           COSPOLY
5179
5180#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5181#--THEN WE RETURN       SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
5182#--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
5183#--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
5184#--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
5185#--WHERE T=S*S.
5186#--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
5187#--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
5188SINPOLY:
5189        fmovm.x         &0x0c,-(%sp)            # save fp2/fp3
5190
5191        fmov.x          %fp0,X(%a6)             # X IS R
5192        fmul.x          %fp0,%fp0               # FP0 IS S
5193
5194        fmov.d          SINA7(%pc),%fp3
5195        fmov.d          SINA6(%pc),%fp2
5196
5197        fmov.x          %fp0,%fp1
5198        fmul.x          %fp1,%fp1               # FP1 IS T
5199
5200        ror.l           &1,%d1
5201        and.l           &0x80000000,%d1
5202# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5203        eor.l           %d1,X(%a6)              # X IS NOW R'= SGN*R
5204
5205        fmul.x          %fp1,%fp3               # TA7
5206        fmul.x          %fp1,%fp2               # TA6
5207
5208        fadd.d          SINA5(%pc),%fp3         # A5+TA7
5209        fadd.d          SINA4(%pc),%fp2         # A4+TA6
5210
5211        fmul.x          %fp1,%fp3               # T(A5+TA7)
5212        fmul.x          %fp1,%fp2               # T(A4+TA6)
5213
5214        fadd.d          SINA3(%pc),%fp3         # A3+T(A5+TA7)
5215        fadd.x          SINA2(%pc),%fp2         # A2+T(A4+TA6)
5216
5217        fmul.x          %fp3,%fp1               # T(A3+T(A5+TA7))
5218
5219        fmul.x          %fp0,%fp2               # S(A2+T(A4+TA6))
5220        fadd.x          SINA1(%pc),%fp1         # A1+T(A3+T(A5+TA7))
5221        fmul.x          X(%a6),%fp0             # R'*S
5222
5223        fadd.x          %fp2,%fp1               # [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
5224
5225        fmul.x          %fp1,%fp0               # SIN(R')-R'
5226
5227        fmovm.x         (%sp)+,&0x30            # restore fp2/fp3
5228
5229        fmov.l          %d0,%fpcr               # restore users round mode,prec
5230        fadd.x          X(%a6),%fp0             # last inst - possible exception set
5231        bra             t_inx2
5232
5233#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5234#--THEN WE RETURN       SGN*COS(R). SGN*COS(R) IS COMPUTED BY
5235#--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
5236#--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
5237#--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
5238#--WHERE T=S*S.
5239#--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
5240#--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
5241#--AND IS THEREFORE STORED AS SINGLE PRECISION.
5242COSPOLY:
5243        fmovm.x         &0x0c,-(%sp)            # save fp2/fp3
5244
5245        fmul.x          %fp0,%fp0               # FP0 IS S
5246
5247        fmov.d          COSB8(%pc),%fp2
5248        fmov.d          COSB7(%pc),%fp3
5249
5250        fmov.x          %fp0,%fp1
5251        fmul.x          %fp1,%fp1               # FP1 IS T
5252
5253        fmov.x          %fp0,X(%a6)             # X IS S
5254        ror.l           &1,%d1
5255        and.l           &0x80000000,%d1
5256# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5257
5258        fmul.x          %fp1,%fp2               # TB8
5259
5260        eor.l           %d1,X(%a6)              # X IS NOW S'= SGN*S
5261        and.l           &0x80000000,%d1
5262
5263        fmul.x          %fp1,%fp3               # TB7
5264
5265        or.l            &0x3F800000,%d1         # D0 IS SGN IN SINGLE
5266        mov.l           %d1,POSNEG1(%a6)
5267
5268        fadd.d          COSB6(%pc),%fp2         # B6+TB8
5269        fadd.d          COSB5(%pc),%fp3         # B5+TB7
5270
5271        fmul.x          %fp1,%fp2               # T(B6+TB8)
5272        fmul.x          %fp1,%fp3               # T(B5+TB7)
5273
5274        fadd.d          COSB4(%pc),%fp2         # B4+T(B6+TB8)
5275        fadd.x          COSB3(%pc),%fp3         # B3+T(B5+TB7)
5276
5277        fmul.x          %fp1,%fp2               # T(B4+T(B6+TB8))
5278        fmul.x          %fp3,%fp1               # T(B3+T(B5+TB7))
5279
5280        fadd.x          COSB2(%pc),%fp2         # B2+T(B4+T(B6+TB8))
5281        fadd.s          COSB1(%pc),%fp1         # B1+T(B3+T(B5+TB7))
5282
5283        fmul.x          %fp2,%fp0               # S(B2+T(B4+T(B6+TB8)))
5284
5285        fadd.x          %fp1,%fp0
5286
5287        fmul.x          X(%a6),%fp0
5288
5289        fmovm.x         (%sp)+,&0x30            # restore fp2/fp3
5290
5291        fmov.l          %d0,%fpcr               # restore users round mode,prec
5292        fadd.s          POSNEG1(%a6),%fp0       # last inst - possible exception set
5293        bra             t_inx2
5294
5295##############################################
5296
5297# SINe: Big OR Small?
5298#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5299#--IF |X| < 2**(-40), RETURN X OR 1.
5300SINBORS:
5301        cmp.l           %d1,&0x3FFF8000
5302        bgt.l           SREDUCEX
5303
5304SINSM:
5305        mov.l           ADJN(%a6),%d1
5306        cmp.l           %d1,&0
5307        bgt.b           COSTINY
5308
5309# here, the operation may underflow iff the precision is sgl or dbl.
5310# extended denorms are handled through another entry point.
5311SINTINY:
5312#       mov.w           &0x0000,XDCARE(%a6)     # JUST IN CASE
5313
5314        fmov.l          %d0,%fpcr               # restore users round mode,prec
5315        mov.b           &FMOV_OP,%d1            # last inst is MOVE
5316        fmov.x          X(%a6),%fp0             # last inst - possible exception set
5317        bra             t_catch
5318
5319COSTINY:
5320        fmov.s          &0x3F800000,%fp0        # fp0 = 1.0
5321        fmov.l          %d0,%fpcr               # restore users round mode,prec
5322        fadd.s          &0x80800000,%fp0        # last inst - possible exception set
5323        bra             t_pinx2
5324
5325################################################
5326        global          ssind
5327#--SIN(X) = X FOR DENORMALIZED X
5328ssind:
5329        bra             t_extdnrm
5330
5331############################################
5332        global          scosd
5333#--COS(X) = 1 FOR DENORMALIZED X
5334scosd:
5335        fmov.s          &0x3F800000,%fp0        # fp0 = 1.0
5336        bra             t_pinx2
5337
5338##################################################
5339
5340        global          ssincos
5341ssincos:
5342#--SET ADJN TO 4
5343        mov.l           &4,ADJN(%a6)
5344
5345        fmov.x          (%a0),%fp0              # LOAD INPUT
5346        fmov.x          %fp0,X(%a6)
5347
5348        mov.l           (%a0),%d1
5349        mov.w           4(%a0),%d1
5350        and.l           &0x7FFFFFFF,%d1         # COMPACTIFY X
5351
5352        cmp.l           %d1,&0x3FD78000         # |X| >= 2**(-40)?
5353        bge.b           SCOK1
5354        bra.w           SCSM
5355
5356SCOK1:
5357        cmp.l           %d1,&0x4004BC7E         # |X| < 15 PI?
5358        blt.b           SCMAIN
5359        bra.w           SREDUCEX
5360
5361
5362#--THIS IS THE USUAL CASE, |X| <= 15 PI.
5363#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5364SCMAIN:
5365        fmov.x          %fp0,%fp1
5366
5367        fmul.d          TWOBYPI(%pc),%fp1       # X*2/PI
5368
5369        lea             PITBL+0x200(%pc),%a1    # TABLE OF N*PI/2, N = -32,...,32
5370
5371        fmov.l          %fp1,INT(%a6)           # CONVERT TO INTEGER
5372
5373        mov.l           INT(%a6),%d1
5374        asl.l           &4,%d1
5375        add.l           %d1,%a1                 # ADDRESS OF N*PIBY2, IN Y1, Y2
5376
5377        fsub.x          (%a1)+,%fp0             # X-Y1
5378        fsub.s          (%a1),%fp0              # FP0 IS R = (X-Y1)-Y2
5379
5380SCCONT:
5381#--continuation point from REDUCEX
5382
5383        mov.l           INT(%a6),%d1
5384        ror.l           &1,%d1
5385        cmp.l           %d1,&0                  # D0 < 0 IFF N IS ODD
5386        bge.w           NEVEN
5387
5388SNODD:
5389#--REGISTERS SAVED SO FAR: D0, A0, FP2.
5390        fmovm.x         &0x04,-(%sp)            # save fp2
5391
5392        fmov.x          %fp0,RPRIME(%a6)
5393        fmul.x          %fp0,%fp0               # FP0 IS S = R*R
5394        fmov.d          SINA7(%pc),%fp1         # A7
5395        fmov.d          COSB8(%pc),%fp2         # B8
5396        fmul.x          %fp0,%fp1               # SA7
5397        fmul.x          %fp0,%fp2               # SB8
5398
5399        mov.l           %d2,-(%sp)
5400        mov.l           %d1,%d2
5401        ror.l           &1,%d2
5402        and.l           &0x80000000,%d2
5403        eor.l           %d1,%d2
5404        and.l           &0x80000000,%d2
5405
5406        fadd.d          SINA6(%pc),%fp1         # A6+SA7
5407        fadd.d          COSB7(%pc),%fp2         # B7+SB8
5408
5409        fmul.x          %fp0,%fp1               # S(A6+SA7)
5410        eor.l           %d2,RPRIME(%a6)
5411        mov.l           (%sp)+,%d2
5412        fmul.x          %fp0,%fp2               # S(B7+SB8)
5413        ror.l           &1,%d1
5414        and.l           &0x80000000,%d1
5415        mov.l           &0x3F800000,POSNEG1(%a6)
5416        eor.l           %d1,POSNEG1(%a6)
5417
5418        fadd.d          SINA5(%pc),%fp1         # A5+S(A6+SA7)
5419        fadd.d          COSB6(%pc),%fp2         # B6+S(B7+SB8)
5420
5421        fmul.x          %fp0,%fp1               # S(A5+S(A6+SA7))
5422        fmul.x          %fp0,%fp2               # S(B6+S(B7+SB8))
5423        fmov.x          %fp0,SPRIME(%a6)
5424
5425        fadd.d          SINA4(%pc),%fp1         # A4+S(A5+S(A6+SA7))
5426        eor.l           %d1,SPRIME(%a6)
5427        fadd.d          COSB5(%pc),%fp2         # B5+S(B6+S(B7+SB8))
5428
5429        fmul.x          %fp0,%fp1               # S(A4+...)
5430        fmul.x          %fp0,%fp2               # S(B5+...)
5431
5432        fadd.d          SINA3(%pc),%fp1         # A3+S(A4+...)
5433        fadd.d          COSB4(%pc),%fp2         # B4+S(B5+...)
5434
5435        fmul.x          %fp0,%fp1               # S(A3+...)
5436        fmul.x          %fp0,%fp2               # S(B4+...)
5437
5438        fadd.x          SINA2(%pc),%fp1         # A2+S(A3+...)
5439        fadd.x          COSB3(%pc),%fp2         # B3+S(B4+...)
5440
5441        fmul.x          %fp0,%fp1               # S(A2+...)
5442        fmul.x          %fp0,%fp2               # S(B3+...)
5443
5444        fadd.x          SINA1(%pc),%fp1         # A1+S(A2+...)
5445        fadd.x          COSB2(%pc),%fp2         # B2+S(B3+...)
5446
5447        fmul.x          %fp0,%fp1               # S(A1+...)
5448        fmul.x          %fp2,%fp0               # S(B2+...)
5449
5450        fmul.x          RPRIME(%a6),%fp1        # R'S(A1+...)
5451        fadd.s          COSB1(%pc),%fp0         # B1+S(B2...)
5452        fmul.x          SPRIME(%a6),%fp0        # S'(B1+S(B2+...))
5453
5454        fmovm.x         (%sp)+,&0x20            # restore fp2
5455
5456        fmov.l          %d0,%fpcr
5457        fadd.x          RPRIME(%a6),%fp1        # COS(X)
5458        bsr             sto_cos                 # store cosine result
5459        fadd.s          POSNEG1(%a6),%fp0       # SIN(X)
5460        bra             t_inx2
5461
5462NEVEN:
5463#--REGISTERS SAVED SO FAR: FP2.
5464        fmovm.x         &0x04,-(%sp)            # save fp2
5465
5466        fmov.x          %fp0,RPRIME(%a6)
5467        fmul.x          %fp0,%fp0               # FP0 IS S = R*R
5468
5469        fmov.d          COSB8(%pc),%fp1         # B8
5470        fmov.d          SINA7(%pc),%fp2         # A7
5471
5472        fmul.x          %fp0,%fp1               # SB8
5473        fmov.x          %fp0,SPRIME(%a6)
5474        fmul.x          %fp0,%fp2               # SA7
5475
5476        ror.l           &1,%d1
5477        and.l           &0x80000000,%d1
5478
5479        fadd.d          COSB7(%pc),%fp1         # B7+SB8
5480        fadd.d          SINA6(%pc),%fp2         # A6+SA7
5481
5482        eor.l           %d1,RPRIME(%a6)
5483        eor.l           %d1,SPRIME(%a6)
5484
5485        fmul.x          %fp0,%fp1               # S(B7+SB8)
5486
5487        or.l            &0x3F800000,%d1
5488        mov.l           %d1,POSNEG1(%a6)
5489
5490        fmul.x          %fp0,%fp2               # S(A6+SA7)
5491
5492        fadd.d          COSB6(%pc),%fp1         # B6+S(B7+SB8)
5493        fadd.d          SINA5(%pc),%fp2         # A5+S(A6+SA7)
5494
5495        fmul.x          %fp0,%fp1               # S(B6+S(B7+SB8))
5496        fmul.x          %fp0,%fp2               # S(A5+S(A6+SA7))
5497
5498        fadd.d          COSB5(%pc),%fp1         # B5+S(B6+S(B7+SB8))
5499        fadd.d          SINA4(%pc),%fp2         # A4+S(A5+S(A6+SA7))
5500
5501        fmul.x          %fp0,%fp1               # S(B5+...)
5502        fmul.x          %fp0,%fp2               # S(A4+...)
5503
5504        fadd.d          COSB4(%pc),%fp1         # B4+S(B5+...)
5505        fadd.d          SINA3(%pc),%fp2         # A3+S(A4+...)
5506
5507        fmul.x          %fp0,%fp1               # S(B4+...)
5508        fmul.x          %fp0,%fp2               # S(A3+...)
5509
5510        fadd.x          COSB3(%pc),%fp1         # B3+S(B4+...)
5511        fadd.x          SINA2(%pc),%fp2         # A2+S(A3+...)
5512
5513        fmul.x          %fp0,%fp1               # S(B3+...)
5514        fmul.x          %fp0,%fp2               # S(A2+...)
5515
5516        fadd.x          COSB2(%pc),%fp1         # B2+S(B3+...)
5517        fadd.x          SINA1(%pc),%fp2         # A1+S(A2+...)
5518
5519        fmul.x          %fp0,%fp1               # S(B2+...)
5520        fmul.x          %fp2,%fp0               # s(a1+...)
5521
5522
5523        fadd.s          COSB1(%pc),%fp1         # B1+S(B2...)
5524        fmul.x          RPRIME(%a6),%fp0        # R'S(A1+...)
5525        fmul.x          SPRIME(%a6),%fp1        # S'(B1+S(B2+...))
5526
5527        fmovm.x         (%sp)+,&0x20            # restore fp2
5528
5529        fmov.l          %d0,%fpcr
5530        fadd.s          POSNEG1(%a6),%fp1       # COS(X)
5531        bsr             sto_cos                 # store cosine result
5532        fadd.x          RPRIME(%a6),%fp0        # SIN(X)
5533        bra             t_inx2
5534
5535################################################
5536
5537SCBORS:
5538        cmp.l           %d1,&0x3FFF8000
5539        bgt.w           SREDUCEX
5540
5541################################################
5542
5543SCSM:
5544#       mov.w           &0x0000,XDCARE(%a6)
5545        fmov.s          &0x3F800000,%fp1
5546
5547        fmov.l          %d0,%fpcr
5548        fsub.s          &0x00800000,%fp1
5549        bsr             sto_cos                 # store cosine result
5550        fmov.l          %fpcr,%d0               # d0 must have fpcr,too
5551        mov.b           &FMOV_OP,%d1            # last inst is MOVE
5552        fmov.x          X(%a6),%fp0
5553        bra             t_catch
5554
5555##############################################
5556
5557        global          ssincosd
5558#--SIN AND COS OF X FOR DENORMALIZED X
5559ssincosd:
5560        mov.l           %d0,-(%sp)              # save d0
5561        fmov.s          &0x3F800000,%fp1
5562        bsr             sto_cos                 # store cosine result
5563        mov.l           (%sp)+,%d0              # restore d0
5564        bra             t_extdnrm
5565
5566############################################
5567
5568#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
5569#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
5570#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
5571SREDUCEX:
5572        fmovm.x         &0x3c,-(%sp)            # save {fp2-fp5}
5573        mov.l           %d2,-(%sp)              # save d2
5574        fmov.s          &0x00000000,%fp1        # fp1 = 0
5575
5576#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
5577#--there is a danger of unwanted overflow in first LOOP iteration.  In this
5578#--case, reduce argument by one remainder step to make subsequent reduction
5579#--safe.
5580        cmp.l           %d1,&0x7ffeffff         # is arg dangerously large?
5581        bne.b           SLOOP                   # no
5582
5583# yes; create 2**16383*PI/2
5584        mov.w           &0x7ffe,FP_SCR0_EX(%a6)
5585        mov.l           &0xc90fdaa2,FP_SCR0_HI(%a6)
5586        clr.l           FP_SCR0_LO(%a6)
5587
5588# create low half of 2**16383*PI/2 at FP_SCR1
5589        mov.w           &0x7fdc,FP_SCR1_EX(%a6)
5590        mov.l           &0x85a308d3,FP_SCR1_HI(%a6)
5591        clr.l           FP_SCR1_LO(%a6)
5592
5593        ftest.x         %fp0                    # test sign of argument
5594        fblt.w          sred_neg
5595
5596        or.b            &0x80,FP_SCR0_EX(%a6)   # positive arg
5597        or.b            &0x80,FP_SCR1_EX(%a6)
5598sred_neg:
5599        fadd.x          FP_SCR0(%a6),%fp0       # high part of reduction is exact
5600        fmov.x          %fp0,%fp1               # save high result in fp1
5601        fadd.x          FP_SCR1(%a6),%fp0       # low part of reduction
5602        fsub.x          %fp0,%fp1               # determine low component of result
5603        fadd.x          FP_SCR1(%a6),%fp1       # fp0/fp1 are reduced argument.
5604
5605#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
5606#--integer quotient will be stored in N
5607#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
5608SLOOP:
5609        fmov.x          %fp0,INARG(%a6)         # +-2**K * F, 1 <= F < 2
5610        mov.w           INARG(%a6),%d1
5611        mov.l           %d1,%a1                 # save a copy of D0
5612        and.l           &0x00007FFF,%d1
5613        sub.l           &0x00003FFF,%d1         # d0 = K
5614        cmp.l           %d1,&28
5615        ble.b           SLASTLOOP
5616SCONTLOOP:
5617        sub.l           &27,%d1                 # d0 = L := K-27
5618        mov.b           &0,ENDFLAG(%a6)
5619        bra.b           SWORK
5620SLASTLOOP:
5621        clr.l           %d1                     # d0 = L := 0
5622        mov.b           &1,ENDFLAG(%a6)
5623
5624SWORK:
5625#--FIND THE REMAINDER OF (R,r) W.R.T.   2**L * (PI/2). L IS SO CHOSEN
5626#--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
5627
5628#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
5629#--2**L * (PIby2_1), 2**L * (PIby2_2)
5630
5631        mov.l           &0x00003FFE,%d2         # BIASED EXP OF 2/PI
5632        sub.l           %d1,%d2                 # BIASED EXP OF 2**(-L)*(2/PI)
5633
5634        mov.l           &0xA2F9836E,FP_SCR0_HI(%a6)
5635        mov.l           &0x4E44152A,FP_SCR0_LO(%a6)
5636        mov.w           %d2,FP_SCR0_EX(%a6)     # FP_SCR0 = 2**(-L)*(2/PI)
5637
5638        fmov.x          %fp0,%fp2
5639        fmul.x          FP_SCR0(%a6),%fp2       # fp2 = X * 2**(-L)*(2/PI)
5640
5641#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
5642#--FLOATING POINT FORMAT, THE TWO FMOVE'S       FMOVE.L FP <--> N
5643#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
5644#--(SIGN(INARG)*2**63   +       FP2) - SIGN(INARG)*2**63 WILL GIVE
5645#--US THE DESIRED VALUE IN FLOATING POINT.
5646        mov.l           %a1,%d2
5647        swap            %d2
5648        and.l           &0x80000000,%d2
5649        or.l            &0x5F000000,%d2         # d2 = SIGN(INARG)*2**63 IN SGL
5650        mov.l           %d2,TWOTO63(%a6)
5651        fadd.s          TWOTO63(%a6),%fp2       # THE FRACTIONAL PART OF FP1 IS ROUNDED
5652        fsub.s          TWOTO63(%a6),%fp2       # fp2 = N
5653#       fint.x          %fp2
5654
5655#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
5656        mov.l           %d1,%d2                 # d2 = L
5657
5658        add.l           &0x00003FFF,%d2         # BIASED EXP OF 2**L * (PI/2)
5659        mov.w           %d2,FP_SCR0_EX(%a6)
5660        mov.l           &0xC90FDAA2,FP_SCR0_HI(%a6)
5661        clr.l           FP_SCR0_LO(%a6)         # FP_SCR0 = 2**(L) * Piby2_1
5662
5663        add.l           &0x00003FDD,%d1
5664        mov.w           %d1,FP_SCR1_EX(%a6)
5665        mov.l           &0x85A308D3,FP_SCR1_HI(%a6)
5666        clr.l           FP_SCR1_LO(%a6)         # FP_SCR1 = 2**(L) * Piby2_2
5667
5668        mov.b           ENDFLAG(%a6),%d1
5669
5670#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
5671#--P2 = 2**(L) * Piby2_2
5672        fmov.x          %fp2,%fp4               # fp4 = N
5673        fmul.x          FP_SCR0(%a6),%fp4       # fp4 = W = N*P1
5674        fmov.x          %fp2,%fp5               # fp5 = N
5675        fmul.x          FP_SCR1(%a6),%fp5       # fp5 = w = N*P2
5676        fmov.x          %fp4,%fp3               # fp3 = W = N*P1
5677
5678#--we want P+p = W+w  but  |p| <= half ulp of P
5679#--Then, we need to compute  A := R-P   and  a := r-p
5680        fadd.x          %fp5,%fp3               # fp3 = P
5681        fsub.x          %fp3,%fp4               # fp4 = W-P
5682
5683        fsub.x          %fp3,%fp0               # fp0 = A := R - P
5684        fadd.x          %fp5,%fp4               # fp4 = p = (W-P)+w
5685
5686        fmov.x          %fp0,%fp3               # fp3 = A
5687        fsub.x          %fp4,%fp1               # fp1 = a := r - p
5688
5689#--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
5690#--|r| <= half ulp of R.
5691        fadd.x          %fp1,%fp0               # fp0 = R := A+a
5692#--No need to calculate r if this is the last loop
5693        cmp.b           %d1,&0
5694        bgt.w           SRESTORE
5695
5696#--Need to calculate r
5697        fsub.x          %fp0,%fp3               # fp3 = A-R
5698        fadd.x          %fp3,%fp1               # fp1 = r := (A-R)+a
5699        bra.w           SLOOP
5700
5701SRESTORE:
5702        fmov.l          %fp2,INT(%a6)
5703        mov.l           (%sp)+,%d2              # restore d2
5704        fmovm.x         (%sp)+,&0x3c            # restore {fp2-fp5}
5705
5706        mov.l           ADJN(%a6),%d1
5707        cmp.l           %d1,&4
5708
5709        blt.w           SINCONT
5710        bra.w           SCCONT
5711
5712#########################################################################
5713# stan():  computes the tangent of a normalized input                   #
5714# stand(): computes the tangent of a denormalized input                 #
5715#                                                                       #
5716# INPUT *************************************************************** #
5717#       a0 = pointer to extended precision input                        #
5718#       d0 = round precision,mode                                       #
5719#                                                                       #
5720# OUTPUT ************************************************************** #
5721#       fp0 = tan(X)                                                    #
5722#                                                                       #
5723# ACCURACY and MONOTONICITY ******************************************* #
5724#       The returned result is within 3 ulp in 64 significant bit, i.e. #
5725#       within 0.5001 ulp to 53 bits if the result is subsequently      #
5726#       rounded to double precision. The result is provably monotonic   #
5727#       in double precision.                                            #
5728#                                                                       #
5729# ALGORITHM *********************************************************** #
5730#                                                                       #
5731#       1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.                   #
5732#                                                                       #
5733#       2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let        #
5734#               k = N mod 2, so in particular, k = 0 or 1.              #
5735#                                                                       #
5736#       3. If k is odd, go to 5.                                        #
5737#                                                                       #
5738#       4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a  #
5739#               rational function U/V where                             #
5740#               U = r + r*s*(P1 + s*(P2 + s*P3)), and                   #
5741#               V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.      #
5742#               Exit.                                                   #
5743#                                                                       #
5744#       4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #
5745#               a rational function U/V where                           #
5746#               U = r + r*s*(P1 + s*(P2 + s*P3)), and                   #
5747#               V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,       #
5748#               -Cot(r) = -V/U. Exit.                                   #
5749#                                                                       #
5750#       6. If |X| > 1, go to 8.                                         #
5751#                                                                       #
5752#       7. (|X|<2**(-40)) Tan(X) = X. Exit.                             #
5753#                                                                       #
5754#       8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back   #
5755#               to 2.                                                   #
5756#                                                                       #
5757#########################################################################
5758
5759TANQ4:
5760        long            0x3EA0B759,0xF50F8688
5761TANP3:
5762        long            0xBEF2BAA5,0xA8924F04
5763
5764TANQ3:
5765        long            0xBF346F59,0xB39BA65F,0x00000000,0x00000000
5766
5767TANP2:
5768        long            0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
5769
5770TANQ2:
5771        long            0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
5772
5773TANP1:
5774        long            0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
5775
5776TANQ1:
5777        long            0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
5778
5779INVTWOPI:
5780        long            0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
5781
5782TWOPI1:
5783        long            0x40010000,0xC90FDAA2,0x00000000,0x00000000
5784TWOPI2:
5785        long            0x3FDF0000,0x85A308D4,0x00000000,0x00000000
5786
5787#--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
5788#--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
5789#--MOST 69 BITS LONG.
5790#       global          PITBL
5791PITBL:
5792        long            0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
5793        long            0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
5794        long            0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
5795        long            0xC0040000,0xB6365E22,0xEE46F000,0x21480000
5796        long            0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
5797        long            0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
5798        long            0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
5799        long            0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
5800        long            0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
5801        long            0xC0040000,0x90836524,0x88034B96,0x20B00000
5802        long            0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
5803        long            0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
5804        long            0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
5805        long            0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
5806        long            0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
5807        long            0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
5808        long            0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
5809        long            0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
5810        long            0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
5811        long            0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
5812        long            0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
5813        long            0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
5814        long            0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
5815        long            0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
5816        long            0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
5817        long            0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
5818        long            0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
5819        long            0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
5820        long            0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
5821        long            0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
5822        long            0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
5823        long            0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
5824        long            0x00000000,0x00000000,0x00000000,0x00000000
5825        long            0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
5826        long            0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
5827        long            0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
5828        long            0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
5829        long            0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
5830        long            0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
5831        long            0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
5832        long            0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
5833        long            0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
5834        long            0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
5835        long            0x40030000,0x8A3AE64F,0x76F80584,0x21080000
5836        long            0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
5837        long            0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
5838        long            0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
5839        long            0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
5840        long            0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
5841        long            0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
5842        long            0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
5843        long            0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
5844        long            0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
5845        long            0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
5846        long            0x40040000,0x8A3AE64F,0x76F80584,0x21880000
5847        long            0x40040000,0x90836524,0x88034B96,0xA0B00000
5848        long            0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
5849        long            0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
5850        long            0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
5851        long            0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
5852        long            0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
5853        long            0x40040000,0xB6365E22,0xEE46F000,0xA1480000
5854        long            0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
5855        long            0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
5856        long            0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
5857
5858        set             INARG,FP_SCR0
5859
5860        set             TWOTO63,L_SCR1
5861        set             INT,L_SCR1
5862        set             ENDFLAG,L_SCR2
5863
5864        global          stan
5865stan:
5866        fmov.x          (%a0),%fp0              # LOAD INPUT
5867
5868        mov.l           (%a0),%d1
5869        mov.w           4(%a0),%d1
5870        and.l           &0x7FFFFFFF,%d1
5871
5872        cmp.l           %d1,&0x3FD78000         # |X| >= 2**(-40)?
5873        bge.b           TANOK1
5874        bra.w           TANSM
5875TANOK1:
5876        cmp.l           %d1,&0x4004BC7E         # |X| < 15 PI?
5877        blt.b           TANMAIN
5878        bra.w           REDUCEX
5879
5880TANMAIN:
5881#--THIS IS THE USUAL CASE, |X| <= 15 PI.
5882#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5883        fmov.x          %fp0,%fp1
5884        fmul.d          TWOBYPI(%pc),%fp1       # X*2/PI
5885
5886        lea.l           PITBL+0x200(%pc),%a1    # TABLE OF N*PI/2, N = -32,...,32
5887
5888        fmov.l          %fp1,%d1                # CONVERT TO INTEGER
5889
5890        asl.l           &4,%d1
5891        add.l           %d1,%a1                 # ADDRESS N*PIBY2 IN Y1, Y2
5892
5893        fsub.x          (%a1)+,%fp0             # X-Y1
5894
5895        fsub.s          (%a1),%fp0              # FP0 IS R = (X-Y1)-Y2
5896
5897        ror.l           &5,%d1
5898        and.l           &0x80000000,%d1         # D0 WAS ODD IFF D0 < 0
5899
5900TANCONT:
5901        fmovm.x         &0x0c,-(%sp)            # save fp2,fp3
5902
5903        cmp.l           %d1,&0
5904        blt.w           NODD
5905
5906        fmov.x          %fp0,%fp1
5907        fmul.x          %fp1,%fp1               # S = R*R
5908
5909        fmov.d          TANQ4(%pc),%fp3
5910        fmov.d          TANP3(%pc),%fp2
5911
5912        fmul.x          %fp1,%fp3               # SQ4
5913        fmul.x          %fp1,%fp2               # SP3
5914
5915        fadd.d          TANQ3(%pc),%fp3         # Q3+SQ4
5916        fadd.x          TANP2(%pc),%fp2         # P2+SP3
5917
5918        fmul.x          %fp1,%fp3               # S(Q3+SQ4)
5919        fmul.x          %fp1,%fp2               # S(P2+SP3)
5920
5921        fadd.x          TANQ2(%pc),%fp3         # Q2+S(Q3+SQ4)
5922        fadd.x          TANP1(%pc),%fp2         # P1+S(P2+SP3)
5923
5924        fmul.x          %fp1,%fp3               # S(Q2+S(Q3+SQ4))
5925        fmul.x          %fp1,%fp2               # S(P1+S(P2+SP3))
5926
5927        fadd.x          TANQ1(%pc),%fp3         # Q1+S(Q2+S(Q3+SQ4))
5928        fmul.x          %fp0,%fp2               # RS(P1+S(P2+SP3))
5929
5930        fmul.x          %fp3,%fp1               # S(Q1+S(Q2+S(Q3+SQ4)))
5931
5932        fadd.x          %fp2,%fp0               # R+RS(P1+S(P2+SP3))
5933
5934        fadd.s          &0x3F800000,%fp1        # 1+S(Q1+...)
5935
5936        fmovm.x         (%sp)+,&0x30            # restore fp2,fp3
5937
5938        fmov.l          %d0,%fpcr               # restore users round mode,prec
5939        fdiv.x          %fp1,%fp0               # last inst - possible exception set
5940        bra             t_inx2
5941
5942NODD:
5943        fmov.x          %fp0,%fp1
5944        fmul.x          %fp0,%fp0               # S = R*R
5945
5946        fmov.d          TANQ4(%pc),%fp3
5947        fmov.d          TANP3(%pc),%fp2
5948
5949        fmul.x          %fp0,%fp3               # SQ4
5950        fmul.x          %fp0,%fp2               # SP3
5951
5952        fadd.d          TANQ3(%pc),%fp3         # Q3+SQ4
5953        fadd.x          TANP2(%pc),%fp2         # P2+SP3
5954
5955        fmul.x          %fp0,%fp3               # S(Q3+SQ4)
5956        fmul.x          %fp0,%fp2               # S(P2+SP3)
5957
5958        fadd.x          TANQ2(%pc),%fp3         # Q2+S(Q3+SQ4)
5959        fadd.x          TANP1(%pc),%fp2         # P1+S(P2+SP3)
5960
5961        fmul.x          %fp0,%fp3               # S(Q2+S(Q3+SQ4))
5962        fmul.x          %fp0,%fp2               # S(P1+S(P2+SP3))
5963
5964        fadd.x          TANQ1(%pc),%fp3         # Q1+S(Q2+S(Q3+SQ4))
5965        fmul.x          %fp1,%fp2               # RS(P1+S(P2+SP3))
5966
5967        fmul.x          %fp3,%fp0               # S(Q1+S(Q2+S(Q3+SQ4)))
5968
5969        fadd.x          %fp2,%fp1               # R+RS(P1+S(P2+SP3))
5970        fadd.s          &0x3F800000,%fp0        # 1+S(Q1+...)
5971
5972        fmovm.x         (%sp)+,&0x30            # restore fp2,fp3
5973
5974        fmov.x          %fp1,-(%sp)
5975        eor.l           &0x80000000,(%sp)
5976
5977        fmov.l          %d0,%fpcr               # restore users round mode,prec
5978        fdiv.x          (%sp)+,%fp0             # last inst - possible exception set
5979        bra             t_inx2
5980
5981TANBORS:
5982#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5983#--IF |X| < 2**(-40), RETURN X OR 1.
5984        cmp.l           %d1,&0x3FFF8000
5985        bgt.b           REDUCEX
5986
5987TANSM:
5988        fmov.x          %fp0,-(%sp)
5989        fmov.l          %d0,%fpcr               # restore users round mode,prec
5990        mov.b           &FMOV_OP,%d1            # last inst is MOVE
5991        fmov.x          (%sp)+,%fp0             # last inst - posibble exception set
5992        bra             t_catch
5993
5994        global          stand
5995#--TAN(X) = X FOR DENORMALIZED X
5996stand:
5997        bra             t_extdnrm
5998
5999#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
6000#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
6001#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
6002REDUCEX:
6003        fmovm.x         &0x3c,-(%sp)            # save {fp2-fp5}
6004        mov.l           %d2,-(%sp)              # save d2
6005        fmov.s          &0x00000000,%fp1        # fp1 = 0
6006
6007#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
6008#--there is a danger of unwanted overflow in first LOOP iteration.  In this
6009#--case, reduce argument by one remainder step to make subsequent reduction
6010#--safe.
6011        cmp.l           %d1,&0x7ffeffff         # is arg dangerously large?
6012        bne.b           LOOP                    # no
6013
6014# yes; create 2**16383*PI/2
6015        mov.w           &0x7ffe,FP_SCR0_EX(%a6)
6016        mov.l           &0xc90fdaa2,FP_SCR0_HI(%a6)
6017        clr.l           FP_SCR0_LO(%a6)
6018
6019# create low half of 2**16383*PI/2 at FP_SCR1
6020        mov.w           &0x7fdc,FP_SCR1_EX(%a6)
6021        mov.l           &0x85a308d3,FP_SCR1_HI(%a6)
6022        clr.l           FP_SCR1_LO(%a6)
6023
6024        ftest.x         %fp0                    # test sign of argument
6025        fblt.w          red_neg
6026
6027        or.b            &0x80,FP_SCR0_EX(%a6)   # positive arg
6028        or.b            &0x80,FP_SCR1_EX(%a6)
6029red_neg:
6030        fadd.x          FP_SCR0(%a6),%fp0       # high part of reduction is exact
6031        fmov.x          %fp0,%fp1               # save high result in fp1
6032        fadd.x          FP_SCR1(%a6),%fp0       # low part of reduction
6033        fsub.x          %fp0,%fp1               # determine low component of result
6034        fadd.x          FP_SCR1(%a6),%fp1       # fp0/fp1 are reduced argument.
6035
6036#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
6037#--integer quotient will be stored in N
6038#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
6039LOOP:
6040        fmov.x          %fp0,INARG(%a6)         # +-2**K * F, 1 <= F < 2
6041        mov.w           INARG(%a6),%d1
6042        mov.l           %d1,%a1                 # save a copy of D0
6043        and.l           &0x00007FFF,%d1
6044        sub.l           &0x00003FFF,%d1         # d0 = K
6045        cmp.l           %d1,&28
6046        ble.b           LASTLOOP
6047CONTLOOP:
6048        sub.l           &27,%d1                 # d0 = L := K-27
6049        mov.b           &0,ENDFLAG(%a6)
6050        bra.b           WORK
6051LASTLOOP:
6052        clr.l           %d1                     # d0 = L := 0
6053        mov.b           &1,ENDFLAG(%a6)
6054
6055WORK:
6056#--FIND THE REMAINDER OF (R,r) W.R.T.   2**L * (PI/2). L IS SO CHOSEN
6057#--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
6058
6059#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
6060#--2**L * (PIby2_1), 2**L * (PIby2_2)
6061
6062        mov.l           &0x00003FFE,%d2         # BIASED EXP OF 2/PI
6063        sub.l           %d1,%d2                 # BIASED EXP OF 2**(-L)*(2/PI)
6064
6065        mov.l           &0xA2F9836E,FP_SCR0_HI(%a6)
6066        mov.l           &0x4E44152A,FP_SCR0_LO(%a6)
6067        mov.w           %d2,FP_SCR0_EX(%a6)     # FP_SCR0 = 2**(-L)*(2/PI)
6068
6069        fmov.x          %fp0,%fp2
6070        fmul.x          FP_SCR0(%a6),%fp2       # fp2 = X * 2**(-L)*(2/PI)
6071
6072#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
6073#--FLOATING POINT FORMAT, THE TWO FMOVE'S       FMOVE.L FP <--> N
6074#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
6075#--(SIGN(INARG)*2**63   +       FP2) - SIGN(INARG)*2**63 WILL GIVE
6076#--US THE DESIRED VALUE IN FLOATING POINT.
6077        mov.l           %a1,%d2
6078        swap            %d2
6079        and.l           &0x80000000,%d2
6080        or.l            &0x5F000000,%d2         # d2 = SIGN(INARG)*2**63 IN SGL
6081        mov.l           %d2,TWOTO63(%a6)
6082        fadd.s          TWOTO63(%a6),%fp2       # THE FRACTIONAL PART OF FP1 IS ROUNDED
6083        fsub.s          TWOTO63(%a6),%fp2       # fp2 = N
6084#       fintrz.x        %fp2,%fp2
6085
6086#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
6087        mov.l           %d1,%d2                 # d2 = L
6088
6089        add.l           &0x00003FFF,%d2         # BIASED EXP OF 2**L * (PI/2)
6090        mov.w           %d2,FP_SCR0_EX(%a6)
6091        mov.l           &0xC90FDAA2,FP_SCR0_HI(%a6)
6092        clr.l           FP_SCR0_LO(%a6)         # FP_SCR0 = 2**(L) * Piby2_1
6093
6094        add.l           &0x00003FDD,%d1
6095        mov.w           %d1,FP_SCR1_EX(%a6)
6096        mov.l           &0x85A308D3,FP_SCR1_HI(%a6)
6097        clr.l           FP_SCR1_LO(%a6)         # FP_SCR1 = 2**(L) * Piby2_2
6098
6099        mov.b           ENDFLAG(%a6),%d1
6100
6101#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
6102#--P2 = 2**(L) * Piby2_2
6103        fmov.x          %fp2,%fp4               # fp4 = N
6104        fmul.x          FP_SCR0(%a6),%fp4       # fp4 = W = N*P1
6105        fmov.x          %fp2,%fp5               # fp5 = N
6106        fmul.x          FP_SCR1(%a6),%fp5       # fp5 = w = N*P2
6107        fmov.x          %fp4,%fp3               # fp3 = W = N*P1
6108
6109#--we want P+p = W+w  but  |p| <= half ulp of P
6110#--Then, we need to compute  A := R-P   and  a := r-p
6111        fadd.x          %fp5,%fp3               # fp3 = P
6112        fsub.x          %fp3,%fp4               # fp4 = W-P
6113
6114        fsub.x          %fp3,%fp0               # fp0 = A := R - P
6115        fadd.x          %fp5,%fp4               # fp4 = p = (W-P)+w
6116
6117        fmov.x          %fp0,%fp3               # fp3 = A
6118        fsub.x          %fp4,%fp1               # fp1 = a := r - p
6119
6120#--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
6121#--|r| <= half ulp of R.
6122        fadd.x          %fp1,%fp0               # fp0 = R := A+a
6123#--No need to calculate r if this is the last loop
6124        cmp.b           %d1,&0
6125        bgt.w           RESTORE
6126
6127#--Need to calculate r
6128        fsub.x          %fp0,%fp3               # fp3 = A-R
6129        fadd.x          %fp3,%fp1               # fp1 = r := (A-R)+a
6130        bra.w           LOOP
6131
6132RESTORE:
6133        fmov.l          %fp2,INT(%a6)
6134        mov.l           (%sp)+,%d2              # restore d2
6135        fmovm.x         (%sp)+,&0x3c            # restore {fp2-fp5}
6136
6137        mov.l           INT(%a6),%d1
6138        ror.l           &1,%d1
6139
6140        bra.w           TANCONT
6141
6142#########################################################################
6143# satan():  computes the arctangent of a normalized number              #
6144# satand(): computes the arctangent of a denormalized number            #
6145#                                                                       #
6146# INPUT *************************************************************** #
6147#       a0 = pointer to extended precision input                        #
6148#       d0 = round precision,mode                                       #
6149#                                                                       #
6150# OUTPUT ************************************************************** #
6151#       fp0 = arctan(X)                                                 #
6152#                                                                       #
6153# ACCURACY and MONOTONICITY ******************************************* #
6154#       The returned result is within 2 ulps in 64 significant bit,     #
6155#       i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6156#       rounded to double precision. The result is provably monotonic   #
6157#       in double precision.                                            #
6158#                                                                       #
6159# ALGORITHM *********************************************************** #
6160#       Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.               #
6161#                                                                       #
6162#       Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x.                    #
6163#               Note that k = -4, -3,..., or 3.                         #
6164#               Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5       #
6165#               significant bits of X with a bit-1 attached at the 6-th #
6166#               bit position. Define u to be u = (X-F) / (1 + X*F).     #
6167#                                                                       #
6168#       Step 3. Approximate arctan(u) by a polynomial poly.             #
6169#                                                                       #
6170#       Step 4. Return arctan(F) + poly, arctan(F) is fetched from a    #
6171#               table of values calculated beforehand. Exit.            #
6172#                                                                       #
6173#       Step 5. If |X| >= 16, go to Step 7.                             #
6174#                                                                       #
6175#       Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.  #
6176#                                                                       #
6177#       Step 7. Define X' = -1/X. Approximate arctan(X') by an odd      #
6178#               polynomial in X'.                                       #
6179#               Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.            #
6180#                                                                       #
6181#########################################################################
6182
6183ATANA3: long            0xBFF6687E,0x314987D8
6184ATANA2: long            0x4002AC69,0x34A26DB3
6185ATANA1: long            0xBFC2476F,0x4E1DA28E
6186
6187ATANB6: long            0x3FB34444,0x7F876989
6188ATANB5: long            0xBFB744EE,0x7FAF45DB
6189ATANB4: long            0x3FBC71C6,0x46940220
6190ATANB3: long            0xBFC24924,0x921872F9
6191ATANB2: long            0x3FC99999,0x99998FA9
6192ATANB1: long            0xBFD55555,0x55555555
6193
6194ATANC5: long            0xBFB70BF3,0x98539E6A
6195ATANC4: long            0x3FBC7187,0x962D1D7D
6196ATANC3: long            0xBFC24924,0x827107B8
6197ATANC2: long            0x3FC99999,0x9996263E
6198ATANC1: long            0xBFD55555,0x55555536
6199
6200PPIBY2: long            0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
6201NPIBY2: long            0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
6202
6203PTINY:  long            0x00010000,0x80000000,0x00000000,0x00000000
6204NTINY:  long            0x80010000,0x80000000,0x00000000,0x00000000
6205
6206ATANTBL:
6207        long            0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
6208        long            0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
6209        long            0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
6210        long            0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
6211        long            0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
6212        long            0x3FFB0000,0xAB98E943,0x62765619,0x00000000
6213        long            0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
6214        long            0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
6215        long            0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
6216        long            0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
6217        long            0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
6218        long            0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
6219        long            0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
6220        long            0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
6221        long            0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
6222        long            0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
6223        long            0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
6224        long            0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
6225        long            0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
6226        long            0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
6227        long            0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
6228        long            0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
6229        long            0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
6230        long            0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
6231        long            0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
6232        long            0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
6233        long            0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
6234        long            0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
6235        long            0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
6236        long            0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
6237        long            0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
6238        long            0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
6239        long            0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
6240        long            0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
6241        long            0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
6242        long            0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
6243        long            0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
6244        long            0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
6245        long            0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
6246        long            0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
6247        long            0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
6248        long            0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
6249        long            0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
6250        long            0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
6251        long            0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
6252        long            0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
6253        long            0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
6254        long            0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
6255        long            0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
6256        long            0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
6257        long            0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
6258        long            0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
6259        long            0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
6260        long            0x3FFE0000,0x97731420,0x365E538C,0x00000000
6261        long            0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
6262        long            0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
6263        long            0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
6264        long            0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
6265        long            0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
6266        long            0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
6267        long            0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
6268        long            0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
6269        long            0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
6270        long            0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
6271        long            0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
6272        long            0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
6273        long            0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
6274        long            0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
6275        long            0x3FFE0000,0xE8771129,0xC4353259,0x00000000
6276        long            0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
6277        long            0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
6278        long            0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
6279        long            0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
6280        long            0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
6281        long            0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
6282        long            0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
6283        long            0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
6284        long            0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
6285        long            0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
6286        long            0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
6287        long            0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
6288        long            0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
6289        long            0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
6290        long            0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
6291        long            0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
6292        long            0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
6293        long            0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
6294        long            0x3FFF0000,0x9F100575,0x006CC571,0x00000000
6295        long            0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
6296        long            0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
6297        long            0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
6298        long            0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
6299        long            0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
6300        long            0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
6301        long            0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
6302        long            0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
6303        long            0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
6304        long            0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
6305        long            0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
6306        long            0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
6307        long            0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
6308        long            0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
6309        long            0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
6310        long            0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
6311        long            0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
6312        long            0x3FFF0000,0xB525529D,0x562246BD,0x00000000
6313        long            0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
6314        long            0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
6315        long            0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
6316        long            0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
6317        long            0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
6318        long            0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
6319        long            0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
6320        long            0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
6321        long            0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
6322        long            0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
6323        long            0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
6324        long            0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
6325        long            0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
6326        long            0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
6327        long            0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
6328        long            0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
6329        long            0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
6330        long            0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
6331        long            0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
6332        long            0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
6333        long            0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
6334        long            0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
6335
6336        set             X,FP_SCR0
6337        set             XDCARE,X+2
6338        set             XFRAC,X+4
6339        set             XFRACLO,X+8
6340
6341        set             ATANF,FP_SCR1
6342        set             ATANFHI,ATANF+4
6343        set             ATANFLO,ATANF+8
6344
6345        global          satan
6346#--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
6347satan:
6348        fmov.x          (%a0),%fp0              # LOAD INPUT
6349
6350        mov.l           (%a0),%d1
6351        mov.w           4(%a0),%d1
6352        fmov.x          %fp0,X(%a6)
6353        and.l           &0x7FFFFFFF,%d1
6354
6355        cmp.l           %d1,&0x3FFB8000         # |X| >= 1/16?
6356        bge.b           ATANOK1
6357        bra.w           ATANSM
6358
6359ATANOK1:
6360        cmp.l           %d1,&0x4002FFFF         # |X| < 16 ?
6361        ble.b           ATANMAIN
6362        bra.w           ATANBIG
6363
6364#--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
6365#--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
6366#--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
6367#--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
6368#--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
6369#--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
6370#--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
6371#--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
6372#--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
6373#--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
6374#--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
6375#--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
6376#--WILL INVOLVE A VERY LONG POLYNOMIAL.
6377
6378#--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
6379#--WE CHOSE F TO BE +-2^K * 1.BBBB1
6380#--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
6381#--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
6382#--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
6383#-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
6384
6385ATANMAIN:
6386
6387        and.l           &0xF8000000,XFRAC(%a6)  # FIRST 5 BITS
6388        or.l            &0x04000000,XFRAC(%a6)  # SET 6-TH BIT TO 1
6389        mov.l           &0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F
6390
6391        fmov.x          %fp0,%fp1               # FP1 IS X
6392        fmul.x          X(%a6),%fp1             # FP1 IS X*F, NOTE THAT X*F > 0
6393        fsub.x          X(%a6),%fp0             # FP0 IS X-F
6394        fadd.s          &0x3F800000,%fp1        # FP1 IS 1 + X*F
6395        fdiv.x          %fp1,%fp0               # FP0 IS U = (X-F)/(1+X*F)
6396
6397#--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
6398#--CREATE ATAN(F) AND STORE IT IN ATANF, AND
6399#--SAVE REGISTERS FP2.
6400
6401        mov.l           %d2,-(%sp)              # SAVE d2 TEMPORARILY
6402        mov.l           %d1,%d2                 # THE EXP AND 16 BITS OF X
6403        and.l           &0x00007800,%d1         # 4 VARYING BITS OF F'S FRACTION
6404        and.l           &0x7FFF0000,%d2         # EXPONENT OF F
6405        sub.l           &0x3FFB0000,%d2         # K+4
6406        asr.l           &1,%d2
6407        add.l           %d2,%d1                 # THE 7 BITS IDENTIFYING F
6408        asr.l           &7,%d1                  # INDEX INTO TBL OF ATAN(|F|)
6409        lea             ATANTBL(%pc),%a1
6410        add.l           %d1,%a1                 # ADDRESS OF ATAN(|F|)
6411        mov.l           (%a1)+,ATANF(%a6)
6412        mov.l           (%a1)+,ATANFHI(%a6)
6413        mov.l           (%a1)+,ATANFLO(%a6)     # ATANF IS NOW ATAN(|F|)
6414        mov.l           X(%a6),%d1              # LOAD SIGN AND EXPO. AGAIN
6415        and.l           &0x80000000,%d1         # SIGN(F)
6416        or.l            %d1,ATANF(%a6)          # ATANF IS NOW SIGN(F)*ATAN(|F|)
6417        mov.l           (%sp)+,%d2              # RESTORE d2
6418
6419#--THAT'S ALL I HAVE TO DO FOR NOW,
6420#--BUT ALAS, THE DIVIDE IS STILL CRANKING!
6421
6422#--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
6423#--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
6424#--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
6425#--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
6426#--WHAT WE HAVE HERE IS MERELY  A1 = A3, A2 = A1/A3, A3 = A2/A3.
6427#--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
6428#--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
6429
6430        fmovm.x         &0x04,-(%sp)            # save fp2
6431
6432        fmov.x          %fp0,%fp1
6433        fmul.x          %fp1,%fp1
6434        fmov.d          ATANA3(%pc),%fp2
6435        fadd.x          %fp1,%fp2               # A3+V
6436        fmul.x          %fp1,%fp2               # V*(A3+V)
6437        fmul.x          %fp0,%fp1               # U*V
6438        fadd.d          ATANA2(%pc),%fp2        # A2+V*(A3+V)
6439        fmul.d          ATANA1(%pc),%fp1        # A1*U*V
6440        fmul.x          %fp2,%fp1               # A1*U*V*(A2+V*(A3+V))
6441        fadd.x          %fp1,%fp0               # ATAN(U), FP1 RELEASED
6442
6443        fmovm.x         (%sp)+,&0x20            # restore fp2
6444
6445        fmov.l          %d0,%fpcr               # restore users rnd mode,prec
6446        fadd.x          ATANF(%a6),%fp0         # ATAN(X)
6447        bra             t_inx2
6448
6449ATANBORS:
6450#--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
6451#--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
6452        cmp.l           %d1,&0x3FFF8000
6453        bgt.w           ATANBIG                 # I.E. |X| >= 16
6454
6455ATANSM:
6456#--|X| <= 1/16
6457#--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
6458#--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
6459#--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
6460#--WHERE Y = X*X, AND Z = Y*Y.
6461
6462        cmp.l           %d1,&0x3FD78000
6463        blt.w           ATANTINY
6464
6465#--COMPUTE POLYNOMIAL
6466        fmovm.x         &0x0c,-(%sp)            # save fp2/fp3
6467
6468        fmul.x          %fp0,%fp0               # FPO IS Y = X*X
6469
6470        fmov.x          %fp0,%fp1
6471        fmul.x          %fp1,%fp1               # FP1 IS Z = Y*Y
6472
6473        fmov.d          ATANB6(%pc),%fp2
6474        fmov.d          ATANB5(%pc),%fp3
6475
6476        fmul.x          %fp1,%fp2               # Z*B6
6477        fmul.x          %fp1,%fp3               # Z*B5
6478
6479        fadd.d          ATANB4(%pc),%fp2        # B4+Z*B6
6480        fadd.d          ATANB3(%pc),%fp3        # B3+Z*B5
6481
6482        fmul.x          %fp1,%fp2               # Z*(B4+Z*B6)
6483        fmul.x          %fp3,%fp1               # Z*(B3+Z*B5)
6484
6485        fadd.d          ATANB2(%pc),%fp2        # B2+Z*(B4+Z*B6)
6486        fadd.d          ATANB1(%pc),%fp1        # B1+Z*(B3+Z*B5)
6487
6488        fmul.x          %fp0,%fp2               # Y*(B2+Z*(B4+Z*B6))
6489        fmul.x          X(%a6),%fp0             # X*Y
6490
6491        fadd.x          %fp2,%fp1               # [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
6492
6493        fmul.x          %fp1,%fp0               # X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
6494
6495        fmovm.x         (%sp)+,&0x30            # restore fp2/fp3
6496
6497        fmov.l          %d0,%fpcr               # restore users rnd mode,prec
6498        fadd.x          X(%a6),%fp0
6499        bra             t_inx2
6500
6501ATANTINY:
6502#--|X| < 2^(-40), ATAN(X) = X
6503
6504        fmov.l          %d0,%fpcr               # restore users rnd mode,prec
6505        mov.b           &FMOV_OP,%d1            # last inst is MOVE
6506        fmov.x          X(%a6),%fp0             # last inst - possible exception set
6507
6508        bra             t_catch
6509
6510ATANBIG:
6511#--IF |X| > 2^(100), RETURN     SIGN(X)*(PI/2 - TINY). OTHERWISE,
6512#--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
6513        cmp.l           %d1,&0x40638000
6514        bgt.w           ATANHUGE
6515
6516#--APPROXIMATE ATAN(-1/X) BY
6517#--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
6518#--THIS CAN BE RE-WRITTEN AS
6519#--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
6520
6521        fmovm.x         &0x0c,-(%sp)            # save fp2/fp3
6522
6523        fmov.s          &0xBF800000,%fp1        # LOAD -1
6524        fdiv.x          %fp0,%fp1               # FP1 IS -1/X
6525
6526#--DIVIDE IS STILL CRANKING
6527
6528        fmov.x          %fp1,%fp0               # FP0 IS X'
6529        fmul.x          %fp0,%fp0               # FP0 IS Y = X'*X'
6530        fmov.x          %fp1,X(%a6)             # X IS REALLY X'
6531
6532        fmov.x          %fp0,%fp1
6533        fmul.x          %fp1,%fp1               # FP1 IS Z = Y*Y
6534
6535        fmov.d          ATANC5(%pc),%fp3
6536        fmov.d          ATANC4(%pc),%fp2
6537
6538        fmul.x          %fp1,%fp3               # Z*C5
6539        fmul.x          %fp1,%fp2               # Z*B4
6540
6541        fadd.d          ATANC3(%pc),%fp3        # C3+Z*C5
6542        fadd.d          ATANC2(%pc),%fp2        # C2+Z*C4
6543
6544        fmul.x          %fp3,%fp1               # Z*(C3+Z*C5), FP3 RELEASED
6545        fmul.x          %fp0,%fp2               # Y*(C2+Z*C4)
6546
6547        fadd.d          ATANC1(%pc),%fp1        # C1+Z*(C3+Z*C5)
6548        fmul.x          X(%a6),%fp0             # X'*Y
6549
6550        fadd.x          %fp2,%fp1               # [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
6551
6552        fmul.x          %fp1,%fp0               # X'*Y*([B1+Z*(B3+Z*B5)]
6553#                                       ...     +[Y*(B2+Z*(B4+Z*B6))])
6554        fadd.x          X(%a6),%fp0
6555
6556        fmovm.x         (%sp)+,&0x30            # restore fp2/fp3
6557
6558        fmov.l          %d0,%fpcr               # restore users rnd mode,prec
6559        tst.b           (%a0)
6560        bpl.b           pos_big
6561
6562neg_big:
6563        fadd.x          NPIBY2(%pc),%fp0
6564        bra             t_minx2
6565
6566pos_big:
6567        fadd.x          PPIBY2(%pc),%fp0
6568        bra             t_pinx2
6569
6570ATANHUGE:
6571#--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
6572        tst.b           (%a0)
6573        bpl.b           pos_huge
6574
6575neg_huge:
6576        fmov.x          NPIBY2(%pc),%fp0
6577        fmov.l          %d0,%fpcr
6578        fadd.x          PTINY(%pc),%fp0
6579        bra             t_minx2
6580
6581pos_huge:
6582        fmov.x          PPIBY2(%pc),%fp0
6583        fmov.l          %d0,%fpcr
6584        fadd.x          NTINY(%pc),%fp0
6585        bra             t_pinx2
6586
6587        global          satand
6588#--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
6589satand:
6590        bra             t_extdnrm
6591
6592#########################################################################
6593# sasin():  computes the inverse sine of a normalized input             #
6594# sasind(): computes the inverse sine of a denormalized input           #
6595#                                                                       #
6596# INPUT *************************************************************** #
6597#       a0 = pointer to extended precision input                        #
6598#       d0 = round precision,mode                                       #
6599#                                                                       #
6600# OUTPUT ************************************************************** #
6601#       fp0 = arcsin(X)                                                 #
6602#                                                                       #
6603# ACCURACY and MONOTONICITY ******************************************* #
6604#       The returned result is within 3 ulps in 64 significant bit,     #
6605#       i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6606#       rounded to double precision. The result is provably monotonic   #
6607#       in double precision.                                            #
6608#                                                                       #
6609# ALGORITHM *********************************************************** #
6610#                                                                       #
6611#       ASIN                                                            #
6612#       1. If |X| >= 1, go to 3.                                        #
6613#                                                                       #
6614#       2. (|X| < 1) Calculate asin(X) by                               #
6615#               z := sqrt( [1-X][1+X] )                                 #
6616#               asin(X) = atan( x / z ).                                #
6617#               Exit.                                                   #
6618#                                                                       #
6619#       3. If |X| > 1, go to 5.                                         #
6620#                                                                       #
6621#       4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#
6622#                                                                       #
6623#       5. (|X| > 1) Generate an invalid operation by 0 * infinity.     #
6624#               Exit.                                                   #
6625#                                                                       #
6626#########################################################################
6627
6628        global          sasin
6629sasin:
6630        fmov.x          (%a0),%fp0              # LOAD INPUT
6631
6632        mov.l           (%a0),%d1
6633        mov.w           4(%a0),%d1
6634        and.l           &0x7FFFFFFF,%d1
6635        cmp.l           %d1,&0x3FFF8000
6636        bge.b           ASINBIG
6637
6638# This catch is added here for the '060 QSP. Originally, the call to
6639# satan() would handle this case by causing the exception which would
6640# not be caught until gen_except(). Now, with the exceptions being
6641# detected inside of satan(), the exception would have been handled there
6642# instead of inside sasin() as expected.
6643        cmp.l           %d1,&0x3FD78000
6644        blt.w           ASINTINY
6645
6646#--THIS IS THE USUAL CASE, |X| < 1
6647#--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
6648
6649ASINMAIN:
6650        fmov.s          &0x3F800000,%fp1
6651        fsub.x          %fp0,%fp1               # 1-X
6652        fmovm.x         &0x4,-(%sp)             #  {fp2}
6653        fmov.s          &0x3F800000,%fp2
6654        fadd.x          %fp0,%fp2               # 1+X
6655        fmul.x          %fp2,%fp1               # (1+X)(1-X)
6656        fmovm.x         (%sp)+,&0x20            #  {fp2}
6657        fsqrt.x         %fp1                    # SQRT([1-X][1+X])
6658        fdiv.x          %fp1,%fp0               # X/SQRT([1-X][1+X])
6659        fmovm.x         &0x01,-(%sp)            # save X/SQRT(...)
6660        lea             (%sp),%a0               # pass ptr to X/SQRT(...)
6661        bsr             satan
6662        add.l           &0xc,%sp                # clear X/SQRT(...) from stack
6663        bra             t_inx2
6664
6665ASINBIG:
6666        fabs.x          %fp0                    # |X|
6667        fcmp.s          %fp0,&0x3F800000
6668        fbgt            t_operr                 # cause an operr exception
6669
6670#--|X| = 1, ASIN(X) = +- PI/2.
6671ASINONE:
6672        fmov.x          PIBY2(%pc),%fp0
6673        mov.l           (%a0),%d1
6674        and.l           &0x80000000,%d1         # SIGN BIT OF X
6675        or.l            &0x3F800000,%d1         # +-1 IN SGL FORMAT
6676        mov.l           %d1,-(%sp)              # push SIGN(X) IN SGL-FMT
6677        fmov.l          %d0,%fpcr
6678        fmul.s          (%sp)+,%fp0
6679        bra             t_inx2
6680
6681#--|X| < 2^(-40), ATAN(X) = X
6682ASINTINY:
6683        fmov.l          %d0,%fpcr               # restore users rnd mode,prec
6684        mov.b           &FMOV_OP,%d1            # last inst is MOVE
6685        fmov.x          (%a0),%fp0              # last inst - possible exception
6686        bra             t_catch
6687
6688        global          sasind
6689#--ASIN(X) = X FOR DENORMALIZED X
6690sasind:
6691        bra             t_extdnrm
6692
6693#########################################################################
6694# sacos():  computes the inverse cosine of a normalized input           #
6695# sacosd(): computes the inverse cosine of a denormalized input         #
6696#                                                                       #
6697# INPUT *************************************************************** #
6698#       a0 = pointer to extended precision input                        #
6699#       d0 = round precision,mode                                       #
6700#                                                                       #
6701# OUTPUT ************************************************************** #
6702#       fp0 = arccos(X)                                                 #
6703#                                                                       #
6704# ACCURACY and MONOTONICITY ******************************************* #
6705#       The returned result is within 3 ulps in 64 significant bit,     #
6706#       i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6707#       rounded to double precision. The result is provably monotonic   #
6708#       in double precision.                                            #
6709#                                                                       #
6710# ALGORITHM *********************************************************** #
6711#                                                                       #
6712#       ACOS                                                            #
6713#       1. If |X| >= 1, go to 3.                                        #
6714#                                                                       #
6715#       2. (|X| < 1) Calculate acos(X) by                               #
6716#               z := (1-X) / (1+X)                                      #
6717#               acos(X) = 2 * atan( sqrt(z) ).                          #
6718#               Exit.                                                   #
6719#                                                                       #
6720#       3. If |X| > 1, go to 5.                                         #
6721#                                                                       #
6722#       4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit.    #
6723#                                                                       #
6724#       5. (|X| > 1) Generate an invalid operation by 0 * infinity.     #
6725#               Exit.                                                   #
6726#                                                                       #
6727#########################################################################
6728
6729        global          sacos
6730sacos:
6731        fmov.x          (%a0),%fp0              # LOAD INPUT
6732
6733        mov.l           (%a0),%d1               # pack exp w/ upper 16 fraction
6734        mov.w           4(%a0),%d1
6735        and.l           &0x7FFFFFFF,%d1
6736        cmp.l           %d1,&0x3FFF8000
6737        bge.b           ACOSBIG
6738
6739#--THIS IS THE USUAL CASE, |X| < 1
6740#--ACOS(X) = 2 * ATAN(  SQRT( (1-X)/(1+X) ) )
6741
6742ACOSMAIN:
6743        fmov.s          &0x3F800000,%fp1
6744        fadd.x          %fp0,%fp1               # 1+X
6745        fneg.x          %fp0                    # -X
6746        fadd.s          &0x3F800000,%fp0        # 1-X
6747        fdiv.x          %fp1,%fp0               # (1-X)/(1+X)
6748        fsqrt.x         %fp0                    # SQRT((1-X)/(1+X))
6749        mov.l           %d0,-(%sp)              # save original users fpcr
6750        clr.l           %d0
6751        fmovm.x         &0x01,-(%sp)            # save SQRT(...) to stack
6752        lea             (%sp),%a0               # pass ptr to sqrt
6753        bsr             satan                   # ATAN(SQRT([1-X]/[1+X]))
6754        add.l           &0xc,%sp                # clear SQRT(...) from stack
6755
6756        fmov.l          (%sp)+,%fpcr            # restore users round prec,mode
6757        fadd.x          %fp0,%fp0               # 2 * ATAN( STUFF )
6758        bra             t_pinx2
6759
6760ACOSBIG:
6761        fabs.x          %fp0
6762        fcmp.s          %fp0,&0x3F800000
6763        fbgt            t_operr                 # cause an operr exception
6764
6765#--|X| = 1, ACOS(X) = 0 OR PI
6766        tst.b           (%a0)                   # is X positive or negative?
6767        bpl.b           ACOSP1
6768
6769#--X = -1
6770#Returns PI and inexact exception
6771ACOSM1:
6772        fmov.x          PI(%pc),%fp0            # load PI
6773        fmov.l          %d0,%fpcr               # load round mode,prec
6774        fadd.s          &0x00800000,%fp0        # add a small value
6775        bra             t_pinx2
6776
6777ACOSP1:
6778        bra             ld_pzero                # answer is positive zero
6779
6780        global          sacosd
6781#--ACOS(X) = PI/2 FOR DENORMALIZED X
6782sacosd:
6783        fmov.l          %d0,%fpcr               # load user's rnd mode/prec
6784        fmov.x          PIBY2(%pc),%fp0
6785        bra             t_pinx2
6786
6787#########################################################################
6788# setox():    computes the exponential for a normalized input           #
6789# setoxd():   computes the exponential for a denormalized input         #
6790# setoxm1():  computes the exponential minus 1 for a normalized input   #
6791# setoxm1d(): computes the exponential minus 1 for a denormalized input #
6792#                                                                       #
6793# INPUT *************************************************************** #
6794#       a0 = pointer to extended precision input                        #
6795#       d0 = round precision,mode                                       #
6796#                                                                       #
6797# OUTPUT ************************************************************** #
6798#       fp0 = exp(X) or exp(X)-1                                        #
6799#                                                                       #
6800# ACCURACY and MONOTONICITY ******************************************* #
6801#       The returned result is within 0.85 ulps in 64 significant bit,  #
6802#       i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6803#       rounded to double precision. The result is provably monotonic   #
6804#       in double precision.                                            #
6805#                                                                       #
6806# ALGORITHM and IMPLEMENTATION **************************************** #
6807#                                                                       #
6808#       setoxd                                                          #
6809#       ------                                                          #
6810#       Step 1. Set ans := 1.0                                          #
6811#                                                                       #
6812#       Step 2. Return  ans := ans + sign(X)*2^(-126). Exit.            #
6813#       Notes:  This will always generate one exception -- inexact.     #
6814#                                                                       #
6815#                                                                       #
6816#       setox                                                           #
6817#       -----                                                           #
6818#                                                                       #
6819#       Step 1. Filter out extreme cases of input argument.             #
6820#               1.1     If |X| >= 2^(-65), go to Step 1.3.              #
6821#               1.2     Go to Step 7.                                   #
6822#               1.3     If |X| < 16380 log(2), go to Step 2.            #
6823#               1.4     Go to Step 8.                                   #
6824#       Notes:  The usual case should take the branches 1.1 -> 1.3 -> 2.#
6825#               To avoid the use of floating-point comparisons, a       #
6826#               compact representation of |X| is used. This format is a #
6827#               32-bit integer, the upper (more significant) 16 bits    #
6828#               are the sign and biased exponent field of |X|; the      #
6829#               lower 16 bits are the 16 most significant fraction      #
6830#               (including the explicit bit) bits of |X|. Consequently, #
6831#               the comparisons in Steps 1.1 and 1.3 can be performed   #
6832#               by integer comparison. Note also that the constant      #
6833#               16380 log(2) used in Step 1.3 is also in the compact    #
6834#               form. Thus taking the branch to Step 2 guarantees       #
6835#               |X| < 16380 log(2). There is no harm to have a small    #
6836#               number of cases where |X| is less than, but close to,   #
6837#               16380 log(2) and the branch to Step 9 is taken.         #
6838#                                                                       #
6839#       Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ).      #
6840#               2.1     Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #
6841#                       was taken)                                      #
6842#               2.2     N := round-to-nearest-integer( X * 64/log2 ).   #
6843#               2.3     Calculate       J = N mod 64; so J = 0,1,2,..., #
6844#                       or 63.                                          #
6845#               2.4     Calculate       M = (N - J)/64; so N = 64M + J. #
6846#               2.5     Calculate the address of the stored value of    #
6847#                       2^(J/64).                                       #
6848#               2.6     Create the value Scale = 2^M.                   #
6849#       Notes:  The calculation in 2.2 is really performed by           #
6850#                       Z := X * constant                               #
6851#                       N := round-to-nearest-integer(Z)                #
6852#               where                                                   #
6853#                       constant := single-precision( 64/log 2 ).       #
6854#                                                                       #
6855#               Using a single-precision constant avoids memory         #
6856#               access. Another effect of using a single-precision      #
6857#               "constant" is that the calculated value Z is            #
6858#                                                                       #
6859#                       Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24).      #
6860#                                                                       #
6861#               This error has to be considered later in Steps 3 and 4. #
6862#                                                                       #
6863#       Step 3. Calculate X - N*log2/64.                                #
6864#               3.1     R := X + N*L1,                                  #
6865#                               where L1 := single-precision(-log2/64). #
6866#               3.2     R := R + N*L2,                                  #
6867#                               L2 := extended-precision(-log2/64 - L1).#
6868#       Notes:  a) The way L1 and L2 are chosen ensures L1+L2           #
6869#               approximate the value -log2/64 to 88 bits of accuracy.  #
6870#               b) N*L1 is exact because N is no longer than 22 bits    #
6871#               and L1 is no longer than 24 bits.                       #
6872#               c) The calculation X+N*L1 is also exact due to          #
6873#               cancellation. Thus, R is practically X+N(L1+L2) to full #
6874#               64 bits.                                                #
6875#               d) It is important to estimate how large can |R| be     #
6876#               after Step 3.2.                                         #
6877#                                                                       #
6878#               N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24)     #
6879#               X*64/log2 (1+eps)       =       N + f,  |f| <= 0.5      #
6880#               X*64/log2 - N   =       f - eps*X 64/log2               #
6881#               X - N*log2/64   =       f*log2/64 - eps*X               #
6882#                                                                       #
6883#                                                                       #
6884#               Now |X| <= 16446 log2, thus                             #
6885#                                                                       #
6886#                       |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 #
6887#                                       <= 0.57 log2/64.                #
6888#                This bound will be used in Step 4.                     #
6889#                                                                       #
6890#       Step 4. Approximate exp(R)-1 by a polynomial                    #
6891#               p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))      #
6892#       Notes:  a) In order to reduce memory access, the coefficients   #
6893#               are made as "short" as possible: A1 (which is 1/2), A4  #
6894#               and A5 are single precision; A2 and A3 are double       #
6895#               precision.                                              #
6896#               b) Even with the restrictions above,                    #
6897#                  |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062.  #
6898#               Note that 0.0062 is slightly bigger than 0.57 log2/64.  #
6899#               c) To fully utilize the pipeline, p is separated into   #
6900#               two independent pieces of roughly equal complexities    #
6901#                       p = [ R + R*S*(A2 + S*A4) ]     +               #
6902#                               [ S*(A1 + S*(A3 + S*A5)) ]              #
6903#               where S = R*R.                                          #
6904#                                                                       #
6905#       Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by             #
6906#                               ans := T + ( T*p + t)                   #
6907#               where T and t are the stored values for 2^(J/64).       #
6908#       Notes:  2^(J/64) is stored as T and t where T+t approximates    #
6909#               2^(J/64) to roughly 85 bits; T is in extended precision #
6910#               and t is in single precision. Note also that T is       #
6911#               rounded to 62 bits so that the last two bits of T are   #
6912#               zero. The reason for such a special form is that T-1,   #
6913#               T-2, and T-8 will all be exact --- a property that will #
6914#               give much more accurate computation of the function     #
6915#               EXPM1.                                                  #
6916#                                                                       #
6917#       Step 6. Reconstruction of exp(X)                                #
6918#                       exp(X) = 2^M * 2^(J/64) * exp(R).               #
6919#               6.1     If AdjFlag = 0, go to 6.3                       #
6920#               6.2     ans := ans * AdjScale                           #
6921#               6.3     Restore the user FPCR                           #
6922#               6.4     Return ans := ans * Scale. Exit.                #
6923#       Notes:  If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R,       #
6924#               |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will    #
6925#               neither overflow nor underflow. If AdjFlag = 1, that    #
6926#               means that                                              #
6927#                       X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. #
6928#               Hence, exp(X) may overflow or underflow or neither.     #
6929#               When that is the case, AdjScale = 2^(M1) where M1 is    #
6930#               approximately M. Thus 6.2 will never cause              #
6931#               over/underflow. Possible exception in 6.4 is overflow   #
6932#               or underflow. The inexact exception is not generated in #
6933#               6.4. Although one can argue that the inexact flag       #
6934#               should always be raised, to simulate that exception     #
6935#               cost to much than the flag is worth in practical uses.  #
6936#                                                                       #
6937#       Step 7. Return 1 + X.                                           #
6938#               7.1     ans := X                                        #
6939#               7.2     Restore user FPCR.                              #
6940#               7.3     Return ans := 1 + ans. Exit                     #
6941#       Notes:  For non-zero X, the inexact exception will always be    #
6942#               raised by 7.3. That is the only exception raised by 7.3.#
6943#               Note also that we use the FMOVEM instruction to move X  #
6944#               in Step 7.1 to avoid unnecessary trapping. (Although    #
6945#               the FMOVEM may not seem relevant since X is normalized, #
6946#               the precaution will be useful in the library version of #
6947#               this code where the separate entry for denormalized     #
6948#               inputs will be done away with.)                         #
6949#                                                                       #
6950#       Step 8. Handle exp(X) where |X| >= 16380log2.                   #
6951#               8.1     If |X| > 16480 log2, go to Step 9.              #
6952#               (mimic 2.2 - 2.6)                                       #
6953#               8.2     N := round-to-integer( X * 64/log2 )            #
6954#               8.3     Calculate J = N mod 64, J = 0,1,...,63          #
6955#               8.4     K := (N-J)/64, M1 := truncate(K/2), M = K-M1,   #
6956#                       AdjFlag := 1.                                   #
6957#               8.5     Calculate the address of the stored value       #
6958#                       2^(J/64).                                       #
6959#               8.6     Create the values Scale = 2^M, AdjScale = 2^M1. #
6960#               8.7     Go to Step 3.                                   #
6961#       Notes:  Refer to notes for 2.2 - 2.6.                           #
6962#                                                                       #
6963#       Step 9. Handle exp(X), |X| > 16480 log2.                        #
6964#               9.1     If X < 0, go to 9.3                             #
6965#               9.2     ans := Huge, go to 9.4                          #
6966#               9.3     ans := Tiny.                                    #
6967#               9.4     Restore user FPCR.                              #
6968#               9.5     Return ans := ans * ans. Exit.                  #
6969#       Notes:  Exp(X) will surely overflow or underflow, depending on  #
6970#               X's sign. "Huge" and "Tiny" are respectively large/tiny #
6971#               extended-precision numbers whose square over/underflow  #
6972#               with an inexact result. Thus, 9.5 always raises the     #
6973#               inexact together with either overflow or underflow.     #
6974#                                                                       #
6975#       setoxm1d                                                        #
6976#       --------                                                        #
6977#                                                                       #
6978#       Step 1. Set ans := 0                                            #
6979#                                                                       #
6980#       Step 2. Return  ans := X + ans. Exit.                           #
6981#       Notes:  This will return X with the appropriate rounding        #
6982#                precision prescribed by the user FPCR.                 #
6983#                                                                       #
6984#       setoxm1                                                         #
6985#       -------                                                         #
6986#                                                                       #
6987#       Step 1. Check |X|                                               #
6988#               1.1     If |X| >= 1/4, go to Step 1.3.                  #
6989#               1.2     Go to Step 7.                                   #
6990#               1.3     If |X| < 70 log(2), go to Step 2.               #
6991#               1.4     Go to Step 10.                                  #
6992#       Notes:  The usual case should take the branches 1.1 -> 1.3 -> 2.#
6993#               However, it is conceivable |X| can be small very often  #
6994#               because EXPM1 is intended to evaluate exp(X)-1          #
6995#               accurately when |X| is small. For further details on    #
6996#               the comparisons, see the notes on Step 1 of setox.      #
6997#                                                                       #
6998#       Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ).      #
6999#               2.1     N := round-to-nearest-integer( X * 64/log2 ).   #
7000#               2.2     Calculate       J = N mod 64; so J = 0,1,2,..., #
7001#                       or 63.                                          #
7002#               2.3     Calculate       M = (N - J)/64; so N = 64M + J. #
7003#               2.4     Calculate the address of the stored value of    #
7004#                       2^(J/64).                                       #
7005#               2.5     Create the values Sc = 2^M and                  #
7006#                       OnebySc := -2^(-M).                             #
7007#       Notes:  See the notes on Step 2 of setox.                       #
7008#                                                                       #
7009#       Step 3. Calculate X - N*log2/64.                                #
7010#               3.1     R := X + N*L1,                                  #
7011#                               where L1 := single-precision(-log2/64). #
7012#               3.2     R := R + N*L2,                                  #
7013#                               L2 := extended-precision(-log2/64 - L1).#
7014#       Notes:  Applying the analysis of Step 3 of setox in this case   #
7015#               shows that |R| <= 0.0055 (note that |X| <= 70 log2 in   #
7016#               this case).                                             #
7017#                                                                       #
7018#       Step 4. Approximate exp(R)-1 by a polynomial                    #
7019#                       p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) #
7020#       Notes:  a) In order to reduce memory access, the coefficients   #
7021#               are made as "short" as possible: A1 (which is 1/2), A5  #
7022#               and A6 are single precision; A2, A3 and A4 are double   #
7023#               precision.                                              #
7024#               b) Even with the restriction above,                     #
7025#                       |p - (exp(R)-1)| <      |R| * 2^(-72.7)         #
7026#               for all |R| <= 0.0055.                                  #
7027#               c) To fully utilize the pipeline, p is separated into   #
7028#               two independent pieces of roughly equal complexity      #
7029#                       p = [ R*S*(A2 + S*(A4 + S*A6)) ]        +       #
7030#                               [ R + S*(A1 + S*(A3 + S*A5)) ]          #
7031#               where S = R*R.                                          #
7032#                                                                       #
7033#       Step 5. Compute 2^(J/64)*p by                                   #
7034#                               p := T*p                                #
7035#               where T and t are the stored values for 2^(J/64).       #
7036#       Notes:  2^(J/64) is stored as T and t where T+t approximates    #
7037#               2^(J/64) to roughly 85 bits; T is in extended precision #
7038#               and t is in single precision. Note also that T is       #
7039#               rounded to 62 bits so that the last two bits of T are   #
7040#               zero. The reason for such a special form is that T-1,   #
7041#               T-2, and T-8 will all be exact --- a property that will #
7042#               be exploited in Step 6 below. The total relative error  #
7043#               in p is no bigger than 2^(-67.7) compared to the final  #
7044#               result.                                                 #
7045#                                                                       #
7046#       Step 6. Reconstruction of exp(X)-1                              #
7047#                       exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ).     #
7048#               6.1     If M <= 63, go to Step 6.3.                     #
7049#               6.2     ans := T + (p + (t + OnebySc)). Go to 6.6       #
7050#               6.3     If M >= -3, go to 6.5.                          #
7051#               6.4     ans := (T + (p + t)) + OnebySc. Go to 6.6       #
7052#               6.5     ans := (T + OnebySc) + (p + t).                 #
7053#               6.6     Restore user FPCR.                              #
7054#               6.7     Return ans := Sc * ans. Exit.                   #
7055#       Notes:  The various arrangements of the expressions give        #
7056#               accurate evaluations.                                   #
7057#                                                                       #
7058#       Step 7. exp(X)-1 for |X| < 1/4.                                 #
7059#               7.1     If |X| >= 2^(-65), go to Step 9.                #
7060#               7.2     Go to Step 8.                                   #
7061#                                                                       #
7062#       Step 8. Calculate exp(X)-1, |X| < 2^(-65).                      #
7063#               8.1     If |X| < 2^(-16312), goto 8.3                   #
7064#               8.2     Restore FPCR; return ans := X - 2^(-16382).     #
7065#                       Exit.                                           #
7066#               8.3     X := X * 2^(140).                               #
7067#               8.4     Restore FPCR; ans := ans - 2^(-16382).          #
7068#                Return ans := ans*2^(140). Exit                        #
7069#       Notes:  The idea is to return "X - tiny" under the user         #
7070#               precision and rounding modes. To avoid unnecessary      #
7071#               inefficiency, we stay away from denormalized numbers    #
7072#               the best we can. For |X| >= 2^(-16312), the             #
7073#               straightforward 8.2 generates the inexact exception as  #
7074#               the case warrants.                                      #
7075#                                                                       #
7076#       Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial          #
7077#                       p = X + X*X*(B1 + X*(B2 + ... + X*B12))         #
7078#       Notes:  a) In order to reduce memory access, the coefficients   #
7079#               are made as "short" as possible: B1 (which is 1/2), B9  #
7080#               to B12 are single precision; B3 to B8 are double        #
7081#               precision; and B2 is double extended.                   #
7082#               b) Even with the restriction above,                     #
7083#                       |p - (exp(X)-1)| < |X| 2^(-70.6)                #
7084#               for all |X| <= 0.251.                                   #
7085#               Note that 0.251 is slightly bigger than 1/4.            #
7086#               c) To fully preserve accuracy, the polynomial is        #
7087#               computed as                                             #
7088#                       X + ( S*B1 +    Q ) where S = X*X and           #
7089#                       Q       =       X*S*(B2 + X*(B3 + ... + X*B12)) #
7090#               d) To fully utilize the pipeline, Q is separated into   #
7091#               two independent pieces of roughly equal complexity      #
7092#                       Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] +       #
7093#                               [ S*S*(B3 + S*(B5 + ... + S*B11)) ]     #
7094#                                                                       #
7095#       Step 10. Calculate exp(X)-1 for |X| >= 70 log 2.                #
7096#               10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all       #
7097#               practical purposes. Therefore, go to Step 1 of setox.   #
7098#               10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical #
7099#               purposes.                                               #
7100#               ans := -1                                               #
7101#               Restore user FPCR                                       #
7102#               Return ans := ans + 2^(-126). Exit.                     #
7103#       Notes:  10.2 will always create an inexact and return -1 + tiny #
7104#               in the user rounding precision and mode.                #
7105#                                                                       #
7106#########################################################################
7107
7108L2:     long            0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
7109
7110EEXPA3: long            0x3FA55555,0x55554CC1
7111EEXPA2: long            0x3FC55555,0x55554A54
7112
7113EM1A4:  long            0x3F811111,0x11174385
7114EM1A3:  long            0x3FA55555,0x55554F5A
7115
7116EM1A2:  long            0x3FC55555,0x55555555,0x00000000,0x00000000
7117
7118EM1B8:  long            0x3EC71DE3,0xA5774682
7119EM1B7:  long            0x3EFA01A0,0x19D7CB68
7120
7121EM1B6:  long            0x3F2A01A0,0x1A019DF3
7122EM1B5:  long            0x3F56C16C,0x16C170E2
7123
7124EM1B4:  long            0x3F811111,0x11111111
7125EM1B3:  long            0x3FA55555,0x55555555
7126
7127EM1B2:  long            0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
7128        long            0x00000000
7129
7130TWO140: long            0x48B00000,0x00000000
7131TWON140:
7132        long            0x37300000,0x00000000
7133
7134EEXPTBL:
7135        long            0x3FFF0000,0x80000000,0x00000000,0x00000000
7136        long            0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
7137        long            0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
7138        long            0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
7139        long            0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
7140        long            0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
7141        long            0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
7142        long            0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
7143        long            0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
7144        long            0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
7145        long            0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
7146        long            0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
7147        long            0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
7148        long            0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
7149        long            0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
7150        long            0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
7151        long            0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
7152        long            0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
7153        long            0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
7154        long            0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
7155        long            0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
7156        long            0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
7157        long            0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
7158        long            0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
7159        long            0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
7160        long            0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
7161        long            0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
7162        long            0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
7163        long            0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
7164        long            0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
7165        long            0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
7166        long            0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
7167        long            0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
7168        long            0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
7169        long            0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
7170        long            0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
7171        long            0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
7172        long            0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
7173        long            0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
7174        long            0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
7175        long            0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
7176        long            0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
7177        long            0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
7178        long            0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
7179        long            0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
7180        long            0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
7181        long            0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
7182        long            0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
7183        long            0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
7184        long            0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
7185        long            0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
7186        long            0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
7187        long            0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
7188        long            0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
7189        long            0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
7190        long            0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
7191        long            0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
7192        long            0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
7193        long            0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
7194        long            0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
7195        long            0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
7196        long            0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
7197        long            0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
7198        long            0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
7199
7200        set             ADJFLAG,L_SCR2
7201        set             SCALE,FP_SCR0
7202        set             ADJSCALE,FP_SCR1
7203        set             SC,FP_SCR0
7204        set             ONEBYSC,FP_SCR1
7205
7206        global          setox
7207setox:
7208#--entry point for EXP(X), here X is finite, non-zero, and not NaN's
7209
7210#--Step 1.
7211        mov.l           (%a0),%d1               # load part of input X
7212        and.l           &0x7FFF0000,%d1         # biased expo. of X
7213        cmp.l           %d1,&0x3FBE0000         # 2^(-65)
7214        bge.b           EXPC1                   # normal case
7215        bra             EXPSM
7216
7217EXPC1:
7218#--The case |X| >= 2^(-65)
7219        mov.w           4(%a0),%d1              # expo. and partial sig. of |X|
7220        cmp.l           %d1,&0x400CB167         # 16380 log2 trunc. 16 bits
7221        blt.b           EXPMAIN                 # normal case
7222        bra             EEXPBIG
7223
7224EXPMAIN:
7225#--Step 2.
7226#--This is the normal branch:   2^(-65) <= |X| < 16380 log2.
7227        fmov.x          (%a0),%fp0              # load input from (a0)
7228
7229        fmov.x          %fp0,%fp1
7230        fmul.s          &0x42B8AA3B,%fp0        # 64/log2 * X
7231        fmovm.x         &0xc,-(%sp)             # save fp2 {%fp2/%fp3}
7232        mov.l           &0,ADJFLAG(%a6)
7233        fmov.l          %fp0,%d1                # N = int( X * 64/log2 )
7234        lea             EEXPTBL(%pc),%a1
7235        fmov.l          %d1,%fp0                # convert to floating-format
7236
7237        mov.l           %d1,L_SCR1(%a6)         # save N temporarily
7238        and.l           &0x3F,%d1               # D0 is J = N mod 64
7239        lsl.l           &4,%d1
7240        add.l           %d1,%a1                 # address of 2^(J/64)
7241        mov.l           L_SCR1(%a6),%d1
7242        asr.l           &6,%d1                  # D0 is M
7243        add.w           &0x3FFF,%d1             # biased expo. of 2^(M)
7244        mov.w           L2(%pc),L_SCR1(%a6)     # prefetch L2, no need in CB
7245
7246EXPCONT1:
7247#--Step 3.
7248#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7249#--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
7250        fmov.x          %fp0,%fp2
7251        fmul.s          &0xBC317218,%fp0        # N * L1, L1 = lead(-log2/64)
7252        fmul.x          L2(%pc),%fp2            # N * L2, L1+L2 = -log2/64
7253        fadd.x          %fp1,%fp0               # X + N*L1
7254        fadd.x          %fp2,%fp0               # fp0 is R, reduced arg.
7255
7256#--Step 4.
7257#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7258#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
7259#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
7260#--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
7261
7262        fmov.x          %fp0,%fp1
7263        fmul.x          %fp1,%fp1               # fp1 IS S = R*R
7264
7265        fmov.s          &0x3AB60B70,%fp2        # fp2 IS A5
7266
7267        fmul.x          %fp1,%fp2               # fp2 IS S*A5
7268        fmov.x          %fp1,%fp3
7269        fmul.s          &0x3C088895,%fp3        # fp3 IS S*A4
7270
7271        fadd.d          EEXPA3(%pc),%fp2        # fp2 IS A3+S*A5
7272        fadd.d          EEXPA2(%pc),%fp3        # fp3 IS A2+S*A4
7273
7274        fmul.x          %fp1,%fp2               # fp2 IS S*(A3+S*A5)
7275        mov.w           %d1,SCALE(%a6)          # SCALE is 2^(M) in extended
7276        mov.l           &0x80000000,SCALE+4(%a6)
7277        clr.l           SCALE+8(%a6)
7278
7279        fmul.x          %fp1,%fp3               # fp3 IS S*(A2+S*A4)
7280
7281        fadd.s          &0x3F000000,%fp2        # fp2 IS A1+S*(A3+S*A5)
7282        fmul.x          %fp0,%fp3               # fp3 IS R*S*(A2+S*A4)
7283
7284        fmul.x          %fp1,%fp2               # fp2 IS S*(A1+S*(A3+S*A5))
7285        fadd.x          %fp3,%fp0               # fp0 IS R+R*S*(A2+S*A4),
7286
7287        fmov.x          (%a1)+,%fp1             # fp1 is lead. pt. of 2^(J/64)
7288        fadd.x          %fp2,%fp0               # fp0 is EXP(R) - 1
7289
7290#--Step 5
7291#--final reconstruction process
7292#--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
7293
7294        fmul.x          %fp1,%fp0               # 2^(J/64)*(Exp(R)-1)
7295        fmovm.x         (%sp)+,&0x30            # fp2 restored {%fp2/%fp3}
7296        fadd.s          (%a1),%fp0              # accurate 2^(J/64)
7297
7298        fadd.x          %fp1,%fp0               # 2^(J/64) + 2^(J/64)*...
7299        mov.l           ADJFLAG(%a6),%d1
7300
7301#--Step 6
7302        tst.l           %d1
7303        beq.b           NORMAL
7304ADJUST:
7305        fmul.x          ADJSCALE(%a6),%fp0
7306NORMAL:
7307        fmov.l          %d0,%fpcr               # restore user FPCR
7308        mov.b           &FMUL_OP,%d1            # last inst is MUL
7309        fmul.x          SCALE(%a6),%fp0         # multiply 2^(M)
7310        bra             t_catch
7311
7312EXPSM:
7313#--Step 7
7314        fmovm.x         (%a0),&0x80             # load X
7315        fmov.l          %d0,%fpcr
7316        fadd.s          &0x3F800000,%fp0        # 1+X in user mode
7317        bra             t_pinx2
7318
7319EEXPBIG:
7320#--Step 8
7321        cmp.l           %d1,&0x400CB27C         # 16480 log2
7322        bgt.b           EXP2BIG
7323#--Steps 8.2 -- 8.6
7324        fmov.x          (%a0),%fp0              # load input from (a0)
7325
7326        fmov.x          %fp0,%fp1
7327        fmul.s          &0x42B8AA3B,%fp0        # 64/log2 * X
7328        fmovm.x         &0xc,-(%sp)             # save fp2 {%fp2/%fp3}
7329        mov.l           &1,ADJFLAG(%a6)
7330        fmov.l          %fp0,%d1                # N = int( X * 64/log2 )
7331        lea             EEXPTBL(%pc),%a1
7332        fmov.l          %d1,%fp0                # convert to floating-format
7333        mov.l           %d1,L_SCR1(%a6)         # save N temporarily
7334        and.l           &0x3F,%d1               # D0 is J = N mod 64
7335        lsl.l           &4,%d1
7336        add.l           %d1,%a1                 # address of 2^(J/64)
7337        mov.l           L_SCR1(%a6),%d1
7338        asr.l           &6,%d1                  # D0 is K
7339        mov.l           %d1,L_SCR1(%a6)         # save K temporarily
7340        asr.l           &1,%d1                  # D0 is M1
7341        sub.l           %d1,L_SCR1(%a6)         # a1 is M
7342        add.w           &0x3FFF,%d1             # biased expo. of 2^(M1)
7343        mov.w           %d1,ADJSCALE(%a6)       # ADJSCALE := 2^(M1)
7344        mov.l           &0x80000000,ADJSCALE+4(%a6)
7345        clr.l           ADJSCALE+8(%a6)
7346        mov.l           L_SCR1(%a6),%d1         # D0 is M
7347        add.w           &0x3FFF,%d1             # biased expo. of 2^(M)
7348        bra.w           EXPCONT1                # go back to Step 3
7349
7350EXP2BIG:
7351#--Step 9
7352        tst.b           (%a0)                   # is X positive or negative?
7353        bmi             t_unfl2
7354        bra             t_ovfl2
7355
7356        global          setoxd
7357setoxd:
7358#--entry point for EXP(X), X is denormalized
7359        mov.l           (%a0),-(%sp)
7360        andi.l          &0x80000000,(%sp)
7361        ori.l           &0x00800000,(%sp)       # sign(X)*2^(-126)
7362
7363        fmov.s          &0x3F800000,%fp0
7364
7365        fmov.l          %d0,%fpcr
7366        fadd.s          (%sp)+,%fp0
7367        bra             t_pinx2
7368
7369        global          setoxm1
7370setoxm1:
7371#--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
7372
7373#--Step 1.
7374#--Step 1.1
7375        mov.l           (%a0),%d1               # load part of input X
7376        and.l           &0x7FFF0000,%d1         # biased expo. of X
7377        cmp.l           %d1,&0x3FFD0000         # 1/4
7378        bge.b           EM1CON1                 # |X| >= 1/4
7379        bra             EM1SM
7380
7381EM1CON1:
7382#--Step 1.3
7383#--The case |X| >= 1/4
7384        mov.w           4(%a0),%d1              # expo. and partial sig. of |X|
7385        cmp.l           %d1,&0x4004C215         # 70log2 rounded up to 16 bits
7386        ble.b           EM1MAIN                 # 1/4 <= |X| <= 70log2
7387        bra             EM1BIG
7388
7389EM1MAIN:
7390#--Step 2.
7391#--This is the case:    1/4 <= |X| <= 70 log2.
7392        fmov.x          (%a0),%fp0              # load input from (a0)
7393
7394        fmov.x          %fp0,%fp1
7395        fmul.s          &0x42B8AA3B,%fp0        # 64/log2 * X
7396        fmovm.x         &0xc,-(%sp)             # save fp2 {%fp2/%fp3}
7397        fmov.l          %fp0,%d1                # N = int( X * 64/log2 )
7398        lea             EEXPTBL(%pc),%a1
7399        fmov.l          %d1,%fp0                # convert to floating-format
7400
7401        mov.l           %d1,L_SCR1(%a6)         # save N temporarily
7402        and.l           &0x3F,%d1               # D0 is J = N mod 64
7403        lsl.l           &4,%d1
7404        add.l           %d1,%a1                 # address of 2^(J/64)
7405        mov.l           L_SCR1(%a6),%d1
7406        asr.l           &6,%d1                  # D0 is M
7407        mov.l           %d1,L_SCR1(%a6)         # save a copy of M
7408
7409#--Step 3.
7410#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7411#--a0 points to 2^(J/64), D0 and a1 both contain M
7412        fmov.x          %fp0,%fp2
7413        fmul.s          &0xBC317218,%fp0        # N * L1, L1 = lead(-log2/64)
7414        fmul.x          L2(%pc),%fp2            # N * L2, L1+L2 = -log2/64
7415        fadd.x          %fp1,%fp0               # X + N*L1
7416        fadd.x          %fp2,%fp0               # fp0 is R, reduced arg.
7417        add.w           &0x3FFF,%d1             # D0 is biased expo. of 2^M
7418
7419#--Step 4.
7420#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7421#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
7422#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
7423#--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
7424
7425        fmov.x          %fp0,%fp1
7426        fmul.x          %fp1,%fp1               # fp1 IS S = R*R
7427
7428        fmov.s          &0x3950097B,%fp2        # fp2 IS a6
7429
7430        fmul.x          %fp1,%fp2               # fp2 IS S*A6
7431        fmov.x          %fp1,%fp3
7432        fmul.s          &0x3AB60B6A,%fp3        # fp3 IS S*A5
7433
7434        fadd.d          EM1A4(%pc),%fp2         # fp2 IS A4+S*A6
7435        fadd.d          EM1A3(%pc),%fp3         # fp3 IS A3+S*A5
7436        mov.w           %d1,SC(%a6)             # SC is 2^(M) in extended
7437        mov.l           &0x80000000,SC+4(%a6)
7438        clr.l           SC+8(%a6)
7439
7440        fmul.x          %fp1,%fp2               # fp2 IS S*(A4+S*A6)
7441        mov.l           L_SCR1(%a6),%d1         # D0 is M
7442        neg.w           %d1                     # D0 is -M
7443        fmul.x          %fp1,%fp3               # fp3 IS S*(A3+S*A5)
7444        add.w           &0x3FFF,%d1             # biased expo. of 2^(-M)
7445        fadd.d          EM1A2(%pc),%fp2         # fp2 IS A2+S*(A4+S*A6)
7446        fadd.s          &0x3F000000,%fp3        # fp3 IS A1+S*(A3+S*A5)
7447
7448        fmul.x          %fp1,%fp2               # fp2 IS S*(A2+S*(A4+S*A6))
7449        or.w            &0x8000,%d1             # signed/expo. of -2^(-M)
7450        mov.w           %d1,ONEBYSC(%a6)        # OnebySc is -2^(-M)
7451        mov.l           &0x80000000,ONEBYSC+4(%a6)
7452        clr.l           ONEBYSC+8(%a6)
7453        fmul.x          %fp3,%fp1               # fp1 IS S*(A1+S*(A3+S*A5))
7454
7455        fmul.x          %fp0,%fp2               # fp2 IS R*S*(A2+S*(A4+S*A6))
7456        fadd.x          %fp1,%fp0               # fp0 IS R+S*(A1+S*(A3+S*A5))
7457
7458        fadd.x          %fp2,%fp0               # fp0 IS EXP(R)-1
7459
7460        fmovm.x         (%sp)+,&0x30            # fp2 restored {%fp2/%fp3}
7461
7462#--Step 5
7463#--Compute 2^(J/64)*p
7464
7465        fmul.x          (%a1),%fp0              # 2^(J/64)*(Exp(R)-1)
7466
7467#--Step 6
7468#--Step 6.1
7469        mov.l           L_SCR1(%a6),%d1         # retrieve M
7470        cmp.l           %d1,&63
7471        ble.b           MLE63
7472#--Step 6.2     M >= 64
7473        fmov.s          12(%a1),%fp1            # fp1 is t
7474        fadd.x          ONEBYSC(%a6),%fp1       # fp1 is t+OnebySc
7475        fadd.x          %fp1,%fp0               # p+(t+OnebySc), fp1 released
7476        fadd.x          (%a1),%fp0              # T+(p+(t+OnebySc))
7477        bra             EM1SCALE
7478MLE63:
7479#--Step 6.3     M <= 63
7480        cmp.l           %d1,&-3
7481        bge.b           MGEN3
7482MLTN3:
7483#--Step 6.4     M <= -4
7484        fadd.s          12(%a1),%fp0            # p+t
7485        fadd.x          (%a1),%fp0              # T+(p+t)
7486        fadd.x          ONEBYSC(%a6),%fp0       # OnebySc + (T+(p+t))
7487        bra             EM1SCALE
7488MGEN3:
7489#--Step 6.5     -3 <= M <= 63
7490        fmov.x          (%a1)+,%fp1             # fp1 is T
7491        fadd.s          (%a1),%fp0              # fp0 is p+t
7492        fadd.x          ONEBYSC(%a6),%fp1       # fp1 is T+OnebySc
7493        fadd.x          %fp1,%fp0               # (T+OnebySc)+(p+t)
7494
7495EM1SCALE:
7496#--Step 6.6
7497        fmov.l          %d0,%fpcr
7498        fmul.x          SC(%a6),%fp0
7499        bra             t_inx2
7500
7501EM1SM:
7502#--Step 7       |X| < 1/4.
7503        cmp.l           %d1,&0x3FBE0000         # 2^(-65)
7504        bge.b           EM1POLY
7505
7506EM1TINY:
7507#--Step 8       |X| < 2^(-65)
7508        cmp.l           %d1,&0x00330000         # 2^(-16312)
7509        blt.b           EM12TINY
7510#--Step 8.2
7511        mov.l           &0x80010000,SC(%a6)     # SC is -2^(-16382)
7512        mov.l           &0x80000000,SC+4(%a6)
7513        clr.l           SC+8(%a6)
7514        fmov.x          (%a0),%fp0
7515        fmov.l          %d0,%fpcr
7516        mov.b           &FADD_OP,%d1            # last inst is ADD
7517        fadd.x          SC(%a6),%fp0
7518        bra             t_catch
7519
7520EM12TINY:
7521#--Step 8.3
7522        fmov.x          (%a0),%fp0
7523        fmul.d          TWO140(%pc),%fp0
7524        mov.l           &0x80010000,SC(%a6)
7525        mov.l           &0x80000000,SC+4(%a6)
7526        clr.l           SC+8(%a6)
7527        fadd.x          SC(%a6),%fp0
7528        fmov.l          %d0,%fpcr
7529        mov.b           &FMUL_OP,%d1            # last inst is MUL
7530        fmul.d          TWON140(%pc),%fp0
7531        bra             t_catch
7532
7533EM1POLY:
7534#--Step 9       exp(X)-1 by a simple polynomial
7535        fmov.x          (%a0),%fp0              # fp0 is X
7536        fmul.x          %fp0,%fp0               # fp0 is S := X*X
7537        fmovm.x         &0xc,-(%sp)             # save fp2 {%fp2/%fp3}
7538        fmov.s          &0x2F30CAA8,%fp1        # fp1 is B12
7539        fmul.x          %fp0,%fp1               # fp1 is S*B12
7540        fmov.s          &0x310F8290,%fp2        # fp2 is B11
7541        fadd.s          &0x32D73220,%fp1        # fp1 is B10+S*B12
7542
7543        fmul.x          %fp0,%fp2               # fp2 is S*B11
7544        fmul.x          %fp0,%fp1               # fp1 is S*(B10 + ...
7545
7546        fadd.s          &0x3493F281,%fp2        # fp2 is B9+S*...
7547        fadd.d          EM1B8(%pc),%fp1         # fp1 is B8+S*...
7548
7549        fmul.x          %fp0,%fp2               # fp2 is S*(B9+...
7550        fmul.x          %fp0,%fp1               # fp1 is S*(B8+...
7551
7552        fadd.d          EM1B7(%pc),%fp2         # fp2 is B7+S*...
7553        fadd.d          EM1B6(%pc),%fp1         # fp1 is B6+S*...
7554
7555        fmul.x          %fp0,%fp2               # fp2 is S*(B7+...
7556        fmul.x          %fp0,%fp1               # fp1 is S*(B6+...
7557
7558        fadd.d          EM1B5(%pc),%fp2         # fp2 is B5+S*...
7559        fadd.d          EM1B4(%pc),%fp1         # fp1 is B4+S*...
7560
7561        fmul.x          %fp0,%fp2               # fp2 is S*(B5+...
7562        fmul.x          %fp0,%fp1               # fp1 is S*(B4+...
7563
7564        fadd.d          EM1B3(%pc),%fp2         # fp2 is B3+S*...
7565        fadd.x          EM1B2(%pc),%fp1         # fp1 is B2+S*...
7566
7567        fmul.x          %fp0,%fp2               # fp2 is S*(B3+...
7568        fmul.x          %fp0,%fp1               # fp1 is S*(B2+...
7569
7570        fmul.x          %fp0,%fp2               # fp2 is S*S*(B3+...)
7571        fmul.x          (%a0),%fp1              # fp1 is X*S*(B2...
7572
7573        fmul.s          &0x3F000000,%fp0        # fp0 is S*B1
7574        fadd.x          %fp2,%fp1               # fp1 is Q
7575
7576        fmovm.x         (%sp)+,&0x30            # fp2 restored {%fp2/%fp3}
7577
7578        fadd.x          %fp1,%fp0               # fp0 is S*B1+Q
7579
7580        fmov.l          %d0,%fpcr
7581        fadd.x          (%a0),%fp0
7582        bra             t_inx2
7583
7584EM1BIG:
7585#--Step 10      |X| > 70 log2
7586        mov.l           (%a0),%d1
7587        cmp.l           %d1,&0
7588        bgt.w           EXPC1
7589#--Step 10.2
7590        fmov.s          &0xBF800000,%fp0        # fp0 is -1
7591        fmov.l          %d0,%fpcr
7592        fadd.s          &0x00800000,%fp0        # -1 + 2^(-126)
7593        bra             t_minx2
7594
7595        global          setoxm1d
7596setoxm1d:
7597#--entry point for EXPM1(X), here X is denormalized
7598#--Step 0.
7599        bra             t_extdnrm
7600
7601#########################################################################
7602# sgetexp():  returns the exponent portion of the input argument.       #
7603#             The exponent bias is removed and the exponent value is    #
7604#             returned as an extended precision number in fp0.          #
7605# sgetexpd(): handles denormalized numbers.                             #
7606#                                                                       #
7607# sgetman():  extracts the mantissa of the input argument. The          #
7608#             mantissa is converted to an extended precision number w/  #
7609#             an exponent of $3fff and is returned in fp0. The range of #
7610#             the result is [1.0 - 2.0).                                #
7611# sgetmand(): handles denormalized numbers.                             #
7612#                                                                       #
7613# INPUT *************************************************************** #
7614#       a0  = pointer to extended precision input                       #
7615#                                                                       #
7616# OUTPUT ************************************************************** #
7617#       fp0 = exponent(X) or mantissa(X)                                #
7618#                                                                       #
7619#########################################################################
7620
7621        global          sgetexp
7622sgetexp:
7623        mov.w           SRC_EX(%a0),%d0         # get the exponent
7624        bclr            &0xf,%d0                # clear the sign bit
7625        subi.w          &0x3fff,%d0             # subtract off the bias
7626        fmov.w          %d0,%fp0                # return exp in fp0
7627        blt.b           sgetexpn                # it's negative
7628        rts
7629
7630sgetexpn:
7631        mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7632        rts
7633
7634        global          sgetexpd
7635sgetexpd:
7636        bsr.l           norm                    # normalize
7637        neg.w           %d0                     # new exp = -(shft amt)
7638        subi.w          &0x3fff,%d0             # subtract off the bias
7639        fmov.w          %d0,%fp0                # return exp in fp0
7640        mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7641        rts
7642
7643        global          sgetman
7644sgetman:
7645        mov.w           SRC_EX(%a0),%d0         # get the exp
7646        ori.w           &0x7fff,%d0             # clear old exp
7647        bclr            &0xe,%d0                # make it the new exp +-3fff
7648
7649# here, we build the result in a tmp location so as not to disturb the input
7650        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc
7651        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc
7652        mov.w           %d0,FP_SCR0_EX(%a6)     # insert new exponent
7653        fmov.x          FP_SCR0(%a6),%fp0       # put new value back in fp0
7654        bmi.b           sgetmann                # it's negative
7655        rts
7656
7657sgetmann:
7658        mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7659        rts
7660
7661#
7662# For denormalized numbers, shift the mantissa until the j-bit = 1,
7663# then load the exponent with +/1 $3fff.
7664#
7665        global          sgetmand
7666sgetmand:
7667        bsr.l           norm                    # normalize exponent
7668        bra.b           sgetman
7669
7670#########################################################################
7671# scosh():  computes the hyperbolic cosine of a normalized input        #
7672# scoshd(): computes the hyperbolic cosine of a denormalized input      #
7673#                                                                       #
7674# INPUT *************************************************************** #
7675#       a0 = pointer to extended precision input                        #
7676#       d0 = round precision,mode                                       #
7677#                                                                       #
7678# OUTPUT ************************************************************** #
7679#       fp0 = cosh(X)                                                   #
7680#                                                                       #
7681# ACCURACY and MONOTONICITY ******************************************* #
7682#       The returned result is within 3 ulps in 64 significant bit,     #
7683#       i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7684#       rounded to double precision. The result is provably monotonic   #
7685#       in double precision.                                            #
7686#                                                                       #
7687# ALGORITHM *********************************************************** #
7688#                                                                       #
7689#       COSH                                                            #
7690#       1. If |X| > 16380 log2, go to 3.                                #
7691#                                                                       #
7692#       2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae      #
7693#               y = |X|, z = exp(Y), and                                #
7694#               cosh(X) = (1/2)*( z + 1/z ).                            #
7695#               Exit.                                                   #
7696#                                                                       #
7697#       3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5.            #
7698#                                                                       #
7699#       4. (16380 log2 < |X| <= 16480 log2)                             #
7700#               cosh(X) = sign(X) * exp(|X|)/2.                         #
7701#               However, invoking exp(|X|) may cause premature          #
7702#               overflow. Thus, we calculate sinh(X) as follows:        #
7703#               Y       := |X|                                          #
7704#               Fact    :=      2**(16380)                              #
7705#               Y'      := Y - 16381 log2                               #
7706#               cosh(X) := Fact * exp(Y').                              #
7707#               Exit.                                                   #
7708#                                                                       #
7709#       5. (|X| > 16480 log2) sinh(X) must overflow. Return             #
7710#               Huge*Huge to generate overflow and an infinity with     #
7711#               the appropriate sign. Huge is the largest finite number #
7712#               in extended format. Exit.                               #
7713#                                                                       #
7714#########################################################################
7715
7716TWO16380:
7717        long            0x7FFB0000,0x80000000,0x00000000,0x00000000
7718
7719        global          scosh
7720scosh:
7721        fmov.x          (%a0),%fp0              # LOAD INPUT
7722
7723        mov.l           (%a0),%d1
7724        mov.w           4(%a0),%d1
7725        and.l           &0x7FFFFFFF,%d1
7726        cmp.l           %d1,&0x400CB167
7727        bgt.b           COSHBIG
7728
7729#--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7730#--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
7731
7732        fabs.x          %fp0                    # |X|
7733
7734        mov.l           %d0,-(%sp)
7735        clr.l           %d0
7736        fmovm.x         &0x01,-(%sp)            # save |X| to stack
7737        lea             (%sp),%a0               # pass ptr to |X|
7738        bsr             setox                   # FP0 IS EXP(|X|)
7739        add.l           &0xc,%sp                # erase |X| from stack
7740        fmul.s          &0x3F000000,%fp0        # (1/2)EXP(|X|)
7741        mov.l           (%sp)+,%d0
7742
7743        fmov.s          &0x3E800000,%fp1        # (1/4)
7744        fdiv.x          %fp0,%fp1               # 1/(2 EXP(|X|))
7745
7746        fmov.l          %d0,%fpcr
7747        mov.b           &FADD_OP,%d1            # last inst is ADD
7748        fadd.x          %fp1,%fp0
7749        bra             t_catch
7750
7751COSHBIG:
7752        cmp.l           %d1,&0x400CB2B3
7753        bgt.b           COSHHUGE
7754
7755        fabs.x          %fp0
7756        fsub.d          T1(%pc),%fp0            # (|X|-16381LOG2_LEAD)
7757        fsub.d          T2(%pc),%fp0            # |X| - 16381 LOG2, ACCURATE
7758
7759        mov.l           %d0,-(%sp)
7760        clr.l           %d0
7761        fmovm.x         &0x01,-(%sp)            # save fp0 to stack
7762        lea             (%sp),%a0               # pass ptr to fp0
7763        bsr             setox
7764        add.l           &0xc,%sp                # clear fp0 from stack
7765        mov.l           (%sp)+,%d0
7766
7767        fmov.l          %d0,%fpcr
7768        mov.b           &FMUL_OP,%d1            # last inst is MUL
7769        fmul.x          TWO16380(%pc),%fp0
7770        bra             t_catch
7771
7772COSHHUGE:
7773        bra             t_ovfl2
7774
7775        global          scoshd
7776#--COSH(X) = 1 FOR DENORMALIZED X
7777scoshd:
7778        fmov.s          &0x3F800000,%fp0
7779
7780        fmov.l          %d0,%fpcr
7781        fadd.s          &0x00800000,%fp0
7782        bra             t_pinx2
7783
7784#########################################################################
7785# ssinh():  computes the hyperbolic sine of a normalized input          #
7786# ssinhd(): computes the hyperbolic sine of a denormalized input        #
7787#                                                                       #
7788# INPUT *************************************************************** #
7789#       a0 = pointer to extended precision input                        #
7790#       d0 = round precision,mode                                       #
7791#                                                                       #
7792# OUTPUT ************************************************************** #
7793#       fp0 = sinh(X)                                                   #
7794#                                                                       #
7795# ACCURACY and MONOTONICITY ******************************************* #
7796#       The returned result is within 3 ulps in 64 significant bit,     #
7797#       i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7798#       rounded to double precision. The result is provably monotonic   #
7799#       in double precision.                                            #
7800#                                                                       #
7801# ALGORITHM *********************************************************** #
7802#                                                                       #
7803#       SINH                                                            #
7804#       1. If |X| > 16380 log2, go to 3.                                #
7805#                                                                       #
7806#       2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula       #
7807#               y = |X|, sgn = sign(X), and z = expm1(Y),               #
7808#               sinh(X) = sgn*(1/2)*( z + z/(1+z) ).                    #
7809#          Exit.                                                        #
7810#                                                                       #
7811#       3. If |X| > 16480 log2, go to 5.                                #
7812#                                                                       #
7813#       4. (16380 log2 < |X| <= 16480 log2)                             #
7814#               sinh(X) = sign(X) * exp(|X|)/2.                         #
7815#          However, invoking exp(|X|) may cause premature overflow.     #
7816#          Thus, we calculate sinh(X) as follows:                       #
7817#             Y       := |X|                                            #
7818#             sgn     := sign(X)                                        #
7819#             sgnFact := sgn * 2**(16380)                               #
7820#             Y'      := Y - 16381 log2                                 #
7821#             sinh(X) := sgnFact * exp(Y').                             #
7822#          Exit.                                                        #
7823#                                                                       #
7824#       5. (|X| > 16480 log2) sinh(X) must overflow. Return             #
7825#          sign(X)*Huge*Huge to generate overflow and an infinity with  #
7826#          the appropriate sign. Huge is the largest finite number in   #
7827#          extended format. Exit.                                       #
7828#                                                                       #
7829#########################################################################
7830
7831        global          ssinh
7832ssinh:
7833        fmov.x          (%a0),%fp0              # LOAD INPUT
7834
7835        mov.l           (%a0),%d1
7836        mov.w           4(%a0),%d1
7837        mov.l           %d1,%a1                 # save (compacted) operand
7838        and.l           &0x7FFFFFFF,%d1
7839        cmp.l           %d1,&0x400CB167
7840        bgt.b           SINHBIG
7841
7842#--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7843#--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
7844
7845        fabs.x          %fp0                    # Y = |X|
7846
7847        movm.l          &0x8040,-(%sp)          # {a1/d0}
7848        fmovm.x         &0x01,-(%sp)            # save Y on stack
7849        lea             (%sp),%a0               # pass ptr to Y
7850        clr.l           %d0
7851        bsr             setoxm1                 # FP0 IS Z = EXPM1(Y)
7852        add.l           &0xc,%sp                # clear Y from stack
7853        fmov.l          &0,%fpcr
7854        movm.l          (%sp)+,&0x0201          # {a1/d0}
7855
7856        fmov.x          %fp0,%fp1
7857        fadd.s          &0x3F800000,%fp1        # 1+Z
7858        fmov.x          %fp0,-(%sp)
7859        fdiv.x          %fp1,%fp0               # Z/(1+Z)
7860        mov.l           %a1,%d1
7861        and.l           &0x80000000,%d1
7862        or.l            &0x3F000000,%d1
7863        fadd.x          (%sp)+,%fp0
7864        mov.l           %d1,-(%sp)
7865
7866        fmov.l          %d0,%fpcr
7867        mov.b           &FMUL_OP,%d1            # last inst is MUL
7868        fmul.s          (%sp)+,%fp0             # last fp inst - possible exceptions set
7869        bra             t_catch
7870
7871SINHBIG:
7872        cmp.l           %d1,&0x400CB2B3
7873        bgt             t_ovfl
7874        fabs.x          %fp0
7875        fsub.d          T1(%pc),%fp0            # (|X|-16381LOG2_LEAD)
7876        mov.l           &0,-(%sp)
7877        mov.l           &0x80000000,-(%sp)
7878        mov.l           %a1,%d1
7879        and.l           &0x80000000,%d1
7880        or.l            &0x7FFB0000,%d1
7881        mov.l           %d1,-(%sp)              # EXTENDED FMT
7882        fsub.d          T2(%pc),%fp0            # |X| - 16381 LOG2, ACCURATE
7883
7884        mov.l           %d0,-(%sp)
7885        clr.l           %d0
7886        fmovm.x         &0x01,-(%sp)            # save fp0 on stack
7887        lea             (%sp),%a0               # pass ptr to fp0
7888        bsr             setox
7889        add.l           &0xc,%sp                # clear fp0 from stack
7890
7891        mov.l           (%sp)+,%d0
7892        fmov.l          %d0,%fpcr
7893        mov.b           &FMUL_OP,%d1            # last inst is MUL
7894        fmul.x          (%sp)+,%fp0             # possible exception
7895        bra             t_catch
7896
7897        global          ssinhd
7898#--SINH(X) = X FOR DENORMALIZED X
7899ssinhd:
7900        bra             t_extdnrm
7901
7902#########################################################################
7903# stanh():  computes the hyperbolic tangent of a normalized input       #
7904# stanhd(): computes the hyperbolic tangent of a denormalized input     #
7905#                                                                       #
7906# INPUT *************************************************************** #
7907#       a0 = pointer to extended precision input                        #
7908#       d0 = round precision,mode                                       #
7909#                                                                       #
7910# OUTPUT ************************************************************** #
7911#       fp0 = tanh(X)                                                   #
7912#                                                                       #
7913# ACCURACY and MONOTONICITY ******************************************* #
7914#       The returned result is within 3 ulps in 64 significant bit,     #
7915#       i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7916#       rounded to double precision. The result is provably monotonic   #
7917#       in double precision.                                            #
7918#                                                                       #
7919# ALGORITHM *********************************************************** #
7920#                                                                       #
7921#       TANH                                                            #
7922#       1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3.            #
7923#                                                                       #
7924#       2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by           #
7925#               sgn := sign(X), y := 2|X|, z := expm1(Y), and           #
7926#               tanh(X) = sgn*( z/(2+z) ).                              #
7927#               Exit.                                                   #
7928#                                                                       #
7929#       3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1,          #
7930#               go to 7.                                                #
7931#                                                                       #
7932#       4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6.              #
7933#                                                                       #
7934#       5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by           #
7935#               sgn := sign(X), y := 2|X|, z := exp(Y),                 #
7936#               tanh(X) = sgn - [ sgn*2/(1+z) ].                        #
7937#               Exit.                                                   #
7938#                                                                       #
7939#       6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we  #
7940#               calculate Tanh(X) by                                    #
7941#               sgn := sign(X), Tiny := 2**(-126),                      #
7942#               tanh(X) := sgn - sgn*Tiny.                              #
7943#               Exit.                                                   #
7944#                                                                       #
7945#       7. (|X| < 2**(-40)). Tanh(X) = X.       Exit.                   #
7946#                                                                       #
7947#########################################################################
7948
7949        set             X,FP_SCR0
7950        set             XFRAC,X+4
7951
7952        set             SGN,L_SCR3
7953
7954        set             V,FP_SCR0
7955
7956        global          stanh
7957stanh:
7958        fmov.x          (%a0),%fp0              # LOAD INPUT
7959
7960        fmov.x          %fp0,X(%a6)
7961        mov.l           (%a0),%d1
7962        mov.w           4(%a0),%d1
7963        mov.l           %d1,X(%a6)
7964        and.l           &0x7FFFFFFF,%d1
7965        cmp.l           %d1, &0x3fd78000        # is |X| < 2^(-40)?
7966        blt.w           TANHBORS                # yes
7967        cmp.l           %d1, &0x3fffddce        # is |X| > (5/2)LOG2?
7968        bgt.w           TANHBORS                # yes
7969
7970#--THIS IS THE USUAL CASE
7971#--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
7972
7973        mov.l           X(%a6),%d1
7974        mov.l           %d1,SGN(%a6)
7975        and.l           &0x7FFF0000,%d1
7976        add.l           &0x00010000,%d1         # EXPONENT OF 2|X|
7977        mov.l           %d1,X(%a6)
7978        and.l           &0x80000000,SGN(%a6)
7979        fmov.x          X(%a6),%fp0             # FP0 IS Y = 2|X|
7980
7981        mov.l           %d0,-(%sp)
7982        clr.l           %d0
7983        fmovm.x         &0x1,-(%sp)             # save Y on stack
7984        lea             (%sp),%a0               # pass ptr to Y
7985        bsr             setoxm1                 # FP0 IS Z = EXPM1(Y)
7986        add.l           &0xc,%sp                # clear Y from stack
7987        mov.l           (%sp)+,%d0
7988
7989        fmov.x          %fp0,%fp1
7990        fadd.s          &0x40000000,%fp1        # Z+2
7991        mov.l           SGN(%a6),%d1
7992        fmov.x          %fp1,V(%a6)
7993        eor.l           %d1,V(%a6)
7994
7995        fmov.l          %d0,%fpcr               # restore users round prec,mode
7996        fdiv.x          V(%a6),%fp0
7997        bra             t_inx2
7998
7999TANHBORS:
8000        cmp.l           %d1,&0x3FFF8000
8001        blt.w           TANHSM
8002
8003        cmp.l           %d1,&0x40048AA1
8004        bgt.w           TANHHUGE
8005
8006#-- (5/2) LOG2 < |X| < 50 LOG2,
8007#--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
8008#--TANH(X) = SGN -      SGN*2/[EXP(Y)+1].
8009
8010        mov.l           X(%a6),%d1
8011        mov.l           %d1,SGN(%a6)
8012        and.l           &0x7FFF0000,%d1
8013        add.l           &0x00010000,%d1         # EXPO OF 2|X|
8014        mov.l           %d1,X(%a6)              # Y = 2|X|
8015        and.l           &0x80000000,SGN(%a6)
8016        mov.l           SGN(%a6),%d1
8017        fmov.x          X(%a6),%fp0             # Y = 2|X|
8018
8019        mov.l           %d0,-(%sp)
8020        clr.l           %d0
8021        fmovm.x         &0x01,-(%sp)            # save Y on stack
8022        lea             (%sp),%a0               # pass ptr to Y
8023        bsr             setox                   # FP0 IS EXP(Y)
8024        add.l           &0xc,%sp                # clear Y from stack
8025        mov.l           (%sp)+,%d0
8026        mov.l           SGN(%a6),%d1
8027        fadd.s          &0x3F800000,%fp0        # EXP(Y)+1
8028
8029        eor.l           &0xC0000000,%d1         # -SIGN(X)*2
8030        fmov.s          %d1,%fp1                # -SIGN(X)*2 IN SGL FMT
8031        fdiv.x          %fp0,%fp1               # -SIGN(X)2 / [EXP(Y)+1 ]
8032
8033        mov.l           SGN(%a6),%d1
8034        or.l            &0x3F800000,%d1         # SGN
8035        fmov.s          %d1,%fp0                # SGN IN SGL FMT
8036
8037        fmov.l          %d0,%fpcr               # restore users round prec,mode
8038        mov.b           &FADD_OP,%d1            # last inst is ADD
8039        fadd.x          %fp1,%fp0
8040        bra             t_inx2
8041
8042TANHSM:
8043        fmov.l          %d0,%fpcr               # restore users round prec,mode
8044        mov.b           &FMOV_OP,%d1            # last inst is MOVE
8045        fmov.x          X(%a6),%fp0             # last inst - possible exception set
8046        bra             t_catch
8047
8048#---RETURN SGN(X) - SGN(X)EPS
8049TANHHUGE:
8050        mov.l           X(%a6),%d1
8051        and.l           &0x80000000,%d1
8052        or.l            &0x3F800000,%d1
8053        fmov.s          %d1,%fp0
8054        and.l           &0x80000000,%d1
8055        eor.l           &0x80800000,%d1         # -SIGN(X)*EPS
8056
8057        fmov.l          %d0,%fpcr               # restore users round prec,mode
8058        fadd.s          %d1,%fp0
8059        bra             t_inx2
8060
8061        global          stanhd
8062#--TANH(X) = X FOR DENORMALIZED X
8063stanhd:
8064        bra             t_extdnrm
8065
8066#########################################################################
8067# slogn():    computes the natural logarithm of a normalized input      #
8068# slognd():   computes the natural logarithm of a denormalized input    #
8069# slognp1():  computes the log(1+X) of a normalized input               #
8070# slognp1d(): computes the log(1+X) of a denormalized input             #
8071#                                                                       #
8072# INPUT *************************************************************** #
8073#       a0 = pointer to extended precision input                        #
8074#       d0 = round precision,mode                                       #
8075#                                                                       #
8076# OUTPUT ************************************************************** #
8077#       fp0 = log(X) or log(1+X)                                        #
8078#                                                                       #
8079# ACCURACY and MONOTONICITY ******************************************* #
8080#       The returned result is within 2 ulps in 64 significant bit,     #
8081#       i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8082#       rounded to double precision. The result is provably monotonic   #
8083#       in double precision.                                            #
8084#                                                                       #
8085# ALGORITHM *********************************************************** #
8086#       LOGN:                                                           #
8087#       Step 1. If |X-1| < 1/16, approximate log(X) by an odd           #
8088#               polynomial in u, where u = 2(X-1)/(X+1). Otherwise,     #
8089#               move on to Step 2.                                      #
8090#                                                                       #
8091#       Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first #
8092#               seven significant bits of Y plus 2**(-7), i.e.          #
8093#               F = 1.xxxxxx1 in base 2 where the six "x" match those   #
8094#               of Y. Note that |Y-F| <= 2**(-7).                       #
8095#                                                                       #
8096#       Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a           #
8097#               polynomial in u, log(1+u) = poly.                       #
8098#                                                                       #
8099#       Step 4. Reconstruct                                             #
8100#               log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) #
8101#               by k*log(2) + (log(F) + poly). The values of log(F) are #
8102#               calculated beforehand and stored in the program.        #
8103#                                                                       #
8104#       lognp1:                                                         #
8105#       Step 1: If |X| < 1/16, approximate log(1+X) by an odd           #
8106#               polynomial in u where u = 2X/(2+X). Otherwise, move on  #
8107#               to Step 2.                                              #
8108#                                                                       #
8109#       Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done  #
8110#               in Step 2 of the algorithm for LOGN and compute         #
8111#               log(1+X) as k*log(2) + log(F) + poly where poly         #
8112#               approximates log(1+u), u = (Y-F)/F.                     #
8113#                                                                       #
8114#       Implementation Notes:                                           #
8115#       Note 1. There are 64 different possible values for F, thus 64   #
8116#               log(F)'s need to be tabulated. Moreover, the values of  #
8117#               1/F are also tabulated so that the division in (Y-F)/F  #
8118#               can be performed by a multiplication.                   #
8119#                                                                       #
8120#       Note 2. In Step 2 of lognp1, in order to preserved accuracy,    #
8121#               the value Y-F has to be calculated carefully when       #
8122#               1/2 <= X < 3/2.                                         #
8123#                                                                       #
8124#       Note 3. To fully exploit the pipeline, polynomials are usually  #
8125#               separated into two parts evaluated independently before #
8126#               being added up.                                         #
8127#                                                                       #
8128#########################################################################
8129LOGOF2:
8130        long            0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
8131
8132one:
8133        long            0x3F800000
8134zero:
8135        long            0x00000000
8136infty:
8137        long            0x7F800000
8138negone:
8139        long            0xBF800000
8140
8141LOGA6:
8142        long            0x3FC2499A,0xB5E4040B
8143LOGA5:
8144        long            0xBFC555B5,0x848CB7DB
8145
8146LOGA4:
8147        long            0x3FC99999,0x987D8730
8148LOGA3:
8149        long            0xBFCFFFFF,0xFF6F7E97
8150
8151LOGA2:
8152        long            0x3FD55555,0x555555A4
8153LOGA1:
8154        long            0xBFE00000,0x00000008
8155
8156LOGB5:
8157        long            0x3F175496,0xADD7DAD6
8158LOGB4:
8159        long            0x3F3C71C2,0xFE80C7E0
8160
8161LOGB3:
8162        long            0x3F624924,0x928BCCFF
8163LOGB2:
8164        long            0x3F899999,0x999995EC
8165
8166LOGB1:
8167        long            0x3FB55555,0x55555555
8168TWO:
8169        long            0x40000000,0x00000000
8170
8171LTHOLD:
8172        long            0x3f990000,0x80000000,0x00000000,0x00000000
8173
8174LOGTBL:
8175        long            0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
8176        long            0x3FF70000,0xFF015358,0x833C47E2,0x00000000
8177        long            0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
8178        long            0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
8179        long            0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
8180        long            0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
8181        long            0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
8182        long            0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
8183        long            0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
8184        long            0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
8185        long            0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
8186        long            0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
8187        long            0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
8188        long            0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
8189        long            0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
8190        long            0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
8191        long            0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
8192        long            0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
8193        long            0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
8194        long            0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
8195        long            0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
8196        long            0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
8197        long            0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
8198        long            0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
8199        long            0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
8200        long            0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
8201        long            0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
8202        long            0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
8203        long            0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
8204        long            0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
8205        long            0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
8206        long            0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
8207        long            0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
8208        long            0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
8209        long            0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
8210        long            0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
8211        long            0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
8212        long            0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
8213        long            0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
8214        long            0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
8215        long            0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
8216        long            0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
8217        long            0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
8218        long            0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
8219        long            0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
8220        long            0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
8221        long            0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
8222        long            0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
8223        long            0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
8224        long            0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
8225        long            0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
8226        long            0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
8227        long            0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
8228        long            0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
8229        long            0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
8230        long            0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
8231        long            0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
8232        long            0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
8233        long            0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
8234        long            0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
8235        long            0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
8236        long            0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
8237        long            0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
8238        long            0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
8239        long            0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
8240        long            0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
8241        long            0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
8242        long            0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
8243        long            0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
8244        long            0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
8245        long            0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
8246        long            0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
8247        long            0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
8248        long            0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
8249        long            0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
8250        long            0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
8251        long            0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
8252        long            0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
8253        long            0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
8254        long            0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
8255        long            0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
8256        long            0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
8257        long            0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
8258        long            0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
8259        long            0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
8260        long            0x3FFE0000,0x825EFCED,0x49369330,0x00000000
8261        long            0x3FFE0000,0x9868C809,0x868C8098,0x00000000
8262        long            0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
8263        long            0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
8264        long            0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
8265        long            0x3FFE0000,0x95A02568,0x095A0257,0x00000000
8266        long            0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
8267        long            0x3FFE0000,0x94458094,0x45809446,0x00000000
8268        long            0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
8269        long            0x3FFE0000,0x92F11384,0x0497889C,0x00000000
8270        long            0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
8271        long            0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
8272        long            0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
8273        long            0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
8274        long            0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
8275        long            0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
8276        long            0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
8277        long            0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
8278        long            0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
8279        long            0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
8280        long            0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
8281        long            0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
8282        long            0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
8283        long            0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
8284        long            0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
8285        long            0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
8286        long            0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
8287        long            0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
8288        long            0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
8289        long            0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
8290        long            0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
8291        long            0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
8292        long            0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
8293        long            0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
8294        long            0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
8295        long            0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
8296        long            0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
8297        long            0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
8298        long            0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
8299        long            0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
8300        long            0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
8301        long            0x3FFE0000,0x80808080,0x80808081,0x00000000
8302        long            0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
8303
8304        set             ADJK,L_SCR1
8305
8306        set             X,FP_SCR0
8307        set             XDCARE,X+2
8308        set             XFRAC,X+4
8309
8310        set             F,FP_SCR1
8311        set             FFRAC,F+4
8312
8313        set             KLOG2,FP_SCR0
8314
8315        set             SAVEU,FP_SCR0
8316
8317        global          slogn
8318#--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
8319slogn:
8320        fmov.x          (%a0),%fp0              # LOAD INPUT
8321        mov.l           &0x00000000,ADJK(%a6)
8322
8323LOGBGN:
8324#--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
8325#--A FINITE, NON-ZERO, NORMALIZED NUMBER.
8326
8327        mov.l           (%a0),%d1
8328        mov.w           4(%a0),%d1
8329
8330        mov.l           (%a0),X(%a6)
8331        mov.l           4(%a0),X+4(%a6)
8332        mov.l           8(%a0),X+8(%a6)
8333
8334        cmp.l           %d1,&0                  # CHECK IF X IS NEGATIVE
8335        blt.w           LOGNEG                  # LOG OF NEGATIVE ARGUMENT IS INVALID
8336# X IS POSITIVE, CHECK IF X IS NEAR 1
8337        cmp.l           %d1,&0x3ffef07d         # IS X < 15/16?
8338        blt.b           LOGMAIN                 # YES
8339        cmp.l           %d1,&0x3fff8841         # IS X > 17/16?
8340        ble.w           LOGNEAR1                # NO
8341
8342LOGMAIN:
8343#--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
8344
8345#--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
8346#--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
8347#--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
8348#--                      = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
8349#--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
8350#--LOG(1+U) CAN BE VERY EFFICIENT.
8351#--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
8352#--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
8353
8354#--GET K, Y, F, AND ADDRESS OF 1/F.
8355        asr.l           &8,%d1
8356        asr.l           &8,%d1                  # SHIFTED 16 BITS, BIASED EXPO. OF X
8357        sub.l           &0x3FFF,%d1             # THIS IS K
8358        add.l           ADJK(%a6),%d1           # ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
8359        lea             LOGTBL(%pc),%a0         # BASE ADDRESS OF 1/F AND LOG(F)
8360        fmov.l          %d1,%fp1                # CONVERT K TO FLOATING-POINT FORMAT
8361
8362#--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
8363        mov.l           &0x3FFF0000,X(%a6)      # X IS NOW Y, I.E. 2^(-K)*X
8364        mov.l           XFRAC(%a6),FFRAC(%a6)
8365        and.l           &0xFE000000,FFRAC(%a6)  # FIRST 7 BITS OF Y
8366        or.l            &0x01000000,FFRAC(%a6)  # GET F: ATTACH A 1 AT THE EIGHTH BIT
8367        mov.l           FFRAC(%a6),%d1  # READY TO GET ADDRESS OF 1/F
8368        and.l           &0x7E000000,%d1
8369        asr.l           &8,%d1
8370        asr.l           &8,%d1
8371        asr.l           &4,%d1                  # SHIFTED 20, D0 IS THE DISPLACEMENT
8372        add.l           %d1,%a0                 # A0 IS THE ADDRESS FOR 1/F
8373
8374        fmov.x          X(%a6),%fp0
8375        mov.l           &0x3fff0000,F(%a6)
8376        clr.l           F+8(%a6)
8377        fsub.x          F(%a6),%fp0             # Y-F
8378        fmovm.x         &0xc,-(%sp)             # SAVE FP2-3 WHILE FP0 IS NOT READY
8379#--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
8380#--REGISTERS SAVED: FPCR, FP1, FP2
8381
8382LP1CONT1:
8383#--AN RE-ENTRY POINT FOR LOGNP1
8384        fmul.x          (%a0),%fp0              # FP0 IS U = (Y-F)/F
8385        fmul.x          LOGOF2(%pc),%fp1        # GET K*LOG2 WHILE FP0 IS NOT READY
8386        fmov.x          %fp0,%fp2
8387        fmul.x          %fp2,%fp2               # FP2 IS V=U*U
8388        fmov.x          %fp1,KLOG2(%a6)         # PUT K*LOG2 IN MEMEORY, FREE FP1
8389
8390#--LOG(1+U) IS APPROXIMATED BY
8391#--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
8392#--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
8393
8394        fmov.x          %fp2,%fp3
8395        fmov.x          %fp2,%fp1
8396
8397        fmul.d          LOGA6(%pc),%fp1         # V*A6
8398        fmul.d          LOGA5(%pc),%fp2         # V*A5
8399
8400        fadd.d          LOGA4(%pc),%fp1         # A4+V*A6
8401        fadd.d          LOGA3(%pc),%fp2         # A3+V*A5
8402
8403        fmul.x          %fp3,%fp1               # V*(A4+V*A6)
8404        fmul.x          %fp3,%fp2               # V*(A3+V*A5)
8405
8406        fadd.d          LOGA2(%pc),%fp1         # A2+V*(A4+V*A6)
8407        fadd.d          LOGA1(%pc),%fp2         # A1+V*(A3+V*A5)
8408
8409        fmul.x          %fp3,%fp1               # V*(A2+V*(A4+V*A6))
8410        add.l           &16,%a0                 # ADDRESS OF LOG(F)
8411        fmul.x          %fp3,%fp2               # V*(A1+V*(A3+V*A5))
8412
8413        fmul.x          %fp0,%fp1               # U*V*(A2+V*(A4+V*A6))
8414        fadd.x          %fp2,%fp0               # U+V*(A1+V*(A3+V*A5))
8415
8416        fadd.x          (%a0),%fp1              # LOG(F)+U*V*(A2+V*(A4+V*A6))
8417        fmovm.x         (%sp)+,&0x30            # RESTORE FP2-3
8418        fadd.x          %fp1,%fp0               # FP0 IS LOG(F) + LOG(1+U)
8419
8420        fmov.l          %d0,%fpcr
8421        fadd.x          KLOG2(%a6),%fp0         # FINAL ADD
8422        bra             t_inx2
8423
8424
8425LOGNEAR1:
8426
8427# if the input is exactly equal to one, then exit through ld_pzero.
8428# if these 2 lines weren't here, the correct answer would be returned
8429# but the INEX2 bit would be set.
8430        fcmp.b          %fp0,&0x1               # is it equal to one?
8431        fbeq.l          ld_pzero                # yes
8432
8433#--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
8434        fmov.x          %fp0,%fp1
8435        fsub.s          one(%pc),%fp1           # FP1 IS X-1
8436        fadd.s          one(%pc),%fp0           # FP0 IS X+1
8437        fadd.x          %fp1,%fp1               # FP1 IS 2(X-1)
8438#--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
8439#--IN U, U = 2(X-1)/(X+1) = FP1/FP0
8440
8441LP1CONT2:
8442#--THIS IS AN RE-ENTRY POINT FOR LOGNP1
8443        fdiv.x          %fp0,%fp1               # FP1 IS U
8444        fmovm.x         &0xc,-(%sp)             # SAVE FP2-3
8445#--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
8446#--LET V=U*U, W=V*V, CALCULATE
8447#--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
8448#--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
8449        fmov.x          %fp1,%fp0
8450        fmul.x          %fp0,%fp0               # FP0 IS V
8451        fmov.x          %fp1,SAVEU(%a6)         # STORE U IN MEMORY, FREE FP1
8452        fmov.x          %fp0,%fp1
8453        fmul.x          %fp1,%fp1               # FP1 IS W
8454
8455        fmov.d          LOGB5(%pc),%fp3
8456        fmov.d          LOGB4(%pc),%fp2
8457
8458        fmul.x          %fp1,%fp3               # W*B5
8459        fmul.x          %fp1,%fp2               # W*B4
8460
8461        fadd.d          LOGB3(%pc),%fp3         # B3+W*B5
8462        fadd.d          LOGB2(%pc),%fp2         # B2+W*B4
8463
8464        fmul.x          %fp3,%fp1               # W*(B3+W*B5), FP3 RELEASED
8465
8466        fmul.x          %fp0,%fp2               # V*(B2+W*B4)
8467
8468        fadd.d          LOGB1(%pc),%fp1         # B1+W*(B3+W*B5)
8469        fmul.x          SAVEU(%a6),%fp0         # FP0 IS U*V
8470
8471        fadd.x          %fp2,%fp1               # B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
8472        fmovm.x         (%sp)+,&0x30            # FP2-3 RESTORED
8473
8474        fmul.x          %fp1,%fp0               # U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
8475
8476        fmov.l          %d0,%fpcr
8477        fadd.x          SAVEU(%a6),%fp0
8478        bra             t_inx2
8479
8480#--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
8481LOGNEG:
8482        bra             t_operr
8483
8484        global          slognd
8485slognd:
8486#--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
8487
8488        mov.l           &-100,ADJK(%a6)         # INPUT = 2^(ADJK) * FP0
8489
8490#----normalize the input value by left shifting k bits (k to be determined
8491#----below), adjusting exponent and storing -k to  ADJK
8492#----the value TWOTO100 is no longer needed.
8493#----Note that this code assumes the denormalized input is NON-ZERO.
8494
8495        movm.l          &0x3f00,-(%sp)          # save some registers  {d2-d7}
8496        mov.l           (%a0),%d3               # D3 is exponent of smallest norm. #
8497        mov.l           4(%a0),%d4
8498        mov.l           8(%a0),%d5              # (D4,D5) is (Hi_X,Lo_X)
8499        clr.l           %d2                     # D2 used for holding K
8500
8501        tst.l           %d4
8502        bne.b           Hi_not0
8503
8504Hi_0:
8505        mov.l           %d5,%d4
8506        clr.l           %d5
8507        mov.l           &32,%d2
8508        clr.l           %d6
8509        bfffo           %d4{&0:&32},%d6
8510        lsl.l           %d6,%d4
8511        add.l           %d6,%d2                 # (D3,D4,D5) is normalized
8512
8513        mov.l           %d3,X(%a6)
8514        mov.l           %d4,XFRAC(%a6)
8515        mov.l           %d5,XFRAC+4(%a6)
8516        neg.l           %d2
8517        mov.l           %d2,ADJK(%a6)
8518        fmov.x          X(%a6),%fp0
8519        movm.l          (%sp)+,&0xfc            # restore registers {d2-d7}
8520        lea             X(%a6),%a0
8521        bra.w           LOGBGN                  # begin regular log(X)
8522
8523Hi_not0:
8524        clr.l           %d6
8525        bfffo           %d4{&0:&32},%d6         # find first 1
8526        mov.l           %d6,%d2                 # get k
8527        lsl.l           %d6,%d4
8528        mov.l           %d5,%d7                 # a copy of D5
8529        lsl.l           %d6,%d5
8530        neg.l           %d6
8531        add.l           &32,%d6
8532        lsr.l           %d6,%d7
8533        or.l            %d7,%d4                 # (D3,D4,D5) normalized
8534
8535        mov.l           %d3,X(%a6)
8536        mov.l           %d4,XFRAC(%a6)
8537        mov.l           %d5,XFRAC+4(%a6)
8538        neg.l           %d2
8539        mov.l           %d2,ADJK(%a6)
8540        fmov.x          X(%a6),%fp0
8541        movm.l          (%sp)+,&0xfc            # restore registers {d2-d7}
8542        lea             X(%a6),%a0
8543        bra.w           LOGBGN                  # begin regular log(X)
8544
8545        global          slognp1
8546#--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
8547slognp1:
8548        fmov.x          (%a0),%fp0              # LOAD INPUT
8549        fabs.x          %fp0                    # test magnitude
8550        fcmp.x          %fp0,LTHOLD(%pc)        # compare with min threshold
8551        fbgt.w          LP1REAL                 # if greater, continue
8552        fmov.l          %d0,%fpcr
8553        mov.b           &FMOV_OP,%d1            # last inst is MOVE
8554        fmov.x          (%a0),%fp0              # return signed argument
8555        bra             t_catch
8556
8557LP1REAL:
8558        fmov.x          (%a0),%fp0              # LOAD INPUT
8559        mov.l           &0x00000000,ADJK(%a6)
8560        fmov.x          %fp0,%fp1               # FP1 IS INPUT Z
8561        fadd.s          one(%pc),%fp0           # X := ROUND(1+Z)
8562        fmov.x          %fp0,X(%a6)
8563        mov.w           XFRAC(%a6),XDCARE(%a6)
8564        mov.l           X(%a6),%d1
8565        cmp.l           %d1,&0
8566        ble.w           LP1NEG0                 # LOG OF ZERO OR -VE
8567        cmp.l           %d1,&0x3ffe8000         # IS BOUNDS [1/2,3/2]?
8568        blt.w           LOGMAIN
8569        cmp.l           %d1,&0x3fffc000
8570        bgt.w           LOGMAIN
8571#--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
8572#--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
8573#--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
8574
8575LP1NEAR1:
8576#--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
8577        cmp.l           %d1,&0x3ffef07d
8578        blt.w           LP1CARE
8579        cmp.l           %d1,&0x3fff8841
8580        bgt.w           LP1CARE
8581
8582LP1ONE16:
8583#--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
8584#--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
8585        fadd.x          %fp1,%fp1               # FP1 IS 2Z
8586        fadd.s          one(%pc),%fp0           # FP0 IS 1+X
8587#--U = FP1/FP0
8588        bra.w           LP1CONT2
8589
8590LP1CARE:
8591#--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
8592#--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
8593#--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
8594#--THERE ARE ONLY TWO CASES.
8595#--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
8596#--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
8597#--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
8598#--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
8599
8600        mov.l           XFRAC(%a6),FFRAC(%a6)
8601        and.l           &0xFE000000,FFRAC(%a6)
8602        or.l            &0x01000000,FFRAC(%a6)  # F OBTAINED
8603        cmp.l           %d1,&0x3FFF8000         # SEE IF 1+Z > 1
8604        bge.b           KISZERO
8605
8606KISNEG1:
8607        fmov.s          TWO(%pc),%fp0
8608        mov.l           &0x3fff0000,F(%a6)
8609        clr.l           F+8(%a6)
8610        fsub.x          F(%a6),%fp0             # 2-F
8611        mov.l           FFRAC(%a6),%d1
8612        and.l           &0x7E000000,%d1
8613        asr.l           &8,%d1
8614        asr.l           &8,%d1
8615        asr.l           &4,%d1                  # D0 CONTAINS DISPLACEMENT FOR 1/F
8616        fadd.x          %fp1,%fp1               # GET 2Z
8617        fmovm.x         &0xc,-(%sp)             # SAVE FP2  {%fp2/%fp3}
8618        fadd.x          %fp1,%fp0               # FP0 IS Y-F = (2-F)+2Z
8619        lea             LOGTBL(%pc),%a0         # A0 IS ADDRESS OF 1/F
8620        add.l           %d1,%a0
8621        fmov.s          negone(%pc),%fp1        # FP1 IS K = -1
8622        bra.w           LP1CONT1
8623
8624KISZERO:
8625        fmov.s          one(%pc),%fp0
8626        mov.l           &0x3fff0000,F(%a6)
8627        clr.l           F+8(%a6)
8628        fsub.x          F(%a6),%fp0             # 1-F
8629        mov.l           FFRAC(%a6),%d1
8630        and.l           &0x7E000000,%d1
8631        asr.l           &8,%d1
8632        asr.l           &8,%d1
8633        asr.l           &4,%d1
8634        fadd.x          %fp1,%fp0               # FP0 IS Y-F
8635        fmovm.x         &0xc,-(%sp)             # FP2 SAVED {%fp2/%fp3}
8636        lea             LOGTBL(%pc),%a0
8637        add.l           %d1,%a0                 # A0 IS ADDRESS OF 1/F
8638        fmov.s          zero(%pc),%fp1          # FP1 IS K = 0
8639        bra.w           LP1CONT1
8640
8641LP1NEG0:
8642#--FPCR SAVED. D0 IS X IN COMPACT FORM.
8643        cmp.l           %d1,&0
8644        blt.b           LP1NEG
8645LP1ZERO:
8646        fmov.s          negone(%pc),%fp0
8647
8648        fmov.l          %d0,%fpcr
8649        bra             t_dz
8650
8651LP1NEG:
8652        fmov.s          zero(%pc),%fp0
8653
8654        fmov.l          %d0,%fpcr
8655        bra             t_operr
8656
8657        global          slognp1d
8658#--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
8659# Simply return the denorm
8660slognp1d:
8661        bra             t_extdnrm
8662
8663#########################################################################
8664# satanh():  computes the inverse hyperbolic tangent of a norm input    #
8665# satanhd(): computes the inverse hyperbolic tangent of a denorm input  #
8666#                                                                       #
8667# INPUT *************************************************************** #
8668#       a0 = pointer to extended precision input                        #
8669#       d0 = round precision,mode                                       #
8670#                                                                       #
8671# OUTPUT ************************************************************** #
8672#       fp0 = arctanh(X)                                                #
8673#                                                                       #
8674# ACCURACY and MONOTONICITY ******************************************* #
8675#       The returned result is within 3 ulps in 64 significant bit,     #
8676#       i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8677#       rounded to double precision. The result is provably monotonic   #
8678#       in double precision.                                            #
8679#                                                                       #
8680# ALGORITHM *********************************************************** #
8681#                                                                       #
8682#       ATANH                                                           #
8683#       1. If |X| >= 1, go to 3.                                        #
8684#                                                                       #
8685#       2. (|X| < 1) Calculate atanh(X) by                              #
8686#               sgn := sign(X)                                          #
8687#               y := |X|                                                #
8688#               z := 2y/(1-y)                                           #
8689#               atanh(X) := sgn * (1/2) * logp1(z)                      #
8690#               Exit.                                                   #
8691#                                                                       #
8692#       3. If |X| > 1, go to 5.                                         #
8693#                                                                       #
8694#       4. (|X| = 1) Generate infinity with an appropriate sign and     #
8695#               divide-by-zero by                                       #
8696#               sgn := sign(X)                                          #
8697#               atan(X) := sgn / (+0).                                  #
8698#               Exit.                                                   #
8699#                                                                       #
8700#       5. (|X| > 1) Generate an invalid operation by 0 * infinity.     #
8701#               Exit.                                                   #
8702#                                                                       #
8703#########################################################################
8704
8705        global          satanh
8706satanh:
8707        mov.l           (%a0),%d1
8708        mov.w           4(%a0),%d1
8709        and.l           &0x7FFFFFFF,%d1
8710        cmp.l           %d1,&0x3FFF8000
8711        bge.b           ATANHBIG
8712
8713#--THIS IS THE USUAL CASE, |X| < 1
8714#--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
8715
8716        fabs.x          (%a0),%fp0              # Y = |X|
8717        fmov.x          %fp0,%fp1
8718        fneg.x          %fp1                    # -Y
8719        fadd.x          %fp0,%fp0               # 2Y
8720        fadd.s          &0x3F800000,%fp1        # 1-Y
8721        fdiv.x          %fp1,%fp0               # 2Y/(1-Y)
8722        mov.l           (%a0),%d1
8723        and.l           &0x80000000,%d1
8724        or.l            &0x3F000000,%d1         # SIGN(X)*HALF
8725        mov.l           %d1,-(%sp)
8726
8727        mov.l           %d0,-(%sp)              # save rnd prec,mode
8728        clr.l           %d0                     # pass ext prec,RN
8729        fmovm.x         &0x01,-(%sp)            # save Z on stack
8730        lea             (%sp),%a0               # pass ptr to Z
8731        bsr             slognp1                 # LOG1P(Z)
8732        add.l           &0xc,%sp                # clear Z from stack
8733
8734        mov.l           (%sp)+,%d0              # fetch old prec,mode
8735        fmov.l          %d0,%fpcr               # load it
8736        mov.b           &FMUL_OP,%d1            # last inst is MUL
8737        fmul.s          (%sp)+,%fp0
8738        bra             t_catch
8739
8740ATANHBIG:
8741        fabs.x          (%a0),%fp0              # |X|
8742        fcmp.s          %fp0,&0x3F800000
8743        fbgt            t_operr
8744        bra             t_dz
8745
8746        global          satanhd
8747#--ATANH(X) = X FOR DENORMALIZED X
8748satanhd:
8749        bra             t_extdnrm
8750
8751#########################################################################
8752# slog10():  computes the base-10 logarithm of a normalized input       #
8753# slog10d(): computes the base-10 logarithm of a denormalized input     #
8754# slog2():   computes the base-2 logarithm of a normalized input        #
8755# slog2d():  computes the base-2 logarithm of a denormalized input      #
8756#                                                                       #
8757# INPUT *************************************************************** #
8758#       a0 = pointer to extended precision input                        #
8759#       d0 = round precision,mode                                       #
8760#                                                                       #
8761# OUTPUT ************************************************************** #
8762#       fp0 = log_10(X) or log_2(X)                                     #
8763#                                                                       #
8764# ACCURACY and MONOTONICITY ******************************************* #
8765#       The returned result is within 1.7 ulps in 64 significant bit,   #
8766#       i.e. within 0.5003 ulp to 53 bits if the result is subsequently #
8767#       rounded to double precision. The result is provably monotonic   #
8768#       in double precision.                                            #
8769#                                                                       #
8770# ALGORITHM *********************************************************** #
8771#                                                                       #
8772#       slog10d:                                                        #
8773#                                                                       #
8774#       Step 0. If X < 0, create a NaN and raise the invalid operation  #
8775#               flag. Otherwise, save FPCR in D1; set FpCR to default.  #
8776#       Notes:  Default means round-to-nearest mode, no floating-point  #
8777#               traps, and precision control = double extended.         #
8778#                                                                       #
8779#       Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8780#       Notes:  Even if X is denormalized, log(X) is always normalized. #
8781#                                                                       #
8782#       Step 2.  Compute log_10(X) = log(X) * (1/log(10)).              #
8783#            2.1 Restore the user FPCR                                  #
8784#            2.2 Return ans := Y * INV_L10.                             #
8785#                                                                       #
8786#       slog10:                                                         #
8787#                                                                       #
8788#       Step 0. If X < 0, create a NaN and raise the invalid operation  #
8789#               flag. Otherwise, save FPCR in D1; set FpCR to default.  #
8790#       Notes:  Default means round-to-nearest mode, no floating-point  #
8791#               traps, and precision control = double extended.         #
8792#                                                                       #
8793#       Step 1. Call sLogN to obtain Y = log(X), the natural log of X.  #
8794#                                                                       #
8795#       Step 2.   Compute log_10(X) = log(X) * (1/log(10)).             #
8796#            2.1  Restore the user FPCR                                 #
8797#            2.2  Return ans := Y * INV_L10.                            #
8798#                                                                       #
8799#       sLog2d:                                                         #
8800#                                                                       #
8801#       Step 0. If X < 0, create a NaN and raise the invalid operation  #
8802#               flag. Otherwise, save FPCR in D1; set FpCR to default.  #
8803#       Notes:  Default means round-to-nearest mode, no floating-point  #
8804#               traps, and precision control = double extended.         #
8805#                                                                       #
8806#       Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8807#       Notes:  Even if X is denormalized, log(X) is always normalized. #
8808#                                                                       #
8809#       Step 2.   Compute log_10(X) = log(X) * (1/log(2)).              #
8810#            2.1  Restore the user FPCR                                 #
8811#            2.2  Return ans := Y * INV_L2.                             #
8812#                                                                       #
8813#       sLog2:                                                          #
8814#                                                                       #
8815#       Step 0. If X < 0, create a NaN and raise the invalid operation  #
8816#               flag. Otherwise, save FPCR in D1; set FpCR to default.  #
8817#       Notes:  Default means round-to-nearest mode, no floating-point  #
8818#               traps, and precision control = double extended.         #
8819#                                                                       #
8820#       Step 1. If X is not an integer power of two, i.e., X != 2^k,    #
8821#               go to Step 3.                                           #
8822#                                                                       #
8823#       Step 2.   Return k.                                             #
8824#            2.1  Get integer k, X = 2^k.                               #
8825#            2.2  Restore the user FPCR.                                #
8826#            2.3  Return ans := convert-to-double-extended(k).          #
8827#                                                                       #
8828#       Step 3. Call sLogN to obtain Y = log(X), the natural log of X.  #
8829#                                                                       #
8830#       Step 4.   Compute log_2(X) = log(X) * (1/log(2)).               #
8831#            4.1  Restore the user FPCR                                 #
8832#            4.2  Return ans := Y * INV_L2.                             #
8833#                                                                       #
8834#########################################################################
8835
8836INV_L10:
8837        long            0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
8838
8839INV_L2:
8840        long            0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
8841
8842        global          slog10
8843#--entry point for Log10(X), X is normalized
8844slog10:
8845        fmov.b          &0x1,%fp0
8846        fcmp.x          %fp0,(%a0)              # if operand == 1,
8847        fbeq.l          ld_pzero                # return an EXACT zero
8848
8849        mov.l           (%a0),%d1
8850        blt.w           invalid
8851        mov.l           %d0,-(%sp)
8852        clr.l           %d0
8853        bsr             slogn                   # log(X), X normal.
8854        fmov.l          (%sp)+,%fpcr
8855        fmul.x          INV_L10(%pc),%fp0
8856        bra             t_inx2
8857
8858        global          slog10d
8859#--entry point for Log10(X), X is denormalized
8860slog10d:
8861        mov.l           (%a0),%d1
8862        blt.w           invalid
8863        mov.l           %d0,-(%sp)
8864        clr.l           %d0
8865        bsr             slognd                  # log(X), X denorm.
8866        fmov.l          (%sp)+,%fpcr
8867        fmul.x          INV_L10(%pc),%fp0
8868        bra             t_minx2
8869
8870        global          slog2
8871#--entry point for Log2(X), X is normalized
8872slog2:
8873        mov.l           (%a0),%d1
8874        blt.w           invalid
8875
8876        mov.l           8(%a0),%d1
8877        bne.b           continue                # X is not 2^k
8878
8879        mov.l           4(%a0),%d1
8880        and.l           &0x7FFFFFFF,%d1
8881        bne.b           continue
8882
8883#--X = 2^k.
8884        mov.w           (%a0),%d1
8885        and.l           &0x00007FFF,%d1
8886        sub.l           &0x3FFF,%d1
8887        beq.l           ld_pzero
8888        fmov.l          %d0,%fpcr
8889        fmov.l          %d1,%fp0
8890        bra             t_inx2
8891
8892continue:
8893        mov.l           %d0,-(%sp)
8894        clr.l           %d0
8895        bsr             slogn                   # log(X), X normal.
8896        fmov.l          (%sp)+,%fpcr
8897        fmul.x          INV_L2(%pc),%fp0
8898        bra             t_inx2
8899
8900invalid:
8901        bra             t_operr
8902
8903        global          slog2d
8904#--entry point for Log2(X), X is denormalized
8905slog2d:
8906        mov.l           (%a0),%d1
8907        blt.w           invalid
8908        mov.l           %d0,-(%sp)
8909        clr.l           %d0
8910        bsr             slognd                  # log(X), X denorm.
8911        fmov.l          (%sp)+,%fpcr
8912        fmul.x          INV_L2(%pc),%fp0
8913        bra             t_minx2
8914
8915#########################################################################
8916# stwotox():  computes 2**X for a normalized input                      #
8917# stwotoxd(): computes 2**X for a denormalized input                    #
8918# stentox():  computes 10**X for a normalized input                     #
8919# stentoxd(): computes 10**X for a denormalized input                   #
8920#                                                                       #
8921# INPUT *************************************************************** #
8922#       a0 = pointer to extended precision input                        #
8923#       d0 = round precision,mode                                       #
8924#                                                                       #
8925# OUTPUT ************************************************************** #
8926#       fp0 = 2**X or 10**X                                             #
8927#                                                                       #
8928# ACCURACY and MONOTONICITY ******************************************* #
8929#       The returned result is within 2 ulps in 64 significant bit,     #
8930#       i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8931#       rounded to double precision. The result is provably monotonic   #
8932#       in double precision.                                            #
8933#                                                                       #
8934# ALGORITHM *********************************************************** #
8935#                                                                       #
8936#       twotox                                                          #
8937#       1. If |X| > 16480, go to ExpBig.                                #
8938#                                                                       #
8939#       2. If |X| < 2**(-70), go to ExpSm.                              #
8940#                                                                       #
8941#       3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore  #
8942#               decompose N as                                          #
8943#                N = 64(M + M') + j,  j = 0,1,2,...,63.                 #
8944#                                                                       #
8945#       4. Overwrite r := r * log2. Then                                #
8946#               2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).           #
8947#               Go to expr to compute that expression.                  #
8948#                                                                       #
8949#       tentox                                                          #
8950#       1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.   #
8951#                                                                       #
8952#       2. If |X| < 2**(-70), go to ExpSm.                              #
8953#                                                                       #
8954#       3. Set y := X*log_2(10)*64 (base 2 log of 10). Set              #
8955#               N := round-to-int(y). Decompose N as                    #
8956#                N = 64(M + M') + j,  j = 0,1,2,...,63.                 #
8957#                                                                       #
8958#       4. Define r as                                                  #
8959#               r := ((X - N*L1)-N*L2) * L10                            #
8960#               where L1, L2 are the leading and trailing parts of      #
8961#               log_10(2)/64 and L10 is the natural log of 10. Then     #
8962#               10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).          #
8963#               Go to expr to compute that expression.                  #
8964#                                                                       #
8965#       expr                                                            #
8966#       1. Fetch 2**(j/64) from table as Fact1 and Fact2.               #
8967#                                                                       #
8968#       2. Overwrite Fact1 and Fact2 by                                 #
8969#               Fact1 := 2**(M) * Fact1                                 #
8970#               Fact2 := 2**(M) * Fact2                                 #
8971#               Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).                #
8972#                                                                       #
8973#       3. Calculate P where 1 + P approximates exp(r):                 #
8974#               P = r + r*r*(A1+r*(A2+...+r*A5)).                       #
8975#                                                                       #
8976#       4. Let AdjFact := 2**(M'). Return                               #
8977#               AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).              #
8978#               Exit.                                                   #
8979#                                                                       #
8980#       ExpBig                                                          #
8981#       1. Generate overflow by Huge * Huge if X > 0; otherwise,        #
8982#               generate underflow by Tiny * Tiny.                      #
8983#                                                                       #
8984#       ExpSm                                                           #
8985#       1. Return 1 + X.                                                #
8986#                                                                       #
8987#########################################################################
8988
8989L2TEN64:
8990        long            0x406A934F,0x0979A371   # 64LOG10/LOG2
8991L10TWO1:
8992        long            0x3F734413,0x509F8000   # LOG2/64LOG10
8993
8994L10TWO2:
8995        long            0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
8996
8997LOG10:  long            0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
8998
8999LOG2:   long            0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
9000
9001EXPA5:  long            0x3F56C16D,0x6F7BD0B2
9002EXPA4:  long            0x3F811112,0x302C712C
9003EXPA3:  long            0x3FA55555,0x55554CC1
9004EXPA2:  long            0x3FC55555,0x55554A54
9005EXPA1:  long            0x3FE00000,0x00000000,0x00000000,0x00000000
9006
9007TEXPTBL:
9008        long            0x3FFF0000,0x80000000,0x00000000,0x3F738000
9009        long            0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
9010        long            0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
9011        long            0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
9012        long            0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
9013        long            0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
9014        long            0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
9015        long            0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
9016        long            0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
9017        long            0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
9018        long            0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
9019        long            0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
9020        long            0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
9021        long            0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
9022        long            0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
9023        long            0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
9024        long            0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
9025        long            0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
9026        long            0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
9027        long            0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
9028        long            0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
9029        long            0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
9030        long            0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
9031        long            0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
9032        long            0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
9033        long            0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
9034        long            0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
9035        long            0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
9036        long            0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
9037        long            0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
9038        long            0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
9039        long            0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
9040        long            0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
9041        long            0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
9042        long            0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
9043        long            0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
9044        long            0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
9045        long            0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
9046        long            0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
9047        long            0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
9048        long            0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
9049        long            0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
9050        long            0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
9051        long            0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
9052        long            0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
9053        long            0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
9054        long            0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
9055        long            0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
9056        long            0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
9057        long            0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
9058        long            0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
9059        long            0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
9060        long            0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
9061        long            0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
9062        long            0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
9063        long            0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
9064        long            0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
9065        long            0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
9066        long            0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
9067        long            0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
9068        long            0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
9069        long            0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
9070        long            0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
9071        long            0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
9072
9073        set             INT,L_SCR1
9074
9075        set             X,FP_SCR0
9076        set             XDCARE,X+2
9077        set             XFRAC,X+4
9078
9079        set             ADJFACT,FP_SCR0
9080
9081        set             FACT1,FP_SCR0
9082        set             FACT1HI,FACT1+4
9083        set             FACT1LOW,FACT1+8
9084
9085        set             FACT2,FP_SCR1
9086        set             FACT2HI,FACT2+4
9087        set             FACT2LOW,FACT2+8
9088
9089        global          stwotox
9090#--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9091stwotox:
9092        fmovm.x         (%a0),&0x80             # LOAD INPUT
9093
9094        mov.l           (%a0),%d1
9095        mov.w           4(%a0),%d1
9096        fmov.x          %fp0,X(%a6)
9097        and.l           &0x7FFFFFFF,%d1
9098
9099        cmp.l           %d1,&0x3FB98000         # |X| >= 2**(-70)?
9100        bge.b           TWOOK1
9101        bra.w           EXPBORS
9102
9103TWOOK1:
9104        cmp.l           %d1,&0x400D80C0         # |X| > 16480?
9105        ble.b           TWOMAIN
9106        bra.w           EXPBORS
9107
9108TWOMAIN:
9109#--USUAL CASE, 2^(-70) <= |X| <= 16480
9110
9111        fmov.x          %fp0,%fp1
9112        fmul.s          &0x42800000,%fp1        # 64 * X
9113        fmov.l          %fp1,INT(%a6)           # N = ROUND-TO-INT(64 X)
9114        mov.l           %d2,-(%sp)
9115        lea             TEXPTBL(%pc),%a1        # LOAD ADDRESS OF TABLE OF 2^(J/64)
9116        fmov.l          INT(%a6),%fp1           # N --> FLOATING FMT
9117        mov.l           INT(%a6),%d1
9118        mov.l           %d1,%d2
9119        and.l           &0x3F,%d1               # D0 IS J
9120        asl.l           &4,%d1                  # DISPLACEMENT FOR 2^(J/64)
9121        add.l           %d1,%a1                 # ADDRESS FOR 2^(J/64)
9122        asr.l           &6,%d2                  # d2 IS L, N = 64L + J
9123        mov.l           %d2,%d1
9124        asr.l           &1,%d1                  # D0 IS M
9125        sub.l           %d1,%d2                 # d2 IS M', N = 64(M+M') + J
9126        add.l           &0x3FFF,%d2
9127
9128#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9129#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9130#--ADJFACT = 2^(M').
9131#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9132
9133        fmovm.x         &0x0c,-(%sp)            # save fp2/fp3
9134
9135        fmul.s          &0x3C800000,%fp1        # (1/64)*N
9136        mov.l           (%a1)+,FACT1(%a6)
9137        mov.l           (%a1)+,FACT1HI(%a6)
9138        mov.l           (%a1)+,FACT1LOW(%a6)
9139        mov.w           (%a1)+,FACT2(%a6)
9140
9141        fsub.x          %fp1,%fp0               # X - (1/64)*INT(64 X)
9142
9143        mov.w           (%a1)+,FACT2HI(%a6)
9144        clr.w           FACT2HI+2(%a6)
9145        clr.l           FACT2LOW(%a6)
9146        add.w           %d1,FACT1(%a6)
9147        fmul.x          LOG2(%pc),%fp0          # FP0 IS R
9148        add.w           %d1,FACT2(%a6)
9149
9150        bra.w           expr
9151
9152EXPBORS:
9153#--FPCR, D0 SAVED
9154        cmp.l           %d1,&0x3FFF8000
9155        bgt.b           TEXPBIG
9156
9157#--|X| IS SMALL, RETURN 1 + X
9158
9159        fmov.l          %d0,%fpcr               # restore users round prec,mode
9160        fadd.s          &0x3F800000,%fp0        # RETURN 1 + X
9161        bra             t_pinx2
9162
9163TEXPBIG:
9164#--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
9165#--REGISTERS SAVE SO FAR ARE FPCR AND  D0
9166        mov.l           X(%a6),%d1
9167        cmp.l           %d1,&0
9168        blt.b           EXPNEG
9169
9170        bra             t_ovfl2                 # t_ovfl expects positive value
9171
9172EXPNEG:
9173        bra             t_unfl2                 # t_unfl expects positive value
9174
9175        global          stwotoxd
9176stwotoxd:
9177#--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
9178
9179        fmov.l          %d0,%fpcr               # set user's rounding mode/precision
9180        fmov.s          &0x3F800000,%fp0        # RETURN 1 + X
9181        mov.l           (%a0),%d1
9182        or.l            &0x00800001,%d1
9183        fadd.s          %d1,%fp0
9184        bra             t_pinx2
9185
9186        global          stentox
9187#--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9188stentox:
9189        fmovm.x         (%a0),&0x80             # LOAD INPUT
9190
9191        mov.l           (%a0),%d1
9192        mov.w           4(%a0),%d1
9193        fmov.x          %fp0,X(%a6)
9194        and.l           &0x7FFFFFFF,%d1
9195
9196        cmp.l           %d1,&0x3FB98000         # |X| >= 2**(-70)?
9197        bge.b           TENOK1
9198        bra.w           EXPBORS
9199
9200TENOK1:
9201        cmp.l           %d1,&0x400B9B07         # |X| <= 16480*log2/log10 ?
9202        ble.b           TENMAIN
9203        bra.w           EXPBORS
9204
9205TENMAIN:
9206#--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
9207
9208        fmov.x          %fp0,%fp1
9209        fmul.d          L2TEN64(%pc),%fp1       # X*64*LOG10/LOG2
9210        fmov.l          %fp1,INT(%a6)           # N=INT(X*64*LOG10/LOG2)
9211        mov.l           %d2,-(%sp)
9212        lea             TEXPTBL(%pc),%a1        # LOAD ADDRESS OF TABLE OF 2^(J/64)
9213        fmov.l          INT(%a6),%fp1           # N --> FLOATING FMT
9214        mov.l           INT(%a6),%d1
9215        mov.l           %d1,%d2
9216        and.l           &0x3F,%d1               # D0 IS J
9217        asl.l           &4,%d1                  # DISPLACEMENT FOR 2^(J/64)
9218        add.l           %d1,%a1                 # ADDRESS FOR 2^(J/64)
9219        asr.l           &6,%d2                  # d2 IS L, N = 64L + J
9220        mov.l           %d2,%d1
9221        asr.l           &1,%d1                  # D0 IS M
9222        sub.l           %d1,%d2                 # d2 IS M', N = 64(M+M') + J
9223        add.l           &0x3FFF,%d2
9224
9225#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9226#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9227#--ADJFACT = 2^(M').
9228#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9229        fmovm.x         &0x0c,-(%sp)            # save fp2/fp3
9230
9231        fmov.x          %fp1,%fp2
9232
9233        fmul.d          L10TWO1(%pc),%fp1       # N*(LOG2/64LOG10)_LEAD
9234        mov.l           (%a1)+,FACT1(%a6)
9235
9236        fmul.x          L10TWO2(%pc),%fp2       # N*(LOG2/64LOG10)_TRAIL
9237
9238        mov.l           (%a1)+,FACT1HI(%a6)
9239        mov.l           (%a1)+,FACT1LOW(%a6)
9240        fsub.x          %fp1,%fp0               # X - N L_LEAD
9241        mov.w           (%a1)+,FACT2(%a6)
9242
9243        fsub.x          %fp2,%fp0               # X - N L_TRAIL
9244
9245        mov.w           (%a1)+,FACT2HI(%a6)
9246        clr.w           FACT2HI+2(%a6)
9247        clr.l           FACT2LOW(%a6)
9248
9249        fmul.x          LOG10(%pc),%fp0         # FP0 IS R
9250        add.w           %d1,FACT1(%a6)
9251        add.w           %d1,FACT2(%a6)
9252
9253expr:
9254#--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
9255#--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
9256#--FP0 IS R. THE FOLLOWING CODE COMPUTES
9257#--     2**(M'+M) * 2**(J/64) * EXP(R)
9258
9259        fmov.x          %fp0,%fp1
9260        fmul.x          %fp1,%fp1               # FP1 IS S = R*R
9261
9262        fmov.d          EXPA5(%pc),%fp2         # FP2 IS A5
9263        fmov.d          EXPA4(%pc),%fp3         # FP3 IS A4
9264
9265        fmul.x          %fp1,%fp2               # FP2 IS S*A5
9266        fmul.x          %fp1,%fp3               # FP3 IS S*A4
9267
9268        fadd.d          EXPA3(%pc),%fp2         # FP2 IS A3+S*A5
9269        fadd.d          EXPA2(%pc),%fp3         # FP3 IS A2+S*A4
9270
9271        fmul.x          %fp1,%fp2               # FP2 IS S*(A3+S*A5)
9272        fmul.x          %fp1,%fp3               # FP3 IS S*(A2+S*A4)
9273
9274        fadd.d          EXPA1(%pc),%fp2         # FP2 IS A1+S*(A3+S*A5)
9275        fmul.x          %fp0,%fp3               # FP3 IS R*S*(A2+S*A4)
9276
9277        fmul.x          %fp1,%fp2               # FP2 IS S*(A1+S*(A3+S*A5))
9278        fadd.x          %fp3,%fp0               # FP0 IS R+R*S*(A2+S*A4)
9279        fadd.x          %fp2,%fp0               # FP0 IS EXP(R) - 1
9280
9281        fmovm.x         (%sp)+,&0x30            # restore fp2/fp3
9282
9283#--FINAL RECONSTRUCTION PROCESS
9284#--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
9285
9286        fmul.x          FACT1(%a6),%fp0
9287        fadd.x          FACT2(%a6),%fp0
9288        fadd.x          FACT1(%a6),%fp0
9289
9290        fmov.l          %d0,%fpcr               # restore users round prec,mode
9291        mov.w           %d2,ADJFACT(%a6)        # INSERT EXPONENT
9292        mov.l           (%sp)+,%d2
9293        mov.l           &0x80000000,ADJFACT+4(%a6)
9294        clr.l           ADJFACT+8(%a6)
9295        mov.b           &FMUL_OP,%d1            # last inst is MUL
9296        fmul.x          ADJFACT(%a6),%fp0       # FINAL ADJUSTMENT
9297        bra             t_catch
9298
9299        global          stentoxd
9300stentoxd:
9301#--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
9302
9303        fmov.l          %d0,%fpcr               # set user's rounding mode/precision
9304        fmov.s          &0x3F800000,%fp0        # RETURN 1 + X
9305        mov.l           (%a0),%d1
9306        or.l            &0x00800001,%d1
9307        fadd.s          %d1,%fp0
9308        bra             t_pinx2
9309
9310#########################################################################
9311# smovcr(): returns the ROM constant at the offset specified in d1      #
9312#           rounded to the mode and precision specified in d0.          #
9313#                                                                       #
9314# INPUT *************************************************************** #
9315#       d0 = rnd prec,mode                                              #
9316#       d1 = ROM offset                                                 #
9317#                                                                       #
9318# OUTPUT ************************************************************** #
9319#       fp0 = the ROM constant rounded to the user's rounding mode,prec #
9320#                                                                       #
9321#########################################################################
9322
9323        global          smovcr
9324smovcr:
9325        mov.l           %d1,-(%sp)              # save rom offset for a sec
9326
9327        lsr.b           &0x4,%d0                # shift ctrl bits to lo
9328        mov.l           %d0,%d1                 # make a copy
9329        andi.w          &0x3,%d1                # extract rnd mode
9330        andi.w          &0xc,%d0                # extract rnd prec
9331        swap            %d0                     # put rnd prec in hi
9332        mov.w           %d1,%d0                 # put rnd mode in lo
9333
9334        mov.l           (%sp)+,%d1              # get rom offset
9335
9336#
9337# check range of offset
9338#
9339        tst.b           %d1                     # if zero, offset is to pi
9340        beq.b           pi_tbl                  # it is pi
9341        cmpi.b          %d1,&0x0a               # check range $01 - $0a
9342        ble.b           z_val                   # if in this range, return zero
9343        cmpi.b          %d1,&0x0e               # check range $0b - $0e
9344        ble.b           sm_tbl                  # valid constants in this range
9345        cmpi.b          %d1,&0x2f               # check range $10 - $2f
9346        ble.b           z_val                   # if in this range, return zero
9347        cmpi.b          %d1,&0x3f               # check range $30 - $3f
9348        ble.b           bg_tbl                  # valid constants in this range
9349
9350z_val:
9351        bra.l           ld_pzero                # return a zero
9352
9353#
9354# the answer is PI rounded to the proper precision.
9355#
9356# fetch a pointer to the answer table relating to the proper rounding
9357# precision.
9358#
9359pi_tbl:
9360        tst.b           %d0                     # is rmode RN?
9361        bne.b           pi_not_rn               # no
9362pi_rn:
9363        lea.l           PIRN(%pc),%a0           # yes; load PI RN table addr
9364        bra.w           set_finx
9365pi_not_rn:
9366        cmpi.b          %d0,&rp_mode            # is rmode RP?
9367        beq.b           pi_rp                   # yes
9368pi_rzrm:
9369        lea.l           PIRZRM(%pc),%a0         # no; load PI RZ,RM table addr
9370        bra.b           set_finx
9371pi_rp:
9372        lea.l           PIRP(%pc),%a0           # load PI RP table addr
9373        bra.b           set_finx
9374
9375#
9376# the answer is one of:
9377#       $0B     log10(2)        (inexact)
9378#       $0C     e               (inexact)
9379#       $0D     log2(e)         (inexact)
9380#       $0E     log10(e)        (exact)
9381#
9382# fetch a pointer to the answer table relating to the proper rounding
9383# precision.
9384#
9385sm_tbl:
9386        subi.b          &0xb,%d1                # make offset in 0-4 range
9387        tst.b           %d0                     # is rmode RN?
9388        bne.b           sm_not_rn               # no
9389sm_rn:
9390        lea.l           SMALRN(%pc),%a0         # yes; load RN table addr
9391sm_tbl_cont:
9392        cmpi.b          %d1,&0x2                # is result log10(e)?
9393        ble.b           set_finx                # no; answer is inexact
9394        bra.b           no_finx                 # yes; answer is exact
9395sm_not_rn:
9396        cmpi.b          %d0,&rp_mode            # is rmode RP?
9397        beq.b           sm_rp                   # yes
9398sm_rzrm:
9399        lea.l           SMALRZRM(%pc),%a0       # no; load RZ,RM table addr
9400        bra.b           sm_tbl_cont
9401sm_rp:
9402        lea.l           SMALRP(%pc),%a0         # load RP table addr
9403        bra.b           sm_tbl_cont
9404
9405#
9406# the answer is one of:
9407#       $30     ln(2)           (inexact)
9408#       $31     ln(10)          (inexact)
9409#       $32     10^0            (exact)
9410#       $33     10^1            (exact)
9411#       $34     10^2            (exact)
9412#       $35     10^4            (exact)
9413#       $36     10^8            (exact)
9414#       $37     10^16           (exact)
9415#       $38     10^32           (inexact)
9416#       $39     10^64           (inexact)
9417#       $3A     10^128          (inexact)
9418#       $3B     10^256          (inexact)
9419#       $3C     10^512          (inexact)
9420#       $3D     10^1024         (inexact)
9421#       $3E     10^2048         (inexact)
9422#       $3F     10^4096         (inexact)
9423#
9424# fetch a pointer to the answer table relating to the proper rounding
9425# precision.
9426#
9427bg_tbl:
9428        subi.b          &0x30,%d1               # make offset in 0-f range
9429        tst.b           %d0                     # is rmode RN?
9430        bne.b           bg_not_rn               # no
9431bg_rn:
9432        lea.l           BIGRN(%pc),%a0          # yes; load RN table addr
9433bg_tbl_cont:
9434        cmpi.b          %d1,&0x1                # is offset <= $31?
9435        ble.b           set_finx                # yes; answer is inexact
9436        cmpi.b          %d1,&0x7                # is $32 <= offset <= $37?
9437        ble.b           no_finx                 # yes; answer is exact
9438        bra.b           set_finx                # no; answer is inexact
9439bg_not_rn:
9440        cmpi.b          %d0,&rp_mode            # is rmode RP?
9441        beq.b           bg_rp                   # yes
9442bg_rzrm:
9443        lea.l           BIGRZRM(%pc),%a0        # no; load RZ,RM table addr
9444        bra.b           bg_tbl_cont
9445bg_rp:
9446        lea.l           BIGRP(%pc),%a0          # load RP table addr
9447        bra.b           bg_tbl_cont
9448
9449# answer is inexact, so set INEX2 and AINEX in the user's FPSR.
9450set_finx:
9451        ori.l           &inx2a_mask,USER_FPSR(%a6) # set INEX2/AINEX
9452no_finx:
9453        mulu.w          &0xc,%d1                # offset points into tables
9454        swap            %d0                     # put rnd prec in lo word
9455        tst.b           %d0                     # is precision extended?
9456
9457        bne.b           not_ext                 # if xprec, do not call round
9458
9459# Precision is extended
9460        fmovm.x         (%a0,%d1.w),&0x80       # return result in fp0
9461        rts
9462
9463# Precision is single or double
9464not_ext:
9465        swap            %d0                     # rnd prec in upper word
9466
9467# call round() to round the answer to the proper precision.
9468# exponents out of range for single or double DO NOT cause underflow
9469# or overflow.
9470        mov.w           0x0(%a0,%d1.w),FP_SCR1_EX(%a6) # load first word
9471        mov.l           0x4(%a0,%d1.w),FP_SCR1_HI(%a6) # load second word
9472        mov.l           0x8(%a0,%d1.w),FP_SCR1_LO(%a6) # load third word
9473        mov.l           %d0,%d1
9474        clr.l           %d0                     # clear g,r,s
9475        lea             FP_SCR1(%a6),%a0        # pass ptr to answer
9476        clr.w           LOCAL_SGN(%a0)          # sign always positive
9477        bsr.l           _round                  # round the mantissa
9478
9479        fmovm.x         (%a0),&0x80             # return rounded result in fp0
9480        rts
9481
9482        align           0x4
9483
9484PIRN:   long            0x40000000,0xc90fdaa2,0x2168c235        # pi
9485PIRZRM: long            0x40000000,0xc90fdaa2,0x2168c234        # pi
9486PIRP:   long            0x40000000,0xc90fdaa2,0x2168c235        # pi
9487
9488SMALRN: long            0x3ffd0000,0x9a209a84,0xfbcff798        # log10(2)
9489        long            0x40000000,0xadf85458,0xa2bb4a9a        # e
9490        long            0x3fff0000,0xb8aa3b29,0x5c17f0bc        # log2(e)
9491        long            0x3ffd0000,0xde5bd8a9,0x37287195        # log10(e)
9492        long            0x00000000,0x00000000,0x00000000        # 0.0
9493
9494SMALRZRM:
9495        long            0x3ffd0000,0x9a209a84,0xfbcff798        # log10(2)
9496        long            0x40000000,0xadf85458,0xa2bb4a9a        # e
9497        long            0x3fff0000,0xb8aa3b29,0x5c17f0bb        # log2(e)
9498        long            0x3ffd0000,0xde5bd8a9,0x37287195        # log10(e)
9499        long            0x00000000,0x00000000,0x00000000        # 0.0
9500
9501SMALRP: long            0x3ffd0000,0x9a209a84,0xfbcff799        # log10(2)
9502        long            0x40000000,0xadf85458,0xa2bb4a9b        # e
9503        long            0x3fff0000,0xb8aa3b29,0x5c17f0bc        # log2(e)
9504        long            0x3ffd0000,0xde5bd8a9,0x37287195        # log10(e)
9505        long            0x00000000,0x00000000,0x00000000        # 0.0
9506
9507BIGRN:  long            0x3ffe0000,0xb17217f7,0xd1cf79ac        # ln(2)
9508        long            0x40000000,0x935d8ddd,0xaaa8ac17        # ln(10)
9509
9510        long            0x3fff0000,0x80000000,0x00000000        # 10 ^ 0
9511        long            0x40020000,0xA0000000,0x00000000        # 10 ^ 1
9512        long            0x40050000,0xC8000000,0x00000000        # 10 ^ 2
9513        long            0x400C0000,0x9C400000,0x00000000        # 10 ^ 4
9514        long            0x40190000,0xBEBC2000,0x00000000        # 10 ^ 8
9515        long            0x40340000,0x8E1BC9BF,0x04000000        # 10 ^ 16
9516        long            0x40690000,0x9DC5ADA8,0x2B70B59E        # 10 ^ 32
9517        long            0x40D30000,0xC2781F49,0xFFCFA6D5        # 10 ^ 64
9518        long            0x41A80000,0x93BA47C9,0x80E98CE0        # 10 ^ 128
9519        long            0x43510000,0xAA7EEBFB,0x9DF9DE8E        # 10 ^ 256
9520        long            0x46A30000,0xE319A0AE,0xA60E91C7        # 10 ^ 512
9521        long            0x4D480000,0xC9767586,0x81750C17        # 10 ^ 1024
9522        long            0x5A920000,0x9E8B3B5D,0xC53D5DE5        # 10 ^ 2048
9523        long            0x75250000,0xC4605202,0x8A20979B        # 10 ^ 4096
9524
9525BIGRZRM:
9526        long            0x3ffe0000,0xb17217f7,0xd1cf79ab        # ln(2)
9527        long            0x40000000,0x935d8ddd,0xaaa8ac16        # ln(10)
9528
9529        long            0x3fff0000,0x80000000,0x00000000        # 10 ^ 0
9530        long            0x40020000,0xA0000000,0x00000000        # 10 ^ 1
9531        long            0x40050000,0xC8000000,0x00000000        # 10 ^ 2
9532        long            0x400C0000,0x9C400000,0x00000000        # 10 ^ 4
9533        long            0x40190000,0xBEBC2000,0x00000000        # 10 ^ 8
9534        long            0x40340000,0x8E1BC9BF,0x04000000        # 10 ^ 16
9535        long            0x40690000,0x9DC5ADA8,0x2B70B59D        # 10 ^ 32
9536        long            0x40D30000,0xC2781F49,0xFFCFA6D5        # 10 ^ 64
9537        long            0x41A80000,0x93BA47C9,0x80E98CDF        # 10 ^ 128
9538        long            0x43510000,0xAA7EEBFB,0x9DF9DE8D        # 10 ^ 256
9539        long            0x46A30000,0xE319A0AE,0xA60E91C6        # 10 ^ 512
9540        long            0x4D480000,0xC9767586,0x81750C17        # 10 ^ 1024
9541        long            0x5A920000,0x9E8B3B5D,0xC53D5DE4        # 10 ^ 2048
9542        long            0x75250000,0xC4605202,0x8A20979A        # 10 ^ 4096
9543
9544BIGRP:
9545        long            0x3ffe0000,0xb17217f7,0xd1cf79ac        # ln(2)
9546        long            0x40000000,0x935d8ddd,0xaaa8ac17        # ln(10)
9547
9548        long            0x3fff0000,0x80000000,0x00000000        # 10 ^ 0
9549        long            0x40020000,0xA0000000,0x00000000        # 10 ^ 1
9550        long            0x40050000,0xC8000000,0x00000000        # 10 ^ 2
9551        long            0x400C0000,0x9C400000,0x00000000        # 10 ^ 4
9552        long            0x40190000,0xBEBC2000,0x00000000        # 10 ^ 8
9553        long            0x40340000,0x8E1BC9BF,0x04000000        # 10 ^ 16
9554        long            0x40690000,0x9DC5ADA8,0x2B70B59E        # 10 ^ 32
9555        long            0x40D30000,0xC2781F49,0xFFCFA6D6        # 10 ^ 64
9556        long            0x41A80000,0x93BA47C9,0x80E98CE0        # 10 ^ 128
9557        long            0x43510000,0xAA7EEBFB,0x9DF9DE8E        # 10 ^ 256
9558        long            0x46A30000,0xE319A0AE,0xA60E91C7        # 10 ^ 512
9559        long            0x4D480000,0xC9767586,0x81750C18        # 10 ^ 1024
9560        long            0x5A920000,0x9E8B3B5D,0xC53D5DE5        # 10 ^ 2048
9561        long            0x75250000,0xC4605202,0x8A20979B        # 10 ^ 4096
9562
9563#########################################################################
9564# sscale(): computes the destination operand scaled by the source       #
9565#           operand. If the absoulute value of the source operand is    #
9566#           >= 2^14, an overflow or underflow is returned.              #
9567#                                                                       #
9568# INPUT *************************************************************** #
9569#       a0  = pointer to double-extended source operand X               #
9570#       a1  = pointer to double-extended destination operand Y          #
9571#                                                                       #
9572# OUTPUT ************************************************************** #
9573#       fp0 =  scale(X,Y)                                               #
9574#                                                                       #
9575#########################################################################
9576
9577set     SIGN,           L_SCR1
9578
9579        global          sscale
9580sscale:
9581        mov.l           %d0,-(%sp)              # store off ctrl bits for now
9582
9583        mov.w           DST_EX(%a1),%d1         # get dst exponent
9584        smi.b           SIGN(%a6)               # use SIGN to hold dst sign
9585        andi.l          &0x00007fff,%d1         # strip sign from dst exp
9586
9587        mov.w           SRC_EX(%a0),%d0         # check src bounds
9588        andi.w          &0x7fff,%d0             # clr src sign bit
9589        cmpi.w          %d0,&0x3fff             # is src ~ ZERO?
9590        blt.w           src_small               # yes
9591        cmpi.w          %d0,&0x400c             # no; is src too big?
9592        bgt.w           src_out                 # yes
9593
9594#
9595# Source is within 2^14 range.
9596#
9597src_ok:
9598        fintrz.x        SRC(%a0),%fp0           # calc int of src
9599        fmov.l          %fp0,%d0                # int src to d0
9600# don't want any accrued bits from the fintrz showing up later since
9601# we may need to read the fpsr for the last fp op in t_catch2().
9602        fmov.l          &0x0,%fpsr
9603
9604        tst.b           DST_HI(%a1)             # is dst denormalized?
9605        bmi.b           sok_norm
9606
9607# the dst is a DENORM. normalize the DENORM and add the adjustment to
9608# the src value. then, jump to the norm part of the routine.
9609sok_dnrm:
9610        mov.l           %d0,-(%sp)              # save src for now
9611
9612        mov.w           DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy
9613        mov.l           DST_HI(%a1),FP_SCR0_HI(%a6)
9614        mov.l           DST_LO(%a1),FP_SCR0_LO(%a6)
9615
9616        lea             FP_SCR0(%a6),%a0        # pass ptr to DENORM
9617        bsr.l           norm                    # normalize the DENORM
9618        neg.l           %d0
9619        add.l           (%sp)+,%d0              # add adjustment to src
9620
9621        fmovm.x         FP_SCR0(%a6),&0x80      # load normalized DENORM
9622
9623        cmpi.w          %d0,&-0x3fff            # is the shft amt really low?
9624        bge.b           sok_norm2               # thank goodness no
9625
9626# the multiply factor that we're trying to create should be a denorm
9627# for the multiply to work. Therefore, we're going to actually do a
9628# multiply with a denorm which will cause an unimplemented data type
9629# exception to be put into the machine which will be caught and corrected
9630# later. we don't do this with the DENORMs above because this method
9631# is slower. but, don't fret, I don't see it being used much either.
9632        fmov.l          (%sp)+,%fpcr            # restore user fpcr
9633        mov.l           &0x80000000,%d1         # load normalized mantissa
9634        subi.l          &-0x3fff,%d0            # how many should we shift?
9635        neg.l           %d0                     # make it positive
9636        cmpi.b          %d0,&0x20               # is it > 32?
9637        bge.b           sok_dnrm_32             # yes
9638        lsr.l           %d0,%d1                 # no; bit stays in upper lw
9639        clr.l           -(%sp)                  # insert zero low mantissa
9640        mov.l           %d1,-(%sp)              # insert new high mantissa
9641        clr.l           -(%sp)                  # make zero exponent
9642        bra.b           sok_norm_cont
9643sok_dnrm_32:
9644        subi.b          &0x20,%d0               # get shift count
9645        lsr.l           %d0,%d1                 # make low mantissa longword
9646        mov.l           %d1,-(%sp)              # insert new low mantissa
9647        clr.l           -(%sp)                  # insert zero high mantissa
9648        clr.l           -(%sp)                  # make zero exponent
9649        bra.b           sok_norm_cont
9650
9651# the src will force the dst to a DENORM value or worse. so, let's
9652# create an fp multiply that will create the result.
9653sok_norm:
9654        fmovm.x         DST(%a1),&0x80          # load fp0 with normalized src
9655sok_norm2:
9656        fmov.l          (%sp)+,%fpcr            # restore user fpcr
9657
9658        addi.w          &0x3fff,%d0             # turn src amt into exp value
9659        swap            %d0                     # put exponent in high word
9660        clr.l           -(%sp)                  # insert new exponent
9661        mov.l           &0x80000000,-(%sp)      # insert new high mantissa
9662        mov.l           %d0,-(%sp)              # insert new lo mantissa
9663
9664sok_norm_cont:
9665        fmov.l          %fpcr,%d0               # d0 needs fpcr for t_catch2
9666        mov.b           &FMUL_OP,%d1            # last inst is MUL
9667        fmul.x          (%sp)+,%fp0             # do the multiply
9668        bra             t_catch2                # catch any exceptions
9669
9670#
9671# Source is outside of 2^14 range.  Test the sign and branch
9672# to the appropriate exception handler.
9673#
9674src_out:
9675        mov.l           (%sp)+,%d0              # restore ctrl bits
9676        exg             %a0,%a1                 # swap src,dst ptrs
9677        tst.b           SRC_EX(%a1)             # is src negative?
9678        bmi             t_unfl                  # yes; underflow
9679        bra             t_ovfl_sc               # no; overflow
9680
9681#
9682# The source input is below 1, so we check for denormalized numbers
9683# and set unfl.
9684#
9685src_small:
9686        tst.b           DST_HI(%a1)             # is dst denormalized?
9687        bpl.b           ssmall_done             # yes
9688
9689        mov.l           (%sp)+,%d0
9690        fmov.l          %d0,%fpcr               # no; load control bits
9691        mov.b           &FMOV_OP,%d1            # last inst is MOVE
9692        fmov.x          DST(%a1),%fp0           # simply return dest
9693        bra             t_catch2
9694ssmall_done:
9695        mov.l           (%sp)+,%d0              # load control bits into d1
9696        mov.l           %a1,%a0                 # pass ptr to dst
9697        bra             t_resdnrm
9698
9699#########################################################################
9700# smod(): computes the fp MOD of the input values X,Y.                  #
9701# srem(): computes the fp (IEEE) REM of the input values X,Y.           #
9702#                                                                       #
9703# INPUT *************************************************************** #
9704#       a0 = pointer to extended precision input X                      #
9705#       a1 = pointer to extended precision input Y                      #
9706#       d0 = round precision,mode                                       #
9707#                                                                       #
9708#       The input operands X and Y can be either normalized or          #
9709#       denormalized.                                                   #
9710#                                                                       #
9711# OUTPUT ************************************************************** #
9712#      fp0 = FREM(X,Y) or FMOD(X,Y)                                     #
9713#                                                                       #
9714# ALGORITHM *********************************************************** #
9715#                                                                       #
9716#       Step 1.  Save and strip signs of X and Y: signX := sign(X),     #
9717#                signY := sign(Y), X := |X|, Y := |Y|,                  #
9718#                signQ := signX EOR signY. Record whether MOD or REM    #
9719#                is requested.                                          #
9720#                                                                       #
9721#       Step 2.  Set L := expo(X)-expo(Y), k := 0, Q := 0.              #
9722#                If (L < 0) then                                        #
9723#                   R := X, go to Step 4.                               #
9724#                else                                                   #
9725#                   R := 2^(-L)X, j := L.                               #
9726#                endif                                                  #
9727#                                                                       #
9728#       Step 3.  Perform MOD(X,Y)                                       #
9729#            3.1 If R = Y, go to Step 9.                                #
9730#            3.2 If R > Y, then { R := R - Y, Q := Q + 1}               #
9731#            3.3 If j = 0, go to Step 4.                                #
9732#            3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to        #
9733#                Step 3.1.                                              #
9734#                                                                       #
9735#       Step 4.  At this point, R = X - QY = MOD(X,Y). Set              #
9736#                Last_Subtract := false (used in Step 7 below). If      #
9737#                MOD is requested, go to Step 6.                        #
9738#                                                                       #
9739#       Step 5.  R = MOD(X,Y), but REM(X,Y) is requested.               #
9740#            5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to        #
9741#                Step 6.                                                #
9742#            5.2 If R > Y/2, then { set Last_Subtract := true,          #
9743#                Q := Q + 1, Y := signY*Y }. Go to Step 6.              #
9744#            5.3 This is the tricky case of R = Y/2. If Q is odd,       #
9745#                then { Q := Q + 1, signX := -signX }.                  #
9746#                                                                       #
9747#       Step 6.  R := signX*R.                                          #
9748#                                                                       #
9749#       Step 7.  If Last_Subtract = true, R := R - Y.                   #
9750#                                                                       #
9751#       Step 8.  Return signQ, last 7 bits of Q, and R as required.     #
9752#                                                                       #
9753#       Step 9.  At this point, R = 2^(-j)*X - Q Y = Y. Thus,           #
9754#                X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1),                #
9755#                R := 0. Return signQ, last 7 bits of Q, and R.         #
9756#                                                                       #
9757#########################################################################
9758
9759        set             Mod_Flag,L_SCR3
9760        set             Sc_Flag,L_SCR3+1
9761
9762        set             SignY,L_SCR2
9763        set             SignX,L_SCR2+2
9764        set             SignQ,L_SCR3+2
9765
9766        set             Y,FP_SCR0
9767        set             Y_Hi,Y+4
9768        set             Y_Lo,Y+8
9769
9770        set             R,FP_SCR1
9771        set             R_Hi,R+4
9772        set             R_Lo,R+8
9773
9774Scale:
9775        long            0x00010000,0x80000000,0x00000000,0x00000000
9776
9777        global          smod
9778smod:
9779        clr.b           FPSR_QBYTE(%a6)
9780        mov.l           %d0,-(%sp)              # save ctrl bits
9781        clr.b           Mod_Flag(%a6)
9782        bra.b           Mod_Rem
9783
9784        global          srem
9785srem:
9786        clr.b           FPSR_QBYTE(%a6)
9787        mov.l           %d0,-(%sp)              # save ctrl bits
9788        mov.b           &0x1,Mod_Flag(%a6)
9789
9790Mod_Rem:
9791#..Save sign of X and Y
9792        movm.l          &0x3f00,-(%sp)          # save data registers
9793        mov.w           SRC_EX(%a0),%d3
9794        mov.w           %d3,SignY(%a6)
9795        and.l           &0x00007FFF,%d3         # Y := |Y|
9796
9797#
9798        mov.l           SRC_HI(%a0),%d4
9799        mov.l           SRC_LO(%a0),%d5         # (D3,D4,D5) is |Y|
9800
9801        tst.l           %d3
9802        bne.b           Y_Normal
9803
9804        mov.l           &0x00003FFE,%d3         # $3FFD + 1
9805        tst.l           %d4
9806        bne.b           HiY_not0
9807
9808HiY_0:
9809        mov.l           %d5,%d4
9810        clr.l           %d5
9811        sub.l           &32,%d3
9812        clr.l           %d6
9813        bfffo           %d4{&0:&32},%d6
9814        lsl.l           %d6,%d4
9815        sub.l           %d6,%d3                 # (D3,D4,D5) is normalized
9816#                                               ...with bias $7FFD
9817        bra.b           Chk_X
9818
9819HiY_not0:
9820        clr.l           %d6
9821        bfffo           %d4{&0:&32},%d6
9822        sub.l           %d6,%d3
9823        lsl.l           %d6,%d4
9824        mov.l           %d5,%d7                 # a copy of D5
9825        lsl.l           %d6,%d5
9826        neg.l           %d6
9827        add.l           &32,%d6
9828        lsr.l           %d6,%d7
9829        or.l            %d7,%d4                 # (D3,D4,D5) normalized
9830#                                       ...with bias $7FFD
9831        bra.b           Chk_X
9832
9833Y_Normal:
9834        add.l           &0x00003FFE,%d3         # (D3,D4,D5) normalized
9835#                                       ...with bias $7FFD
9836
9837Chk_X:
9838        mov.w           DST_EX(%a1),%d0
9839        mov.w           %d0,SignX(%a6)
9840        mov.w           SignY(%a6),%d1
9841        eor.l           %d0,%d1
9842        and.l           &0x00008000,%d1
9843        mov.w           %d1,SignQ(%a6)          # sign(Q) obtained
9844        and.l           &0x00007FFF,%d0
9845        mov.l           DST_HI(%a1),%d1
9846        mov.l           DST_LO(%a1),%d2         # (D0,D1,D2) is |X|
9847        tst.l           %d0
9848        bne.b           X_Normal
9849        mov.l           &0x00003FFE,%d0
9850        tst.l           %d1
9851        bne.b           HiX_not0
9852
9853HiX_0:
9854        mov.l           %d2,%d1
9855        clr.l           %d2
9856        sub.l           &32,%d0
9857        clr.l           %d6
9858        bfffo           %d1{&0:&32},%d6
9859        lsl.l           %d6,%d1
9860        sub.l           %d6,%d0                 # (D0,D1,D2) is normalized
9861#                                       ...with bias $7FFD
9862        bra.b           Init
9863
9864HiX_not0:
9865        clr.l           %d6
9866        bfffo           %d1{&0:&32},%d6
9867        sub.l           %d6,%d0
9868        lsl.l           %d6,%d1
9869        mov.l           %d2,%d7                 # a copy of D2
9870        lsl.l           %d6,%d2
9871        neg.l           %d6
9872        add.l           &32,%d6
9873        lsr.l           %d6,%d7
9874        or.l            %d7,%d1                 # (D0,D1,D2) normalized
9875#                                       ...with bias $7FFD
9876        bra.b           Init
9877
9878X_Normal:
9879        add.l           &0x00003FFE,%d0         # (D0,D1,D2) normalized
9880#                                       ...with bias $7FFD
9881
9882Init:
9883#
9884        mov.l           %d3,L_SCR1(%a6)         # save biased exp(Y)
9885        mov.l           %d0,-(%sp)              # save biased exp(X)
9886        sub.l           %d3,%d0                 # L := expo(X)-expo(Y)
9887
9888        clr.l           %d6                     # D6 := carry <- 0
9889        clr.l           %d3                     # D3 is Q
9890        mov.l           &0,%a1                  # A1 is k; j+k=L, Q=0
9891
9892#..(Carry,D1,D2) is R
9893        tst.l           %d0
9894        bge.b           Mod_Loop_pre
9895
9896#..expo(X) < expo(Y). Thus X = mod(X,Y)
9897#
9898        mov.l           (%sp)+,%d0              # restore d0
9899        bra.w           Get_Mod
9900
9901Mod_Loop_pre:
9902        addq.l          &0x4,%sp                # erase exp(X)
9903#..At this point  R = 2^(-L)X; Q = 0; k = 0; and  k+j = L
9904Mod_Loop:
9905        tst.l           %d6                     # test carry bit
9906        bgt.b           R_GT_Y
9907
9908#..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
9909        cmp.l           %d1,%d4                 # compare hi(R) and hi(Y)
9910        bne.b           R_NE_Y
9911        cmp.l           %d2,%d5                 # compare lo(R) and lo(Y)
9912        bne.b           R_NE_Y
9913
9914#..At this point, R = Y
9915        bra.w           Rem_is_0
9916
9917R_NE_Y:
9918#..use the borrow of the previous compare
9919        bcs.b           R_LT_Y                  # borrow is set iff R < Y
9920
9921R_GT_Y:
9922#..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
9923#..and Y < (D1,D2) < 2Y. Either way, perform R - Y
9924        sub.l           %d5,%d2                 # lo(R) - lo(Y)
9925        subx.l          %d4,%d1                 # hi(R) - hi(Y)
9926        clr.l           %d6                     # clear carry
9927        addq.l          &1,%d3                  # Q := Q + 1
9928
9929R_LT_Y:
9930#..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
9931        tst.l           %d0                     # see if j = 0.
9932        beq.b           PostLoop
9933
9934        add.l           %d3,%d3                 # Q := 2Q
9935        add.l           %d2,%d2                 # lo(R) = 2lo(R)
9936        roxl.l          &1,%d1                  # hi(R) = 2hi(R) + carry
9937        scs             %d6                     # set Carry if 2(R) overflows
9938        addq.l          &1,%a1                  # k := k+1
9939        subq.l          &1,%d0                  # j := j - 1
9940#..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
9941
9942        bra.b           Mod_Loop
9943
9944PostLoop:
9945#..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
9946
9947#..normalize R.
9948        mov.l           L_SCR1(%a6),%d0         # new biased expo of R
9949        tst.l           %d1
9950        bne.b           HiR_not0
9951
9952HiR_0:
9953        mov.l           %d2,%d1
9954        clr.l           %d2
9955        sub.l           &32,%d0
9956        clr.l           %d6
9957        bfffo           %d1{&0:&32},%d6
9958        lsl.l           %d6,%d1
9959        sub.l           %d6,%d0                 # (D0,D1,D2) is normalized
9960#                                       ...with bias $7FFD
9961        bra.b           Get_Mod
9962
9963HiR_not0:
9964        clr.l           %d6
9965        bfffo           %d1{&0:&32},%d6
9966        bmi.b           Get_Mod                 # already normalized
9967        sub.l           %d6,%d0
9968        lsl.l           %d6,%d1
9969        mov.l           %d2,%d7                 # a copy of D2
9970        lsl.l           %d6,%d2
9971        neg.l           %d6
9972        add.l           &32,%d6
9973        lsr.l           %d6,%d7
9974        or.l            %d7,%d1                 # (D0,D1,D2) normalized
9975
9976#
9977Get_Mod:
9978        cmp.l           %d0,&0x000041FE
9979        bge.b           No_Scale
9980Do_Scale:
9981        mov.w           %d0,R(%a6)
9982        mov.l           %d1,R_Hi(%a6)
9983        mov.l           %d2,R_Lo(%a6)
9984        mov.l           L_SCR1(%a6),%d6
9985        mov.w           %d6,Y(%a6)
9986        mov.l           %d4,Y_Hi(%a6)
9987        mov.l           %d5,Y_Lo(%a6)
9988        fmov.x          R(%a6),%fp0             # no exception
9989        mov.b           &1,Sc_Flag(%a6)
9990        bra.b           ModOrRem
9991No_Scale:
9992        mov.l           %d1,R_Hi(%a6)
9993        mov.l           %d2,R_Lo(%a6)
9994        sub.l           &0x3FFE,%d0
9995        mov.w           %d0,R(%a6)
9996        mov.l           L_SCR1(%a6),%d6
9997        sub.l           &0x3FFE,%d6
9998        mov.l           %d6,L_SCR1(%a6)
9999        fmov.x          R(%a6),%fp0
10000        mov.w           %d6,Y(%a6)
10001        mov.l           %d4,Y_Hi(%a6)
10002        mov.l           %d5,Y_Lo(%a6)
10003        clr.b           Sc_Flag(%a6)
10004
10005#
10006ModOrRem:
10007        tst.b           Mod_Flag(%a6)
10008        beq.b           Fix_Sign
10009
10010        mov.l           L_SCR1(%a6),%d6         # new biased expo(Y)
10011        subq.l          &1,%d6                  # biased expo(Y/2)
10012        cmp.l           %d0,%d6
10013        blt.b           Fix_Sign
10014        bgt.b           Last_Sub
10015
10016        cmp.l           %d1,%d4
10017        bne.b           Not_EQ
10018        cmp.l           %d2,%d5
10019        bne.b           Not_EQ
10020        bra.w           Tie_Case
10021
10022Not_EQ:
10023        bcs.b           Fix_Sign
10024
10025Last_Sub:
10026#
10027        fsub.x          Y(%a6),%fp0             # no exceptions
10028        addq.l          &1,%d3                  # Q := Q + 1
10029
10030#
10031Fix_Sign:
10032#..Get sign of X
10033        mov.w           SignX(%a6),%d6
10034        bge.b           Get_Q
10035        fneg.x          %fp0
10036
10037#..Get Q
10038#
10039Get_Q:
10040        clr.l           %d6
10041        mov.w           SignQ(%a6),%d6          # D6 is sign(Q)
10042        mov.l           &8,%d7
10043        lsr.l           %d7,%d6
10044        and.l           &0x0000007F,%d3         # 7 bits of Q
10045        or.l            %d6,%d3                 # sign and bits of Q
10046#       swap            %d3
10047#       fmov.l          %fpsr,%d6
10048#       and.l           &0xFF00FFFF,%d6
10049#       or.l            %d3,%d6
10050#       fmov.l          %d6,%fpsr               # put Q in fpsr
10051        mov.b           %d3,FPSR_QBYTE(%a6)     # put Q in fpsr
10052
10053#
10054Restore:
10055        movm.l          (%sp)+,&0xfc            #  {%d2-%d7}
10056        mov.l           (%sp)+,%d0
10057        fmov.l          %d0,%fpcr
10058        tst.b           Sc_Flag(%a6)
10059        beq.b           Finish
10060        mov.b           &FMUL_OP,%d1            # last inst is MUL
10061        fmul.x          Scale(%pc),%fp0         # may cause underflow
10062        bra             t_catch2
10063# the '040 package did this apparently to see if the dst operand for the
10064# preceding fmul was a denorm. but, it better not have been since the
10065# algorithm just got done playing with fp0 and expected no exceptions
10066# as a result. trust me...
10067#       bra             t_avoid_unsupp          # check for denorm as a
10068#                                               ;result of the scaling
10069
10070Finish:
10071        mov.b           &FMOV_OP,%d1            # last inst is MOVE
10072        fmov.x          %fp0,%fp0               # capture exceptions & round
10073        bra             t_catch2
10074
10075Rem_is_0:
10076#..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
10077        addq.l          &1,%d3
10078        cmp.l           %d0,&8                  # D0 is j
10079        bge.b           Q_Big
10080
10081        lsl.l           %d0,%d3
10082        bra.b           Set_R_0
10083
10084Q_Big:
10085        clr.l           %d3
10086
10087Set_R_0:
10088        fmov.s          &0x00000000,%fp0
10089        clr.b           Sc_Flag(%a6)
10090        bra.w           Fix_Sign
10091
10092Tie_Case:
10093#..Check parity of Q
10094        mov.l           %d3,%d6
10095        and.l           &0x00000001,%d6
10096        tst.l           %d6
10097        beq.w           Fix_Sign                # Q is even
10098
10099#..Q is odd, Q := Q + 1, signX := -signX
10100        addq.l          &1,%d3
10101        mov.w           SignX(%a6),%d6
10102        eor.l           &0x00008000,%d6
10103        mov.w           %d6,SignX(%a6)
10104        bra.w           Fix_Sign
10105
10106qnan:   long            0x7fff0000, 0xffffffff, 0xffffffff
10107
10108#########################################################################
10109# XDEF **************************************************************** #
10110#       t_dz(): Handle DZ exception during transcendental emulation.    #
10111#               Sets N bit according to sign of source operand.         #
10112#       t_dz2(): Handle DZ exception during transcendental emulation.   #
10113#                Sets N bit always.                                     #
10114#                                                                       #
10115# XREF **************************************************************** #
10116#       None                                                            #
10117#                                                                       #
10118# INPUT *************************************************************** #
10119#       a0 = pointer to source operand                                  #
10120#                                                                       #
10121# OUTPUT ************************************************************** #
10122#       fp0 = default result                                            #
10123#                                                                       #
10124# ALGORITHM *********************************************************** #
10125#       - Store properly signed INF into fp0.                           #
10126#       - Set FPSR exception status dz bit, ccode inf bit, and          #
10127#         accrued dz bit.                                               #
10128#                                                                       #
10129#########################################################################
10130
10131        global          t_dz
10132t_dz:
10133        tst.b           SRC_EX(%a0)             # no; is src negative?
10134        bmi.b           t_dz2                   # yes
10135
10136dz_pinf:
10137        fmov.s          &0x7f800000,%fp0        # return +INF in fp0
10138        ori.l           &dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ
10139        rts
10140
10141        global          t_dz2
10142t_dz2:
10143        fmov.s          &0xff800000,%fp0        # return -INF in fp0
10144        ori.l           &dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ
10145        rts
10146
10147#################################################################
10148# OPERR exception:                                              #
10149#       - set FPSR exception status operr bit, condition code   #
10150#         nan bit; Store default NAN into fp0                   #
10151#################################################################
10152        global          t_operr
10153t_operr:
10154        ori.l           &opnan_mask,USER_FPSR(%a6) # set NaN/OPERR/AIOP
10155        fmovm.x         qnan(%pc),&0x80         # return default NAN in fp0
10156        rts
10157
10158#################################################################
10159# Extended DENORM:                                              #
10160#       - For all functions that have a denormalized input and  #
10161#         that f(x)=x, this is the entry point.                 #
10162#       - we only return the EXOP here if either underflow or   #
10163#         inexact is enabled.                                   #
10164#################################################################
10165
10166# Entry point for scale w/ extended denorm. The function does
10167# NOT set INEX2/AUNFL/AINEX.
10168        global          t_resdnrm
10169t_resdnrm:
10170        ori.l           &unfl_mask,USER_FPSR(%a6) # set UNFL
10171        bra.b           xdnrm_con
10172
10173        global          t_extdnrm
10174t_extdnrm:
10175        ori.l           &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10176
10177xdnrm_con:
10178        mov.l           %a0,%a1                 # make copy of src ptr
10179        mov.l           %d0,%d1                 # make copy of rnd prec,mode
10180        andi.b          &0xc0,%d1               # extended precision?
10181        bne.b           xdnrm_sd                # no
10182
10183# result precision is extended.
10184        tst.b           LOCAL_EX(%a0)           # is denorm negative?
10185        bpl.b           xdnrm_exit              # no
10186
10187        bset            &neg_bit,FPSR_CC(%a6)   # yes; set 'N' ccode bit
10188        bra.b           xdnrm_exit
10189
10190# result precision is single or double
10191xdnrm_sd:
10192        mov.l           %a1,-(%sp)
10193        tst.b           LOCAL_EX(%a0)           # is denorm pos or neg?
10194        smi.b           %d1                     # set d0 accodingly
10195        bsr.l           unf_sub
10196        mov.l           (%sp)+,%a1
10197xdnrm_exit:
10198        fmovm.x         (%a0),&0x80             # return default result in fp0
10199
10200        mov.b           FPCR_ENABLE(%a6),%d0
10201        andi.b          &0x0a,%d0               # is UNFL or INEX enabled?
10202        bne.b           xdnrm_ena               # yes
10203        rts
10204
10205################
10206# unfl enabled #
10207################
10208# we have a DENORM that needs to be converted into an EXOP.
10209# so, normalize the mantissa, add 0x6000 to the new exponent,
10210# and return the result in fp1.
10211xdnrm_ena:
10212        mov.w           LOCAL_EX(%a1),FP_SCR0_EX(%a6)
10213        mov.l           LOCAL_HI(%a1),FP_SCR0_HI(%a6)
10214        mov.l           LOCAL_LO(%a1),FP_SCR0_LO(%a6)
10215
10216        lea             FP_SCR0(%a6),%a0
10217        bsr.l           norm                    # normalize mantissa
10218        addi.l          &0x6000,%d0             # add extra bias
10219        andi.w          &0x8000,FP_SCR0_EX(%a6) # keep old sign
10220        or.w            %d0,FP_SCR0_EX(%a6)     # insert new exponent
10221
10222        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
10223        rts
10224
10225#################################################################
10226# UNFL exception:                                               #
10227#       - This routine is for cases where even an EXOP isn't    #
10228#         large enough to hold the range of this result.        #
10229#         In such a case, the EXOP equals zero.                 #
10230#       - Return the default result to the proper precision     #
10231#         with the sign of this result being the same as that   #
10232#         of the src operand.                                   #
10233#       - t_unfl2() is provided to force the result sign to     #
10234#         positive which is the desired result for fetox().     #
10235#################################################################
10236        global          t_unfl
10237t_unfl:
10238        ori.l           &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10239
10240        tst.b           (%a0)                   # is result pos or neg?
10241        smi.b           %d1                     # set d1 accordingly
10242        bsr.l           unf_sub                 # calc default unfl result
10243        fmovm.x         (%a0),&0x80             # return default result in fp0
10244
10245        fmov.s          &0x00000000,%fp1        # return EXOP in fp1
10246        rts
10247
10248# t_unfl2 ALWAYS tells unf_sub to create a positive result
10249        global          t_unfl2
10250t_unfl2:
10251        ori.l           &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10252
10253        sf.b            %d1                     # set d0 to represent positive
10254        bsr.l           unf_sub                 # calc default unfl result
10255        fmovm.x         (%a0),&0x80             # return default result in fp0
10256
10257        fmov.s          &0x0000000,%fp1         # return EXOP in fp1
10258        rts
10259
10260#################################################################
10261# OVFL exception:                                               #
10262#       - This routine is for cases where even an EXOP isn't    #
10263#         large enough to hold the range of this result.        #
10264#       - Return the default result to the proper precision     #
10265#         with the sign of this result being the same as that   #
10266#         of the src operand.                                   #
10267#       - t_ovfl2() is provided to force the result sign to     #
10268#         positive which is the desired result for fcosh().     #
10269#       - t_ovfl_sc() is provided for scale() which only sets   #
10270#         the inexact bits if the number is inexact for the     #
10271#         precision indicated.                                  #
10272#################################################################
10273
10274        global          t_ovfl_sc
10275t_ovfl_sc:
10276        ori.l           &ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX
10277
10278        mov.b           %d0,%d1                 # fetch rnd mode/prec
10279        andi.b          &0xc0,%d1               # extract rnd prec
10280        beq.b           ovfl_work               # prec is extended
10281
10282        tst.b           LOCAL_HI(%a0)           # is dst a DENORM?
10283        bmi.b           ovfl_sc_norm            # no
10284
10285# dst op is a DENORM. we have to normalize the mantissa to see if the
10286# result would be inexact for the given precision. make a copy of the
10287# dst so we don't screw up the version passed to us.
10288        mov.w           LOCAL_EX(%a0),FP_SCR0_EX(%a6)
10289        mov.l           LOCAL_HI(%a0),FP_SCR0_HI(%a6)
10290        mov.l           LOCAL_LO(%a0),FP_SCR0_LO(%a6)
10291        lea             FP_SCR0(%a6),%a0        # pass ptr to FP_SCR0
10292        movm.l          &0xc080,-(%sp)          # save d0-d1/a0
10293        bsr.l           norm                    # normalize mantissa
10294        movm.l          (%sp)+,&0x0103          # restore d0-d1/a0
10295
10296ovfl_sc_norm:
10297        cmpi.b          %d1,&0x40               # is prec dbl?
10298        bne.b           ovfl_sc_dbl             # no; sgl
10299ovfl_sc_sgl:
10300        tst.l           LOCAL_LO(%a0)           # is lo lw of sgl set?
10301        bne.b           ovfl_sc_inx             # yes
10302        tst.b           3+LOCAL_HI(%a0)         # is lo byte of hi lw set?
10303        bne.b           ovfl_sc_inx             # yes
10304        bra.b           ovfl_work               # don't set INEX2
10305ovfl_sc_dbl:
10306        mov.l           LOCAL_LO(%a0),%d1       # are any of lo 11 bits of
10307        andi.l          &0x7ff,%d1              # dbl mantissa set?
10308        beq.b           ovfl_work               # no; don't set INEX2
10309ovfl_sc_inx:
10310        ori.l           &inex2_mask,USER_FPSR(%a6) # set INEX2
10311        bra.b           ovfl_work               # continue
10312
10313        global          t_ovfl
10314t_ovfl:
10315        ori.l           &ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
10316
10317ovfl_work:
10318        tst.b           LOCAL_EX(%a0)           # what is the sign?
10319        smi.b           %d1                     # set d1 accordingly
10320        bsr.l           ovf_res                 # calc default ovfl result
10321        mov.b           %d0,FPSR_CC(%a6)        # insert new ccodes
10322        fmovm.x         (%a0),&0x80             # return default result in fp0
10323
10324        fmov.s          &0x00000000,%fp1        # return EXOP in fp1
10325        rts
10326
10327# t_ovfl2 ALWAYS tells ovf_res to create a positive result
10328        global          t_ovfl2
10329t_ovfl2:
10330        ori.l           &ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
10331
10332        sf.b            %d1                     # clear sign flag for positive
10333        bsr.l           ovf_res                 # calc default ovfl result
10334        mov.b           %d0,FPSR_CC(%a6)        # insert new ccodes
10335        fmovm.x         (%a0),&0x80             # return default result in fp0
10336
10337        fmov.s          &0x00000000,%fp1        # return EXOP in fp1
10338        rts
10339
10340#################################################################
10341# t_catch():                                                    #
10342#       - the last operation of a transcendental emulation      #
10343#         routine may have caused an underflow or overflow.     #
10344#         we find out if this occurred by doing an fsave and    #
10345#         checking the exception bit. if one did occur, then we #
10346#         jump to fgen_except() which creates the default       #
10347#         result and EXOP for us.                               #
10348#################################################################
10349        global          t_catch
10350t_catch:
10351
10352        fsave           -(%sp)
10353        tst.b           0x2(%sp)
10354        bmi.b           catch
10355        add.l           &0xc,%sp
10356
10357#################################################################
10358# INEX2 exception:                                              #
10359#       - The inex2 and ainex bits are set.                     #
10360#################################################################
10361        global          t_inx2
10362t_inx2:
10363        fblt.w          t_minx2
10364        fbeq.w          inx2_zero
10365
10366        global          t_pinx2
10367t_pinx2:
10368        ori.w           &inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
10369        rts
10370
10371        global          t_minx2
10372t_minx2:
10373        ori.l           &inx2a_mask+neg_mask,USER_FPSR(%a6) # set N/INEX2/AINEX
10374        rts
10375
10376inx2_zero:
10377        mov.b           &z_bmask,FPSR_CC(%a6)
10378        ori.w           &inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
10379        rts
10380
10381# an underflow or overflow exception occurred.
10382# we must set INEX/AINEX since the fmul/fdiv/fmov emulation may not!
10383catch:
10384        ori.w           &inx2a_mask,FPSR_EXCEPT(%a6)
10385catch2:
10386        bsr.l           fgen_except
10387        add.l           &0xc,%sp
10388        rts
10389
10390        global          t_catch2
10391t_catch2:
10392
10393        fsave           -(%sp)
10394
10395        tst.b           0x2(%sp)
10396        bmi.b           catch2
10397        add.l           &0xc,%sp
10398
10399        fmov.l          %fpsr,%d0
10400        or.l            %d0,USER_FPSR(%a6)
10401
10402        rts
10403
10404#########################################################################
10405
10406#########################################################################
10407# unf_res(): underflow default result calculation for transcendentals   #
10408#                                                                       #
10409# INPUT:                                                                #
10410#       d0   : rnd mode,precision                                       #
10411#       d1.b : sign bit of result ('11111111 = (-) ; '00000000 = (+))   #
10412# OUTPUT:                                                               #
10413#       a0   : points to result (in instruction memory)                 #
10414#########################################################################
10415unf_sub:
10416        ori.l           &unfinx_mask,USER_FPSR(%a6)
10417
10418        andi.w          &0x10,%d1               # keep sign bit in 4th spot
10419
10420        lsr.b           &0x4,%d0                # shift rnd prec,mode to lo bits
10421        andi.b          &0xf,%d0                # strip hi rnd mode bit
10422        or.b            %d1,%d0                 # concat {sgn,mode,prec}
10423
10424        mov.l           %d0,%d1                 # make a copy
10425        lsl.b           &0x1,%d1                # mult index 2 by 2
10426
10427        mov.b           (tbl_unf_cc.b,%pc,%d0.w*1),FPSR_CC(%a6) # insert ccode bits
10428        lea             (tbl_unf_result.b,%pc,%d1.w*8),%a0 # grab result ptr
10429        rts
10430
10431tbl_unf_cc:
10432        byte            0x4, 0x4, 0x4, 0x0
10433        byte            0x4, 0x4, 0x4, 0x0
10434        byte            0x4, 0x4, 0x4, 0x0
10435        byte            0x0, 0x0, 0x0, 0x0
10436        byte            0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10437        byte            0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10438        byte            0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10439
10440tbl_unf_result:
10441        long            0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10442        long            0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10443        long            0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10444        long            0x00000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
10445
10446        long            0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10447        long            0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10448        long            0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10449        long            0x3f810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
10450
10451        long            0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10452        long            0x3c010000, 0x00000000, 0x00000000, 0x0 # ZER0;dbl
10453        long            0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10454        long            0x3c010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
10455
10456        long            0x0,0x0,0x0,0x0
10457        long            0x0,0x0,0x0,0x0
10458        long            0x0,0x0,0x0,0x0
10459        long            0x0,0x0,0x0,0x0
10460
10461        long            0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10462        long            0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10463        long            0x80000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
10464        long            0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10465
10466        long            0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10467        long            0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10468        long            0xbf810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
10469        long            0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10470
10471        long            0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10472        long            0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10473        long            0xbc010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
10474        long            0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10475
10476############################################################
10477
10478#########################################################################
10479# src_zero(): Return signed zero according to sign of src operand.      #
10480#########################################################################
10481        global          src_zero
10482src_zero:
10483        tst.b           SRC_EX(%a0)             # get sign of src operand
10484        bmi.b           ld_mzero                # if neg, load neg zero
10485
10486#
10487# ld_pzero(): return a positive zero.
10488#
10489        global          ld_pzero
10490ld_pzero:
10491        fmov.s          &0x00000000,%fp0        # load +0
10492        mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
10493        rts
10494
10495# ld_mzero(): return a negative zero.
10496        global          ld_mzero
10497ld_mzero:
10498        fmov.s          &0x80000000,%fp0        # load -0
10499        mov.b           &neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits
10500        rts
10501
10502#########################################################################
10503# dst_zero(): Return signed zero according to sign of dst operand.      #
10504#########################################################################
10505        global          dst_zero
10506dst_zero:
10507        tst.b           DST_EX(%a1)             # get sign of dst operand
10508        bmi.b           ld_mzero                # if neg, load neg zero
10509        bra.b           ld_pzero                # load positive zero
10510
10511#########################################################################
10512# src_inf(): Return signed inf according to sign of src operand.        #
10513#########################################################################
10514        global          src_inf
10515src_inf:
10516        tst.b           SRC_EX(%a0)             # get sign of src operand
10517        bmi.b           ld_minf                 # if negative branch
10518
10519#
10520# ld_pinf(): return a positive infinity.
10521#
10522        global          ld_pinf
10523ld_pinf:
10524        fmov.s          &0x7f800000,%fp0        # load +INF
10525        mov.b           &inf_bmask,FPSR_CC(%a6) # set 'INF' ccode bit
10526        rts
10527
10528#
10529# ld_minf():return a negative infinity.
10530#
10531        global          ld_minf
10532ld_minf:
10533        fmov.s          &0xff800000,%fp0        # load -INF
10534        mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
10535        rts
10536
10537#########################################################################
10538# dst_inf(): Return signed inf according to sign of dst operand.        #
10539#########################################################################
10540        global          dst_inf
10541dst_inf:
10542        tst.b           DST_EX(%a1)             # get sign of dst operand
10543        bmi.b           ld_minf                 # if negative branch
10544        bra.b           ld_pinf
10545
10546        global          szr_inf
10547#################################################################
10548# szr_inf(): Return +ZERO for a negative src operand or         #
10549#                   +INF for a positive src operand.            #
10550#            Routine used for fetox, ftwotox, and ftentox.      #
10551#################################################################
10552szr_inf:
10553        tst.b           SRC_EX(%a0)             # check sign of source
10554        bmi.b           ld_pzero
10555        bra.b           ld_pinf
10556
10557#########################################################################
10558# sopr_inf(): Return +INF for a positive src operand or                 #
10559#             jump to operand error routine for a negative src operand. #
10560#             Routine used for flogn, flognp1, flog10, and flog2.       #
10561#########################################################################
10562        global          sopr_inf
10563sopr_inf:
10564        tst.b           SRC_EX(%a0)             # check sign of source
10565        bmi.w           t_operr
10566        bra.b           ld_pinf
10567
10568#################################################################
10569# setoxm1i(): Return minus one for a negative src operand or    #
10570#             positive infinity for a positive src operand.     #
10571#             Routine used for fetoxm1.                         #
10572#################################################################
10573        global          setoxm1i
10574setoxm1i:
10575        tst.b           SRC_EX(%a0)             # check sign of source
10576        bmi.b           ld_mone
10577        bra.b           ld_pinf
10578
10579#########################################################################
10580# src_one(): Return signed one according to sign of src operand.        #
10581#########################################################################
10582        global          src_one
10583src_one:
10584        tst.b           SRC_EX(%a0)             # check sign of source
10585        bmi.b           ld_mone
10586
10587#
10588# ld_pone(): return positive one.
10589#
10590        global          ld_pone
10591ld_pone:
10592        fmov.s          &0x3f800000,%fp0        # load +1
10593        clr.b           FPSR_CC(%a6)
10594        rts
10595
10596#
10597# ld_mone(): return negative one.
10598#
10599        global          ld_mone
10600ld_mone:
10601        fmov.s          &0xbf800000,%fp0        # load -1
10602        mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
10603        rts
10604
10605ppiby2: long            0x3fff0000, 0xc90fdaa2, 0x2168c235
10606mpiby2: long            0xbfff0000, 0xc90fdaa2, 0x2168c235
10607
10608#################################################################
10609# spi_2(): Return signed PI/2 according to sign of src operand. #
10610#################################################################
10611        global          spi_2
10612spi_2:
10613        tst.b           SRC_EX(%a0)             # check sign of source
10614        bmi.b           ld_mpi2
10615
10616#
10617# ld_ppi2(): return positive PI/2.
10618#
10619        global          ld_ppi2
10620ld_ppi2:
10621        fmov.l          %d0,%fpcr
10622        fmov.x          ppiby2(%pc),%fp0        # load +pi/2
10623        bra.w           t_pinx2                 # set INEX2
10624
10625#
10626# ld_mpi2(): return negative PI/2.
10627#
10628        global          ld_mpi2
10629ld_mpi2:
10630        fmov.l          %d0,%fpcr
10631        fmov.x          mpiby2(%pc),%fp0        # load -pi/2
10632        bra.w           t_minx2                 # set INEX2
10633
10634####################################################
10635# The following routines give support for fsincos. #
10636####################################################
10637
10638#
10639# ssincosz(): When the src operand is ZERO, store a one in the
10640#             cosine register and return a ZERO in fp0 w/ the same sign
10641#             as the src operand.
10642#
10643        global          ssincosz
10644ssincosz:
10645        fmov.s          &0x3f800000,%fp1
10646        tst.b           SRC_EX(%a0)             # test sign
10647        bpl.b           sincoszp
10648        fmov.s          &0x80000000,%fp0        # return sin result in fp0
10649        mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6)
10650        bra.b           sto_cos                 # store cosine result
10651sincoszp:
10652        fmov.s          &0x00000000,%fp0        # return sin result in fp0
10653        mov.b           &z_bmask,FPSR_CC(%a6)
10654        bra.b           sto_cos                 # store cosine result
10655
10656#
10657# ssincosi(): When the src operand is INF, store a QNAN in the cosine
10658#             register and jump to the operand error routine for negative
10659#             src operands.
10660#
10661        global          ssincosi
10662ssincosi:
10663        fmov.x          qnan(%pc),%fp1          # load NAN
10664        bsr.l           sto_cos                 # store cosine result
10665        bra.w           t_operr
10666
10667#
10668# ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
10669#                register and branch to the src QNAN routine.
10670#
10671        global          ssincosqnan
10672ssincosqnan:
10673        fmov.x          LOCAL_EX(%a0),%fp1
10674        bsr.l           sto_cos
10675        bra.w           src_qnan
10676
10677#
10678# ssincossnan(): When the src operand is an SNAN, store the SNAN w/ the SNAN bit set
10679#                in the cosine register and branch to the src SNAN routine.
10680#
10681        global          ssincossnan
10682ssincossnan:
10683        fmov.x          LOCAL_EX(%a0),%fp1
10684        bsr.l           sto_cos
10685        bra.w           src_snan
10686
10687########################################################################
10688
10689#########################################################################
10690# sto_cos(): store fp1 to the fpreg designated by the CMDREG dst field. #
10691#            fp1 holds the result of the cosine portion of ssincos().   #
10692#            the value in fp1 will not take any exceptions when moved.  #
10693# INPUT:                                                                #
10694#       fp1 : fp value to store                                         #
10695# MODIFIED:                                                             #
10696#       d0                                                              #
10697#########################################################################
10698        global          sto_cos
10699sto_cos:
10700        mov.b           1+EXC_CMDREG(%a6),%d0
10701        andi.w          &0x7,%d0
10702        mov.w           (tbl_sto_cos.b,%pc,%d0.w*2),%d0
10703        jmp             (tbl_sto_cos.b,%pc,%d0.w*1)
10704
10705tbl_sto_cos:
10706        short           sto_cos_0 - tbl_sto_cos
10707        short           sto_cos_1 - tbl_sto_cos
10708        short           sto_cos_2 - tbl_sto_cos
10709        short           sto_cos_3 - tbl_sto_cos
10710        short           sto_cos_4 - tbl_sto_cos
10711        short           sto_cos_5 - tbl_sto_cos
10712        short           sto_cos_6 - tbl_sto_cos
10713        short           sto_cos_7 - tbl_sto_cos
10714
10715sto_cos_0:
10716        fmovm.x         &0x40,EXC_FP0(%a6)
10717        rts
10718sto_cos_1:
10719        fmovm.x         &0x40,EXC_FP1(%a6)
10720        rts
10721sto_cos_2:
10722        fmov.x          %fp1,%fp2
10723        rts
10724sto_cos_3:
10725        fmov.x          %fp1,%fp3
10726        rts
10727sto_cos_4:
10728        fmov.x          %fp1,%fp4
10729        rts
10730sto_cos_5:
10731        fmov.x          %fp1,%fp5
10732        rts
10733sto_cos_6:
10734        fmov.x          %fp1,%fp6
10735        rts
10736sto_cos_7:
10737        fmov.x          %fp1,%fp7
10738        rts
10739
10740##################################################################
10741        global          smod_sdnrm
10742        global          smod_snorm
10743smod_sdnrm:
10744smod_snorm:
10745        mov.b           DTAG(%a6),%d1
10746        beq.l           smod
10747        cmpi.b          %d1,&ZERO
10748        beq.w           smod_zro
10749        cmpi.b          %d1,&INF
10750        beq.l           t_operr
10751        cmpi.b          %d1,&DENORM
10752        beq.l           smod
10753        cmpi.b          %d1,&SNAN
10754        beq.l           dst_snan
10755        bra.l           dst_qnan
10756
10757        global          smod_szero
10758smod_szero:
10759        mov.b           DTAG(%a6),%d1
10760        beq.l           t_operr
10761        cmpi.b          %d1,&ZERO
10762        beq.l           t_operr
10763        cmpi.b          %d1,&INF
10764        beq.l           t_operr
10765        cmpi.b          %d1,&DENORM
10766        beq.l           t_operr
10767        cmpi.b          %d1,&QNAN
10768        beq.l           dst_qnan
10769        bra.l           dst_snan
10770
10771        global          smod_sinf
10772smod_sinf:
10773        mov.b           DTAG(%a6),%d1
10774        beq.l           smod_fpn
10775        cmpi.b          %d1,&ZERO
10776        beq.l           smod_zro
10777        cmpi.b          %d1,&INF
10778        beq.l           t_operr
10779        cmpi.b          %d1,&DENORM
10780        beq.l           smod_fpn
10781        cmpi.b          %d1,&QNAN
10782        beq.l           dst_qnan
10783        bra.l           dst_snan
10784
10785smod_zro:
10786srem_zro:
10787        mov.b           SRC_EX(%a0),%d1         # get src sign
10788        mov.b           DST_EX(%a1),%d0         # get dst sign
10789        eor.b           %d0,%d1                 # get qbyte sign
10790        andi.b          &0x80,%d1
10791        mov.b           %d1,FPSR_QBYTE(%a6)
10792        tst.b           %d0
10793        bpl.w           ld_pzero
10794        bra.w           ld_mzero
10795
10796smod_fpn:
10797srem_fpn:
10798        clr.b           FPSR_QBYTE(%a6)
10799        mov.l           %d0,-(%sp)
10800        mov.b           SRC_EX(%a0),%d1         # get src sign
10801        mov.b           DST_EX(%a1),%d0         # get dst sign
10802        eor.b           %d0,%d1                 # get qbyte sign
10803        andi.b          &0x80,%d1
10804        mov.b           %d1,FPSR_QBYTE(%a6)
10805        cmpi.b          DTAG(%a6),&DENORM
10806        bne.b           smod_nrm
10807        lea             DST(%a1),%a0
10808        mov.l           (%sp)+,%d0
10809        bra             t_resdnrm
10810smod_nrm:
10811        fmov.l          (%sp)+,%fpcr
10812        fmov.x          DST(%a1),%fp0
10813        tst.b           DST_EX(%a1)
10814        bmi.b           smod_nrm_neg
10815        rts
10816
10817smod_nrm_neg:
10818        mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode
10819        rts
10820
10821#########################################################################
10822        global          srem_snorm
10823        global          srem_sdnrm
10824srem_sdnrm:
10825srem_snorm:
10826        mov.b           DTAG(%a6),%d1
10827        beq.l           srem
10828        cmpi.b          %d1,&ZERO
10829        beq.w           srem_zro
10830        cmpi.b          %d1,&INF
10831        beq.l           t_operr
10832        cmpi.b          %d1,&DENORM
10833        beq.l           srem
10834        cmpi.b          %d1,&QNAN
10835        beq.l           dst_qnan
10836        bra.l           dst_snan
10837
10838        global          srem_szero
10839srem_szero:
10840        mov.b           DTAG(%a6),%d1
10841        beq.l           t_operr
10842        cmpi.b          %d1,&ZERO
10843        beq.l           t_operr
10844        cmpi.b          %d1,&INF
10845        beq.l           t_operr
10846        cmpi.b          %d1,&DENORM
10847        beq.l           t_operr
10848        cmpi.b          %d1,&QNAN
10849        beq.l           dst_qnan
10850        bra.l           dst_snan
10851
10852        global          srem_sinf
10853srem_sinf:
10854        mov.b           DTAG(%a6),%d1
10855        beq.w           srem_fpn
10856        cmpi.b          %d1,&ZERO
10857        beq.w           srem_zro
10858        cmpi.b          %d1,&INF
10859        beq.l           t_operr
10860        cmpi.b          %d1,&DENORM
10861        beq.l           srem_fpn
10862        cmpi.b          %d1,&QNAN
10863        beq.l           dst_qnan
10864        bra.l           dst_snan
10865
10866#########################################################################
10867        global          sscale_snorm
10868        global          sscale_sdnrm
10869sscale_snorm:
10870sscale_sdnrm:
10871        mov.b           DTAG(%a6),%d1
10872        beq.l           sscale
10873        cmpi.b          %d1,&ZERO
10874        beq.l           dst_zero
10875        cmpi.b          %d1,&INF
10876        beq.l           dst_inf
10877        cmpi.b          %d1,&DENORM
10878        beq.l           sscale
10879        cmpi.b          %d1,&QNAN
10880        beq.l           dst_qnan
10881        bra.l           dst_snan
10882
10883        global          sscale_szero
10884sscale_szero:
10885        mov.b           DTAG(%a6),%d1
10886        beq.l           sscale
10887        cmpi.b          %d1,&ZERO
10888        beq.l           dst_zero
10889        cmpi.b          %d1,&INF
10890        beq.l           dst_inf
10891        cmpi.b          %d1,&DENORM
10892        beq.l           sscale
10893        cmpi.b          %d1,&QNAN
10894        beq.l           dst_qnan
10895        bra.l           dst_snan
10896
10897        global          sscale_sinf
10898sscale_sinf:
10899        mov.b           DTAG(%a6),%d1
10900        beq.l           t_operr
10901        cmpi.b          %d1,&QNAN
10902        beq.l           dst_qnan
10903        cmpi.b          %d1,&SNAN
10904        beq.l           dst_snan
10905        bra.l           t_operr
10906
10907########################################################################
10908
10909#
10910# sop_sqnan(): The src op for frem/fmod/fscale was a QNAN.
10911#
10912        global          sop_sqnan
10913sop_sqnan:
10914        mov.b           DTAG(%a6),%d1
10915        cmpi.b          %d1,&QNAN
10916        beq.b           dst_qnan
10917        cmpi.b          %d1,&SNAN
10918        beq.b           dst_snan
10919        bra.b           src_qnan
10920
10921#
10922# sop_ssnan(): The src op for frem/fmod/fscale was an SNAN.
10923#
10924        global          sop_ssnan
10925sop_ssnan:
10926        mov.b           DTAG(%a6),%d1
10927        cmpi.b          %d1,&QNAN
10928        beq.b           dst_qnan_src_snan
10929        cmpi.b          %d1,&SNAN
10930        beq.b           dst_snan
10931        bra.b           src_snan
10932
10933dst_qnan_src_snan:
10934        ori.l           &snaniop_mask,USER_FPSR(%a6) # set NAN/SNAN/AIOP
10935        bra.b           dst_qnan
10936
10937#
10938# dst_qnan(): Return the dst SNAN w/ the SNAN bit set.
10939#
10940        global          dst_snan
10941dst_snan:
10942        fmov.x          DST(%a1),%fp0           # the fmove sets the SNAN bit
10943        fmov.l          %fpsr,%d0               # catch resulting status
10944        or.l            %d0,USER_FPSR(%a6)      # store status
10945        rts
10946
10947#
10948# dst_qnan(): Return the dst QNAN.
10949#
10950        global          dst_qnan
10951dst_qnan:
10952        fmov.x          DST(%a1),%fp0           # return the non-signalling nan
10953        tst.b           DST_EX(%a1)             # set ccodes according to QNAN sign
10954        bmi.b           dst_qnan_m
10955dst_qnan_p:
10956        mov.b           &nan_bmask,FPSR_CC(%a6)
10957        rts
10958dst_qnan_m:
10959        mov.b           &neg_bmask+nan_bmask,FPSR_CC(%a6)
10960        rts
10961
10962#
10963# src_snan(): Return the src SNAN w/ the SNAN bit set.
10964#
10965        global          src_snan
10966src_snan:
10967        fmov.x          SRC(%a0),%fp0           # the fmove sets the SNAN bit
10968        fmov.l          %fpsr,%d0               # catch resulting status
10969        or.l            %d0,USER_FPSR(%a6)      # store status
10970        rts
10971
10972#
10973# src_qnan(): Return the src QNAN.
10974#
10975        global          src_qnan
10976src_qnan:
10977        fmov.x          SRC(%a0),%fp0           # return the non-signalling nan
10978        tst.b           SRC_EX(%a0)             # set ccodes according to QNAN sign
10979        bmi.b           dst_qnan_m
10980src_qnan_p:
10981        mov.b           &nan_bmask,FPSR_CC(%a6)
10982        rts
10983src_qnan_m:
10984        mov.b           &neg_bmask+nan_bmask,FPSR_CC(%a6)
10985        rts
10986
10987#
10988# fkern2.s:
10989#       These entry points are used by the exception handler
10990# routines where an instruction is selected by an index into
10991# a large jump table corresponding to a given instruction which
10992# has been decoded. Flow continues here where we now decode
10993# further accoding to the source operand type.
10994#
10995
10996        global          fsinh
10997fsinh:
10998        mov.b           STAG(%a6),%d1
10999        beq.l           ssinh
11000        cmpi.b          %d1,&ZERO
11001        beq.l           src_zero
11002        cmpi.b          %d1,&INF
11003        beq.l           src_inf
11004        cmpi.b          %d1,&DENORM
11005        beq.l           ssinhd
11006        cmpi.b          %d1,&QNAN
11007        beq.l           src_qnan
11008        bra.l           src_snan
11009
11010        global          flognp1
11011flognp1:
11012        mov.b           STAG(%a6),%d1
11013        beq.l           slognp1
11014        cmpi.b          %d1,&ZERO
11015        beq.l           src_zero
11016        cmpi.b          %d1,&INF
11017        beq.l           sopr_inf
11018        cmpi.b          %d1,&DENORM
11019        beq.l           slognp1d
11020        cmpi.b          %d1,&QNAN
11021        beq.l           src_qnan
11022        bra.l           src_snan
11023
11024        global          fetoxm1
11025fetoxm1:
11026        mov.b           STAG(%a6),%d1
11027        beq.l           setoxm1
11028        cmpi.b          %d1,&ZERO
11029        beq.l           src_zero
11030        cmpi.b          %d1,&INF
11031        beq.l           setoxm1i
11032        cmpi.b          %d1,&DENORM
11033        beq.l           setoxm1d
11034        cmpi.b          %d1,&QNAN
11035        beq.l           src_qnan
11036        bra.l           src_snan
11037
11038        global          ftanh
11039ftanh:
11040        mov.b           STAG(%a6),%d1
11041        beq.l           stanh
11042        cmpi.b          %d1,&ZERO
11043        beq.l           src_zero
11044        cmpi.b          %d1,&INF
11045        beq.l           src_one
11046        cmpi.b          %d1,&DENORM
11047        beq.l           stanhd
11048        cmpi.b          %d1,&QNAN
11049        beq.l           src_qnan
11050        bra.l           src_snan
11051
11052        global          fatan
11053fatan:
11054        mov.b           STAG(%a6),%d1
11055        beq.l           satan
11056        cmpi.b          %d1,&ZERO
11057        beq.l           src_zero
11058        cmpi.b          %d1,&INF
11059        beq.l           spi_2
11060        cmpi.b          %d1,&DENORM
11061        beq.l           satand
11062        cmpi.b          %d1,&QNAN
11063        beq.l           src_qnan
11064        bra.l           src_snan
11065
11066        global          fasin
11067fasin:
11068        mov.b           STAG(%a6),%d1
11069        beq.l           sasin
11070        cmpi.b          %d1,&ZERO
11071        beq.l           src_zero
11072        cmpi.b          %d1,&INF
11073        beq.l           t_operr
11074        cmpi.b          %d1,&DENORM
11075        beq.l           sasind
11076        cmpi.b          %d1,&QNAN
11077        beq.l           src_qnan
11078        bra.l           src_snan
11079
11080        global          fatanh
11081fatanh:
11082        mov.b           STAG(%a6),%d1
11083        beq.l           satanh
11084        cmpi.b          %d1,&ZERO
11085        beq.l           src_zero
11086        cmpi.b          %d1,&INF
11087        beq.l           t_operr
11088        cmpi.b          %d1,&DENORM
11089        beq.l           satanhd
11090        cmpi.b          %d1,&QNAN
11091        beq.l           src_qnan
11092        bra.l           src_snan
11093
11094        global          fsine
11095fsine:
11096        mov.b           STAG(%a6),%d1
11097        beq.l           ssin
11098        cmpi.b          %d1,&ZERO
11099        beq.l           src_zero
11100        cmpi.b          %d1,&INF
11101        beq.l           t_operr
11102        cmpi.b          %d1,&DENORM
11103        beq.l           ssind
11104        cmpi.b          %d1,&QNAN
11105        beq.l           src_qnan
11106        bra.l           src_snan
11107
11108        global          ftan
11109ftan:
11110        mov.b           STAG(%a6),%d1
11111        beq.l           stan
11112        cmpi.b          %d1,&ZERO
11113        beq.l           src_zero
11114        cmpi.b          %d1,&INF
11115        beq.l           t_operr
11116        cmpi.b          %d1,&DENORM
11117        beq.l           stand
11118        cmpi.b          %d1,&QNAN
11119        beq.l           src_qnan
11120        bra.l           src_snan
11121
11122        global          fetox
11123fetox:
11124        mov.b           STAG(%a6),%d1
11125        beq.l           setox
11126        cmpi.b          %d1,&ZERO
11127        beq.l           ld_pone
11128        cmpi.b          %d1,&INF
11129        beq.l           szr_inf
11130        cmpi.b          %d1,&DENORM
11131        beq.l           setoxd
11132        cmpi.b          %d1,&QNAN
11133        beq.l           src_qnan
11134        bra.l           src_snan
11135
11136        global          ftwotox
11137ftwotox:
11138        mov.b           STAG(%a6),%d1
11139        beq.l           stwotox
11140        cmpi.b          %d1,&ZERO
11141        beq.l           ld_pone
11142        cmpi.b          %d1,&INF
11143        beq.l           szr_inf
11144        cmpi.b          %d1,&DENORM
11145        beq.l           stwotoxd
11146        cmpi.b          %d1,&QNAN
11147        beq.l           src_qnan
11148        bra.l           src_snan
11149
11150        global          ftentox
11151ftentox:
11152        mov.b           STAG(%a6),%d1
11153        beq.l           stentox
11154        cmpi.b          %d1,&ZERO
11155        beq.l           ld_pone
11156        cmpi.b          %d1,&INF
11157        beq.l           szr_inf
11158        cmpi.b          %d1,&DENORM
11159        beq.l           stentoxd
11160        cmpi.b          %d1,&QNAN
11161        beq.l           src_qnan
11162        bra.l           src_snan
11163
11164        global          flogn
11165flogn:
11166        mov.b           STAG(%a6),%d1
11167        beq.l           slogn
11168        cmpi.b          %d1,&ZERO
11169        beq.l           t_dz2
11170        cmpi.b          %d1,&INF
11171        beq.l           sopr_inf
11172        cmpi.b          %d1,&DENORM
11173        beq.l           slognd
11174        cmpi.b          %d1,&QNAN
11175        beq.l           src_qnan
11176        bra.l           src_snan
11177
11178        global          flog10
11179flog10:
11180        mov.b           STAG(%a6),%d1
11181        beq.l           slog10
11182        cmpi.b          %d1,&ZERO
11183        beq.l           t_dz2
11184        cmpi.b          %d1,&INF
11185        beq.l           sopr_inf
11186        cmpi.b          %d1,&DENORM
11187        beq.l           slog10d
11188        cmpi.b          %d1,&QNAN
11189        beq.l           src_qnan
11190        bra.l           src_snan
11191
11192        global          flog2
11193flog2:
11194        mov.b           STAG(%a6),%d1
11195        beq.l           slog2
11196        cmpi.b          %d1,&ZERO
11197        beq.l           t_dz2
11198        cmpi.b          %d1,&INF
11199        beq.l           sopr_inf
11200        cmpi.b          %d1,&DENORM
11201        beq.l           slog2d
11202        cmpi.b          %d1,&QNAN
11203        beq.l           src_qnan
11204        bra.l           src_snan
11205
11206        global          fcosh
11207fcosh:
11208        mov.b           STAG(%a6),%d1
11209        beq.l           scosh
11210        cmpi.b          %d1,&ZERO
11211        beq.l           ld_pone
11212        cmpi.b          %d1,&INF
11213        beq.l           ld_pinf
11214        cmpi.b          %d1,&DENORM
11215        beq.l           scoshd
11216        cmpi.b          %d1,&QNAN
11217        beq.l           src_qnan
11218        bra.l           src_snan
11219
11220        global          facos
11221facos:
11222        mov.b           STAG(%a6),%d1
11223        beq.l           sacos
11224        cmpi.b          %d1,&ZERO
11225        beq.l           ld_ppi2
11226        cmpi.b          %d1,&INF
11227        beq.l           t_operr
11228        cmpi.b          %d1,&DENORM
11229        beq.l           sacosd
11230        cmpi.b          %d1,&QNAN
11231        beq.l           src_qnan
11232        bra.l           src_snan
11233
11234        global          fcos
11235fcos:
11236        mov.b           STAG(%a6),%d1
11237        beq.l           scos
11238        cmpi.b          %d1,&ZERO
11239        beq.l           ld_pone
11240        cmpi.b          %d1,&INF
11241        beq.l           t_operr
11242        cmpi.b          %d1,&DENORM
11243        beq.l           scosd
11244        cmpi.b          %d1,&QNAN
11245        beq.l           src_qnan
11246        bra.l           src_snan
11247
11248        global          fgetexp
11249fgetexp:
11250        mov.b           STAG(%a6),%d1
11251        beq.l           sgetexp
11252        cmpi.b          %d1,&ZERO
11253        beq.l           src_zero
11254        cmpi.b          %d1,&INF
11255        beq.l           t_operr
11256        cmpi.b          %d1,&DENORM
11257        beq.l           sgetexpd
11258        cmpi.b          %d1,&QNAN
11259        beq.l           src_qnan
11260        bra.l           src_snan
11261
11262        global          fgetman
11263fgetman:
11264        mov.b           STAG(%a6),%d1
11265        beq.l           sgetman
11266        cmpi.b          %d1,&ZERO
11267        beq.l           src_zero
11268        cmpi.b          %d1,&INF
11269        beq.l           t_operr
11270        cmpi.b          %d1,&DENORM
11271        beq.l           sgetmand
11272        cmpi.b          %d1,&QNAN
11273        beq.l           src_qnan
11274        bra.l           src_snan
11275
11276        global          fsincos
11277fsincos:
11278        mov.b           STAG(%a6),%d1
11279        beq.l           ssincos
11280        cmpi.b          %d1,&ZERO
11281        beq.l           ssincosz
11282        cmpi.b          %d1,&INF
11283        beq.l           ssincosi
11284        cmpi.b          %d1,&DENORM
11285        beq.l           ssincosd
11286        cmpi.b          %d1,&QNAN
11287        beq.l           ssincosqnan
11288        bra.l           ssincossnan
11289
11290        global          fmod
11291fmod:
11292        mov.b           STAG(%a6),%d1
11293        beq.l           smod_snorm
11294        cmpi.b          %d1,&ZERO
11295        beq.l           smod_szero
11296        cmpi.b          %d1,&INF
11297        beq.l           smod_sinf
11298        cmpi.b          %d1,&DENORM
11299        beq.l           smod_sdnrm
11300        cmpi.b          %d1,&QNAN
11301        beq.l           sop_sqnan
11302        bra.l           sop_ssnan
11303
11304        global          frem
11305frem:
11306        mov.b           STAG(%a6),%d1
11307        beq.l           srem_snorm
11308        cmpi.b          %d1,&ZERO
11309        beq.l           srem_szero
11310        cmpi.b          %d1,&INF
11311        beq.l           srem_sinf
11312        cmpi.b          %d1,&DENORM
11313        beq.l           srem_sdnrm
11314        cmpi.b          %d1,&QNAN
11315        beq.l           sop_sqnan
11316        bra.l           sop_ssnan
11317
11318        global          fscale
11319fscale:
11320        mov.b           STAG(%a6),%d1
11321        beq.l           sscale_snorm
11322        cmpi.b          %d1,&ZERO
11323        beq.l           sscale_szero
11324        cmpi.b          %d1,&INF
11325        beq.l           sscale_sinf
11326        cmpi.b          %d1,&DENORM
11327        beq.l           sscale_sdnrm
11328        cmpi.b          %d1,&QNAN
11329        beq.l           sop_sqnan
11330        bra.l           sop_ssnan
11331
11332#########################################################################
11333# XDEF **************************************************************** #
11334#       fgen_except(): catch an exception during transcendental         #
11335#                      emulation                                        #
11336#                                                                       #
11337# XREF **************************************************************** #
11338#       fmul() - emulate a multiply instruction                         #
11339#       fadd() - emulate an add instruction                             #
11340#       fin() - emulate an fmove instruction                            #
11341#                                                                       #
11342# INPUT *************************************************************** #
11343#       fp0 = destination operand                                       #
11344#       d0  = type of instruction that took exception                   #
11345#       fsave frame = source operand                                    #
11346#                                                                       #
11347# OUTPUT ************************************************************** #
11348#       fp0 = result                                                    #
11349#       fp1 = EXOP                                                      #
11350#                                                                       #
11351# ALGORITHM *********************************************************** #
11352#       An exception occurred on the last instruction of the            #
11353# transcendental emulation. hopefully, this won't be happening much     #
11354# because it will be VERY slow.                                         #
11355#       The only exceptions capable of passing through here are         #
11356# Overflow, Underflow, and Unsupported Data Type.                       #
11357#                                                                       #
11358#########################################################################
11359
11360        global          fgen_except
11361fgen_except:
11362        cmpi.b          0x3(%sp),&0x7           # is exception UNSUPP?
11363        beq.b           fge_unsupp              # yes
11364
11365        mov.b           &NORM,STAG(%a6)
11366
11367fge_cont:
11368        mov.b           &NORM,DTAG(%a6)
11369
11370# ok, I have a problem with putting the dst op at FP_DST. the emulation
11371# routines aren't supposed to alter the operands but we've just squashed
11372# FP_DST here...
11373
11374# 8/17/93 - this turns out to be more of a "cleanliness" standpoint
11375# then a potential bug. to begin with, only the dyadic functions
11376# frem,fmod, and fscale would get the dst trashed here. But, for
11377# the 060SP, the FP_DST is never used again anyways.
11378        fmovm.x         &0x80,FP_DST(%a6)       # dst op is in fp0
11379
11380        lea             0x4(%sp),%a0            # pass: ptr to src op
11381        lea             FP_DST(%a6),%a1         # pass: ptr to dst op
11382
11383        cmpi.b          %d1,&FMOV_OP
11384        beq.b           fge_fin                 # it was an "fmov"
11385        cmpi.b          %d1,&FADD_OP
11386        beq.b           fge_fadd                # it was an "fadd"
11387fge_fmul:
11388        bsr.l           fmul
11389        rts
11390fge_fadd:
11391        bsr.l           fadd
11392        rts
11393fge_fin:
11394        bsr.l           fin
11395        rts
11396
11397fge_unsupp:
11398        mov.b           &DENORM,STAG(%a6)
11399        bra.b           fge_cont
11400
11401#
11402# This table holds the offsets of the emulation routines for each individual
11403# math operation relative to the address of this table. Included are
11404# routines like fadd/fmul/fabs as well as the transcendentals.
11405# The location within the table is determined by the extension bits of the
11406# operation longword.
11407#
11408
11409        swbeg           &109
11410tbl_unsupp:
11411        long            fin             - tbl_unsupp    # 00: fmove
11412        long            fint            - tbl_unsupp    # 01: fint
11413        long            fsinh           - tbl_unsupp    # 02: fsinh
11414        long            fintrz          - tbl_unsupp    # 03: fintrz
11415        long            fsqrt           - tbl_unsupp    # 04: fsqrt
11416        long            tbl_unsupp      - tbl_unsupp
11417        long            flognp1         - tbl_unsupp    # 06: flognp1
11418        long            tbl_unsupp      - tbl_unsupp
11419        long            fetoxm1         - tbl_unsupp    # 08: fetoxm1
11420        long            ftanh           - tbl_unsupp    # 09: ftanh
11421        long            fatan           - tbl_unsupp    # 0a: fatan
11422        long            tbl_unsupp      - tbl_unsupp
11423        long            fasin           - tbl_unsupp    # 0c: fasin
11424        long            fatanh          - tbl_unsupp    # 0d: fatanh
11425        long            fsine           - tbl_unsupp    # 0e: fsin
11426        long            ftan            - tbl_unsupp    # 0f: ftan
11427        long            fetox           - tbl_unsupp    # 10: fetox
11428        long            ftwotox         - tbl_unsupp    # 11: ftwotox
11429        long            ftentox         - tbl_unsupp    # 12: ftentox
11430        long            tbl_unsupp      - tbl_unsupp
11431        long            flogn           - tbl_unsupp    # 14: flogn
11432        long            flog10          - tbl_unsupp    # 15: flog10
11433        long            flog2           - tbl_unsupp    # 16: flog2
11434        long            tbl_unsupp      - tbl_unsupp
11435        long            fabs            - tbl_unsupp    # 18: fabs
11436        long            fcosh           - tbl_unsupp    # 19: fcosh
11437        long            fneg            - tbl_unsupp    # 1a: fneg
11438        long            tbl_unsupp      - tbl_unsupp
11439        long            facos           - tbl_unsupp    # 1c: facos
11440        long            fcos            - tbl_unsupp    # 1d: fcos
11441        long            fgetexp         - tbl_unsupp    # 1e: fgetexp
11442        long            fgetman         - tbl_unsupp    # 1f: fgetman
11443        long            fdiv            - tbl_unsupp    # 20: fdiv
11444        long            fmod            - tbl_unsupp    # 21: fmod
11445        long            fadd            - tbl_unsupp    # 22: fadd
11446        long            fmul            - tbl_unsupp    # 23: fmul
11447        long            fsgldiv         - tbl_unsupp    # 24: fsgldiv
11448        long            frem            - tbl_unsupp    # 25: frem
11449        long            fscale          - tbl_unsupp    # 26: fscale
11450        long            fsglmul         - tbl_unsupp    # 27: fsglmul
11451        long            fsub            - tbl_unsupp    # 28: fsub
11452        long            tbl_unsupp      - tbl_unsupp
11453        long            tbl_unsupp      - tbl_unsupp
11454        long            tbl_unsupp      - tbl_unsupp
11455        long            tbl_unsupp      - tbl_unsupp
11456        long            tbl_unsupp      - tbl_unsupp
11457        long            tbl_unsupp      - tbl_unsupp
11458        long            tbl_unsupp      - tbl_unsupp
11459        long            fsincos         - tbl_unsupp    # 30: fsincos
11460        long            fsincos         - tbl_unsupp    # 31: fsincos
11461        long            fsincos         - tbl_unsupp    # 32: fsincos
11462        long            fsincos         - tbl_unsupp    # 33: fsincos
11463        long            fsincos         - tbl_unsupp    # 34: fsincos
11464        long            fsincos         - tbl_unsupp    # 35: fsincos
11465        long            fsincos         - tbl_unsupp    # 36: fsincos
11466        long            fsincos         - tbl_unsupp    # 37: fsincos
11467        long            fcmp            - tbl_unsupp    # 38: fcmp
11468        long            tbl_unsupp      - tbl_unsupp
11469        long            ftst            - tbl_unsupp    # 3a: ftst
11470        long            tbl_unsupp      - tbl_unsupp
11471        long            tbl_unsupp      - tbl_unsupp
11472        long            tbl_unsupp      - tbl_unsupp
11473        long            tbl_unsupp      - tbl_unsupp
11474        long            tbl_unsupp      - tbl_unsupp
11475        long            fsin            - tbl_unsupp    # 40: fsmove
11476        long            fssqrt          - tbl_unsupp    # 41: fssqrt
11477        long            tbl_unsupp      - tbl_unsupp
11478        long            tbl_unsupp      - tbl_unsupp
11479        long            fdin            - tbl_unsupp    # 44: fdmove
11480        long            fdsqrt          - tbl_unsupp    # 45: fdsqrt
11481        long            tbl_unsupp      - tbl_unsupp
11482        long            tbl_unsupp      - tbl_unsupp
11483        long            tbl_unsupp      - tbl_unsupp
11484        long            tbl_unsupp      - tbl_unsupp
11485        long            tbl_unsupp      - tbl_unsupp
11486        long            tbl_unsupp      - tbl_unsupp
11487        long            tbl_unsupp      - tbl_unsupp
11488        long            tbl_unsupp      - tbl_unsupp
11489        long            tbl_unsupp      - tbl_unsupp
11490        long            tbl_unsupp      - tbl_unsupp
11491        long            tbl_unsupp      - tbl_unsupp
11492        long            tbl_unsupp      - tbl_unsupp
11493        long            tbl_unsupp      - tbl_unsupp
11494        long            tbl_unsupp      - tbl_unsupp
11495        long            tbl_unsupp      - tbl_unsupp
11496        long            tbl_unsupp      - tbl_unsupp
11497        long            tbl_unsupp      - tbl_unsupp
11498        long            tbl_unsupp      - tbl_unsupp
11499        long            fsabs           - tbl_unsupp    # 58: fsabs
11500        long            tbl_unsupp      - tbl_unsupp
11501        long            fsneg           - tbl_unsupp    # 5a: fsneg
11502        long            tbl_unsupp      - tbl_unsupp
11503        long            fdabs           - tbl_unsupp    # 5c: fdabs
11504        long            tbl_unsupp      - tbl_unsupp
11505        long            fdneg           - tbl_unsupp    # 5e: fdneg
11506        long            tbl_unsupp      - tbl_unsupp
11507        long            fsdiv           - tbl_unsupp    # 60: fsdiv
11508        long            tbl_unsupp      - tbl_unsupp
11509        long            fsadd           - tbl_unsupp    # 62: fsadd
11510        long            fsmul           - tbl_unsupp    # 63: fsmul
11511        long            fddiv           - tbl_unsupp    # 64: fddiv
11512        long            tbl_unsupp      - tbl_unsupp
11513        long            fdadd           - tbl_unsupp    # 66: fdadd
11514        long            fdmul           - tbl_unsupp    # 67: fdmul
11515        long            fssub           - tbl_unsupp    # 68: fssub
11516        long            tbl_unsupp      - tbl_unsupp
11517        long            tbl_unsupp      - tbl_unsupp
11518        long            tbl_unsupp      - tbl_unsupp
11519        long            fdsub           - tbl_unsupp    # 6c: fdsub
11520
11521#########################################################################
11522# XDEF **************************************************************** #
11523#       fmul(): emulates the fmul instruction                           #
11524#       fsmul(): emulates the fsmul instruction                         #
11525#       fdmul(): emulates the fdmul instruction                         #
11526#                                                                       #
11527# XREF **************************************************************** #
11528#       scale_to_zero_src() - scale src exponent to zero                #
11529#       scale_to_zero_dst() - scale dst exponent to zero                #
11530#       unf_res() - return default underflow result                     #
11531#       ovf_res() - return default overflow result                      #
11532#       res_qnan() - return QNAN result                                 #
11533#       res_snan() - return SNAN result                                 #
11534#                                                                       #
11535# INPUT *************************************************************** #
11536#       a0 = pointer to extended precision source operand               #
11537#       a1 = pointer to extended precision destination operand          #
11538#       d0  rnd prec,mode                                               #
11539#                                                                       #
11540# OUTPUT ************************************************************** #
11541#       fp0 = result                                                    #
11542#       fp1 = EXOP (if exception occurred)                              #
11543#                                                                       #
11544# ALGORITHM *********************************************************** #
11545#       Handle NANs, infinities, and zeroes as special cases. Divide    #
11546# norms/denorms into ext/sgl/dbl precision.                             #
11547#       For norms/denorms, scale the exponents such that a multiply     #
11548# instruction won't cause an exception. Use the regular fmul to         #
11549# compute a result. Check if the regular operands would have taken      #
11550# an exception. If so, return the default overflow/underflow result     #
11551# and return the EXOP if exceptions are enabled. Else, scale the        #
11552# result operand to the proper exponent.                                #
11553#                                                                       #
11554#########################################################################
11555
11556        align           0x10
11557tbl_fmul_ovfl:
11558        long            0x3fff - 0x7ffe         # ext_max
11559        long            0x3fff - 0x407e         # sgl_max
11560        long            0x3fff - 0x43fe         # dbl_max
11561tbl_fmul_unfl:
11562        long            0x3fff + 0x0001         # ext_unfl
11563        long            0x3fff - 0x3f80         # sgl_unfl
11564        long            0x3fff - 0x3c00         # dbl_unfl
11565
11566        global          fsmul
11567fsmul:
11568        andi.b          &0x30,%d0               # clear rnd prec
11569        ori.b           &s_mode*0x10,%d0        # insert sgl prec
11570        bra.b           fmul
11571
11572        global          fdmul
11573fdmul:
11574        andi.b          &0x30,%d0
11575        ori.b           &d_mode*0x10,%d0        # insert dbl prec
11576
11577        global          fmul
11578fmul:
11579        mov.l           %d0,L_SCR3(%a6)         # store rnd info
11580
11581        clr.w           %d1
11582        mov.b           DTAG(%a6),%d1
11583        lsl.b           &0x3,%d1
11584        or.b            STAG(%a6),%d1           # combine src tags
11585        bne.w           fmul_not_norm           # optimize on non-norm input
11586
11587fmul_norm:
11588        mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
11589        mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
11590        mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
11591
11592        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
11593        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
11594        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
11595
11596        bsr.l           scale_to_zero_src       # scale src exponent
11597        mov.l           %d0,-(%sp)              # save scale factor 1
11598
11599        bsr.l           scale_to_zero_dst       # scale dst exponent
11600
11601        add.l           %d0,(%sp)               # SCALE_FACTOR = scale1 + scale2
11602
11603        mov.w           2+L_SCR3(%a6),%d1       # fetch precision
11604        lsr.b           &0x6,%d1                # shift to lo bits
11605        mov.l           (%sp)+,%d0              # load S.F.
11606        cmp.l           %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
11607        beq.w           fmul_may_ovfl           # result may rnd to overflow
11608        blt.w           fmul_ovfl               # result will overflow
11609
11610        cmp.l           %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
11611        beq.w           fmul_may_unfl           # result may rnd to no unfl
11612        bgt.w           fmul_unfl               # result will underflow
11613
11614#
11615# NORMAL:
11616# - the result of the multiply operation will neither overflow nor underflow.
11617# - do the multiply to the proper precision and rounding mode.
11618# - scale the result exponent using the scale factor. if both operands were
11619# normalized then we really don't need to go through this scaling. but for now,
11620# this will do.
11621#
11622fmul_normal:
11623        fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
11624
11625        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11626        fmov.l          &0x0,%fpsr              # clear FPSR
11627
11628        fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
11629
11630        fmov.l          %fpsr,%d1               # save status
11631        fmov.l          &0x0,%fpcr              # clear FPCR
11632
11633        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
11634
11635fmul_normal_exit:
11636        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
11637        mov.l           %d2,-(%sp)              # save d2
11638        mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
11639        mov.l           %d1,%d2                 # make a copy
11640        andi.l          &0x7fff,%d1             # strip sign
11641        andi.w          &0x8000,%d2             # keep old sign
11642        sub.l           %d0,%d1                 # add scale factor
11643        or.w            %d2,%d1                 # concat old sign,new exp
11644        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
11645        mov.l           (%sp)+,%d2              # restore d2
11646        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
11647        rts
11648
11649#
11650# OVERFLOW:
11651# - the result of the multiply operation is an overflow.
11652# - do the multiply to the proper precision and rounding mode in order to
11653# set the inexact bits.
11654# - calculate the default result and return it in fp0.
11655# - if overflow or inexact is enabled, we need a multiply result rounded to
11656# extended precision. if the original operation was extended, then we have this
11657# result. if the original operation was single or double, we have to do another
11658# multiply using extended precision and the correct rounding mode. the result
11659# of this operation then has its exponent scaled by -0x6000 to create the
11660# exceptional operand.
11661#
11662fmul_ovfl:
11663        fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
11664
11665        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11666        fmov.l          &0x0,%fpsr              # clear FPSR
11667
11668        fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
11669
11670        fmov.l          %fpsr,%d1               # save status
11671        fmov.l          &0x0,%fpcr              # clear FPCR
11672
11673        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
11674
11675# save setting this until now because this is where fmul_may_ovfl may jump in
11676fmul_ovfl_tst:
11677        or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11678
11679        mov.b           FPCR_ENABLE(%a6),%d1
11680        andi.b          &0x13,%d1               # is OVFL or INEX enabled?
11681        bne.b           fmul_ovfl_ena           # yes
11682
11683# calculate the default result
11684fmul_ovfl_dis:
11685        btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
11686        sne             %d1                     # set sign param accordingly
11687        mov.l           L_SCR3(%a6),%d0         # pass rnd prec,mode
11688        bsr.l           ovf_res                 # calculate default result
11689        or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
11690        fmovm.x         (%a0),&0x80             # return default result in fp0
11691        rts
11692
11693#
11694# OVFL is enabled; Create EXOP:
11695# - if precision is extended, then we have the EXOP. simply bias the exponent
11696# with an extra -0x6000. if the precision is single or double, we need to
11697# calculate a result rounded to extended precision.
11698#
11699fmul_ovfl_ena:
11700        mov.l           L_SCR3(%a6),%d1
11701        andi.b          &0xc0,%d1               # test the rnd prec
11702        bne.b           fmul_ovfl_ena_sd        # it's sgl or dbl
11703
11704fmul_ovfl_ena_cont:
11705        fmovm.x         &0x80,FP_SCR0(%a6)      # move result to stack
11706
11707        mov.l           %d2,-(%sp)              # save d2
11708        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
11709        mov.w           %d1,%d2                 # make a copy
11710        andi.l          &0x7fff,%d1             # strip sign
11711        sub.l           %d0,%d1                 # add scale factor
11712        subi.l          &0x6000,%d1             # subtract bias
11713        andi.w          &0x7fff,%d1             # clear sign bit
11714        andi.w          &0x8000,%d2             # keep old sign
11715        or.w            %d2,%d1                 # concat old sign,new exp
11716        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
11717        mov.l           (%sp)+,%d2              # restore d2
11718        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
11719        bra.b           fmul_ovfl_dis
11720
11721fmul_ovfl_ena_sd:
11722        fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
11723
11724        mov.l           L_SCR3(%a6),%d1
11725        andi.b          &0x30,%d1               # keep rnd mode only
11726        fmov.l          %d1,%fpcr               # set FPCR
11727
11728        fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
11729
11730        fmov.l          &0x0,%fpcr              # clear FPCR
11731        bra.b           fmul_ovfl_ena_cont
11732
11733#
11734# may OVERFLOW:
11735# - the result of the multiply operation MAY overflow.
11736# - do the multiply to the proper precision and rounding mode in order to
11737# set the inexact bits.
11738# - calculate the default result and return it in fp0.
11739#
11740fmul_may_ovfl:
11741        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
11742
11743        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11744        fmov.l          &0x0,%fpsr              # clear FPSR
11745
11746        fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
11747
11748        fmov.l          %fpsr,%d1               # save status
11749        fmov.l          &0x0,%fpcr              # clear FPCR
11750
11751        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
11752
11753        fabs.x          %fp0,%fp1               # make a copy of result
11754        fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
11755        fbge.w          fmul_ovfl_tst           # yes; overflow has occurred
11756
11757# no, it didn't overflow; we have correct result
11758        bra.w           fmul_normal_exit
11759
11760#
11761# UNDERFLOW:
11762# - the result of the multiply operation is an underflow.
11763# - do the multiply to the proper precision and rounding mode in order to
11764# set the inexact bits.
11765# - calculate the default result and return it in fp0.
11766# - if overflow or inexact is enabled, we need a multiply result rounded to
11767# extended precision. if the original operation was extended, then we have this
11768# result. if the original operation was single or double, we have to do another
11769# multiply using extended precision and the correct rounding mode. the result
11770# of this operation then has its exponent scaled by -0x6000 to create the
11771# exceptional operand.
11772#
11773fmul_unfl:
11774        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11775
11776# for fun, let's use only extended precision, round to zero. then, let
11777# the unf_res() routine figure out all the rest.
11778# will we get the correct answer.
11779        fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
11780
11781        fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
11782        fmov.l          &0x0,%fpsr              # clear FPSR
11783
11784        fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
11785
11786        fmov.l          %fpsr,%d1               # save status
11787        fmov.l          &0x0,%fpcr              # clear FPCR
11788
11789        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
11790
11791        mov.b           FPCR_ENABLE(%a6),%d1
11792        andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
11793        bne.b           fmul_unfl_ena           # yes
11794
11795fmul_unfl_dis:
11796        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
11797
11798        lea             FP_SCR0(%a6),%a0        # pass: result addr
11799        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
11800        bsr.l           unf_res                 # calculate default result
11801        or.b            %d0,FPSR_CC(%a6)        # unf_res2 may have set 'Z'
11802        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
11803        rts
11804
11805#
11806# UNFL is enabled.
11807#
11808fmul_unfl_ena:
11809        fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
11810
11811        mov.l           L_SCR3(%a6),%d1
11812        andi.b          &0xc0,%d1               # is precision extended?
11813        bne.b           fmul_unfl_ena_sd        # no, sgl or dbl
11814
11815# if the rnd mode is anything but RZ, then we have to re-do the above
11816# multiplication because we used RZ for all.
11817        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11818
11819fmul_unfl_ena_cont:
11820        fmov.l          &0x0,%fpsr              # clear FPSR
11821
11822        fmul.x          FP_SCR0(%a6),%fp1       # execute multiply
11823
11824        fmov.l          &0x0,%fpcr              # clear FPCR
11825
11826        fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
11827        mov.l           %d2,-(%sp)              # save d2
11828        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
11829        mov.l           %d1,%d2                 # make a copy
11830        andi.l          &0x7fff,%d1             # strip sign
11831        andi.w          &0x8000,%d2             # keep old sign
11832        sub.l           %d0,%d1                 # add scale factor
11833        addi.l          &0x6000,%d1             # add bias
11834        andi.w          &0x7fff,%d1
11835        or.w            %d2,%d1                 # concat old sign,new exp
11836        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
11837        mov.l           (%sp)+,%d2              # restore d2
11838        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
11839        bra.w           fmul_unfl_dis
11840
11841fmul_unfl_ena_sd:
11842        mov.l           L_SCR3(%a6),%d1
11843        andi.b          &0x30,%d1               # use only rnd mode
11844        fmov.l          %d1,%fpcr               # set FPCR
11845
11846        bra.b           fmul_unfl_ena_cont
11847
11848# MAY UNDERFLOW:
11849# -use the correct rounding mode and precision. this code favors operations
11850# that do not underflow.
11851fmul_may_unfl:
11852        fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
11853
11854        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11855        fmov.l          &0x0,%fpsr              # clear FPSR
11856
11857        fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
11858
11859        fmov.l          %fpsr,%d1               # save status
11860        fmov.l          &0x0,%fpcr              # clear FPCR
11861
11862        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
11863
11864        fabs.x          %fp0,%fp1               # make a copy of result
11865        fcmp.b          %fp1,&0x2               # is |result| > 2.b?
11866        fbgt.w          fmul_normal_exit        # no; no underflow occurred
11867        fblt.w          fmul_unfl               # yes; underflow occurred
11868
11869#
11870# we still don't know if underflow occurred. result is ~ equal to 2. but,
11871# we don't know if the result was an underflow that rounded up to a 2 or
11872# a normalized number that rounded down to a 2. so, redo the entire operation
11873# using RZ as the rounding mode to see what the pre-rounded result is.
11874# this case should be relatively rare.
11875#
11876        fmovm.x         FP_SCR1(%a6),&0x40      # load dst operand
11877
11878        mov.l           L_SCR3(%a6),%d1
11879        andi.b          &0xc0,%d1               # keep rnd prec
11880        ori.b           &rz_mode*0x10,%d1       # insert RZ
11881
11882        fmov.l          %d1,%fpcr               # set FPCR
11883        fmov.l          &0x0,%fpsr              # clear FPSR
11884
11885        fmul.x          FP_SCR0(%a6),%fp1       # execute multiply
11886
11887        fmov.l          &0x0,%fpcr              # clear FPCR
11888        fabs.x          %fp1                    # make absolute value
11889        fcmp.b          %fp1,&0x2               # is |result| < 2.b?
11890        fbge.w          fmul_normal_exit        # no; no underflow occurred
11891        bra.w           fmul_unfl               # yes, underflow occurred
11892
11893################################################################################
11894
11895#
11896# Multiply: inputs are not both normalized; what are they?
11897#
11898fmul_not_norm:
11899        mov.w           (tbl_fmul_op.b,%pc,%d1.w*2),%d1
11900        jmp             (tbl_fmul_op.b,%pc,%d1.w)
11901
11902        swbeg           &48
11903tbl_fmul_op:
11904        short           fmul_norm       - tbl_fmul_op # NORM x NORM
11905        short           fmul_zero       - tbl_fmul_op # NORM x ZERO
11906        short           fmul_inf_src    - tbl_fmul_op # NORM x INF
11907        short           fmul_res_qnan   - tbl_fmul_op # NORM x QNAN
11908        short           fmul_norm       - tbl_fmul_op # NORM x DENORM
11909        short           fmul_res_snan   - tbl_fmul_op # NORM x SNAN
11910        short           tbl_fmul_op     - tbl_fmul_op #
11911        short           tbl_fmul_op     - tbl_fmul_op #
11912
11913        short           fmul_zero       - tbl_fmul_op # ZERO x NORM
11914        short           fmul_zero       - tbl_fmul_op # ZERO x ZERO
11915        short           fmul_res_operr  - tbl_fmul_op # ZERO x INF
11916        short           fmul_res_qnan   - tbl_fmul_op # ZERO x QNAN
11917        short           fmul_zero       - tbl_fmul_op # ZERO x DENORM
11918        short           fmul_res_snan   - tbl_fmul_op # ZERO x SNAN
11919        short           tbl_fmul_op     - tbl_fmul_op #
11920        short           tbl_fmul_op     - tbl_fmul_op #
11921
11922        short           fmul_inf_dst    - tbl_fmul_op # INF x NORM
11923        short           fmul_res_operr  - tbl_fmul_op # INF x ZERO
11924        short           fmul_inf_dst    - tbl_fmul_op # INF x INF
11925        short           fmul_res_qnan   - tbl_fmul_op # INF x QNAN
11926        short           fmul_inf_dst    - tbl_fmul_op # INF x DENORM
11927        short           fmul_res_snan   - tbl_fmul_op # INF x SNAN
11928        short           tbl_fmul_op     - tbl_fmul_op #
11929        short           tbl_fmul_op     - tbl_fmul_op #
11930
11931        short           fmul_res_qnan   - tbl_fmul_op # QNAN x NORM
11932        short           fmul_res_qnan   - tbl_fmul_op # QNAN x ZERO
11933        short           fmul_res_qnan   - tbl_fmul_op # QNAN x INF
11934        short           fmul_res_qnan   - tbl_fmul_op # QNAN x QNAN
11935        short           fmul_res_qnan   - tbl_fmul_op # QNAN x DENORM
11936        short           fmul_res_snan   - tbl_fmul_op # QNAN x SNAN
11937        short           tbl_fmul_op     - tbl_fmul_op #
11938        short           tbl_fmul_op     - tbl_fmul_op #
11939
11940        short           fmul_norm       - tbl_fmul_op # NORM x NORM
11941        short           fmul_zero       - tbl_fmul_op # NORM x ZERO
11942        short           fmul_inf_src    - tbl_fmul_op # NORM x INF
11943        short           fmul_res_qnan   - tbl_fmul_op # NORM x QNAN
11944        short           fmul_norm       - tbl_fmul_op # NORM x DENORM
11945        short           fmul_res_snan   - tbl_fmul_op # NORM x SNAN
11946        short           tbl_fmul_op     - tbl_fmul_op #
11947        short           tbl_fmul_op     - tbl_fmul_op #
11948
11949        short           fmul_res_snan   - tbl_fmul_op # SNAN x NORM
11950        short           fmul_res_snan   - tbl_fmul_op # SNAN x ZERO
11951        short           fmul_res_snan   - tbl_fmul_op # SNAN x INF
11952        short           fmul_res_snan   - tbl_fmul_op # SNAN x QNAN
11953        short           fmul_res_snan   - tbl_fmul_op # SNAN x DENORM
11954        short           fmul_res_snan   - tbl_fmul_op # SNAN x SNAN
11955        short           tbl_fmul_op     - tbl_fmul_op #
11956        short           tbl_fmul_op     - tbl_fmul_op #
11957
11958fmul_res_operr:
11959        bra.l           res_operr
11960fmul_res_snan:
11961        bra.l           res_snan
11962fmul_res_qnan:
11963        bra.l           res_qnan
11964
11965#
11966# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
11967#
11968        global          fmul_zero               # global for fsglmul
11969fmul_zero:
11970        mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
11971        mov.b           DST_EX(%a1),%d1
11972        eor.b           %d0,%d1
11973        bpl.b           fmul_zero_p             # result ZERO is pos.
11974fmul_zero_n:
11975        fmov.s          &0x80000000,%fp0        # load -ZERO
11976        mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
11977        rts
11978fmul_zero_p:
11979        fmov.s          &0x00000000,%fp0        # load +ZERO
11980        mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
11981        rts
11982
11983#
11984# Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
11985#
11986# Note: The j-bit for an infinity is a don't-care. However, to be
11987# strictly compatible w/ the 68881/882, we make sure to return an
11988# INF w/ the j-bit set if the input INF j-bit was set. Destination
11989# INFs take priority.
11990#
11991        global          fmul_inf_dst            # global for fsglmul
11992fmul_inf_dst:
11993        fmovm.x         DST(%a1),&0x80          # return INF result in fp0
11994        mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
11995        mov.b           DST_EX(%a1),%d1
11996        eor.b           %d0,%d1
11997        bpl.b           fmul_inf_dst_p          # result INF is pos.
11998fmul_inf_dst_n:
11999        fabs.x          %fp0                    # clear result sign
12000        fneg.x          %fp0                    # set result sign
12001        mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
12002        rts
12003fmul_inf_dst_p:
12004        fabs.x          %fp0                    # clear result sign
12005        mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
12006        rts
12007
12008        global          fmul_inf_src            # global for fsglmul
12009fmul_inf_src:
12010        fmovm.x         SRC(%a0),&0x80          # return INF result in fp0
12011        mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
12012        mov.b           DST_EX(%a1),%d1
12013        eor.b           %d0,%d1
12014        bpl.b           fmul_inf_dst_p          # result INF is pos.
12015        bra.b           fmul_inf_dst_n
12016
12017#########################################################################
12018# XDEF **************************************************************** #
12019#       fin(): emulates the fmove instruction                           #
12020#       fsin(): emulates the fsmove instruction                         #
12021#       fdin(): emulates the fdmove instruction                         #
12022#                                                                       #
12023# XREF **************************************************************** #
12024#       norm() - normalize mantissa for EXOP on denorm                  #
12025#       scale_to_zero_src() - scale src exponent to zero                #
12026#       ovf_res() - return default overflow result                      #
12027#       unf_res() - return default underflow result                     #
12028#       res_qnan_1op() - return QNAN result                             #
12029#       res_snan_1op() - return SNAN result                             #
12030#                                                                       #
12031# INPUT *************************************************************** #
12032#       a0 = pointer to extended precision source operand               #
12033#       d0 = round prec/mode                                            #
12034#                                                                       #
12035# OUTPUT ************************************************************** #
12036#       fp0 = result                                                    #
12037#       fp1 = EXOP (if exception occurred)                              #
12038#                                                                       #
12039# ALGORITHM *********************************************************** #
12040#       Handle NANs, infinities, and zeroes as special cases. Divide    #
12041# norms into extended, single, and double precision.                    #
12042#       Norms can be emulated w/ a regular fmove instruction. For       #
12043# sgl/dbl, must scale exponent and perform an "fmove". Check to see     #
12044# if the result would have overflowed/underflowed. If so, use unf_res() #
12045# or ovf_res() to return the default result. Also return EXOP if        #
12046# exception is enabled. If no exception, return the default result.     #
12047#       Unnorms don't pass through here.                                #
12048#                                                                       #
12049#########################################################################
12050
12051        global          fsin
12052fsin:
12053        andi.b          &0x30,%d0               # clear rnd prec
12054        ori.b           &s_mode*0x10,%d0        # insert sgl precision
12055        bra.b           fin
12056
12057        global          fdin
12058fdin:
12059        andi.b          &0x30,%d0               # clear rnd prec
12060        ori.b           &d_mode*0x10,%d0        # insert dbl precision
12061
12062        global          fin
12063fin:
12064        mov.l           %d0,L_SCR3(%a6)         # store rnd info
12065
12066        mov.b           STAG(%a6),%d1           # fetch src optype tag
12067        bne.w           fin_not_norm            # optimize on non-norm input
12068
12069#
12070# FP MOVE IN: NORMs and DENORMs ONLY!
12071#
12072fin_norm:
12073        andi.b          &0xc0,%d0               # is precision extended?
12074        bne.w           fin_not_ext             # no, so go handle dbl or sgl
12075
12076#
12077# precision selected is extended. so...we cannot get an underflow
12078# or overflow because of rounding to the correct precision. so...
12079# skip the scaling and unscaling...
12080#
12081        tst.b           SRC_EX(%a0)             # is the operand negative?
12082        bpl.b           fin_norm_done           # no
12083        bset            &neg_bit,FPSR_CC(%a6)   # yes, so set 'N' ccode bit
12084fin_norm_done:
12085        fmovm.x         SRC(%a0),&0x80          # return result in fp0
12086        rts
12087
12088#
12089# for an extended precision DENORM, the UNFL exception bit is set
12090# the accrued bit is NOT set in this instance(no inexactness!)
12091#
12092fin_denorm:
12093        andi.b          &0xc0,%d0               # is precision extended?
12094        bne.w           fin_not_ext             # no, so go handle dbl or sgl
12095
12096        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12097        tst.b           SRC_EX(%a0)             # is the operand negative?
12098        bpl.b           fin_denorm_done         # no
12099        bset            &neg_bit,FPSR_CC(%a6)   # yes, so set 'N' ccode bit
12100fin_denorm_done:
12101        fmovm.x         SRC(%a0),&0x80          # return result in fp0
12102        btst            &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
12103        bne.b           fin_denorm_unfl_ena     # yes
12104        rts
12105
12106#
12107# the input is an extended DENORM and underflow is enabled in the FPCR.
12108# normalize the mantissa and add the bias of 0x6000 to the resulting negative
12109# exponent and insert back into the operand.
12110#
12111fin_denorm_unfl_ena:
12112        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
12113        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12114        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12115        lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
12116        bsr.l           norm                    # normalize result
12117        neg.w           %d0                     # new exponent = -(shft val)
12118        addi.w          &0x6000,%d0             # add new bias to exponent
12119        mov.w           FP_SCR0_EX(%a6),%d1     # fetch old sign,exp
12120        andi.w          &0x8000,%d1             # keep old sign
12121        andi.w          &0x7fff,%d0             # clear sign position
12122        or.w            %d1,%d0                 # concat new exo,old sign
12123        mov.w           %d0,FP_SCR0_EX(%a6)     # insert new exponent
12124        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
12125        rts
12126
12127#
12128# operand is to be rounded to single or double precision
12129#
12130fin_not_ext:
12131        cmpi.b          %d0,&s_mode*0x10        # separate sgl/dbl prec
12132        bne.b           fin_dbl
12133
12134#
12135# operand is to be rounded to single precision
12136#
12137fin_sgl:
12138        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
12139        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12140        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12141        bsr.l           scale_to_zero_src       # calculate scale factor
12142
12143        cmpi.l          %d0,&0x3fff-0x3f80      # will move in underflow?
12144        bge.w           fin_sd_unfl             # yes; go handle underflow
12145        cmpi.l          %d0,&0x3fff-0x407e      # will move in overflow?
12146        beq.w           fin_sd_may_ovfl         # maybe; go check
12147        blt.w           fin_sd_ovfl             # yes; go handle overflow
12148
12149#
12150# operand will NOT overflow or underflow when moved into the fp reg file
12151#
12152fin_sd_normal:
12153        fmov.l          &0x0,%fpsr              # clear FPSR
12154        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12155
12156        fmov.x          FP_SCR0(%a6),%fp0       # perform move
12157
12158        fmov.l          %fpsr,%d1               # save FPSR
12159        fmov.l          &0x0,%fpcr              # clear FPCR
12160
12161        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12162
12163fin_sd_normal_exit:
12164        mov.l           %d2,-(%sp)              # save d2
12165        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
12166        mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
12167        mov.w           %d1,%d2                 # make a copy
12168        andi.l          &0x7fff,%d1             # strip sign
12169        sub.l           %d0,%d1                 # add scale factor
12170        andi.w          &0x8000,%d2             # keep old sign
12171        or.w            %d1,%d2                 # concat old sign,new exponent
12172        mov.w           %d2,FP_SCR0_EX(%a6)     # insert new exponent
12173        mov.l           (%sp)+,%d2              # restore d2
12174        fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
12175        rts
12176
12177#
12178# operand is to be rounded to double precision
12179#
12180fin_dbl:
12181        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
12182        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12183        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12184        bsr.l           scale_to_zero_src       # calculate scale factor
12185
12186        cmpi.l          %d0,&0x3fff-0x3c00      # will move in underflow?
12187        bge.w           fin_sd_unfl             # yes; go handle underflow
12188        cmpi.l          %d0,&0x3fff-0x43fe      # will move in overflow?
12189        beq.w           fin_sd_may_ovfl         # maybe; go check
12190        blt.w           fin_sd_ovfl             # yes; go handle overflow
12191        bra.w           fin_sd_normal           # no; ho handle normalized op
12192
12193#
12194# operand WILL underflow when moved in to the fp register file
12195#
12196fin_sd_unfl:
12197        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12198
12199        tst.b           FP_SCR0_EX(%a6)         # is operand negative?
12200        bpl.b           fin_sd_unfl_tst
12201        bset            &neg_bit,FPSR_CC(%a6)   # set 'N' ccode bit
12202
12203# if underflow or inexact is enabled, then go calculate the EXOP first.
12204fin_sd_unfl_tst:
12205        mov.b           FPCR_ENABLE(%a6),%d1
12206        andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
12207        bne.b           fin_sd_unfl_ena         # yes
12208
12209fin_sd_unfl_dis:
12210        lea             FP_SCR0(%a6),%a0        # pass: result addr
12211        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
12212        bsr.l           unf_res                 # calculate default result
12213        or.b            %d0,FPSR_CC(%a6)        # unf_res may have set 'Z'
12214        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
12215        rts
12216
12217#
12218# operand will underflow AND underflow or inexact is enabled.
12219# Therefore, we must return the result rounded to extended precision.
12220#
12221fin_sd_unfl_ena:
12222        mov.l           FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12223        mov.l           FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12224        mov.w           FP_SCR0_EX(%a6),%d1     # load current exponent
12225
12226        mov.l           %d2,-(%sp)              # save d2
12227        mov.w           %d1,%d2                 # make a copy
12228        andi.l          &0x7fff,%d1             # strip sign
12229        sub.l           %d0,%d1                 # subtract scale factor
12230        andi.w          &0x8000,%d2             # extract old sign
12231        addi.l          &0x6000,%d1             # add new bias
12232        andi.w          &0x7fff,%d1
12233        or.w            %d1,%d2                 # concat old sign,new exp
12234        mov.w           %d2,FP_SCR1_EX(%a6)     # insert new exponent
12235        fmovm.x         FP_SCR1(%a6),&0x40      # return EXOP in fp1
12236        mov.l           (%sp)+,%d2              # restore d2
12237        bra.b           fin_sd_unfl_dis
12238
12239#
12240# operand WILL overflow.
12241#
12242fin_sd_ovfl:
12243        fmov.l          &0x0,%fpsr              # clear FPSR
12244        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12245
12246        fmov.x          FP_SCR0(%a6),%fp0       # perform move
12247
12248        fmov.l          &0x0,%fpcr              # clear FPCR
12249        fmov.l          %fpsr,%d1               # save FPSR
12250
12251        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12252
12253fin_sd_ovfl_tst:
12254        or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12255
12256        mov.b           FPCR_ENABLE(%a6),%d1
12257        andi.b          &0x13,%d1               # is OVFL or INEX enabled?
12258        bne.b           fin_sd_ovfl_ena         # yes
12259
12260#
12261# OVFL is not enabled; therefore, we must create the default result by
12262# calling ovf_res().
12263#
12264fin_sd_ovfl_dis:
12265        btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
12266        sne             %d1                     # set sign param accordingly
12267        mov.l           L_SCR3(%a6),%d0         # pass: prec,mode
12268        bsr.l           ovf_res                 # calculate default result
12269        or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
12270        fmovm.x         (%a0),&0x80             # return default result in fp0
12271        rts
12272
12273#
12274# OVFL is enabled.
12275# the INEX2 bit has already been updated by the round to the correct precision.
12276# now, round to extended(and don't alter the FPSR).
12277#
12278fin_sd_ovfl_ena:
12279        mov.l           %d2,-(%sp)              # save d2
12280        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
12281        mov.l           %d1,%d2                 # make a copy
12282        andi.l          &0x7fff,%d1             # strip sign
12283        andi.w          &0x8000,%d2             # keep old sign
12284        sub.l           %d0,%d1                 # add scale factor
12285        sub.l           &0x6000,%d1             # subtract bias
12286        andi.w          &0x7fff,%d1
12287        or.w            %d2,%d1
12288        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
12289        mov.l           (%sp)+,%d2              # restore d2
12290        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
12291        bra.b           fin_sd_ovfl_dis
12292
12293#
12294# the move in MAY overflow. so...
12295#
12296fin_sd_may_ovfl:
12297        fmov.l          &0x0,%fpsr              # clear FPSR
12298        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12299
12300        fmov.x          FP_SCR0(%a6),%fp0       # perform the move
12301
12302        fmov.l          %fpsr,%d1               # save status
12303        fmov.l          &0x0,%fpcr              # clear FPCR
12304
12305        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12306
12307        fabs.x          %fp0,%fp1               # make a copy of result
12308        fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
12309        fbge.w          fin_sd_ovfl_tst         # yes; overflow has occurred
12310
12311# no, it didn't overflow; we have correct result
12312        bra.w           fin_sd_normal_exit
12313
12314##########################################################################
12315
12316#
12317# operand is not a NORM: check its optype and branch accordingly
12318#
12319fin_not_norm:
12320        cmpi.b          %d1,&DENORM             # weed out DENORM
12321        beq.w           fin_denorm
12322        cmpi.b          %d1,&SNAN               # weed out SNANs
12323        beq.l           res_snan_1op
12324        cmpi.b          %d1,&QNAN               # weed out QNANs
12325        beq.l           res_qnan_1op
12326
12327#
12328# do the fmove in; at this point, only possible ops are ZERO and INF.
12329# use fmov to determine ccodes.
12330# prec:mode should be zero at this point but it won't affect answer anyways.
12331#
12332        fmov.x          SRC(%a0),%fp0           # do fmove in
12333        fmov.l          %fpsr,%d0               # no exceptions possible
12334        rol.l           &0x8,%d0                # put ccodes in lo byte
12335        mov.b           %d0,FPSR_CC(%a6)        # insert correct ccodes
12336        rts
12337
12338#########################################################################
12339# XDEF **************************************************************** #
12340#       fdiv(): emulates the fdiv instruction                           #
12341#       fsdiv(): emulates the fsdiv instruction                         #
12342#       fddiv(): emulates the fddiv instruction                         #
12343#                                                                       #
12344# XREF **************************************************************** #
12345#       scale_to_zero_src() - scale src exponent to zero                #
12346#       scale_to_zero_dst() - scale dst exponent to zero                #
12347#       unf_res() - return default underflow result                     #
12348#       ovf_res() - return default overflow result                      #
12349#       res_qnan() - return QNAN result                                 #
12350#       res_snan() - return SNAN result                                 #
12351#                                                                       #
12352# INPUT *************************************************************** #
12353#       a0 = pointer to extended precision source operand               #
12354#       a1 = pointer to extended precision destination operand          #
12355#       d0  rnd prec,mode                                               #
12356#                                                                       #
12357# OUTPUT ************************************************************** #
12358#       fp0 = result                                                    #
12359#       fp1 = EXOP (if exception occurred)                              #
12360#                                                                       #
12361# ALGORITHM *********************************************************** #
12362#       Handle NANs, infinities, and zeroes as special cases. Divide    #
12363# norms/denorms into ext/sgl/dbl precision.                             #
12364#       For norms/denorms, scale the exponents such that a divide       #
12365# instruction won't cause an exception. Use the regular fdiv to         #
12366# compute a result. Check if the regular operands would have taken      #
12367# an exception. If so, return the default overflow/underflow result     #
12368# and return the EXOP if exceptions are enabled. Else, scale the        #
12369# result operand to the proper exponent.                                #
12370#                                                                       #
12371#########################################################################
12372
12373        align           0x10
12374tbl_fdiv_unfl:
12375        long            0x3fff - 0x0000         # ext_unfl
12376        long            0x3fff - 0x3f81         # sgl_unfl
12377        long            0x3fff - 0x3c01         # dbl_unfl
12378
12379tbl_fdiv_ovfl:
12380        long            0x3fff - 0x7ffe         # ext overflow exponent
12381        long            0x3fff - 0x407e         # sgl overflow exponent
12382        long            0x3fff - 0x43fe         # dbl overflow exponent
12383
12384        global          fsdiv
12385fsdiv:
12386        andi.b          &0x30,%d0               # clear rnd prec
12387        ori.b           &s_mode*0x10,%d0        # insert sgl prec
12388        bra.b           fdiv
12389
12390        global          fddiv
12391fddiv:
12392        andi.b          &0x30,%d0               # clear rnd prec
12393        ori.b           &d_mode*0x10,%d0        # insert dbl prec
12394
12395        global          fdiv
12396fdiv:
12397        mov.l           %d0,L_SCR3(%a6)         # store rnd info
12398
12399        clr.w           %d1
12400        mov.b           DTAG(%a6),%d1
12401        lsl.b           &0x3,%d1
12402        or.b            STAG(%a6),%d1           # combine src tags
12403
12404        bne.w           fdiv_not_norm           # optimize on non-norm input
12405
12406#
12407# DIVIDE: NORMs and DENORMs ONLY!
12408#
12409fdiv_norm:
12410        mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
12411        mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
12412        mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
12413
12414        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
12415        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12416        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12417
12418        bsr.l           scale_to_zero_src       # scale src exponent
12419        mov.l           %d0,-(%sp)              # save scale factor 1
12420
12421        bsr.l           scale_to_zero_dst       # scale dst exponent
12422
12423        neg.l           (%sp)                   # SCALE FACTOR = scale1 - scale2
12424        add.l           %d0,(%sp)
12425
12426        mov.w           2+L_SCR3(%a6),%d1       # fetch precision
12427        lsr.b           &0x6,%d1                # shift to lo bits
12428        mov.l           (%sp)+,%d0              # load S.F.
12429        cmp.l           %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
12430        ble.w           fdiv_may_ovfl           # result will overflow
12431
12432        cmp.l           %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
12433        beq.w           fdiv_may_unfl           # maybe
12434        bgt.w           fdiv_unfl               # yes; go handle underflow
12435
12436fdiv_normal:
12437        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
12438
12439        fmov.l          L_SCR3(%a6),%fpcr       # save FPCR
12440        fmov.l          &0x0,%fpsr              # clear FPSR
12441
12442        fdiv.x          FP_SCR0(%a6),%fp0       # perform divide
12443
12444        fmov.l          %fpsr,%d1               # save FPSR
12445        fmov.l          &0x0,%fpcr              # clear FPCR
12446
12447        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12448
12449fdiv_normal_exit:
12450        fmovm.x         &0x80,FP_SCR0(%a6)      # store result on stack
12451        mov.l           %d2,-(%sp)              # store d2
12452        mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
12453        mov.l           %d1,%d2                 # make a copy
12454        andi.l          &0x7fff,%d1             # strip sign
12455        andi.w          &0x8000,%d2             # keep old sign
12456        sub.l           %d0,%d1                 # add scale factor
12457        or.w            %d2,%d1                 # concat old sign,new exp
12458        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
12459        mov.l           (%sp)+,%d2              # restore d2
12460        fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
12461        rts
12462
12463tbl_fdiv_ovfl2:
12464        long            0x7fff
12465        long            0x407f
12466        long            0x43ff
12467
12468fdiv_no_ovfl:
12469        mov.l           (%sp)+,%d0              # restore scale factor
12470        bra.b           fdiv_normal_exit
12471
12472fdiv_may_ovfl:
12473        mov.l           %d0,-(%sp)              # save scale factor
12474
12475        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
12476
12477        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12478        fmov.l          &0x0,%fpsr              # set FPSR
12479
12480        fdiv.x          FP_SCR0(%a6),%fp0       # execute divide
12481
12482        fmov.l          %fpsr,%d0
12483        fmov.l          &0x0,%fpcr
12484
12485        or.l            %d0,USER_FPSR(%a6)      # save INEX,N
12486
12487        fmovm.x         &0x01,-(%sp)            # save result to stack
12488        mov.w           (%sp),%d0               # fetch new exponent
12489        add.l           &0xc,%sp                # clear result from stack
12490        andi.l          &0x7fff,%d0             # strip sign
12491        sub.l           (%sp),%d0               # add scale factor
12492        cmp.l           %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
12493        blt.b           fdiv_no_ovfl
12494        mov.l           (%sp)+,%d0
12495
12496fdiv_ovfl_tst:
12497        or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12498
12499        mov.b           FPCR_ENABLE(%a6),%d1
12500        andi.b          &0x13,%d1               # is OVFL or INEX enabled?
12501        bne.b           fdiv_ovfl_ena           # yes
12502
12503fdiv_ovfl_dis:
12504        btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
12505        sne             %d1                     # set sign param accordingly
12506        mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
12507        bsr.l           ovf_res                 # calculate default result
12508        or.b            %d0,FPSR_CC(%a6)        # set INF if applicable
12509        fmovm.x         (%a0),&0x80             # return default result in fp0
12510        rts
12511
12512fdiv_ovfl_ena:
12513        mov.l           L_SCR3(%a6),%d1
12514        andi.b          &0xc0,%d1               # is precision extended?
12515        bne.b           fdiv_ovfl_ena_sd        # no, do sgl or dbl
12516
12517fdiv_ovfl_ena_cont:
12518        fmovm.x         &0x80,FP_SCR0(%a6)      # move result to stack
12519
12520        mov.l           %d2,-(%sp)              # save d2
12521        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
12522        mov.w           %d1,%d2                 # make a copy
12523        andi.l          &0x7fff,%d1             # strip sign
12524        sub.l           %d0,%d1                 # add scale factor
12525        subi.l          &0x6000,%d1             # subtract bias
12526        andi.w          &0x7fff,%d1             # clear sign bit
12527        andi.w          &0x8000,%d2             # keep old sign
12528        or.w            %d2,%d1                 # concat old sign,new exp
12529        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
12530        mov.l           (%sp)+,%d2              # restore d2
12531        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
12532        bra.b           fdiv_ovfl_dis
12533
12534fdiv_ovfl_ena_sd:
12535        fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
12536
12537        mov.l           L_SCR3(%a6),%d1
12538        andi.b          &0x30,%d1               # keep rnd mode
12539        fmov.l          %d1,%fpcr               # set FPCR
12540
12541        fdiv.x          FP_SCR0(%a6),%fp0       # execute divide
12542
12543        fmov.l          &0x0,%fpcr              # clear FPCR
12544        bra.b           fdiv_ovfl_ena_cont
12545
12546fdiv_unfl:
12547        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12548
12549        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
12550
12551        fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
12552        fmov.l          &0x0,%fpsr              # clear FPSR
12553
12554        fdiv.x          FP_SCR0(%a6),%fp0       # execute divide
12555
12556        fmov.l          %fpsr,%d1               # save status
12557        fmov.l          &0x0,%fpcr              # clear FPCR
12558
12559        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12560
12561        mov.b           FPCR_ENABLE(%a6),%d1
12562        andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
12563        bne.b           fdiv_unfl_ena           # yes
12564
12565fdiv_unfl_dis:
12566        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
12567
12568        lea             FP_SCR0(%a6),%a0        # pass: result addr
12569        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
12570        bsr.l           unf_res                 # calculate default result
12571        or.b            %d0,FPSR_CC(%a6)        # 'Z' may have been set
12572        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
12573        rts
12574
12575#
12576# UNFL is enabled.
12577#
12578fdiv_unfl_ena:
12579        fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
12580
12581        mov.l           L_SCR3(%a6),%d1
12582        andi.b          &0xc0,%d1               # is precision extended?
12583        bne.b           fdiv_unfl_ena_sd        # no, sgl or dbl
12584
12585        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12586
12587fdiv_unfl_ena_cont:
12588        fmov.l          &0x0,%fpsr              # clear FPSR
12589
12590        fdiv.x          FP_SCR0(%a6),%fp1       # execute divide
12591
12592        fmov.l          &0x0,%fpcr              # clear FPCR
12593
12594        fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
12595        mov.l           %d2,-(%sp)              # save d2
12596        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
12597        mov.l           %d1,%d2                 # make a copy
12598        andi.l          &0x7fff,%d1             # strip sign
12599        andi.w          &0x8000,%d2             # keep old sign
12600        sub.l           %d0,%d1                 # add scale factoer
12601        addi.l          &0x6000,%d1             # add bias
12602        andi.w          &0x7fff,%d1
12603        or.w            %d2,%d1                 # concat old sign,new exp
12604        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exp
12605        mov.l           (%sp)+,%d2              # restore d2
12606        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
12607        bra.w           fdiv_unfl_dis
12608
12609fdiv_unfl_ena_sd:
12610        mov.l           L_SCR3(%a6),%d1
12611        andi.b          &0x30,%d1               # use only rnd mode
12612        fmov.l          %d1,%fpcr               # set FPCR
12613
12614        bra.b           fdiv_unfl_ena_cont
12615
12616#
12617# the divide operation MAY underflow:
12618#
12619fdiv_may_unfl:
12620        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
12621
12622        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12623        fmov.l          &0x0,%fpsr              # clear FPSR
12624
12625        fdiv.x          FP_SCR0(%a6),%fp0       # execute divide
12626
12627        fmov.l          %fpsr,%d1               # save status
12628        fmov.l          &0x0,%fpcr              # clear FPCR
12629
12630        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12631
12632        fabs.x          %fp0,%fp1               # make a copy of result
12633        fcmp.b          %fp1,&0x1               # is |result| > 1.b?
12634        fbgt.w          fdiv_normal_exit        # no; no underflow occurred
12635        fblt.w          fdiv_unfl               # yes; underflow occurred
12636
12637#
12638# we still don't know if underflow occurred. result is ~ equal to 1. but,
12639# we don't know if the result was an underflow that rounded up to a 1
12640# or a normalized number that rounded down to a 1. so, redo the entire
12641# operation using RZ as the rounding mode to see what the pre-rounded
12642# result is. this case should be relatively rare.
12643#
12644        fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into fp1
12645
12646        mov.l           L_SCR3(%a6),%d1
12647        andi.b          &0xc0,%d1               # keep rnd prec
12648        ori.b           &rz_mode*0x10,%d1       # insert RZ
12649
12650        fmov.l          %d1,%fpcr               # set FPCR
12651        fmov.l          &0x0,%fpsr              # clear FPSR
12652
12653        fdiv.x          FP_SCR0(%a6),%fp1       # execute divide
12654
12655        fmov.l          &0x0,%fpcr              # clear FPCR
12656        fabs.x          %fp1                    # make absolute value
12657        fcmp.b          %fp1,&0x1               # is |result| < 1.b?
12658        fbge.w          fdiv_normal_exit        # no; no underflow occurred
12659        bra.w           fdiv_unfl               # yes; underflow occurred
12660
12661############################################################################
12662
12663#
12664# Divide: inputs are not both normalized; what are they?
12665#
12666fdiv_not_norm:
12667        mov.w           (tbl_fdiv_op.b,%pc,%d1.w*2),%d1
12668        jmp             (tbl_fdiv_op.b,%pc,%d1.w*1)
12669
12670        swbeg           &48
12671tbl_fdiv_op:
12672        short           fdiv_norm       - tbl_fdiv_op # NORM / NORM
12673        short           fdiv_inf_load   - tbl_fdiv_op # NORM / ZERO
12674        short           fdiv_zero_load  - tbl_fdiv_op # NORM / INF
12675        short           fdiv_res_qnan   - tbl_fdiv_op # NORM / QNAN
12676        short           fdiv_norm       - tbl_fdiv_op # NORM / DENORM
12677        short           fdiv_res_snan   - tbl_fdiv_op # NORM / SNAN
12678        short           tbl_fdiv_op     - tbl_fdiv_op #
12679        short           tbl_fdiv_op     - tbl_fdiv_op #
12680
12681        short           fdiv_zero_load  - tbl_fdiv_op # ZERO / NORM
12682        short           fdiv_res_operr  - tbl_fdiv_op # ZERO / ZERO
12683        short           fdiv_zero_load  - tbl_fdiv_op # ZERO / INF
12684        short           fdiv_res_qnan   - tbl_fdiv_op # ZERO / QNAN
12685        short           fdiv_zero_load  - tbl_fdiv_op # ZERO / DENORM
12686        short           fdiv_res_snan   - tbl_fdiv_op # ZERO / SNAN
12687        short           tbl_fdiv_op     - tbl_fdiv_op #
12688        short           tbl_fdiv_op     - tbl_fdiv_op #
12689
12690        short           fdiv_inf_dst    - tbl_fdiv_op # INF / NORM
12691        short           fdiv_inf_dst    - tbl_fdiv_op # INF / ZERO
12692        short           fdiv_res_operr  - tbl_fdiv_op # INF / INF
12693        short           fdiv_res_qnan   - tbl_fdiv_op # INF / QNAN
12694        short           fdiv_inf_dst    - tbl_fdiv_op # INF / DENORM
12695        short           fdiv_res_snan   - tbl_fdiv_op # INF / SNAN
12696        short           tbl_fdiv_op     - tbl_fdiv_op #
12697        short           tbl_fdiv_op     - tbl_fdiv_op #
12698
12699        short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / NORM
12700        short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / ZERO
12701        short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / INF
12702        short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / QNAN
12703        short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / DENORM
12704        short           fdiv_res_snan   - tbl_fdiv_op # QNAN / SNAN
12705        short           tbl_fdiv_op     - tbl_fdiv_op #
12706        short           tbl_fdiv_op     - tbl_fdiv_op #
12707
12708        short           fdiv_norm       - tbl_fdiv_op # DENORM / NORM
12709        short           fdiv_inf_load   - tbl_fdiv_op # DENORM / ZERO
12710        short           fdiv_zero_load  - tbl_fdiv_op # DENORM / INF
12711        short           fdiv_res_qnan   - tbl_fdiv_op # DENORM / QNAN
12712        short           fdiv_norm       - tbl_fdiv_op # DENORM / DENORM
12713        short           fdiv_res_snan   - tbl_fdiv_op # DENORM / SNAN
12714        short           tbl_fdiv_op     - tbl_fdiv_op #
12715        short           tbl_fdiv_op     - tbl_fdiv_op #
12716
12717        short           fdiv_res_snan   - tbl_fdiv_op # SNAN / NORM
12718        short           fdiv_res_snan   - tbl_fdiv_op # SNAN / ZERO
12719        short           fdiv_res_snan   - tbl_fdiv_op # SNAN / INF
12720        short           fdiv_res_snan   - tbl_fdiv_op # SNAN / QNAN
12721        short           fdiv_res_snan   - tbl_fdiv_op # SNAN / DENORM
12722        short           fdiv_res_snan   - tbl_fdiv_op # SNAN / SNAN
12723        short           tbl_fdiv_op     - tbl_fdiv_op #
12724        short           tbl_fdiv_op     - tbl_fdiv_op #
12725
12726fdiv_res_qnan:
12727        bra.l           res_qnan
12728fdiv_res_snan:
12729        bra.l           res_snan
12730fdiv_res_operr:
12731        bra.l           res_operr
12732
12733        global          fdiv_zero_load          # global for fsgldiv
12734fdiv_zero_load:
12735        mov.b           SRC_EX(%a0),%d0         # result sign is exclusive
12736        mov.b           DST_EX(%a1),%d1         # or of input signs.
12737        eor.b           %d0,%d1
12738        bpl.b           fdiv_zero_load_p        # result is positive
12739        fmov.s          &0x80000000,%fp0        # load a -ZERO
12740        mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
12741        rts
12742fdiv_zero_load_p:
12743        fmov.s          &0x00000000,%fp0        # load a +ZERO
12744        mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
12745        rts
12746
12747#
12748# The destination was In Range and the source was a ZERO. The result,
12749# Therefore, is an INF w/ the proper sign.
12750# So, determine the sign and return a new INF (w/ the j-bit cleared).
12751#
12752        global          fdiv_inf_load           # global for fsgldiv
12753fdiv_inf_load:
12754        ori.w           &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
12755        mov.b           SRC_EX(%a0),%d0         # load both signs
12756        mov.b           DST_EX(%a1),%d1
12757        eor.b           %d0,%d1
12758        bpl.b           fdiv_inf_load_p         # result is positive
12759        fmov.s          &0xff800000,%fp0        # make result -INF
12760        mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
12761        rts
12762fdiv_inf_load_p:
12763        fmov.s          &0x7f800000,%fp0        # make result +INF
12764        mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
12765        rts
12766
12767#
12768# The destination was an INF w/ an In Range or ZERO source, the result is
12769# an INF w/ the proper sign.
12770# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
12771# dst INF is set, then then j-bit of the result INF is also set).
12772#
12773        global          fdiv_inf_dst            # global for fsgldiv
12774fdiv_inf_dst:
12775        mov.b           DST_EX(%a1),%d0         # load both signs
12776        mov.b           SRC_EX(%a0),%d1
12777        eor.b           %d0,%d1
12778        bpl.b           fdiv_inf_dst_p          # result is positive
12779
12780        fmovm.x         DST(%a1),&0x80          # return result in fp0
12781        fabs.x          %fp0                    # clear sign bit
12782        fneg.x          %fp0                    # set sign bit
12783        mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
12784        rts
12785
12786fdiv_inf_dst_p:
12787        fmovm.x         DST(%a1),&0x80          # return result in fp0
12788        fabs.x          %fp0                    # return positive INF
12789        mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
12790        rts
12791
12792#########################################################################
12793# XDEF **************************************************************** #
12794#       fneg(): emulates the fneg instruction                           #
12795#       fsneg(): emulates the fsneg instruction                         #
12796#       fdneg(): emulates the fdneg instruction                         #
12797#                                                                       #
12798# XREF **************************************************************** #
12799#       norm() - normalize a denorm to provide EXOP                     #
12800#       scale_to_zero_src() - scale sgl/dbl source exponent             #
12801#       ovf_res() - return default overflow result                      #
12802#       unf_res() - return default underflow result                     #
12803#       res_qnan_1op() - return QNAN result                             #
12804#       res_snan_1op() - return SNAN result                             #
12805#                                                                       #
12806# INPUT *************************************************************** #
12807#       a0 = pointer to extended precision source operand               #
12808#       d0 = rnd prec,mode                                              #
12809#                                                                       #
12810# OUTPUT ************************************************************** #
12811#       fp0 = result                                                    #
12812#       fp1 = EXOP (if exception occurred)                              #
12813#                                                                       #
12814# ALGORITHM *********************************************************** #
12815#       Handle NANs, zeroes, and infinities as special cases. Separate  #
12816# norms/denorms into ext/sgl/dbl precisions. Extended precision can be  #
12817# emulated by simply setting sign bit. Sgl/dbl operands must be scaled  #
12818# and an actual fneg performed to see if overflow/underflow would have  #
12819# occurred. If so, return default underflow/overflow result. Else,      #
12820# scale the result exponent and return result. FPSR gets set based on   #
12821# the result value.                                                     #
12822#                                                                       #
12823#########################################################################
12824
12825        global          fsneg
12826fsneg:
12827        andi.b          &0x30,%d0               # clear rnd prec
12828        ori.b           &s_mode*0x10,%d0        # insert sgl precision
12829        bra.b           fneg
12830
12831        global          fdneg
12832fdneg:
12833        andi.b          &0x30,%d0               # clear rnd prec
12834        ori.b           &d_mode*0x10,%d0        # insert dbl prec
12835
12836        global          fneg
12837fneg:
12838        mov.l           %d0,L_SCR3(%a6)         # store rnd info
12839        mov.b           STAG(%a6),%d1
12840        bne.w           fneg_not_norm           # optimize on non-norm input
12841
12842#
12843# NEGATE SIGN : norms and denorms ONLY!
12844#
12845fneg_norm:
12846        andi.b          &0xc0,%d0               # is precision extended?
12847        bne.w           fneg_not_ext            # no; go handle sgl or dbl
12848
12849#
12850# precision selected is extended. so...we can not get an underflow
12851# or overflow because of rounding to the correct precision. so...
12852# skip the scaling and unscaling...
12853#
12854        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12855        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12856        mov.w           SRC_EX(%a0),%d0
12857        eori.w          &0x8000,%d0             # negate sign
12858        bpl.b           fneg_norm_load          # sign is positive
12859        mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
12860fneg_norm_load:
12861        mov.w           %d0,FP_SCR0_EX(%a6)
12862        fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
12863        rts
12864
12865#
12866# for an extended precision DENORM, the UNFL exception bit is set
12867# the accrued bit is NOT set in this instance(no inexactness!)
12868#
12869fneg_denorm:
12870        andi.b          &0xc0,%d0               # is precision extended?
12871        bne.b           fneg_not_ext            # no; go handle sgl or dbl
12872
12873        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12874
12875        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12876        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12877        mov.w           SRC_EX(%a0),%d0
12878        eori.w          &0x8000,%d0             # negate sign
12879        bpl.b           fneg_denorm_done        # no
12880        mov.b           &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit
12881fneg_denorm_done:
12882        mov.w           %d0,FP_SCR0_EX(%a6)
12883        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
12884
12885        btst            &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
12886        bne.b           fneg_ext_unfl_ena       # yes
12887        rts
12888
12889#
12890# the input is an extended DENORM and underflow is enabled in the FPCR.
12891# normalize the mantissa and add the bias of 0x6000 to the resulting negative
12892# exponent and insert back into the operand.
12893#
12894fneg_ext_unfl_ena:
12895        lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
12896        bsr.l           norm                    # normalize result
12897        neg.w           %d0                     # new exponent = -(shft val)
12898        addi.w          &0x6000,%d0             # add new bias to exponent
12899        mov.w           FP_SCR0_EX(%a6),%d1     # fetch old sign,exp
12900        andi.w          &0x8000,%d1             # keep old sign
12901        andi.w          &0x7fff,%d0             # clear sign position
12902        or.w            %d1,%d0                 # concat old sign, new exponent
12903        mov.w           %d0,FP_SCR0_EX(%a6)     # insert new exponent
12904        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
12905        rts
12906
12907#
12908# operand is either single or double
12909#
12910fneg_not_ext:
12911        cmpi.b          %d0,&s_mode*0x10        # separate sgl/dbl prec
12912        bne.b           fneg_dbl
12913
12914#
12915# operand is to be rounded to single precision
12916#
12917fneg_sgl:
12918        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
12919        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12920        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12921        bsr.l           scale_to_zero_src       # calculate scale factor
12922
12923        cmpi.l          %d0,&0x3fff-0x3f80      # will move in underflow?
12924        bge.w           fneg_sd_unfl            # yes; go handle underflow
12925        cmpi.l          %d0,&0x3fff-0x407e      # will move in overflow?
12926        beq.w           fneg_sd_may_ovfl        # maybe; go check
12927        blt.w           fneg_sd_ovfl            # yes; go handle overflow
12928
12929#
12930# operand will NOT overflow or underflow when moved in to the fp reg file
12931#
12932fneg_sd_normal:
12933        fmov.l          &0x0,%fpsr              # clear FPSR
12934        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12935
12936        fneg.x          FP_SCR0(%a6),%fp0       # perform negation
12937
12938        fmov.l          %fpsr,%d1               # save FPSR
12939        fmov.l          &0x0,%fpcr              # clear FPCR
12940
12941        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12942
12943fneg_sd_normal_exit:
12944        mov.l           %d2,-(%sp)              # save d2
12945        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
12946        mov.w           FP_SCR0_EX(%a6),%d1     # load sgn,exp
12947        mov.w           %d1,%d2                 # make a copy
12948        andi.l          &0x7fff,%d1             # strip sign
12949        sub.l           %d0,%d1                 # add scale factor
12950        andi.w          &0x8000,%d2             # keep old sign
12951        or.w            %d1,%d2                 # concat old sign,new exp
12952        mov.w           %d2,FP_SCR0_EX(%a6)     # insert new exponent
12953        mov.l           (%sp)+,%d2              # restore d2
12954        fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
12955        rts
12956
12957#
12958# operand is to be rounded to double precision
12959#
12960fneg_dbl:
12961        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
12962        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12963        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12964        bsr.l           scale_to_zero_src       # calculate scale factor
12965
12966        cmpi.l          %d0,&0x3fff-0x3c00      # will move in underflow?
12967        bge.b           fneg_sd_unfl            # yes; go handle underflow
12968        cmpi.l          %d0,&0x3fff-0x43fe      # will move in overflow?
12969        beq.w           fneg_sd_may_ovfl        # maybe; go check
12970        blt.w           fneg_sd_ovfl            # yes; go handle overflow
12971        bra.w           fneg_sd_normal          # no; ho handle normalized op
12972
12973#
12974# operand WILL underflow when moved in to the fp register file
12975#
12976fneg_sd_unfl:
12977        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12978
12979        eori.b          &0x80,FP_SCR0_EX(%a6)   # negate sign
12980        bpl.b           fneg_sd_unfl_tst
12981        bset            &neg_bit,FPSR_CC(%a6)   # set 'N' ccode bit
12982
12983# if underflow or inexact is enabled, go calculate EXOP first.
12984fneg_sd_unfl_tst:
12985        mov.b           FPCR_ENABLE(%a6),%d1
12986        andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
12987        bne.b           fneg_sd_unfl_ena        # yes
12988
12989fneg_sd_unfl_dis:
12990        lea             FP_SCR0(%a6),%a0        # pass: result addr
12991        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
12992        bsr.l           unf_res                 # calculate default result
12993        or.b            %d0,FPSR_CC(%a6)        # unf_res may have set 'Z'
12994        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
12995        rts
12996
12997#
12998# operand will underflow AND underflow is enabled.
12999# Therefore, we must return the result rounded to extended precision.
13000#
13001fneg_sd_unfl_ena:
13002        mov.l           FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
13003        mov.l           FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
13004        mov.w           FP_SCR0_EX(%a6),%d1     # load current exponent
13005
13006        mov.l           %d2,-(%sp)              # save d2
13007        mov.l           %d1,%d2                 # make a copy
13008        andi.l          &0x7fff,%d1             # strip sign
13009        andi.w          &0x8000,%d2             # keep old sign
13010        sub.l           %d0,%d1                 # subtract scale factor
13011        addi.l          &0x6000,%d1             # add new bias
13012        andi.w          &0x7fff,%d1
13013        or.w            %d2,%d1                 # concat new sign,new exp
13014        mov.w           %d1,FP_SCR1_EX(%a6)     # insert new exp
13015        fmovm.x         FP_SCR1(%a6),&0x40      # return EXOP in fp1
13016        mov.l           (%sp)+,%d2              # restore d2
13017        bra.b           fneg_sd_unfl_dis
13018
13019#
13020# operand WILL overflow.
13021#
13022fneg_sd_ovfl:
13023        fmov.l          &0x0,%fpsr              # clear FPSR
13024        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
13025
13026        fneg.x          FP_SCR0(%a6),%fp0       # perform negation
13027
13028        fmov.l          &0x0,%fpcr              # clear FPCR
13029        fmov.l          %fpsr,%d1               # save FPSR
13030
13031        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
13032
13033fneg_sd_ovfl_tst:
13034        or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
13035
13036        mov.b           FPCR_ENABLE(%a6),%d1
13037        andi.b          &0x13,%d1               # is OVFL or INEX enabled?
13038        bne.b           fneg_sd_ovfl_ena        # yes
13039
13040#
13041# OVFL is not enabled; therefore, we must create the default result by
13042# calling ovf_res().
13043#
13044fneg_sd_ovfl_dis:
13045        btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
13046        sne             %d1                     # set sign param accordingly
13047        mov.l           L_SCR3(%a6),%d0         # pass: prec,mode
13048        bsr.l           ovf_res                 # calculate default result
13049        or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
13050        fmovm.x         (%a0),&0x80             # return default result in fp0
13051        rts
13052
13053#
13054# OVFL is enabled.
13055# the INEX2 bit has already been updated by the round to the correct precision.
13056# now, round to extended(and don't alter the FPSR).
13057#
13058fneg_sd_ovfl_ena:
13059        mov.l           %d2,-(%sp)              # save d2
13060        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
13061        mov.l           %d1,%d2                 # make a copy
13062        andi.l          &0x7fff,%d1             # strip sign
13063        andi.w          &0x8000,%d2             # keep old sign
13064        sub.l           %d0,%d1                 # add scale factor
13065        subi.l          &0x6000,%d1             # subtract bias
13066        andi.w          &0x7fff,%d1
13067        or.w            %d2,%d1                 # concat sign,exp
13068        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
13069        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
13070        mov.l           (%sp)+,%d2              # restore d2
13071        bra.b           fneg_sd_ovfl_dis
13072
13073#
13074# the move in MAY underflow. so...
13075#
13076fneg_sd_may_ovfl:
13077        fmov.l          &0x0,%fpsr              # clear FPSR
13078        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
13079
13080        fneg.x          FP_SCR0(%a6),%fp0       # perform negation
13081
13082        fmov.l          %fpsr,%d1               # save status
13083        fmov.l          &0x0,%fpcr              # clear FPCR
13084
13085        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
13086
13087        fabs.x          %fp0,%fp1               # make a copy of result
13088        fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
13089        fbge.w          fneg_sd_ovfl_tst        # yes; overflow has occurred
13090
13091# no, it didn't overflow; we have correct result
13092        bra.w           fneg_sd_normal_exit
13093
13094##########################################################################
13095
13096#
13097# input is not normalized; what is it?
13098#
13099fneg_not_norm:
13100        cmpi.b          %d1,&DENORM             # weed out DENORM
13101        beq.w           fneg_denorm
13102        cmpi.b          %d1,&SNAN               # weed out SNAN
13103        beq.l           res_snan_1op
13104        cmpi.b          %d1,&QNAN               # weed out QNAN
13105        beq.l           res_qnan_1op
13106
13107#
13108# do the fneg; at this point, only possible ops are ZERO and INF.
13109# use fneg to determine ccodes.
13110# prec:mode should be zero at this point but it won't affect answer anyways.
13111#
13112        fneg.x          SRC_EX(%a0),%fp0        # do fneg
13113        fmov.l          %fpsr,%d0
13114        rol.l           &0x8,%d0                # put ccodes in lo byte
13115        mov.b           %d0,FPSR_CC(%a6)        # insert correct ccodes
13116        rts
13117
13118#########################################################################
13119# XDEF **************************************************************** #
13120#       ftst(): emulates the ftest instruction                          #
13121#                                                                       #
13122# XREF **************************************************************** #
13123#       res{s,q}nan_1op() - set NAN result for monadic instruction      #
13124#                                                                       #
13125# INPUT *************************************************************** #
13126#       a0 = pointer to extended precision source operand               #
13127#                                                                       #
13128# OUTPUT ************************************************************** #
13129#       none                                                            #
13130#                                                                       #
13131# ALGORITHM *********************************************************** #
13132#       Check the source operand tag (STAG) and set the FPCR according  #
13133# to the operand type and sign.                                         #
13134#                                                                       #
13135#########################################################################
13136
13137        global          ftst
13138ftst:
13139        mov.b           STAG(%a6),%d1
13140        bne.b           ftst_not_norm           # optimize on non-norm input
13141
13142#
13143# Norm:
13144#
13145ftst_norm:
13146        tst.b           SRC_EX(%a0)             # is operand negative?
13147        bmi.b           ftst_norm_m             # yes
13148        rts
13149ftst_norm_m:
13150        mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
13151        rts
13152
13153#
13154# input is not normalized; what is it?
13155#
13156ftst_not_norm:
13157        cmpi.b          %d1,&ZERO               # weed out ZERO
13158        beq.b           ftst_zero
13159        cmpi.b          %d1,&INF                # weed out INF
13160        beq.b           ftst_inf
13161        cmpi.b          %d1,&SNAN               # weed out SNAN
13162        beq.l           res_snan_1op
13163        cmpi.b          %d1,&QNAN               # weed out QNAN
13164        beq.l           res_qnan_1op
13165
13166#
13167# Denorm:
13168#
13169ftst_denorm:
13170        tst.b           SRC_EX(%a0)             # is operand negative?
13171        bmi.b           ftst_denorm_m           # yes
13172        rts
13173ftst_denorm_m:
13174        mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
13175        rts
13176
13177#
13178# Infinity:
13179#
13180ftst_inf:
13181        tst.b           SRC_EX(%a0)             # is operand negative?
13182        bmi.b           ftst_inf_m              # yes
13183ftst_inf_p:
13184        mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
13185        rts
13186ftst_inf_m:
13187        mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
13188        rts
13189
13190#
13191# Zero:
13192#
13193ftst_zero:
13194        tst.b           SRC_EX(%a0)             # is operand negative?
13195        bmi.b           ftst_zero_m             # yes
13196ftst_zero_p:
13197        mov.b           &z_bmask,FPSR_CC(%a6)   # set 'N' ccode bit
13198        rts
13199ftst_zero_m:
13200        mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13201        rts
13202
13203#########################################################################
13204# XDEF **************************************************************** #
13205#       fint(): emulates the fint instruction                           #
13206#                                                                       #
13207# XREF **************************************************************** #
13208#       res_{s,q}nan_1op() - set NAN result for monadic operation       #
13209#                                                                       #
13210# INPUT *************************************************************** #
13211#       a0 = pointer to extended precision source operand               #
13212#       d0 = round precision/mode                                       #
13213#                                                                       #
13214# OUTPUT ************************************************************** #
13215#       fp0 = result                                                    #
13216#                                                                       #
13217# ALGORITHM *********************************************************** #
13218#       Separate according to operand type. Unnorms don't pass through  #
13219# here. For norms, load the rounding mode/prec, execute a "fint", then  #
13220# store the resulting FPSR bits.                                        #
13221#       For denorms, force the j-bit to a one and do the same as for    #
13222# norms. Denorms are so low that the answer will either be a zero or a  #
13223# one.                                                                  #
13224#       For zeroes/infs/NANs, return the same while setting the FPSR    #
13225# as appropriate.                                                       #
13226#                                                                       #
13227#########################################################################
13228
13229        global          fint
13230fint:
13231        mov.b           STAG(%a6),%d1
13232        bne.b           fint_not_norm           # optimize on non-norm input
13233
13234#
13235# Norm:
13236#
13237fint_norm:
13238        andi.b          &0x30,%d0               # set prec = ext
13239
13240        fmov.l          %d0,%fpcr               # set FPCR
13241        fmov.l          &0x0,%fpsr              # clear FPSR
13242
13243        fint.x          SRC(%a0),%fp0           # execute fint
13244
13245        fmov.l          &0x0,%fpcr              # clear FPCR
13246        fmov.l          %fpsr,%d0               # save FPSR
13247        or.l            %d0,USER_FPSR(%a6)      # set exception bits
13248
13249        rts
13250
13251#
13252# input is not normalized; what is it?
13253#
13254fint_not_norm:
13255        cmpi.b          %d1,&ZERO               # weed out ZERO
13256        beq.b           fint_zero
13257        cmpi.b          %d1,&INF                # weed out INF
13258        beq.b           fint_inf
13259        cmpi.b          %d1,&DENORM             # weed out DENORM
13260        beq.b           fint_denorm
13261        cmpi.b          %d1,&SNAN               # weed out SNAN
13262        beq.l           res_snan_1op
13263        bra.l           res_qnan_1op            # weed out QNAN
13264
13265#
13266# Denorm:
13267#
13268# for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
13269# also, the INEX2 and AINEX exception bits will be set.
13270# so, we could either set these manually or force the DENORM
13271# to a very small NORM and ship it to the NORM routine.
13272# I do the latter.
13273#
13274fint_denorm:
13275        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
13276        mov.b           &0x80,FP_SCR0_HI(%a6)   # force DENORM ==> small NORM
13277        lea             FP_SCR0(%a6),%a0
13278        bra.b           fint_norm
13279
13280#
13281# Zero:
13282#
13283fint_zero:
13284        tst.b           SRC_EX(%a0)             # is ZERO negative?
13285        bmi.b           fint_zero_m             # yes
13286fint_zero_p:
13287        fmov.s          &0x00000000,%fp0        # return +ZERO in fp0
13288        mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
13289        rts
13290fint_zero_m:
13291        fmov.s          &0x80000000,%fp0        # return -ZERO in fp0
13292        mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13293        rts
13294
13295#
13296# Infinity:
13297#
13298fint_inf:
13299        fmovm.x         SRC(%a0),&0x80          # return result in fp0
13300        tst.b           SRC_EX(%a0)             # is INF negative?
13301        bmi.b           fint_inf_m              # yes
13302fint_inf_p:
13303        mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
13304        rts
13305fint_inf_m:
13306        mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
13307        rts
13308
13309#########################################################################
13310# XDEF **************************************************************** #
13311#       fintrz(): emulates the fintrz instruction                       #
13312#                                                                       #
13313# XREF **************************************************************** #
13314#       res_{s,q}nan_1op() - set NAN result for monadic operation       #
13315#                                                                       #
13316# INPUT *************************************************************** #
13317#       a0 = pointer to extended precision source operand               #
13318#       d0 = round precision/mode                                       #
13319#                                                                       #
13320# OUTPUT ************************************************************** #
13321#       fp0 = result                                                    #
13322#                                                                       #
13323# ALGORITHM *********************************************************** #
13324#       Separate according to operand type. Unnorms don't pass through  #
13325# here. For norms, load the rounding mode/prec, execute a "fintrz",     #
13326# then store the resulting FPSR bits.                                   #
13327#       For denorms, force the j-bit to a one and do the same as for    #
13328# norms. Denorms are so low that the answer will either be a zero or a  #
13329# one.                                                                  #
13330#       For zeroes/infs/NANs, return the same while setting the FPSR    #
13331# as appropriate.                                                       #
13332#                                                                       #
13333#########################################################################
13334
13335        global          fintrz
13336fintrz:
13337        mov.b           STAG(%a6),%d1
13338        bne.b           fintrz_not_norm         # optimize on non-norm input
13339
13340#
13341# Norm:
13342#
13343fintrz_norm:
13344        fmov.l          &0x0,%fpsr              # clear FPSR
13345
13346        fintrz.x        SRC(%a0),%fp0           # execute fintrz
13347
13348        fmov.l          %fpsr,%d0               # save FPSR
13349        or.l            %d0,USER_FPSR(%a6)      # set exception bits
13350
13351        rts
13352
13353#
13354# input is not normalized; what is it?
13355#
13356fintrz_not_norm:
13357        cmpi.b          %d1,&ZERO               # weed out ZERO
13358        beq.b           fintrz_zero
13359        cmpi.b          %d1,&INF                # weed out INF
13360        beq.b           fintrz_inf
13361        cmpi.b          %d1,&DENORM             # weed out DENORM
13362        beq.b           fintrz_denorm
13363        cmpi.b          %d1,&SNAN               # weed out SNAN
13364        beq.l           res_snan_1op
13365        bra.l           res_qnan_1op            # weed out QNAN
13366
13367#
13368# Denorm:
13369#
13370# for DENORMs, the result will be (+/-)ZERO.
13371# also, the INEX2 and AINEX exception bits will be set.
13372# so, we could either set these manually or force the DENORM
13373# to a very small NORM and ship it to the NORM routine.
13374# I do the latter.
13375#
13376fintrz_denorm:
13377        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
13378        mov.b           &0x80,FP_SCR0_HI(%a6)   # force DENORM ==> small NORM
13379        lea             FP_SCR0(%a6),%a0
13380        bra.b           fintrz_norm
13381
13382#
13383# Zero:
13384#
13385fintrz_zero:
13386        tst.b           SRC_EX(%a0)             # is ZERO negative?
13387        bmi.b           fintrz_zero_m           # yes
13388fintrz_zero_p:
13389        fmov.s          &0x00000000,%fp0        # return +ZERO in fp0
13390        mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
13391        rts
13392fintrz_zero_m:
13393        fmov.s          &0x80000000,%fp0        # return -ZERO in fp0
13394        mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13395        rts
13396
13397#
13398# Infinity:
13399#
13400fintrz_inf:
13401        fmovm.x         SRC(%a0),&0x80          # return result in fp0
13402        tst.b           SRC_EX(%a0)             # is INF negative?
13403        bmi.b           fintrz_inf_m            # yes
13404fintrz_inf_p:
13405        mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
13406        rts
13407fintrz_inf_m:
13408        mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
13409        rts
13410
13411#########################################################################
13412# XDEF **************************************************************** #
13413#       fabs():  emulates the fabs instruction                          #
13414#       fsabs(): emulates the fsabs instruction                         #
13415#       fdabs(): emulates the fdabs instruction                         #
13416#                                                                       #
13417# XREF **************************************************************** #
13418#       norm() - normalize denorm mantissa to provide EXOP              #
13419#       scale_to_zero_src() - make exponent. = 0; get scale factor      #
13420#       unf_res() - calculate underflow result                          #
13421#       ovf_res() - calculate overflow result                           #
13422#       res_{s,q}nan_1op() - set NAN result for monadic operation       #
13423#                                                                       #
13424# INPUT *************************************************************** #
13425#       a0 = pointer to extended precision source operand               #
13426#       d0 = rnd precision/mode                                         #
13427#                                                                       #
13428# OUTPUT ************************************************************** #
13429#       fp0 = result                                                    #
13430#       fp1 = EXOP (if exception occurred)                              #
13431#                                                                       #
13432# ALGORITHM *********************************************************** #
13433#       Handle NANs, infinities, and zeroes as special cases. Divide    #
13434# norms into extended, single, and double precision.                    #
13435#       Simply clear sign for extended precision norm. Ext prec denorm  #
13436# gets an EXOP created for it since it's an underflow.                  #
13437#       Double and single precision can overflow and underflow. First,  #
13438# scale the operand such that the exponent is zero. Perform an "fabs"   #
13439# using the correct rnd mode/prec. Check to see if the original         #
13440# exponent would take an exception. If so, use unf_res() or ovf_res()   #
13441# to calculate the default result. Also, create the EXOP for the        #
13442# exceptional case. If no exception should occur, insert the correct    #
13443# result exponent and return.                                           #
13444#       Unnorms don't pass through here.                                #
13445#                                                                       #
13446#########################################################################
13447
13448        global          fsabs
13449fsabs:
13450        andi.b          &0x30,%d0               # clear rnd prec
13451        ori.b           &s_mode*0x10,%d0        # insert sgl precision
13452        bra.b           fabs
13453
13454        global          fdabs
13455fdabs:
13456        andi.b          &0x30,%d0               # clear rnd prec
13457        ori.b           &d_mode*0x10,%d0        # insert dbl precision
13458
13459        global          fabs
13460fabs:
13461        mov.l           %d0,L_SCR3(%a6)         # store rnd info
13462        mov.b           STAG(%a6),%d1
13463        bne.w           fabs_not_norm           # optimize on non-norm input
13464
13465#
13466# ABSOLUTE VALUE: norms and denorms ONLY!
13467#
13468fabs_norm:
13469        andi.b          &0xc0,%d0               # is precision extended?
13470        bne.b           fabs_not_ext            # no; go handle sgl or dbl
13471
13472#
13473# precision selected is extended. so...we can not get an underflow
13474# or overflow because of rounding to the correct precision. so...
13475# skip the scaling and unscaling...
13476#
13477        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
13478        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
13479        mov.w           SRC_EX(%a0),%d1
13480        bclr            &15,%d1                 # force absolute value
13481        mov.w           %d1,FP_SCR0_EX(%a6)     # insert exponent
13482        fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
13483        rts
13484
13485#
13486# for an extended precision DENORM, the UNFL exception bit is set
13487# the accrued bit is NOT set in this instance(no inexactness!)
13488#
13489fabs_denorm:
13490        andi.b          &0xc0,%d0               # is precision extended?
13491        bne.b           fabs_not_ext            # no
13492
13493        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
13494
13495        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
13496        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
13497        mov.w           SRC_EX(%a0),%d0
13498        bclr            &15,%d0                 # clear sign
13499        mov.w           %d0,FP_SCR0_EX(%a6)     # insert exponent
13500
13501        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
13502
13503        btst            &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
13504        bne.b           fabs_ext_unfl_ena
13505        rts
13506
13507#
13508# the input is an extended DENORM and underflow is enabled in the FPCR.
13509# normalize the mantissa and add the bias of 0x6000 to the resulting negative
13510# exponent and insert back into the operand.
13511#
13512fabs_ext_unfl_ena:
13513        lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
13514        bsr.l           norm                    # normalize result
13515        neg.w           %d0                     # new exponent = -(shft val)
13516        addi.w          &0x6000,%d0             # add new bias to exponent
13517        mov.w           FP_SCR0_EX(%a6),%d1     # fetch old sign,exp
13518        andi.w          &0x8000,%d1             # keep old sign
13519        andi.w          &0x7fff,%d0             # clear sign position
13520        or.w            %d1,%d0                 # concat old sign, new exponent
13521        mov.w           %d0,FP_SCR0_EX(%a6)     # insert new exponent
13522        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
13523        rts
13524
13525#
13526# operand is either single or double
13527#
13528fabs_not_ext:
13529        cmpi.b          %d0,&s_mode*0x10        # separate sgl/dbl prec
13530        bne.b           fabs_dbl
13531
13532#
13533# operand is to be rounded to single precision
13534#
13535fabs_sgl:
13536        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
13537        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
13538        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
13539        bsr.l           scale_to_zero_src       # calculate scale factor
13540
13541        cmpi.l          %d0,&0x3fff-0x3f80      # will move in underflow?
13542        bge.w           fabs_sd_unfl            # yes; go handle underflow
13543        cmpi.l          %d0,&0x3fff-0x407e      # will move in overflow?
13544        beq.w           fabs_sd_may_ovfl        # maybe; go check
13545        blt.w           fabs_sd_ovfl            # yes; go handle overflow
13546
13547#
13548# operand will NOT overflow or underflow when moved in to the fp reg file
13549#
13550fabs_sd_normal:
13551        fmov.l          &0x0,%fpsr              # clear FPSR
13552        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
13553
13554        fabs.x          FP_SCR0(%a6),%fp0       # perform absolute
13555
13556        fmov.l          %fpsr,%d1               # save FPSR
13557        fmov.l          &0x0,%fpcr              # clear FPCR
13558
13559        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
13560
13561fabs_sd_normal_exit:
13562        mov.l           %d2,-(%sp)              # save d2
13563        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
13564        mov.w           FP_SCR0_EX(%a6),%d1     # load sgn,exp
13565        mov.l           %d1,%d2                 # make a copy
13566        andi.l          &0x7fff,%d1             # strip sign
13567        sub.l           %d0,%d1                 # add scale factor
13568        andi.w          &0x8000,%d2             # keep old sign
13569        or.w            %d1,%d2                 # concat old sign,new exp
13570        mov.w           %d2,FP_SCR0_EX(%a6)     # insert new exponent
13571        mov.l           (%sp)+,%d2              # restore d2
13572        fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
13573        rts
13574
13575#
13576# operand is to be rounded to double precision
13577#
13578fabs_dbl:
13579        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
13580        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
13581        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
13582        bsr.l           scale_to_zero_src       # calculate scale factor
13583
13584        cmpi.l          %d0,&0x3fff-0x3c00      # will move in underflow?
13585        bge.b           fabs_sd_unfl            # yes; go handle underflow
13586        cmpi.l          %d0,&0x3fff-0x43fe      # will move in overflow?
13587        beq.w           fabs_sd_may_ovfl        # maybe; go check
13588        blt.w           fabs_sd_ovfl            # yes; go handle overflow
13589        bra.w           fabs_sd_normal          # no; ho handle normalized op
13590
13591#
13592# operand WILL underflow when moved in to the fp register file
13593#
13594fabs_sd_unfl:
13595        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
13596
13597        bclr            &0x7,FP_SCR0_EX(%a6)    # force absolute value
13598
13599# if underflow or inexact is enabled, go calculate EXOP first.
13600        mov.b           FPCR_ENABLE(%a6),%d1
13601        andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
13602        bne.b           fabs_sd_unfl_ena        # yes
13603
13604fabs_sd_unfl_dis:
13605        lea             FP_SCR0(%a6),%a0        # pass: result addr
13606        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
13607        bsr.l           unf_res                 # calculate default result
13608        or.b            %d0,FPSR_CC(%a6)        # set possible 'Z' ccode
13609        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
13610        rts
13611
13612#
13613# operand will underflow AND underflow is enabled.
13614# Therefore, we must return the result rounded to extended precision.
13615#
13616fabs_sd_unfl_ena:
13617        mov.l           FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
13618        mov.l           FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
13619        mov.w           FP_SCR0_EX(%a6),%d1     # load current exponent
13620
13621        mov.l           %d2,-(%sp)              # save d2
13622        mov.l           %d1,%d2                 # make a copy
13623        andi.l          &0x7fff,%d1             # strip sign
13624        andi.w          &0x8000,%d2             # keep old sign
13625        sub.l           %d0,%d1                 # subtract scale factor
13626        addi.l          &0x6000,%d1             # add new bias
13627        andi.w          &0x7fff,%d1
13628        or.w            %d2,%d1                 # concat new sign,new exp
13629        mov.w           %d1,FP_SCR1_EX(%a6)     # insert new exp
13630        fmovm.x         FP_SCR1(%a6),&0x40      # return EXOP in fp1
13631        mov.l           (%sp)+,%d2              # restore d2
13632        bra.b           fabs_sd_unfl_dis
13633
13634#
13635# operand WILL overflow.
13636#
13637fabs_sd_ovfl:
13638        fmov.l          &0x0,%fpsr              # clear FPSR
13639        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
13640
13641        fabs.x          FP_SCR0(%a6),%fp0       # perform absolute
13642
13643        fmov.l          &0x0,%fpcr              # clear FPCR
13644        fmov.l          %fpsr,%d1               # save FPSR
13645
13646        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
13647
13648fabs_sd_ovfl_tst:
13649        or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
13650
13651        mov.b           FPCR_ENABLE(%a6),%d1
13652        andi.b          &0x13,%d1               # is OVFL or INEX enabled?
13653        bne.b           fabs_sd_ovfl_ena        # yes
13654
13655#
13656# OVFL is not enabled; therefore, we must create the default result by
13657# calling ovf_res().
13658#
13659fabs_sd_ovfl_dis:
13660        btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
13661        sne             %d1                     # set sign param accordingly
13662        mov.l           L_SCR3(%a6),%d0         # pass: prec,mode
13663        bsr.l           ovf_res                 # calculate default result
13664        or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
13665        fmovm.x         (%a0),&0x80             # return default result in fp0
13666        rts
13667
13668#
13669# OVFL is enabled.
13670# the INEX2 bit has already been updated by the round to the correct precision.
13671# now, round to extended(and don't alter the FPSR).
13672#
13673fabs_sd_ovfl_ena:
13674        mov.l           %d2,-(%sp)              # save d2
13675        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
13676        mov.l           %d1,%d2                 # make a copy
13677        andi.l          &0x7fff,%d1             # strip sign
13678        andi.w          &0x8000,%d2             # keep old sign
13679        sub.l           %d0,%d1                 # add scale factor
13680        subi.l          &0x6000,%d1             # subtract bias
13681        andi.w          &0x7fff,%d1
13682        or.w            %d2,%d1                 # concat sign,exp
13683        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
13684        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
13685        mov.l           (%sp)+,%d2              # restore d2
13686        bra.b           fabs_sd_ovfl_dis
13687
13688#
13689# the move in MAY underflow. so...
13690#
13691fabs_sd_may_ovfl:
13692        fmov.l          &0x0,%fpsr              # clear FPSR
13693        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
13694
13695        fabs.x          FP_SCR0(%a6),%fp0       # perform absolute
13696
13697        fmov.l          %fpsr,%d1               # save status
13698        fmov.l          &0x0,%fpcr              # clear FPCR
13699
13700        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
13701
13702        fabs.x          %fp0,%fp1               # make a copy of result
13703        fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
13704        fbge.w          fabs_sd_ovfl_tst        # yes; overflow has occurred
13705
13706# no, it didn't overflow; we have correct result
13707        bra.w           fabs_sd_normal_exit
13708
13709##########################################################################
13710
13711#
13712# input is not normalized; what is it?
13713#
13714fabs_not_norm:
13715        cmpi.b          %d1,&DENORM             # weed out DENORM
13716        beq.w           fabs_denorm
13717        cmpi.b          %d1,&SNAN               # weed out SNAN
13718        beq.l           res_snan_1op
13719        cmpi.b          %d1,&QNAN               # weed out QNAN
13720        beq.l           res_qnan_1op
13721
13722        fabs.x          SRC(%a0),%fp0           # force absolute value
13723
13724        cmpi.b          %d1,&INF                # weed out INF
13725        beq.b           fabs_inf
13726fabs_zero:
13727        mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
13728        rts
13729fabs_inf:
13730        mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
13731        rts
13732
13733#########################################################################
13734# XDEF **************************************************************** #
13735#       fcmp(): fp compare op routine                                   #
13736#                                                                       #
13737# XREF **************************************************************** #
13738#       res_qnan() - return QNAN result                                 #
13739#       res_snan() - return SNAN result                                 #
13740#                                                                       #
13741# INPUT *************************************************************** #
13742#       a0 = pointer to extended precision source operand               #
13743#       a1 = pointer to extended precision destination operand          #
13744#       d0 = round prec/mode                                            #
13745#                                                                       #
13746# OUTPUT ************************************************************** #
13747#       None                                                            #
13748#                                                                       #
13749# ALGORITHM *********************************************************** #
13750#       Handle NANs and denorms as special cases. For everything else,  #
13751# just use the actual fcmp instruction to produce the correct condition #
13752# codes.                                                                #
13753#                                                                       #
13754#########################################################################
13755
13756        global          fcmp
13757fcmp:
13758        clr.w           %d1
13759        mov.b           DTAG(%a6),%d1
13760        lsl.b           &0x3,%d1
13761        or.b            STAG(%a6),%d1
13762        bne.b           fcmp_not_norm           # optimize on non-norm input
13763
13764#
13765# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
13766#
13767fcmp_norm:
13768        fmovm.x         DST(%a1),&0x80          # load dst op
13769
13770        fcmp.x          %fp0,SRC(%a0)           # do compare
13771
13772        fmov.l          %fpsr,%d0               # save FPSR
13773        rol.l           &0x8,%d0                # extract ccode bits
13774        mov.b           %d0,FPSR_CC(%a6)        # set ccode bits(no exc bits are set)
13775
13776        rts
13777
13778#
13779# fcmp: inputs are not both normalized; what are they?
13780#
13781fcmp_not_norm:
13782        mov.w           (tbl_fcmp_op.b,%pc,%d1.w*2),%d1
13783        jmp             (tbl_fcmp_op.b,%pc,%d1.w*1)
13784
13785        swbeg           &48
13786tbl_fcmp_op:
13787        short           fcmp_norm       - tbl_fcmp_op # NORM - NORM
13788        short           fcmp_norm       - tbl_fcmp_op # NORM - ZERO
13789        short           fcmp_norm       - tbl_fcmp_op # NORM - INF
13790        short           fcmp_res_qnan   - tbl_fcmp_op # NORM - QNAN
13791        short           fcmp_nrm_dnrm   - tbl_fcmp_op # NORM - DENORM
13792        short           fcmp_res_snan   - tbl_fcmp_op # NORM - SNAN
13793        short           tbl_fcmp_op     - tbl_fcmp_op #
13794        short           tbl_fcmp_op     - tbl_fcmp_op #
13795
13796        short           fcmp_norm       - tbl_fcmp_op # ZERO - NORM
13797        short           fcmp_norm       - tbl_fcmp_op # ZERO - ZERO
13798        short           fcmp_norm       - tbl_fcmp_op # ZERO - INF
13799        short           fcmp_res_qnan   - tbl_fcmp_op # ZERO - QNAN
13800        short           fcmp_dnrm_s     - tbl_fcmp_op # ZERO - DENORM
13801        short           fcmp_res_snan   - tbl_fcmp_op # ZERO - SNAN
13802        short           tbl_fcmp_op     - tbl_fcmp_op #
13803        short           tbl_fcmp_op     - tbl_fcmp_op #
13804
13805        short           fcmp_norm       - tbl_fcmp_op # INF - NORM
13806        short           fcmp_norm       - tbl_fcmp_op # INF - ZERO
13807        short           fcmp_norm       - tbl_fcmp_op # INF - INF
13808        short           fcmp_res_qnan   - tbl_fcmp_op # INF - QNAN
13809        short           fcmp_dnrm_s     - tbl_fcmp_op # INF - DENORM
13810        short           fcmp_res_snan   - tbl_fcmp_op # INF - SNAN
13811        short           tbl_fcmp_op     - tbl_fcmp_op #
13812        short           tbl_fcmp_op     - tbl_fcmp_op #
13813
13814        short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - NORM
13815        short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - ZERO
13816        short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - INF
13817        short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - QNAN
13818        short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - DENORM
13819        short           fcmp_res_snan   - tbl_fcmp_op # QNAN - SNAN
13820        short           tbl_fcmp_op     - tbl_fcmp_op #
13821        short           tbl_fcmp_op     - tbl_fcmp_op #
13822
13823        short           fcmp_dnrm_nrm   - tbl_fcmp_op # DENORM - NORM
13824        short           fcmp_dnrm_d     - tbl_fcmp_op # DENORM - ZERO
13825        short           fcmp_dnrm_d     - tbl_fcmp_op # DENORM - INF
13826        short           fcmp_res_qnan   - tbl_fcmp_op # DENORM - QNAN
13827        short           fcmp_dnrm_sd    - tbl_fcmp_op # DENORM - DENORM
13828        short           fcmp_res_snan   - tbl_fcmp_op # DENORM - SNAN
13829        short           tbl_fcmp_op     - tbl_fcmp_op #
13830        short           tbl_fcmp_op     - tbl_fcmp_op #
13831
13832        short           fcmp_res_snan   - tbl_fcmp_op # SNAN - NORM
13833        short           fcmp_res_snan   - tbl_fcmp_op # SNAN - ZERO
13834        short           fcmp_res_snan   - tbl_fcmp_op # SNAN - INF
13835        short           fcmp_res_snan   - tbl_fcmp_op # SNAN - QNAN
13836        short           fcmp_res_snan   - tbl_fcmp_op # SNAN - DENORM
13837        short           fcmp_res_snan   - tbl_fcmp_op # SNAN - SNAN
13838        short           tbl_fcmp_op     - tbl_fcmp_op #
13839        short           tbl_fcmp_op     - tbl_fcmp_op #
13840
13841# unlike all other functions for QNAN and SNAN, fcmp does NOT set the
13842# 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
13843fcmp_res_qnan:
13844        bsr.l           res_qnan
13845        andi.b          &0xf7,FPSR_CC(%a6)
13846        rts
13847fcmp_res_snan:
13848        bsr.l           res_snan
13849        andi.b          &0xf7,FPSR_CC(%a6)
13850        rts
13851
13852#
13853# DENORMs are a little more difficult.
13854# If you have a 2 DENORMs, then you can just force the j-bit to a one
13855# and use the fcmp_norm routine.
13856# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
13857# and use the fcmp_norm routine.
13858# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
13859# But with a DENORM and a NORM of the same sign, the neg bit is set if the
13860# (1) signs are (+) and the DENORM is the dst or
13861# (2) signs are (-) and the DENORM is the src
13862#
13863
13864fcmp_dnrm_s:
13865        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
13866        mov.l           SRC_HI(%a0),%d0
13867        bset            &31,%d0                 # DENORM src; make into small norm
13868        mov.l           %d0,FP_SCR0_HI(%a6)
13869        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
13870        lea             FP_SCR0(%a6),%a0
13871        bra.w           fcmp_norm
13872
13873fcmp_dnrm_d:
13874        mov.l           DST_EX(%a1),FP_SCR0_EX(%a6)
13875        mov.l           DST_HI(%a1),%d0
13876        bset            &31,%d0                 # DENORM src; make into small norm
13877        mov.l           %d0,FP_SCR0_HI(%a6)
13878        mov.l           DST_LO(%a1),FP_SCR0_LO(%a6)
13879        lea             FP_SCR0(%a6),%a1
13880        bra.w           fcmp_norm
13881
13882fcmp_dnrm_sd:
13883        mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
13884        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
13885        mov.l           DST_HI(%a1),%d0
13886        bset            &31,%d0                 # DENORM dst; make into small norm
13887        mov.l           %d0,FP_SCR1_HI(%a6)
13888        mov.l           SRC_HI(%a0),%d0
13889        bset            &31,%d0                 # DENORM dst; make into small norm
13890        mov.l           %d0,FP_SCR0_HI(%a6)
13891        mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
13892        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
13893        lea             FP_SCR1(%a6),%a1
13894        lea             FP_SCR0(%a6),%a0
13895        bra.w           fcmp_norm
13896
13897fcmp_nrm_dnrm:
13898        mov.b           SRC_EX(%a0),%d0         # determine if like signs
13899        mov.b           DST_EX(%a1),%d1
13900        eor.b           %d0,%d1
13901        bmi.w           fcmp_dnrm_s
13902
13903# signs are the same, so must determine the answer ourselves.
13904        tst.b           %d0                     # is src op negative?
13905        bmi.b           fcmp_nrm_dnrm_m         # yes
13906        rts
13907fcmp_nrm_dnrm_m:
13908        mov.b           &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
13909        rts
13910
13911fcmp_dnrm_nrm:
13912        mov.b           SRC_EX(%a0),%d0         # determine if like signs
13913        mov.b           DST_EX(%a1),%d1
13914        eor.b           %d0,%d1
13915        bmi.w           fcmp_dnrm_d
13916
13917# signs are the same, so must determine the answer ourselves.
13918        tst.b           %d0                     # is src op negative?
13919        bpl.b           fcmp_dnrm_nrm_m         # no
13920        rts
13921fcmp_dnrm_nrm_m:
13922        mov.b           &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
13923        rts
13924
13925#########################################################################
13926# XDEF **************************************************************** #
13927#       fsglmul(): emulates the fsglmul instruction                     #
13928#                                                                       #
13929# XREF **************************************************************** #
13930#       scale_to_zero_src() - scale src exponent to zero                #
13931#       scale_to_zero_dst() - scale dst exponent to zero                #
13932#       unf_res4() - return default underflow result for sglop          #
13933#       ovf_res() - return default overflow result                      #
13934#       res_qnan() - return QNAN result                                 #
13935#       res_snan() - return SNAN result                                 #
13936#                                                                       #
13937# INPUT *************************************************************** #
13938#       a0 = pointer to extended precision source operand               #
13939#       a1 = pointer to extended precision destination operand          #
13940#       d0  rnd prec,mode                                               #
13941#                                                                       #
13942# OUTPUT ************************************************************** #
13943#       fp0 = result                                                    #
13944#       fp1 = EXOP (if exception occurred)                              #
13945#                                                                       #
13946# ALGORITHM *********************************************************** #
13947#       Handle NANs, infinities, and zeroes as special cases. Divide    #
13948# norms/denorms into ext/sgl/dbl precision.                             #
13949#       For norms/denorms, scale the exponents such that a multiply     #
13950# instruction won't cause an exception. Use the regular fsglmul to      #
13951# compute a result. Check if the regular operands would have taken      #
13952# an exception. If so, return the default overflow/underflow result     #
13953# and return the EXOP if exceptions are enabled. Else, scale the        #
13954# result operand to the proper exponent.                                #
13955#                                                                       #
13956#########################################################################
13957
13958        global          fsglmul
13959fsglmul:
13960        mov.l           %d0,L_SCR3(%a6)         # store rnd info
13961
13962        clr.w           %d1
13963        mov.b           DTAG(%a6),%d1
13964        lsl.b           &0x3,%d1
13965        or.b            STAG(%a6),%d1
13966
13967        bne.w           fsglmul_not_norm        # optimize on non-norm input
13968
13969fsglmul_norm:
13970        mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
13971        mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
13972        mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
13973
13974        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
13975        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
13976        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
13977
13978        bsr.l           scale_to_zero_src       # scale exponent
13979        mov.l           %d0,-(%sp)              # save scale factor 1
13980
13981        bsr.l           scale_to_zero_dst       # scale dst exponent
13982
13983        add.l           (%sp)+,%d0              # SCALE_FACTOR = scale1 + scale2
13984
13985        cmpi.l          %d0,&0x3fff-0x7ffe      # would result ovfl?
13986        beq.w           fsglmul_may_ovfl        # result may rnd to overflow
13987        blt.w           fsglmul_ovfl            # result will overflow
13988
13989        cmpi.l          %d0,&0x3fff+0x0001      # would result unfl?
13990        beq.w           fsglmul_may_unfl        # result may rnd to no unfl
13991        bgt.w           fsglmul_unfl            # result will underflow
13992
13993fsglmul_normal:
13994        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
13995
13996        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
13997        fmov.l          &0x0,%fpsr              # clear FPSR
13998
13999        fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
14000
14001        fmov.l          %fpsr,%d1               # save status
14002        fmov.l          &0x0,%fpcr              # clear FPCR
14003
14004        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
14005
14006fsglmul_normal_exit:
14007        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
14008        mov.l           %d2,-(%sp)              # save d2
14009        mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
14010        mov.l           %d1,%d2                 # make a copy
14011        andi.l          &0x7fff,%d1             # strip sign
14012        andi.w          &0x8000,%d2             # keep old sign
14013        sub.l           %d0,%d1                 # add scale factor
14014        or.w            %d2,%d1                 # concat old sign,new exp
14015        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
14016        mov.l           (%sp)+,%d2              # restore d2
14017        fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
14018        rts
14019
14020fsglmul_ovfl:
14021        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14022
14023        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
14024        fmov.l          &0x0,%fpsr              # clear FPSR
14025
14026        fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
14027
14028        fmov.l          %fpsr,%d1               # save status
14029        fmov.l          &0x0,%fpcr              # clear FPCR
14030
14031        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
14032
14033fsglmul_ovfl_tst:
14034
14035# save setting this until now because this is where fsglmul_may_ovfl may jump in
14036        or.l            &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
14037
14038        mov.b           FPCR_ENABLE(%a6),%d1
14039        andi.b          &0x13,%d1               # is OVFL or INEX enabled?
14040        bne.b           fsglmul_ovfl_ena        # yes
14041
14042fsglmul_ovfl_dis:
14043        btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
14044        sne             %d1                     # set sign param accordingly
14045        mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
14046        andi.b          &0x30,%d0               # force prec = ext
14047        bsr.l           ovf_res                 # calculate default result
14048        or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
14049        fmovm.x         (%a0),&0x80             # return default result in fp0
14050        rts
14051
14052fsglmul_ovfl_ena:
14053        fmovm.x         &0x80,FP_SCR0(%a6)      # move result to stack
14054
14055        mov.l           %d2,-(%sp)              # save d2
14056        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
14057        mov.l           %d1,%d2                 # make a copy
14058        andi.l          &0x7fff,%d1             # strip sign
14059        sub.l           %d0,%d1                 # add scale factor
14060        subi.l          &0x6000,%d1             # subtract bias
14061        andi.w          &0x7fff,%d1
14062        andi.w          &0x8000,%d2             # keep old sign
14063        or.w            %d2,%d1                 # concat old sign,new exp
14064        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
14065        mov.l           (%sp)+,%d2              # restore d2
14066        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
14067        bra.b           fsglmul_ovfl_dis
14068
14069fsglmul_may_ovfl:
14070        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14071
14072        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
14073        fmov.l          &0x0,%fpsr              # clear FPSR
14074
14075        fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
14076
14077        fmov.l          %fpsr,%d1               # save status
14078        fmov.l          &0x0,%fpcr              # clear FPCR
14079
14080        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
14081
14082        fabs.x          %fp0,%fp1               # make a copy of result
14083        fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
14084        fbge.w          fsglmul_ovfl_tst        # yes; overflow has occurred
14085
14086# no, it didn't overflow; we have correct result
14087        bra.w           fsglmul_normal_exit
14088
14089fsglmul_unfl:
14090        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14091
14092        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14093
14094        fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
14095        fmov.l          &0x0,%fpsr              # clear FPSR
14096
14097        fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
14098
14099        fmov.l          %fpsr,%d1               # save status
14100        fmov.l          &0x0,%fpcr              # clear FPCR
14101
14102        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
14103
14104        mov.b           FPCR_ENABLE(%a6),%d1
14105        andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
14106        bne.b           fsglmul_unfl_ena        # yes
14107
14108fsglmul_unfl_dis:
14109        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
14110
14111        lea             FP_SCR0(%a6),%a0        # pass: result addr
14112        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
14113        bsr.l           unf_res4                # calculate default result
14114        or.b            %d0,FPSR_CC(%a6)        # 'Z' bit may have been set
14115        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
14116        rts
14117
14118#
14119# UNFL is enabled.
14120#
14121fsglmul_unfl_ena:
14122        fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
14123
14124        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
14125        fmov.l          &0x0,%fpsr              # clear FPSR
14126
14127        fsglmul.x       FP_SCR0(%a6),%fp1       # execute sgl multiply
14128
14129        fmov.l          &0x0,%fpcr              # clear FPCR
14130
14131        fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
14132        mov.l           %d2,-(%sp)              # save d2
14133        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
14134        mov.l           %d1,%d2                 # make a copy
14135        andi.l          &0x7fff,%d1             # strip sign
14136        andi.w          &0x8000,%d2             # keep old sign
14137        sub.l           %d0,%d1                 # add scale factor
14138        addi.l          &0x6000,%d1             # add bias
14139        andi.w          &0x7fff,%d1
14140        or.w            %d2,%d1                 # concat old sign,new exp
14141        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
14142        mov.l           (%sp)+,%d2              # restore d2
14143        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
14144        bra.w           fsglmul_unfl_dis
14145
14146fsglmul_may_unfl:
14147        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14148
14149        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
14150        fmov.l          &0x0,%fpsr              # clear FPSR
14151
14152        fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
14153
14154        fmov.l          %fpsr,%d1               # save status
14155        fmov.l          &0x0,%fpcr              # clear FPCR
14156
14157        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
14158
14159        fabs.x          %fp0,%fp1               # make a copy of result
14160        fcmp.b          %fp1,&0x2               # is |result| > 2.b?
14161        fbgt.w          fsglmul_normal_exit     # no; no underflow occurred
14162        fblt.w          fsglmul_unfl            # yes; underflow occurred
14163
14164#
14165# we still don't know if underflow occurred. result is ~ equal to 2. but,
14166# we don't know if the result was an underflow that rounded up to a 2 or
14167# a normalized number that rounded down to a 2. so, redo the entire operation
14168# using RZ as the rounding mode to see what the pre-rounded result is.
14169# this case should be relatively rare.
14170#
14171        fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into fp1
14172
14173        mov.l           L_SCR3(%a6),%d1
14174        andi.b          &0xc0,%d1               # keep rnd prec
14175        ori.b           &rz_mode*0x10,%d1       # insert RZ
14176
14177        fmov.l          %d1,%fpcr               # set FPCR
14178        fmov.l          &0x0,%fpsr              # clear FPSR
14179
14180        fsglmul.x       FP_SCR0(%a6),%fp1       # execute sgl multiply
14181
14182        fmov.l          &0x0,%fpcr              # clear FPCR
14183        fabs.x          %fp1                    # make absolute value
14184        fcmp.b          %fp1,&0x2               # is |result| < 2.b?
14185        fbge.w          fsglmul_normal_exit     # no; no underflow occurred
14186        bra.w           fsglmul_unfl            # yes, underflow occurred
14187
14188##############################################################################
14189
14190#
14191# Single Precision Multiply: inputs are not both normalized; what are they?
14192#
14193fsglmul_not_norm:
14194        mov.w           (tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
14195        jmp             (tbl_fsglmul_op.b,%pc,%d1.w*1)
14196
14197        swbeg           &48
14198tbl_fsglmul_op:
14199        short           fsglmul_norm            - tbl_fsglmul_op # NORM x NORM
14200        short           fsglmul_zero            - tbl_fsglmul_op # NORM x ZERO
14201        short           fsglmul_inf_src         - tbl_fsglmul_op # NORM x INF
14202        short           fsglmul_res_qnan        - tbl_fsglmul_op # NORM x QNAN
14203        short           fsglmul_norm            - tbl_fsglmul_op # NORM x DENORM
14204        short           fsglmul_res_snan        - tbl_fsglmul_op # NORM x SNAN
14205        short           tbl_fsglmul_op          - tbl_fsglmul_op #
14206        short           tbl_fsglmul_op          - tbl_fsglmul_op #
14207
14208        short           fsglmul_zero            - tbl_fsglmul_op # ZERO x NORM
14209        short           fsglmul_zero            - tbl_fsglmul_op # ZERO x ZERO
14210        short           fsglmul_res_operr       - tbl_fsglmul_op # ZERO x INF
14211        short           fsglmul_res_qnan        - tbl_fsglmul_op # ZERO x QNAN
14212        short           fsglmul_zero            - tbl_fsglmul_op # ZERO x DENORM
14213        short           fsglmul_res_snan        - tbl_fsglmul_op # ZERO x SNAN
14214        short           tbl_fsglmul_op          - tbl_fsglmul_op #
14215        short           tbl_fsglmul_op          - tbl_fsglmul_op #
14216
14217        short           fsglmul_inf_dst         - tbl_fsglmul_op # INF x NORM
14218        short           fsglmul_res_operr       - tbl_fsglmul_op # INF x ZERO
14219        short           fsglmul_inf_dst         - tbl_fsglmul_op # INF x INF
14220        short           fsglmul_res_qnan        - tbl_fsglmul_op # INF x QNAN
14221        short           fsglmul_inf_dst         - tbl_fsglmul_op # INF x DENORM
14222        short           fsglmul_res_snan        - tbl_fsglmul_op # INF x SNAN
14223        short           tbl_fsglmul_op          - tbl_fsglmul_op #
14224        short           tbl_fsglmul_op          - tbl_fsglmul_op #
14225
14226        short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x NORM
14227        short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x ZERO
14228        short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x INF
14229        short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x QNAN
14230        short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x DENORM
14231        short           fsglmul_res_snan        - tbl_fsglmul_op # QNAN x SNAN
14232        short           tbl_fsglmul_op          - tbl_fsglmul_op #
14233        short           tbl_fsglmul_op          - tbl_fsglmul_op #
14234
14235        short           fsglmul_norm            - tbl_fsglmul_op # NORM x NORM
14236        short           fsglmul_zero            - tbl_fsglmul_op # NORM x ZERO
14237        short           fsglmul_inf_src         - tbl_fsglmul_op # NORM x INF
14238        short           fsglmul_res_qnan        - tbl_fsglmul_op # NORM x QNAN
14239        short           fsglmul_norm            - tbl_fsglmul_op # NORM x DENORM
14240        short           fsglmul_res_snan        - tbl_fsglmul_op # NORM x SNAN
14241        short           tbl_fsglmul_op          - tbl_fsglmul_op #
14242        short           tbl_fsglmul_op          - tbl_fsglmul_op #
14243
14244        short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x NORM
14245        short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x ZERO
14246        short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x INF
14247        short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x QNAN
14248        short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x DENORM
14249        short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x SNAN
14250        short           tbl_fsglmul_op          - tbl_fsglmul_op #
14251        short           tbl_fsglmul_op          - tbl_fsglmul_op #
14252
14253fsglmul_res_operr:
14254        bra.l           res_operr
14255fsglmul_res_snan:
14256        bra.l           res_snan
14257fsglmul_res_qnan:
14258        bra.l           res_qnan
14259fsglmul_zero:
14260        bra.l           fmul_zero
14261fsglmul_inf_src:
14262        bra.l           fmul_inf_src
14263fsglmul_inf_dst:
14264        bra.l           fmul_inf_dst
14265
14266#########################################################################
14267# XDEF **************************************************************** #
14268#       fsgldiv(): emulates the fsgldiv instruction                     #
14269#                                                                       #
14270# XREF **************************************************************** #
14271#       scale_to_zero_src() - scale src exponent to zero                #
14272#       scale_to_zero_dst() - scale dst exponent to zero                #
14273#       unf_res4() - return default underflow result for sglop          #
14274#       ovf_res() - return default overflow result                      #
14275#       res_qnan() - return QNAN result                                 #
14276#       res_snan() - return SNAN result                                 #
14277#                                                                       #
14278# INPUT *************************************************************** #
14279#       a0 = pointer to extended precision source operand               #
14280#       a1 = pointer to extended precision destination operand          #
14281#       d0  rnd prec,mode                                               #
14282#                                                                       #
14283# OUTPUT ************************************************************** #
14284#       fp0 = result                                                    #
14285#       fp1 = EXOP (if exception occurred)                              #
14286#                                                                       #
14287# ALGORITHM *********************************************************** #
14288#       Handle NANs, infinities, and zeroes as special cases. Divide    #
14289# norms/denorms into ext/sgl/dbl precision.                             #
14290#       For norms/denorms, scale the exponents such that a divide       #
14291# instruction won't cause an exception. Use the regular fsgldiv to      #
14292# compute a result. Check if the regular operands would have taken      #
14293# an exception. If so, return the default overflow/underflow result     #
14294# and return the EXOP if exceptions are enabled. Else, scale the        #
14295# result operand to the proper exponent.                                #
14296#                                                                       #
14297#########################################################################
14298
14299        global          fsgldiv
14300fsgldiv:
14301        mov.l           %d0,L_SCR3(%a6)         # store rnd info
14302
14303        clr.w           %d1
14304        mov.b           DTAG(%a6),%d1
14305        lsl.b           &0x3,%d1
14306        or.b            STAG(%a6),%d1           # combine src tags
14307
14308        bne.w           fsgldiv_not_norm        # optimize on non-norm input
14309
14310#
14311# DIVIDE: NORMs and DENORMs ONLY!
14312#
14313fsgldiv_norm:
14314        mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
14315        mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
14316        mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
14317
14318        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
14319        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
14320        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
14321
14322        bsr.l           scale_to_zero_src       # calculate scale factor 1
14323        mov.l           %d0,-(%sp)              # save scale factor 1
14324
14325        bsr.l           scale_to_zero_dst       # calculate scale factor 2
14326
14327        neg.l           (%sp)                   # S.F. = scale1 - scale2
14328        add.l           %d0,(%sp)
14329
14330        mov.w           2+L_SCR3(%a6),%d1       # fetch precision,mode
14331        lsr.b           &0x6,%d1
14332        mov.l           (%sp)+,%d0
14333        cmpi.l          %d0,&0x3fff-0x7ffe
14334        ble.w           fsgldiv_may_ovfl
14335
14336        cmpi.l          %d0,&0x3fff-0x0000      # will result underflow?
14337        beq.w           fsgldiv_may_unfl        # maybe
14338        bgt.w           fsgldiv_unfl            # yes; go handle underflow
14339
14340fsgldiv_normal:
14341        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14342
14343        fmov.l          L_SCR3(%a6),%fpcr       # save FPCR
14344        fmov.l          &0x0,%fpsr              # clear FPSR
14345
14346        fsgldiv.x       FP_SCR0(%a6),%fp0       # perform sgl divide
14347
14348        fmov.l          %fpsr,%d1               # save FPSR
14349        fmov.l          &0x0,%fpcr              # clear FPCR
14350
14351        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
14352
14353fsgldiv_normal_exit:
14354        fmovm.x         &0x80,FP_SCR0(%a6)      # store result on stack
14355        mov.l           %d2,-(%sp)              # save d2
14356        mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
14357        mov.l           %d1,%d2                 # make a copy
14358        andi.l          &0x7fff,%d1             # strip sign
14359        andi.w          &0x8000,%d2             # keep old sign
14360        sub.l           %d0,%d1                 # add scale factor
14361        or.w            %d2,%d1                 # concat old sign,new exp
14362        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
14363        mov.l           (%sp)+,%d2              # restore d2
14364        fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
14365        rts
14366
14367fsgldiv_may_ovfl:
14368        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14369
14370        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
14371        fmov.l          &0x0,%fpsr              # set FPSR
14372
14373        fsgldiv.x       FP_SCR0(%a6),%fp0       # execute divide
14374
14375        fmov.l          %fpsr,%d1
14376        fmov.l          &0x0,%fpcr
14377
14378        or.l            %d1,USER_FPSR(%a6)      # save INEX,N
14379
14380        fmovm.x         &0x01,-(%sp)            # save result to stack
14381        mov.w           (%sp),%d1               # fetch new exponent
14382        add.l           &0xc,%sp                # clear result
14383        andi.l          &0x7fff,%d1             # strip sign
14384        sub.l           %d0,%d1                 # add scale factor
14385        cmp.l           %d1,&0x7fff             # did divide overflow?
14386        blt.b           fsgldiv_normal_exit
14387
14388fsgldiv_ovfl_tst:
14389        or.w            &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
14390
14391        mov.b           FPCR_ENABLE(%a6),%d1
14392        andi.b          &0x13,%d1               # is OVFL or INEX enabled?
14393        bne.b           fsgldiv_ovfl_ena        # yes
14394
14395fsgldiv_ovfl_dis:
14396        btst            &neg_bit,FPSR_CC(%a6)   # is result negative
14397        sne             %d1                     # set sign param accordingly
14398        mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
14399        andi.b          &0x30,%d0               # kill precision
14400        bsr.l           ovf_res                 # calculate default result
14401        or.b            %d0,FPSR_CC(%a6)        # set INF if applicable
14402        fmovm.x         (%a0),&0x80             # return default result in fp0
14403        rts
14404
14405fsgldiv_ovfl_ena:
14406        fmovm.x         &0x80,FP_SCR0(%a6)      # move result to stack
14407
14408        mov.l           %d2,-(%sp)              # save d2
14409        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
14410        mov.l           %d1,%d2                 # make a copy
14411        andi.l          &0x7fff,%d1             # strip sign
14412        andi.w          &0x8000,%d2             # keep old sign
14413        sub.l           %d0,%d1                 # add scale factor
14414        subi.l          &0x6000,%d1             # subtract new bias
14415        andi.w          &0x7fff,%d1             # clear ms bit
14416        or.w            %d2,%d1                 # concat old sign,new exp
14417        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
14418        mov.l           (%sp)+,%d2              # restore d2
14419        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
14420        bra.b           fsgldiv_ovfl_dis
14421
14422fsgldiv_unfl:
14423        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14424
14425        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14426
14427        fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
14428        fmov.l          &0x0,%fpsr              # clear FPSR
14429
14430        fsgldiv.x       FP_SCR0(%a6),%fp0       # execute sgl divide
14431
14432        fmov.l          %fpsr,%d1               # save status
14433        fmov.l          &0x0,%fpcr              # clear FPCR
14434
14435        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
14436
14437        mov.b           FPCR_ENABLE(%a6),%d1
14438        andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
14439        bne.b           fsgldiv_unfl_ena        # yes
14440
14441fsgldiv_unfl_dis:
14442        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
14443
14444        lea             FP_SCR0(%a6),%a0        # pass: result addr
14445        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
14446        bsr.l           unf_res4                # calculate default result
14447        or.b            %d0,FPSR_CC(%a6)        # 'Z' bit may have been set
14448        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
14449        rts
14450
14451#
14452# UNFL is enabled.
14453#
14454fsgldiv_unfl_ena:
14455        fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
14456
14457        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
14458        fmov.l          &0x0,%fpsr              # clear FPSR
14459
14460        fsgldiv.x       FP_SCR0(%a6),%fp1       # execute sgl divide
14461
14462        fmov.l          &0x0,%fpcr              # clear FPCR
14463
14464        fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
14465        mov.l           %d2,-(%sp)              # save d2
14466        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
14467        mov.l           %d1,%d2                 # make a copy
14468        andi.l          &0x7fff,%d1             # strip sign
14469        andi.w          &0x8000,%d2             # keep old sign
14470        sub.l           %d0,%d1                 # add scale factor
14471        addi.l          &0x6000,%d1             # add bias
14472        andi.w          &0x7fff,%d1             # clear top bit
14473        or.w            %d2,%d1                 # concat old sign, new exp
14474        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
14475        mov.l           (%sp)+,%d2              # restore d2
14476        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
14477        bra.b           fsgldiv_unfl_dis
14478
14479#
14480# the divide operation MAY underflow:
14481#
14482fsgldiv_may_unfl:
14483        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14484
14485        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
14486        fmov.l          &0x0,%fpsr              # clear FPSR
14487
14488        fsgldiv.x       FP_SCR0(%a6),%fp0       # execute sgl divide
14489
14490        fmov.l          %fpsr,%d1               # save status
14491        fmov.l          &0x0,%fpcr              # clear FPCR
14492
14493        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
14494
14495        fabs.x          %fp0,%fp1               # make a copy of result
14496        fcmp.b          %fp1,&0x1               # is |result| > 1.b?
14497        fbgt.w          fsgldiv_normal_exit     # no; no underflow occurred
14498        fblt.w          fsgldiv_unfl            # yes; underflow occurred
14499
14500#
14501# we still don't know if underflow occurred. result is ~ equal to 1. but,
14502# we don't know if the result was an underflow that rounded up to a 1
14503# or a normalized number that rounded down to a 1. so, redo the entire
14504# operation using RZ as the rounding mode to see what the pre-rounded
14505# result is. this case should be relatively rare.
14506#
14507        fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into %fp1
14508
14509        clr.l           %d1                     # clear scratch register
14510        ori.b           &rz_mode*0x10,%d1       # force RZ rnd mode
14511
14512        fmov.l          %d1,%fpcr               # set FPCR
14513        fmov.l          &0x0,%fpsr              # clear FPSR
14514
14515        fsgldiv.x       FP_SCR0(%a6),%fp1       # execute sgl divide
14516
14517        fmov.l          &0x0,%fpcr              # clear FPCR
14518        fabs.x          %fp1                    # make absolute value
14519        fcmp.b          %fp1,&0x1               # is |result| < 1.b?
14520        fbge.w          fsgldiv_normal_exit     # no; no underflow occurred
14521        bra.w           fsgldiv_unfl            # yes; underflow occurred
14522
14523############################################################################
14524
14525#
14526# Divide: inputs are not both normalized; what are they?
14527#
14528fsgldiv_not_norm:
14529        mov.w           (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
14530        jmp             (tbl_fsgldiv_op.b,%pc,%d1.w*1)
14531
14532        swbeg           &48
14533tbl_fsgldiv_op:
14534        short           fsgldiv_norm            - tbl_fsgldiv_op # NORM / NORM
14535        short           fsgldiv_inf_load        - tbl_fsgldiv_op # NORM / ZERO
14536        short           fsgldiv_zero_load       - tbl_fsgldiv_op # NORM / INF
14537        short           fsgldiv_res_qnan        - tbl_fsgldiv_op # NORM / QNAN
14538        short           fsgldiv_norm            - tbl_fsgldiv_op # NORM / DENORM
14539        short           fsgldiv_res_snan        - tbl_fsgldiv_op # NORM / SNAN
14540        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14541        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14542
14543        short           fsgldiv_zero_load       - tbl_fsgldiv_op # ZERO / NORM
14544        short           fsgldiv_res_operr       - tbl_fsgldiv_op # ZERO / ZERO
14545        short           fsgldiv_zero_load       - tbl_fsgldiv_op # ZERO / INF
14546        short           fsgldiv_res_qnan        - tbl_fsgldiv_op # ZERO / QNAN
14547        short           fsgldiv_zero_load       - tbl_fsgldiv_op # ZERO / DENORM
14548        short           fsgldiv_res_snan        - tbl_fsgldiv_op # ZERO / SNAN
14549        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14550        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14551
14552        short           fsgldiv_inf_dst         - tbl_fsgldiv_op # INF / NORM
14553        short           fsgldiv_inf_dst         - tbl_fsgldiv_op # INF / ZERO
14554        short           fsgldiv_res_operr       - tbl_fsgldiv_op # INF / INF
14555        short           fsgldiv_res_qnan        - tbl_fsgldiv_op # INF / QNAN
14556        short           fsgldiv_inf_dst         - tbl_fsgldiv_op # INF / DENORM
14557        short           fsgldiv_res_snan        - tbl_fsgldiv_op # INF / SNAN
14558        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14559        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14560
14561        short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / NORM
14562        short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / ZERO
14563        short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / INF
14564        short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / QNAN
14565        short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / DENORM
14566        short           fsgldiv_res_snan        - tbl_fsgldiv_op # QNAN / SNAN
14567        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14568        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14569
14570        short           fsgldiv_norm            - tbl_fsgldiv_op # DENORM / NORM
14571        short           fsgldiv_inf_load        - tbl_fsgldiv_op # DENORM / ZERO
14572        short           fsgldiv_zero_load       - tbl_fsgldiv_op # DENORM / INF
14573        short           fsgldiv_res_qnan        - tbl_fsgldiv_op # DENORM / QNAN
14574        short           fsgldiv_norm            - tbl_fsgldiv_op # DENORM / DENORM
14575        short           fsgldiv_res_snan        - tbl_fsgldiv_op # DENORM / SNAN
14576        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14577        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14578
14579        short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / NORM
14580        short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / ZERO
14581        short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / INF
14582        short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / QNAN
14583        short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / DENORM
14584        short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / SNAN
14585        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14586        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14587
14588fsgldiv_res_qnan:
14589        bra.l           res_qnan
14590fsgldiv_res_snan:
14591        bra.l           res_snan
14592fsgldiv_res_operr:
14593        bra.l           res_operr
14594fsgldiv_inf_load:
14595        bra.l           fdiv_inf_load
14596fsgldiv_zero_load:
14597        bra.l           fdiv_zero_load
14598fsgldiv_inf_dst:
14599        bra.l           fdiv_inf_dst
14600
14601#########################################################################
14602# XDEF **************************************************************** #
14603#       fadd(): emulates the fadd instruction                           #
14604#       fsadd(): emulates the fadd instruction                          #
14605#       fdadd(): emulates the fdadd instruction                         #
14606#                                                                       #
14607# XREF **************************************************************** #
14608#       addsub_scaler2() - scale the operands so they won't take exc    #
14609#       ovf_res() - return default overflow result                      #
14610#       unf_res() - return default underflow result                     #
14611#       res_qnan() - set QNAN result                                    #
14612#       res_snan() - set SNAN result                                    #
14613#       res_operr() - set OPERR result                                  #
14614#       scale_to_zero_src() - set src operand exponent equal to zero    #
14615#       scale_to_zero_dst() - set dst operand exponent equal to zero    #
14616#                                                                       #
14617# INPUT *************************************************************** #
14618#       a0 = pointer to extended precision source operand               #
14619#       a1 = pointer to extended precision destination operand          #
14620#                                                                       #
14621# OUTPUT ************************************************************** #
14622#       fp0 = result                                                    #
14623#       fp1 = EXOP (if exception occurred)                              #
14624#                                                                       #
14625# ALGORITHM *********************************************************** #
14626#       Handle NANs, infinities, and zeroes as special cases. Divide    #
14627# norms into extended, single, and double precision.                    #
14628#       Do addition after scaling exponents such that exception won't   #
14629# occur. Then, check result exponent to see if exception would have     #
14630# occurred. If so, return default result and maybe EXOP. Else, insert   #
14631# the correct result exponent and return. Set FPSR bits as appropriate. #
14632#                                                                       #
14633#########################################################################
14634
14635        global          fsadd
14636fsadd:
14637        andi.b          &0x30,%d0               # clear rnd prec
14638        ori.b           &s_mode*0x10,%d0        # insert sgl prec
14639        bra.b           fadd
14640
14641        global          fdadd
14642fdadd:
14643        andi.b          &0x30,%d0               # clear rnd prec
14644        ori.b           &d_mode*0x10,%d0        # insert dbl prec
14645
14646        global          fadd
14647fadd:
14648        mov.l           %d0,L_SCR3(%a6)         # store rnd info
14649
14650        clr.w           %d1
14651        mov.b           DTAG(%a6),%d1
14652        lsl.b           &0x3,%d1
14653        or.b            STAG(%a6),%d1           # combine src tags
14654
14655        bne.w           fadd_not_norm           # optimize on non-norm input
14656
14657#
14658# ADD: norms and denorms
14659#
14660fadd_norm:
14661        bsr.l           addsub_scaler2          # scale exponents
14662
14663fadd_zero_entry:
14664        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14665
14666        fmov.l          &0x0,%fpsr              # clear FPSR
14667        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
14668
14669        fadd.x          FP_SCR0(%a6),%fp0       # execute add
14670
14671        fmov.l          &0x0,%fpcr              # clear FPCR
14672        fmov.l          %fpsr,%d1               # fetch INEX2,N,Z
14673
14674        or.l            %d1,USER_FPSR(%a6)      # save exc and ccode bits
14675
14676        fbeq.w          fadd_zero_exit          # if result is zero, end now
14677
14678        mov.l           %d2,-(%sp)              # save d2
14679
14680        fmovm.x         &0x01,-(%sp)            # save result to stack
14681
14682        mov.w           2+L_SCR3(%a6),%d1
14683        lsr.b           &0x6,%d1
14684
14685        mov.w           (%sp),%d2               # fetch new sign, exp
14686        andi.l          &0x7fff,%d2             # strip sign
14687        sub.l           %d0,%d2                 # add scale factor
14688
14689        cmp.l           %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
14690        bge.b           fadd_ovfl               # yes
14691
14692        cmp.l           %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
14693        blt.w           fadd_unfl               # yes
14694        beq.w           fadd_may_unfl           # maybe; go find out
14695
14696fadd_normal:
14697        mov.w           (%sp),%d1
14698        andi.w          &0x8000,%d1             # keep sign
14699        or.w            %d2,%d1                 # concat sign,new exp
14700        mov.w           %d1,(%sp)               # insert new exponent
14701
14702        fmovm.x         (%sp)+,&0x80            # return result in fp0
14703
14704        mov.l           (%sp)+,%d2              # restore d2
14705        rts
14706
14707fadd_zero_exit:
14708#       fmov.s          &0x00000000,%fp0        # return zero in fp0
14709        rts
14710
14711tbl_fadd_ovfl:
14712        long            0x7fff                  # ext ovfl
14713        long            0x407f                  # sgl ovfl
14714        long            0x43ff                  # dbl ovfl
14715
14716tbl_fadd_unfl:
14717        long            0x0000                  # ext unfl
14718        long            0x3f81                  # sgl unfl
14719        long            0x3c01                  # dbl unfl
14720
14721fadd_ovfl:
14722        or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
14723
14724        mov.b           FPCR_ENABLE(%a6),%d1
14725        andi.b          &0x13,%d1               # is OVFL or INEX enabled?
14726        bne.b           fadd_ovfl_ena           # yes
14727
14728        add.l           &0xc,%sp
14729fadd_ovfl_dis:
14730        btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
14731        sne             %d1                     # set sign param accordingly
14732        mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
14733        bsr.l           ovf_res                 # calculate default result
14734        or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
14735        fmovm.x         (%a0),&0x80             # return default result in fp0
14736        mov.l           (%sp)+,%d2              # restore d2
14737        rts
14738
14739fadd_ovfl_ena:
14740        mov.b           L_SCR3(%a6),%d1
14741        andi.b          &0xc0,%d1               # is precision extended?
14742        bne.b           fadd_ovfl_ena_sd        # no; prec = sgl or dbl
14743
14744fadd_ovfl_ena_cont:
14745        mov.w           (%sp),%d1
14746        andi.w          &0x8000,%d1             # keep sign
14747        subi.l          &0x6000,%d2             # add extra bias
14748        andi.w          &0x7fff,%d2
14749        or.w            %d2,%d1                 # concat sign,new exp
14750        mov.w           %d1,(%sp)               # insert new exponent
14751
14752        fmovm.x         (%sp)+,&0x40            # return EXOP in fp1
14753        bra.b           fadd_ovfl_dis
14754
14755fadd_ovfl_ena_sd:
14756        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14757
14758        mov.l           L_SCR3(%a6),%d1
14759        andi.b          &0x30,%d1               # keep rnd mode
14760        fmov.l          %d1,%fpcr               # set FPCR
14761
14762        fadd.x          FP_SCR0(%a6),%fp0       # execute add
14763
14764        fmov.l          &0x0,%fpcr              # clear FPCR
14765
14766        add.l           &0xc,%sp
14767        fmovm.x         &0x01,-(%sp)
14768        bra.b           fadd_ovfl_ena_cont
14769
14770fadd_unfl:
14771        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14772
14773        add.l           &0xc,%sp
14774
14775        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14776
14777        fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
14778        fmov.l          &0x0,%fpsr              # clear FPSR
14779
14780        fadd.x          FP_SCR0(%a6),%fp0       # execute add
14781
14782        fmov.l          &0x0,%fpcr              # clear FPCR
14783        fmov.l          %fpsr,%d1               # save status
14784
14785        or.l            %d1,USER_FPSR(%a6)      # save INEX,N
14786
14787        mov.b           FPCR_ENABLE(%a6),%d1
14788        andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
14789        bne.b           fadd_unfl_ena           # yes
14790
14791fadd_unfl_dis:
14792        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
14793
14794        lea             FP_SCR0(%a6),%a0        # pass: result addr
14795        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
14796        bsr.l           unf_res                 # calculate default result
14797        or.b            %d0,FPSR_CC(%a6)        # 'Z' bit may have been set
14798        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
14799        mov.l           (%sp)+,%d2              # restore d2
14800        rts
14801
14802fadd_unfl_ena:
14803        fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
14804
14805        mov.l           L_SCR3(%a6),%d1
14806        andi.b          &0xc0,%d1               # is precision extended?
14807        bne.b           fadd_unfl_ena_sd        # no; sgl or dbl
14808
14809        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
14810
14811fadd_unfl_ena_cont:
14812        fmov.l          &0x0,%fpsr              # clear FPSR
14813
14814        fadd.x          FP_SCR0(%a6),%fp1       # execute multiply
14815
14816        fmov.l          &0x0,%fpcr              # clear FPCR
14817
14818        fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
14819        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
14820        mov.l           %d1,%d2                 # make a copy
14821        andi.l          &0x7fff,%d1             # strip sign
14822        andi.w          &0x8000,%d2             # keep old sign
14823        sub.l           %d0,%d1                 # add scale factor
14824        addi.l          &0x6000,%d1             # add new bias
14825        andi.w          &0x7fff,%d1             # clear top bit
14826        or.w            %d2,%d1                 # concat sign,new exp
14827        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
14828        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
14829        bra.w           fadd_unfl_dis
14830
14831fadd_unfl_ena_sd:
14832        mov.l           L_SCR3(%a6),%d1
14833        andi.b          &0x30,%d1               # use only rnd mode
14834        fmov.l          %d1,%fpcr               # set FPCR
14835
14836        bra.b           fadd_unfl_ena_cont
14837
14838#
14839# result is equal to the smallest normalized number in the selected precision
14840# if the precision is extended, this result could not have come from an
14841# underflow that rounded up.
14842#
14843fadd_may_unfl:
14844        mov.l           L_SCR3(%a6),%d1
14845        andi.b          &0xc0,%d1
14846        beq.w           fadd_normal             # yes; no underflow occurred
14847
14848        mov.l           0x4(%sp),%d1            # extract hi(man)
14849        cmpi.l          %d1,&0x80000000         # is hi(man) = 0x80000000?
14850        bne.w           fadd_normal             # no; no underflow occurred
14851
14852        tst.l           0x8(%sp)                # is lo(man) = 0x0?
14853        bne.w           fadd_normal             # no; no underflow occurred
14854
14855        btst            &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
14856        beq.w           fadd_normal             # no; no underflow occurred
14857
14858#
14859# ok, so now the result has a exponent equal to the smallest normalized
14860# exponent for the selected precision. also, the mantissa is equal to
14861# 0x8000000000000000 and this mantissa is the result of rounding non-zero
14862# g,r,s.
14863# now, we must determine whether the pre-rounded result was an underflow
14864# rounded "up" or a normalized number rounded "down".
14865# so, we do this be re-executing the add using RZ as the rounding mode and
14866# seeing if the new result is smaller or equal to the current result.
14867#
14868        fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into fp1
14869
14870        mov.l           L_SCR3(%a6),%d1
14871        andi.b          &0xc0,%d1               # keep rnd prec
14872        ori.b           &rz_mode*0x10,%d1       # insert rnd mode
14873        fmov.l          %d1,%fpcr               # set FPCR
14874        fmov.l          &0x0,%fpsr              # clear FPSR
14875
14876        fadd.x          FP_SCR0(%a6),%fp1       # execute add
14877
14878        fmov.l          &0x0,%fpcr              # clear FPCR
14879
14880        fabs.x          %fp0                    # compare absolute values
14881        fabs.x          %fp1
14882        fcmp.x          %fp0,%fp1               # is first result > second?
14883
14884        fbgt.w          fadd_unfl               # yes; it's an underflow
14885        bra.w           fadd_normal             # no; it's not an underflow
14886
14887##########################################################################
14888
14889#
14890# Add: inputs are not both normalized; what are they?
14891#
14892fadd_not_norm:
14893        mov.w           (tbl_fadd_op.b,%pc,%d1.w*2),%d1
14894        jmp             (tbl_fadd_op.b,%pc,%d1.w*1)
14895
14896        swbeg           &48
14897tbl_fadd_op:
14898        short           fadd_norm       - tbl_fadd_op # NORM + NORM
14899        short           fadd_zero_src   - tbl_fadd_op # NORM + ZERO
14900        short           fadd_inf_src    - tbl_fadd_op # NORM + INF
14901        short           fadd_res_qnan   - tbl_fadd_op # NORM + QNAN
14902        short           fadd_norm       - tbl_fadd_op # NORM + DENORM
14903        short           fadd_res_snan   - tbl_fadd_op # NORM + SNAN
14904        short           tbl_fadd_op     - tbl_fadd_op #
14905        short           tbl_fadd_op     - tbl_fadd_op #
14906
14907        short           fadd_zero_dst   - tbl_fadd_op # ZERO + NORM
14908        short           fadd_zero_2     - tbl_fadd_op # ZERO + ZERO
14909        short           fadd_inf_src    - tbl_fadd_op # ZERO + INF
14910        short           fadd_res_qnan   - tbl_fadd_op # NORM + QNAN
14911        short           fadd_zero_dst   - tbl_fadd_op # ZERO + DENORM
14912        short           fadd_res_snan   - tbl_fadd_op # NORM + SNAN
14913        short           tbl_fadd_op     - tbl_fadd_op #
14914        short           tbl_fadd_op     - tbl_fadd_op #
14915
14916        short           fadd_inf_dst    - tbl_fadd_op # INF + NORM
14917        short           fadd_inf_dst    - tbl_fadd_op # INF + ZERO
14918        short           fadd_inf_2      - tbl_fadd_op # INF + INF
14919        short           fadd_res_qnan   - tbl_fadd_op # NORM + QNAN
14920        short           fadd_inf_dst    - tbl_fadd_op # INF + DENORM
14921        short           fadd_res_snan   - tbl_fadd_op # NORM + SNAN
14922        short           tbl_fadd_op     - tbl_fadd_op #
14923        short           tbl_fadd_op     - tbl_fadd_op #
14924
14925        short           fadd_res_qnan   - tbl_fadd_op # QNAN + NORM
14926        short           fadd_res_qnan   - tbl_fadd_op # QNAN + ZERO
14927        short           fadd_res_qnan   - tbl_fadd_op # QNAN + INF
14928        short           fadd_res_qnan   - tbl_fadd_op # QNAN + QNAN
14929        short           fadd_res_qnan   - tbl_fadd_op # QNAN + DENORM
14930        short           fadd_res_snan   - tbl_fadd_op # QNAN + SNAN
14931        short           tbl_fadd_op     - tbl_fadd_op #
14932        short           tbl_fadd_op     - tbl_fadd_op #
14933
14934        short           fadd_norm       - tbl_fadd_op # DENORM + NORM
14935        short           fadd_zero_src   - tbl_fadd_op # DENORM + ZERO
14936        short           fadd_inf_src    - tbl_fadd_op # DENORM + INF
14937        short           fadd_res_qnan   - tbl_fadd_op # NORM + QNAN
14938        short           fadd_norm       - tbl_fadd_op # DENORM + DENORM
14939        short           fadd_res_snan   - tbl_fadd_op # NORM + SNAN
14940        short           tbl_fadd_op     - tbl_fadd_op #
14941        short           tbl_fadd_op     - tbl_fadd_op #
14942
14943        short           fadd_res_snan   - tbl_fadd_op # SNAN + NORM
14944        short           fadd_res_snan   - tbl_fadd_op # SNAN + ZERO
14945        short           fadd_res_snan   - tbl_fadd_op # SNAN + INF
14946        short           fadd_res_snan   - tbl_fadd_op # SNAN + QNAN
14947        short           fadd_res_snan   - tbl_fadd_op # SNAN + DENORM
14948        short           fadd_res_snan   - tbl_fadd_op # SNAN + SNAN
14949        short           tbl_fadd_op     - tbl_fadd_op #
14950        short           tbl_fadd_op     - tbl_fadd_op #
14951
14952fadd_res_qnan:
14953        bra.l           res_qnan
14954fadd_res_snan:
14955        bra.l           res_snan
14956
14957#
14958# both operands are ZEROes
14959#
14960fadd_zero_2:
14961        mov.b           SRC_EX(%a0),%d0         # are the signs opposite
14962        mov.b           DST_EX(%a1),%d1
14963        eor.b           %d0,%d1
14964        bmi.w           fadd_zero_2_chk_rm      # weed out (-ZERO)+(+ZERO)
14965
14966# the signs are the same. so determine whether they are positive or negative
14967# and return the appropriately signed zero.
14968        tst.b           %d0                     # are ZEROes positive or negative?
14969        bmi.b           fadd_zero_rm            # negative
14970        fmov.s          &0x00000000,%fp0        # return +ZERO
14971        mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
14972        rts
14973
14974#
14975# the ZEROes have opposite signs:
14976# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
14977# - -ZERO is returned in the case of RM.
14978#
14979fadd_zero_2_chk_rm:
14980        mov.b           3+L_SCR3(%a6),%d1
14981        andi.b          &0x30,%d1               # extract rnd mode
14982        cmpi.b          %d1,&rm_mode*0x10       # is rnd mode == RM?
14983        beq.b           fadd_zero_rm            # yes
14984        fmov.s          &0x00000000,%fp0        # return +ZERO
14985        mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
14986        rts
14987
14988fadd_zero_rm:
14989        fmov.s          &0x80000000,%fp0        # return -ZERO
14990        mov.b           &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
14991        rts
14992
14993#
14994# one operand is a ZERO and the other is a DENORM or NORM. scale
14995# the DENORM or NORM and jump to the regular fadd routine.
14996#
14997fadd_zero_dst:
14998        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
14999        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
15000        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
15001        bsr.l           scale_to_zero_src       # scale the operand
15002        clr.w           FP_SCR1_EX(%a6)
15003        clr.l           FP_SCR1_HI(%a6)
15004        clr.l           FP_SCR1_LO(%a6)
15005        bra.w           fadd_zero_entry         # go execute fadd
15006
15007fadd_zero_src:
15008        mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
15009        mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
15010        mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
15011        bsr.l           scale_to_zero_dst       # scale the operand
15012        clr.w           FP_SCR0_EX(%a6)
15013        clr.l           FP_SCR0_HI(%a6)
15014        clr.l           FP_SCR0_LO(%a6)
15015        bra.w           fadd_zero_entry         # go execute fadd
15016
15017#
15018# both operands are INFs. an OPERR will result if the INFs have
15019# different signs. else, an INF of the same sign is returned
15020#
15021fadd_inf_2:
15022        mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
15023        mov.b           DST_EX(%a1),%d1
15024        eor.b           %d1,%d0
15025        bmi.l           res_operr               # weed out (-INF)+(+INF)
15026
15027# ok, so it's not an OPERR. but, we do have to remember to return the
15028# src INF since that's where the 881/882 gets the j-bit from...
15029
15030#
15031# operands are INF and one of {ZERO, INF, DENORM, NORM}
15032#
15033fadd_inf_src:
15034        fmovm.x         SRC(%a0),&0x80          # return src INF
15035        tst.b           SRC_EX(%a0)             # is INF positive?
15036        bpl.b           fadd_inf_done           # yes; we're done
15037        mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15038        rts
15039
15040#
15041# operands are INF and one of {ZERO, INF, DENORM, NORM}
15042#
15043fadd_inf_dst:
15044        fmovm.x         DST(%a1),&0x80          # return dst INF
15045        tst.b           DST_EX(%a1)             # is INF positive?
15046        bpl.b           fadd_inf_done           # yes; we're done
15047        mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15048        rts
15049
15050fadd_inf_done:
15051        mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
15052        rts
15053
15054#########################################################################
15055# XDEF **************************************************************** #
15056#       fsub(): emulates the fsub instruction                           #
15057#       fssub(): emulates the fssub instruction                         #
15058#       fdsub(): emulates the fdsub instruction                         #
15059#                                                                       #
15060# XREF **************************************************************** #
15061#       addsub_scaler2() - scale the operands so they won't take exc    #
15062#       ovf_res() - return default overflow result                      #
15063#       unf_res() - return default underflow result                     #
15064#       res_qnan() - set QNAN result                                    #
15065#       res_snan() - set SNAN result                                    #
15066#       res_operr() - set OPERR result                                  #
15067#       scale_to_zero_src() - set src operand exponent equal to zero    #
15068#       scale_to_zero_dst() - set dst operand exponent equal to zero    #
15069#                                                                       #
15070# INPUT *************************************************************** #
15071#       a0 = pointer to extended precision source operand               #
15072#       a1 = pointer to extended precision destination operand          #
15073#                                                                       #
15074# OUTPUT ************************************************************** #
15075#       fp0 = result                                                    #
15076#       fp1 = EXOP (if exception occurred)                              #
15077#                                                                       #
15078# ALGORITHM *********************************************************** #
15079#       Handle NANs, infinities, and zeroes as special cases. Divide    #
15080# norms into extended, single, and double precision.                    #
15081#       Do subtraction after scaling exponents such that exception won't#
15082# occur. Then, check result exponent to see if exception would have     #
15083# occurred. If so, return default result and maybe EXOP. Else, insert   #
15084# the correct result exponent and return. Set FPSR bits as appropriate. #
15085#                                                                       #
15086#########################################################################
15087
15088        global          fssub
15089fssub:
15090        andi.b          &0x30,%d0               # clear rnd prec
15091        ori.b           &s_mode*0x10,%d0        # insert sgl prec
15092        bra.b           fsub
15093
15094        global          fdsub
15095fdsub:
15096        andi.b          &0x30,%d0               # clear rnd prec
15097        ori.b           &d_mode*0x10,%d0        # insert dbl prec
15098
15099        global          fsub
15100fsub:
15101        mov.l           %d0,L_SCR3(%a6)         # store rnd info
15102
15103        clr.w           %d1
15104        mov.b           DTAG(%a6),%d1
15105        lsl.b           &0x3,%d1
15106        or.b            STAG(%a6),%d1           # combine src tags
15107
15108        bne.w           fsub_not_norm           # optimize on non-norm input
15109
15110#
15111# SUB: norms and denorms
15112#
15113fsub_norm:
15114        bsr.l           addsub_scaler2          # scale exponents
15115
15116fsub_zero_entry:
15117        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
15118
15119        fmov.l          &0x0,%fpsr              # clear FPSR
15120        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
15121
15122        fsub.x          FP_SCR0(%a6),%fp0       # execute subtract
15123
15124        fmov.l          &0x0,%fpcr              # clear FPCR
15125        fmov.l          %fpsr,%d1               # fetch INEX2, N, Z
15126
15127        or.l            %d1,USER_FPSR(%a6)      # save exc and ccode bits
15128
15129        fbeq.w          fsub_zero_exit          # if result zero, end now
15130
15131        mov.l           %d2,-(%sp)              # save d2
15132
15133        fmovm.x         &0x01,-(%sp)            # save result to stack
15134
15135        mov.w           2+L_SCR3(%a6),%d1
15136        lsr.b           &0x6,%d1
15137
15138        mov.w           (%sp),%d2               # fetch new exponent
15139        andi.l          &0x7fff,%d2             # strip sign
15140        sub.l           %d0,%d2                 # add scale factor
15141
15142        cmp.l           %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
15143        bge.b           fsub_ovfl               # yes
15144
15145        cmp.l           %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
15146        blt.w           fsub_unfl               # yes
15147        beq.w           fsub_may_unfl           # maybe; go find out
15148
15149fsub_normal:
15150        mov.w           (%sp),%d1
15151        andi.w          &0x8000,%d1             # keep sign
15152        or.w            %d2,%d1                 # insert new exponent
15153        mov.w           %d1,(%sp)               # insert new exponent
15154
15155        fmovm.x         (%sp)+,&0x80            # return result in fp0
15156
15157        mov.l           (%sp)+,%d2              # restore d2
15158        rts
15159
15160fsub_zero_exit:
15161#       fmov.s          &0x00000000,%fp0        # return zero in fp0
15162        rts
15163
15164tbl_fsub_ovfl:
15165        long            0x7fff                  # ext ovfl
15166        long            0x407f                  # sgl ovfl
15167        long            0x43ff                  # dbl ovfl
15168
15169tbl_fsub_unfl:
15170        long            0x0000                  # ext unfl
15171        long            0x3f81                  # sgl unfl
15172        long            0x3c01                  # dbl unfl
15173
15174fsub_ovfl:
15175        or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
15176
15177        mov.b           FPCR_ENABLE(%a6),%d1
15178        andi.b          &0x13,%d1               # is OVFL or INEX enabled?
15179        bne.b           fsub_ovfl_ena           # yes
15180
15181        add.l           &0xc,%sp
15182fsub_ovfl_dis:
15183        btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
15184        sne             %d1                     # set sign param accordingly
15185        mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
15186        bsr.l           ovf_res                 # calculate default result
15187        or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
15188        fmovm.x         (%a0),&0x80             # return default result in fp0
15189        mov.l           (%sp)+,%d2              # restore d2
15190        rts
15191
15192fsub_ovfl_ena:
15193        mov.b           L_SCR3(%a6),%d1
15194        andi.b          &0xc0,%d1               # is precision extended?
15195        bne.b           fsub_ovfl_ena_sd        # no
15196
15197fsub_ovfl_ena_cont:
15198        mov.w           (%sp),%d1               # fetch {sgn,exp}
15199        andi.w          &0x8000,%d1             # keep sign
15200        subi.l          &0x6000,%d2             # subtract new bias
15201        andi.w          &0x7fff,%d2             # clear top bit
15202        or.w            %d2,%d1                 # concat sign,exp
15203        mov.w           %d1,(%sp)               # insert new exponent
15204
15205        fmovm.x         (%sp)+,&0x40            # return EXOP in fp1
15206        bra.b           fsub_ovfl_dis
15207
15208fsub_ovfl_ena_sd:
15209        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
15210
15211        mov.l           L_SCR3(%a6),%d1
15212        andi.b          &0x30,%d1               # clear rnd prec
15213        fmov.l          %d1,%fpcr               # set FPCR
15214
15215        fsub.x          FP_SCR0(%a6),%fp0       # execute subtract
15216
15217        fmov.l          &0x0,%fpcr              # clear FPCR
15218
15219        add.l           &0xc,%sp
15220        fmovm.x         &0x01,-(%sp)
15221        bra.b           fsub_ovfl_ena_cont
15222
15223fsub_unfl:
15224        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
15225
15226        add.l           &0xc,%sp
15227
15228        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
15229
15230        fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
15231        fmov.l          &0x0,%fpsr              # clear FPSR
15232
15233        fsub.x          FP_SCR0(%a6),%fp0       # execute subtract
15234
15235        fmov.l          &0x0,%fpcr              # clear FPCR
15236        fmov.l          %fpsr,%d1               # save status
15237
15238        or.l            %d1,USER_FPSR(%a6)
15239
15240        mov.b           FPCR_ENABLE(%a6),%d1
15241        andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
15242        bne.b           fsub_unfl_ena           # yes
15243
15244fsub_unfl_dis:
15245        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
15246
15247        lea             FP_SCR0(%a6),%a0        # pass: result addr
15248        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
15249        bsr.l           unf_res                 # calculate default result
15250        or.b            %d0,FPSR_CC(%a6)        # 'Z' may have been set
15251        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
15252        mov.l           (%sp)+,%d2              # restore d2
15253        rts
15254
15255fsub_unfl_ena:
15256        fmovm.x         FP_SCR1(%a6),&0x40
15257
15258        mov.l           L_SCR3(%a6),%d1
15259        andi.b          &0xc0,%d1               # is precision extended?
15260        bne.b           fsub_unfl_ena_sd        # no
15261
15262        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
15263
15264fsub_unfl_ena_cont:
15265        fmov.l          &0x0,%fpsr              # clear FPSR
15266
15267        fsub.x          FP_SCR0(%a6),%fp1       # execute subtract
15268
15269        fmov.l          &0x0,%fpcr              # clear FPCR
15270
15271        fmovm.x         &0x40,FP_SCR0(%a6)      # store result to stack
15272        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
15273        mov.l           %d1,%d2                 # make a copy
15274        andi.l          &0x7fff,%d1             # strip sign
15275        andi.w          &0x8000,%d2             # keep old sign
15276        sub.l           %d0,%d1                 # add scale factor
15277        addi.l          &0x6000,%d1             # subtract new bias
15278        andi.w          &0x7fff,%d1             # clear top bit
15279        or.w            %d2,%d1                 # concat sgn,exp
15280        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
15281        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
15282        bra.w           fsub_unfl_dis
15283
15284fsub_unfl_ena_sd:
15285        mov.l           L_SCR3(%a6),%d1
15286        andi.b          &0x30,%d1               # clear rnd prec
15287        fmov.l          %d1,%fpcr               # set FPCR
15288
15289        bra.b           fsub_unfl_ena_cont
15290
15291#
15292# result is equal to the smallest normalized number in the selected precision
15293# if the precision is extended, this result could not have come from an
15294# underflow that rounded up.
15295#
15296fsub_may_unfl:
15297        mov.l           L_SCR3(%a6),%d1
15298        andi.b          &0xc0,%d1               # fetch rnd prec
15299        beq.w           fsub_normal             # yes; no underflow occurred
15300
15301        mov.l           0x4(%sp),%d1
15302        cmpi.l          %d1,&0x80000000         # is hi(man) = 0x80000000?
15303        bne.w           fsub_normal             # no; no underflow occurred
15304
15305        tst.l           0x8(%sp)                # is lo(man) = 0x0?
15306        bne.w           fsub_normal             # no; no underflow occurred
15307
15308        btst            &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
15309        beq.w           fsub_normal             # no; no underflow occurred
15310
15311#
15312# ok, so now the result has a exponent equal to the smallest normalized
15313# exponent for the selected precision. also, the mantissa is equal to
15314# 0x8000000000000000 and this mantissa is the result of rounding non-zero
15315# g,r,s.
15316# now, we must determine whether the pre-rounded result was an underflow
15317# rounded "up" or a normalized number rounded "down".
15318# so, we do this be re-executing the add using RZ as the rounding mode and
15319# seeing if the new result is smaller or equal to the current result.
15320#
15321        fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into fp1
15322
15323        mov.l           L_SCR3(%a6),%d1
15324        andi.b          &0xc0,%d1               # keep rnd prec
15325        ori.b           &rz_mode*0x10,%d1       # insert rnd mode
15326        fmov.l          %d1,%fpcr               # set FPCR
15327        fmov.l          &0x0,%fpsr              # clear FPSR
15328
15329        fsub.x          FP_SCR0(%a6),%fp1       # execute subtract
15330
15331        fmov.l          &0x0,%fpcr              # clear FPCR
15332
15333        fabs.x          %fp0                    # compare absolute values
15334        fabs.x          %fp1
15335        fcmp.x          %fp0,%fp1               # is first result > second?
15336
15337        fbgt.w          fsub_unfl               # yes; it's an underflow
15338        bra.w           fsub_normal             # no; it's not an underflow
15339
15340##########################################################################
15341
15342#
15343# Sub: inputs are not both normalized; what are they?
15344#
15345fsub_not_norm:
15346        mov.w           (tbl_fsub_op.b,%pc,%d1.w*2),%d1
15347        jmp             (tbl_fsub_op.b,%pc,%d1.w*1)
15348
15349        swbeg           &48
15350tbl_fsub_op:
15351        short           fsub_norm       - tbl_fsub_op # NORM - NORM
15352        short           fsub_zero_src   - tbl_fsub_op # NORM - ZERO
15353        short           fsub_inf_src    - tbl_fsub_op # NORM - INF
15354        short           fsub_res_qnan   - tbl_fsub_op # NORM - QNAN
15355        short           fsub_norm       - tbl_fsub_op # NORM - DENORM
15356        short           fsub_res_snan   - tbl_fsub_op # NORM - SNAN
15357        short           tbl_fsub_op     - tbl_fsub_op #
15358        short           tbl_fsub_op     - tbl_fsub_op #
15359
15360        short           fsub_zero_dst   - tbl_fsub_op # ZERO - NORM
15361        short           fsub_zero_2     - tbl_fsub_op # ZERO - ZERO
15362        short           fsub_inf_src    - tbl_fsub_op # ZERO - INF
15363        short           fsub_res_qnan   - tbl_fsub_op # NORM - QNAN
15364        short           fsub_zero_dst   - tbl_fsub_op # ZERO - DENORM
15365        short           fsub_res_snan   - tbl_fsub_op # NORM - SNAN
15366        short           tbl_fsub_op     - tbl_fsub_op #
15367        short           tbl_fsub_op     - tbl_fsub_op #
15368
15369        short           fsub_inf_dst    - tbl_fsub_op # INF - NORM
15370        short           fsub_inf_dst    - tbl_fsub_op # INF - ZERO
15371        short           fsub_inf_2      - tbl_fsub_op # INF - INF
15372        short           fsub_res_qnan   - tbl_fsub_op # NORM - QNAN
15373        short           fsub_inf_dst    - tbl_fsub_op # INF - DENORM
15374        short           fsub_res_snan   - tbl_fsub_op # NORM - SNAN
15375        short           tbl_fsub_op     - tbl_fsub_op #
15376        short           tbl_fsub_op     - tbl_fsub_op #
15377
15378        short           fsub_res_qnan   - tbl_fsub_op # QNAN - NORM
15379        short           fsub_res_qnan   - tbl_fsub_op # QNAN - ZERO
15380        short           fsub_res_qnan   - tbl_fsub_op # QNAN - INF
15381        short           fsub_res_qnan   - tbl_fsub_op # QNAN - QNAN
15382        short           fsub_res_qnan   - tbl_fsub_op # QNAN - DENORM
15383        short           fsub_res_snan   - tbl_fsub_op # QNAN - SNAN
15384        short           tbl_fsub_op     - tbl_fsub_op #
15385        short           tbl_fsub_op     - tbl_fsub_op #
15386
15387        short           fsub_norm       - tbl_fsub_op # DENORM - NORM
15388        short           fsub_zero_src   - tbl_fsub_op # DENORM - ZERO
15389        short           fsub_inf_src    - tbl_fsub_op # DENORM - INF
15390        short           fsub_res_qnan   - tbl_fsub_op # NORM - QNAN
15391        short           fsub_norm       - tbl_fsub_op # DENORM - DENORM
15392        short           fsub_res_snan   - tbl_fsub_op # NORM - SNAN
15393        short           tbl_fsub_op     - tbl_fsub_op #
15394        short           tbl_fsub_op     - tbl_fsub_op #
15395
15396        short           fsub_res_snan   - tbl_fsub_op # SNAN - NORM
15397        short           fsub_res_snan   - tbl_fsub_op # SNAN - ZERO
15398        short           fsub_res_snan   - tbl_fsub_op # SNAN - INF
15399        short           fsub_res_snan   - tbl_fsub_op # SNAN - QNAN
15400        short           fsub_res_snan   - tbl_fsub_op # SNAN - DENORM
15401        short           fsub_res_snan   - tbl_fsub_op # SNAN - SNAN
15402        short           tbl_fsub_op     - tbl_fsub_op #
15403        short           tbl_fsub_op     - tbl_fsub_op #
15404
15405fsub_res_qnan:
15406        bra.l           res_qnan
15407fsub_res_snan:
15408        bra.l           res_snan
15409
15410#
15411# both operands are ZEROes
15412#
15413fsub_zero_2:
15414        mov.b           SRC_EX(%a0),%d0
15415        mov.b           DST_EX(%a1),%d1
15416        eor.b           %d1,%d0
15417        bpl.b           fsub_zero_2_chk_rm
15418
15419# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
15420        tst.b           %d0                     # is dst negative?
15421        bmi.b           fsub_zero_2_rm          # yes
15422        fmov.s          &0x00000000,%fp0        # no; return +ZERO
15423        mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
15424        rts
15425
15426#
15427# the ZEROes have the same signs:
15428# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
15429# - -ZERO is returned in the case of RM.
15430#
15431fsub_zero_2_chk_rm:
15432        mov.b           3+L_SCR3(%a6),%d1
15433        andi.b          &0x30,%d1               # extract rnd mode
15434        cmpi.b          %d1,&rm_mode*0x10       # is rnd mode = RM?
15435        beq.b           fsub_zero_2_rm          # yes
15436        fmov.s          &0x00000000,%fp0        # no; return +ZERO
15437        mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
15438        rts
15439
15440fsub_zero_2_rm:
15441        fmov.s          &0x80000000,%fp0        # return -ZERO
15442        mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG
15443        rts
15444
15445#
15446# one operand is a ZERO and the other is a DENORM or a NORM.
15447# scale the DENORM or NORM and jump to the regular fsub routine.
15448#
15449fsub_zero_dst:
15450        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
15451        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
15452        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
15453        bsr.l           scale_to_zero_src       # scale the operand
15454        clr.w           FP_SCR1_EX(%a6)
15455        clr.l           FP_SCR1_HI(%a6)
15456        clr.l           FP_SCR1_LO(%a6)
15457        bra.w           fsub_zero_entry         # go execute fsub
15458
15459fsub_zero_src:
15460        mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
15461        mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
15462        mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
15463        bsr.l           scale_to_zero_dst       # scale the operand
15464        clr.w           FP_SCR0_EX(%a6)
15465        clr.l           FP_SCR0_HI(%a6)
15466        clr.l           FP_SCR0_LO(%a6)
15467        bra.w           fsub_zero_entry         # go execute fsub
15468
15469#
15470# both operands are INFs. an OPERR will result if the INFs have the
15471# same signs. else,
15472#
15473fsub_inf_2:
15474        mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
15475        mov.b           DST_EX(%a1),%d1
15476        eor.b           %d1,%d0
15477        bpl.l           res_operr               # weed out (-INF)+(+INF)
15478
15479# ok, so it's not an OPERR. but we do have to remember to return
15480# the src INF since that's where the 881/882 gets the j-bit.
15481
15482fsub_inf_src:
15483        fmovm.x         SRC(%a0),&0x80          # return src INF
15484        fneg.x          %fp0                    # invert sign
15485        fbge.w          fsub_inf_done           # sign is now positive
15486        mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15487        rts
15488
15489fsub_inf_dst:
15490        fmovm.x         DST(%a1),&0x80          # return dst INF
15491        tst.b           DST_EX(%a1)             # is INF negative?
15492        bpl.b           fsub_inf_done           # no
15493        mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15494        rts
15495
15496fsub_inf_done:
15497        mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
15498        rts
15499
15500#########################################################################
15501# XDEF **************************************************************** #
15502#       fsqrt(): emulates the fsqrt instruction                         #
15503#       fssqrt(): emulates the fssqrt instruction                       #
15504#       fdsqrt(): emulates the fdsqrt instruction                       #
15505#                                                                       #
15506# XREF **************************************************************** #
15507#       scale_sqrt() - scale the source operand                         #
15508#       unf_res() - return default underflow result                     #
15509#       ovf_res() - return default overflow result                      #
15510#       res_qnan_1op() - return QNAN result                             #
15511#       res_snan_1op() - return SNAN result                             #
15512#                                                                       #
15513# INPUT *************************************************************** #
15514#       a0 = pointer to extended precision source operand               #
15515#       d0  rnd prec,mode                                               #
15516#                                                                       #
15517# OUTPUT ************************************************************** #
15518#       fp0 = result                                                    #
15519#       fp1 = EXOP (if exception occurred)                              #
15520#                                                                       #
15521# ALGORITHM *********************************************************** #
15522#       Handle NANs, infinities, and zeroes as special cases. Divide    #
15523# norms/denorms into ext/sgl/dbl precision.                             #
15524#       For norms/denorms, scale the exponents such that a sqrt         #
15525# instruction won't cause an exception. Use the regular fsqrt to        #
15526# compute a result. Check if the regular operands would have taken      #
15527# an exception. If so, return the default overflow/underflow result     #
15528# and return the EXOP if exceptions are enabled. Else, scale the        #
15529# result operand to the proper exponent.                                #
15530#                                                                       #
15531#########################################################################
15532
15533        global          fssqrt
15534fssqrt:
15535        andi.b          &0x30,%d0               # clear rnd prec
15536        ori.b           &s_mode*0x10,%d0        # insert sgl precision
15537        bra.b           fsqrt
15538
15539        global          fdsqrt
15540fdsqrt:
15541        andi.b          &0x30,%d0               # clear rnd prec
15542        ori.b           &d_mode*0x10,%d0        # insert dbl precision
15543
15544        global          fsqrt
15545fsqrt:
15546        mov.l           %d0,L_SCR3(%a6)         # store rnd info
15547        clr.w           %d1
15548        mov.b           STAG(%a6),%d1
15549        bne.w           fsqrt_not_norm          # optimize on non-norm input
15550
15551#
15552# SQUARE ROOT: norms and denorms ONLY!
15553#
15554fsqrt_norm:
15555        tst.b           SRC_EX(%a0)             # is operand negative?
15556        bmi.l           res_operr               # yes
15557
15558        andi.b          &0xc0,%d0               # is precision extended?
15559        bne.b           fsqrt_not_ext           # no; go handle sgl or dbl
15560
15561        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
15562        fmov.l          &0x0,%fpsr              # clear FPSR
15563
15564        fsqrt.x         (%a0),%fp0              # execute square root
15565
15566        fmov.l          %fpsr,%d1
15567        or.l            %d1,USER_FPSR(%a6)      # set N,INEX
15568
15569        rts
15570
15571fsqrt_denorm:
15572        tst.b           SRC_EX(%a0)             # is operand negative?
15573        bmi.l           res_operr               # yes
15574
15575        andi.b          &0xc0,%d0               # is precision extended?
15576        bne.b           fsqrt_not_ext           # no; go handle sgl or dbl
15577
15578        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
15579        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
15580        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
15581
15582        bsr.l           scale_sqrt              # calculate scale factor
15583
15584        bra.w           fsqrt_sd_normal
15585
15586#
15587# operand is either single or double
15588#
15589fsqrt_not_ext:
15590        cmpi.b          %d0,&s_mode*0x10        # separate sgl/dbl prec
15591        bne.w           fsqrt_dbl
15592
15593#
15594# operand is to be rounded to single precision
15595#
15596fsqrt_sgl:
15597        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
15598        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
15599        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
15600
15601        bsr.l           scale_sqrt              # calculate scale factor
15602
15603        cmpi.l          %d0,&0x3fff-0x3f81      # will move in underflow?
15604        beq.w           fsqrt_sd_may_unfl
15605        bgt.w           fsqrt_sd_unfl           # yes; go handle underflow
15606        cmpi.l          %d0,&0x3fff-0x407f      # will move in overflow?
15607        beq.w           fsqrt_sd_may_ovfl       # maybe; go check
15608        blt.w           fsqrt_sd_ovfl           # yes; go handle overflow
15609
15610#
15611# operand will NOT overflow or underflow when moved in to the fp reg file
15612#
15613fsqrt_sd_normal:
15614        fmov.l          &0x0,%fpsr              # clear FPSR
15615        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
15616
15617        fsqrt.x         FP_SCR0(%a6),%fp0       # perform absolute
15618
15619        fmov.l          %fpsr,%d1               # save FPSR
15620        fmov.l          &0x0,%fpcr              # clear FPCR
15621
15622        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
15623
15624fsqrt_sd_normal_exit:
15625        mov.l           %d2,-(%sp)              # save d2
15626        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
15627        mov.w           FP_SCR0_EX(%a6),%d1     # load sgn,exp
15628        mov.l           %d1,%d2                 # make a copy
15629        andi.l          &0x7fff,%d1             # strip sign
15630        sub.l           %d0,%d1                 # add scale factor
15631        andi.w          &0x8000,%d2             # keep old sign
15632        or.w            %d1,%d2                 # concat old sign,new exp
15633        mov.w           %d2,FP_SCR0_EX(%a6)     # insert new exponent
15634        mov.l           (%sp)+,%d2              # restore d2
15635        fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
15636        rts
15637
15638#
15639# operand is to be rounded to double precision
15640#
15641fsqrt_dbl:
15642        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
15643        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
15644        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
15645
15646        bsr.l           scale_sqrt              # calculate scale factor
15647
15648        cmpi.l          %d0,&0x3fff-0x3c01      # will move in underflow?
15649        beq.w           fsqrt_sd_may_unfl
15650        bgt.b           fsqrt_sd_unfl           # yes; go handle underflow
15651        cmpi.l          %d0,&0x3fff-0x43ff      # will move in overflow?
15652        beq.w           fsqrt_sd_may_ovfl       # maybe; go check
15653        blt.w           fsqrt_sd_ovfl           # yes; go handle overflow
15654        bra.w           fsqrt_sd_normal         # no; ho handle normalized op
15655
15656# we're on the line here and the distinguising characteristic is whether
15657# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
15658# elsewise fall through to underflow.
15659fsqrt_sd_may_unfl:
15660        btst            &0x0,1+FP_SCR0_EX(%a6)  # is exponent 0x3fff?
15661        bne.w           fsqrt_sd_normal         # yes, so no underflow
15662
15663#
15664# operand WILL underflow when moved in to the fp register file
15665#
15666fsqrt_sd_unfl:
15667        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
15668
15669        fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
15670        fmov.l          &0x0,%fpsr              # clear FPSR
15671
15672        fsqrt.x         FP_SCR0(%a6),%fp0       # execute square root
15673
15674        fmov.l          %fpsr,%d1               # save status
15675        fmov.l          &0x0,%fpcr              # clear FPCR
15676
15677        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
15678
15679# if underflow or inexact is enabled, go calculate EXOP first.
15680        mov.b           FPCR_ENABLE(%a6),%d1
15681        andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
15682        bne.b           fsqrt_sd_unfl_ena       # yes
15683
15684fsqrt_sd_unfl_dis:
15685        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
15686
15687        lea             FP_SCR0(%a6),%a0        # pass: result addr
15688        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
15689        bsr.l           unf_res                 # calculate default result
15690        or.b            %d0,FPSR_CC(%a6)        # set possible 'Z' ccode
15691        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
15692        rts
15693
15694#
15695# operand will underflow AND underflow is enabled.
15696# Therefore, we must return the result rounded to extended precision.
15697#
15698fsqrt_sd_unfl_ena:
15699        mov.l           FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
15700        mov.l           FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
15701        mov.w           FP_SCR0_EX(%a6),%d1     # load current exponent
15702
15703        mov.l           %d2,-(%sp)              # save d2
15704        mov.l           %d1,%d2                 # make a copy
15705        andi.l          &0x7fff,%d1             # strip sign
15706        andi.w          &0x8000,%d2             # keep old sign
15707        sub.l           %d0,%d1                 # subtract scale factor
15708        addi.l          &0x6000,%d1             # add new bias
15709        andi.w          &0x7fff,%d1
15710        or.w            %d2,%d1                 # concat new sign,new exp
15711        mov.w           %d1,FP_SCR1_EX(%a6)     # insert new exp
15712        fmovm.x         FP_SCR1(%a6),&0x40      # return EXOP in fp1
15713        mov.l           (%sp)+,%d2              # restore d2
15714        bra.b           fsqrt_sd_unfl_dis
15715
15716#
15717# operand WILL overflow.
15718#
15719fsqrt_sd_ovfl:
15720        fmov.l          &0x0,%fpsr              # clear FPSR
15721        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
15722
15723        fsqrt.x         FP_SCR0(%a6),%fp0       # perform square root
15724
15725        fmov.l          &0x0,%fpcr              # clear FPCR
15726        fmov.l          %fpsr,%d1               # save FPSR
15727
15728        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
15729
15730fsqrt_sd_ovfl_tst:
15731        or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
15732
15733        mov.b           FPCR_ENABLE(%a6),%d1
15734        andi.b          &0x13,%d1               # is OVFL or INEX enabled?
15735        bne.b           fsqrt_sd_ovfl_ena       # yes
15736
15737#
15738# OVFL is not enabled; therefore, we must create the default result by
15739# calling ovf_res().
15740#
15741fsqrt_sd_ovfl_dis:
15742        btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
15743        sne             %d1                     # set sign param accordingly
15744        mov.l           L_SCR3(%a6),%d0         # pass: prec,mode
15745        bsr.l           ovf_res                 # calculate default result
15746        or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
15747        fmovm.x         (%a0),&0x80             # return default result in fp0
15748        rts
15749
15750#
15751# OVFL is enabled.
15752# the INEX2 bit has already been updated by the round to the correct precision.
15753# now, round to extended(and don't alter the FPSR).
15754#
15755fsqrt_sd_ovfl_ena:
15756        mov.l           %d2,-(%sp)              # save d2
15757        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
15758        mov.l           %d1,%d2                 # make a copy
15759        andi.l          &0x7fff,%d1             # strip sign
15760        andi.w          &0x8000,%d2             # keep old sign
15761        sub.l           %d0,%d1                 # add scale factor
15762        subi.l          &0x6000,%d1             # subtract bias
15763        andi.w          &0x7fff,%d1
15764        or.w            %d2,%d1                 # concat sign,exp
15765        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
15766        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
15767        mov.l           (%sp)+,%d2              # restore d2
15768        bra.b           fsqrt_sd_ovfl_dis
15769
15770#
15771# the move in MAY underflow. so...
15772#
15773fsqrt_sd_may_ovfl:
15774        btst            &0x0,1+FP_SCR0_EX(%a6)  # is exponent 0x3fff?
15775        bne.w           fsqrt_sd_ovfl           # yes, so overflow
15776
15777        fmov.l          &0x0,%fpsr              # clear FPSR
15778        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
15779
15780        fsqrt.x         FP_SCR0(%a6),%fp0       # perform absolute
15781
15782        fmov.l          %fpsr,%d1               # save status
15783        fmov.l          &0x0,%fpcr              # clear FPCR
15784
15785        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
15786
15787        fmov.x          %fp0,%fp1               # make a copy of result
15788        fcmp.b          %fp1,&0x1               # is |result| >= 1.b?
15789        fbge.w          fsqrt_sd_ovfl_tst       # yes; overflow has occurred
15790
15791# no, it didn't overflow; we have correct result
15792        bra.w           fsqrt_sd_normal_exit
15793
15794##########################################################################
15795
15796#
15797# input is not normalized; what is it?
15798#
15799fsqrt_not_norm:
15800        cmpi.b          %d1,&DENORM             # weed out DENORM
15801        beq.w           fsqrt_denorm
15802        cmpi.b          %d1,&ZERO               # weed out ZERO
15803        beq.b           fsqrt_zero
15804        cmpi.b          %d1,&INF                # weed out INF
15805        beq.b           fsqrt_inf
15806        cmpi.b          %d1,&SNAN               # weed out SNAN
15807        beq.l           res_snan_1op
15808        bra.l           res_qnan_1op
15809
15810#
15811#       fsqrt(+0) = +0
15812#       fsqrt(-0) = -0
15813#       fsqrt(+INF) = +INF
15814#       fsqrt(-INF) = OPERR
15815#
15816fsqrt_zero:
15817        tst.b           SRC_EX(%a0)             # is ZERO positive or negative?
15818        bmi.b           fsqrt_zero_m            # negative
15819fsqrt_zero_p:
15820        fmov.s          &0x00000000,%fp0        # return +ZERO
15821        mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
15822        rts
15823fsqrt_zero_m:
15824        fmov.s          &0x80000000,%fp0        # return -ZERO
15825        mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
15826        rts
15827
15828fsqrt_inf:
15829        tst.b           SRC_EX(%a0)             # is INF positive or negative?
15830        bmi.l           res_operr               # negative
15831fsqrt_inf_p:
15832        fmovm.x         SRC(%a0),&0x80          # return +INF in fp0
15833        mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
15834        rts
15835
15836##########################################################################
15837
15838#########################################################################
15839# XDEF **************************************************************** #
15840#       addsub_scaler2(): scale inputs to fadd/fsub such that no        #
15841#                         OVFL/UNFL exceptions will result              #
15842#                                                                       #
15843# XREF **************************************************************** #
15844#       norm() - normalize mantissa after adjusting exponent            #
15845#                                                                       #
15846# INPUT *************************************************************** #
15847#       FP_SRC(a6) = fp op1(src)                                        #
15848#       FP_DST(a6) = fp op2(dst)                                        #
15849#                                                                       #
15850# OUTPUT ************************************************************** #
15851#       FP_SRC(a6) = fp op1 scaled(src)                                 #
15852#       FP_DST(a6) = fp op2 scaled(dst)                                 #
15853#       d0         = scale amount                                       #
15854#                                                                       #
15855# ALGORITHM *********************************************************** #
15856#       If the DST exponent is > the SRC exponent, set the DST exponent #
15857# equal to 0x3fff and scale the SRC exponent by the value that the      #
15858# DST exponent was scaled by. If the SRC exponent is greater or equal,  #
15859# do the opposite. Return this scale factor in d0.                      #
15860#       If the two exponents differ by > the number of mantissa bits    #
15861# plus two, then set the smallest exponent to a very small value as a   #
15862# quick shortcut.                                                       #
15863#                                                                       #
15864#########################################################################
15865
15866        global          addsub_scaler2
15867addsub_scaler2:
15868        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
15869        mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
15870        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
15871        mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
15872        mov.w           SRC_EX(%a0),%d0
15873        mov.w           DST_EX(%a1),%d1
15874        mov.w           %d0,FP_SCR0_EX(%a6)
15875        mov.w           %d1,FP_SCR1_EX(%a6)
15876
15877        andi.w          &0x7fff,%d0
15878        andi.w          &0x7fff,%d1
15879        mov.w           %d0,L_SCR1(%a6)         # store src exponent
15880        mov.w           %d1,2+L_SCR1(%a6)       # store dst exponent
15881
15882        cmp.w           %d0, %d1                # is src exp >= dst exp?
15883        bge.l           src_exp_ge2
15884
15885# dst exp is >  src exp; scale dst to exp = 0x3fff
15886dst_exp_gt2:
15887        bsr.l           scale_to_zero_dst
15888        mov.l           %d0,-(%sp)              # save scale factor
15889
15890        cmpi.b          STAG(%a6),&DENORM       # is dst denormalized?
15891        bne.b           cmpexp12
15892
15893        lea             FP_SCR0(%a6),%a0
15894        bsr.l           norm                    # normalize the denorm; result is new exp
15895        neg.w           %d0                     # new exp = -(shft val)
15896        mov.w           %d0,L_SCR1(%a6)         # inset new exp
15897
15898cmpexp12:
15899        mov.w           2+L_SCR1(%a6),%d0
15900        subi.w          &mantissalen+2,%d0      # subtract mantissalen+2 from larger exp
15901
15902        cmp.w           %d0,L_SCR1(%a6)         # is difference >= len(mantissa)+2?
15903        bge.b           quick_scale12
15904
15905        mov.w           L_SCR1(%a6),%d0
15906        add.w           0x2(%sp),%d0            # scale src exponent by scale factor
15907        mov.w           FP_SCR0_EX(%a6),%d1
15908        and.w           &0x8000,%d1
15909        or.w            %d1,%d0                 # concat {sgn,new exp}
15910        mov.w           %d0,FP_SCR0_EX(%a6)     # insert new dst exponent
15911
15912        mov.l           (%sp)+,%d0              # return SCALE factor
15913        rts
15914
15915quick_scale12:
15916        andi.w          &0x8000,FP_SCR0_EX(%a6) # zero src exponent
15917        bset            &0x0,1+FP_SCR0_EX(%a6)  # set exp = 1
15918
15919        mov.l           (%sp)+,%d0              # return SCALE factor
15920        rts
15921
15922# src exp is >= dst exp; scale src to exp = 0x3fff
15923src_exp_ge2:
15924        bsr.l           scale_to_zero_src
15925        mov.l           %d0,-(%sp)              # save scale factor
15926
15927        cmpi.b          DTAG(%a6),&DENORM       # is dst denormalized?
15928        bne.b           cmpexp22
15929        lea             FP_SCR1(%a6),%a0
15930        bsr.l           norm                    # normalize the denorm; result is new exp
15931        neg.w           %d0                     # new exp = -(shft val)
15932        mov.w           %d0,2+L_SCR1(%a6)       # inset new exp
15933
15934cmpexp22:
15935        mov.w           L_SCR1(%a6),%d0
15936        subi.w          &mantissalen+2,%d0      # subtract mantissalen+2 from larger exp
15937
15938        cmp.w           %d0,2+L_SCR1(%a6)       # is difference >= len(mantissa)+2?
15939        bge.b           quick_scale22
15940
15941        mov.w           2+L_SCR1(%a6),%d0
15942        add.w           0x2(%sp),%d0            # scale dst exponent by scale factor
15943        mov.w           FP_SCR1_EX(%a6),%d1
15944        andi.w          &0x8000,%d1
15945        or.w            %d1,%d0                 # concat {sgn,new exp}
15946        mov.w           %d0,FP_SCR1_EX(%a6)     # insert new dst exponent
15947
15948        mov.l           (%sp)+,%d0              # return SCALE factor
15949        rts
15950
15951quick_scale22:
15952        andi.w          &0x8000,FP_SCR1_EX(%a6) # zero dst exponent
15953        bset            &0x0,1+FP_SCR1_EX(%a6)  # set exp = 1
15954
15955        mov.l           (%sp)+,%d0              # return SCALE factor
15956        rts
15957
15958##########################################################################
15959
15960#########################################################################
15961# XDEF **************************************************************** #
15962#       scale_to_zero_src(): scale the exponent of extended precision   #
15963#                            value at FP_SCR0(a6).                      #
15964#                                                                       #
15965# XREF **************************************************************** #
15966#       norm() - normalize the mantissa if the operand was a DENORM     #
15967#                                                                       #
15968# INPUT *************************************************************** #
15969#       FP_SCR0(a6) = extended precision operand to be scaled           #
15970#                                                                       #
15971# OUTPUT ************************************************************** #
15972#       FP_SCR0(a6) = scaled extended precision operand                 #
15973#       d0          = scale value                                       #
15974#                                                                       #
15975# ALGORITHM *********************************************************** #
15976#       Set the exponent of the input operand to 0x3fff. Save the value #
15977# of the difference between the original and new exponent. Then,        #
15978# normalize the operand if it was a DENORM. Add this normalization      #
15979# value to the previous value. Return the result.                       #
15980#                                                                       #
15981#########################################################################
15982
15983        global          scale_to_zero_src
15984scale_to_zero_src:
15985        mov.w           FP_SCR0_EX(%a6),%d1     # extract operand's {sgn,exp}
15986        mov.w           %d1,%d0                 # make a copy
15987
15988        andi.l          &0x7fff,%d1             # extract operand's exponent
15989
15990        andi.w          &0x8000,%d0             # extract operand's sgn
15991        or.w            &0x3fff,%d0             # insert new operand's exponent(=0)
15992
15993        mov.w           %d0,FP_SCR0_EX(%a6)     # insert biased exponent
15994
15995        cmpi.b          STAG(%a6),&DENORM       # is operand normalized?
15996        beq.b           stzs_denorm             # normalize the DENORM
15997
15998stzs_norm:
15999        mov.l           &0x3fff,%d0
16000        sub.l           %d1,%d0                 # scale = BIAS + (-exp)
16001
16002        rts
16003
16004stzs_denorm:
16005        lea             FP_SCR0(%a6),%a0        # pass ptr to src op
16006        bsr.l           norm                    # normalize denorm
16007        neg.l           %d0                     # new exponent = -(shft val)
16008        mov.l           %d0,%d1                 # prepare for op_norm call
16009        bra.b           stzs_norm               # finish scaling
16010
16011###
16012
16013#########################################################################
16014# XDEF **************************************************************** #
16015#       scale_sqrt(): scale the input operand exponent so a subsequent  #
16016#                     fsqrt operation won't take an exception.          #
16017#                                                                       #
16018# XREF **************************************************************** #
16019#       norm() - normalize the mantissa if the operand was a DENORM     #
16020#                                                                       #
16021# INPUT *************************************************************** #
16022#       FP_SCR0(a6) = extended precision operand to be scaled           #
16023#                                                                       #
16024# OUTPUT ************************************************************** #
16025#       FP_SCR0(a6) = scaled extended precision operand                 #
16026#       d0          = scale value                                       #
16027#                                                                       #
16028# ALGORITHM *********************************************************** #
16029#       If the input operand is a DENORM, normalize it.                 #
16030#       If the exponent of the input operand is even, set the exponent  #
16031# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the       #
16032# exponent of the input operand is off, set the exponent to ox3fff and  #
16033# return a scale factor of "(exp-0x3fff)/2".                            #
16034#                                                                       #
16035#########################################################################
16036
16037        global          scale_sqrt
16038scale_sqrt:
16039        cmpi.b          STAG(%a6),&DENORM       # is operand normalized?
16040        beq.b           ss_denorm               # normalize the DENORM
16041
16042        mov.w           FP_SCR0_EX(%a6),%d1     # extract operand's {sgn,exp}
16043        andi.l          &0x7fff,%d1             # extract operand's exponent
16044
16045        andi.w          &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn
16046
16047        btst            &0x0,%d1                # is exp even or odd?
16048        beq.b           ss_norm_even
16049
16050        ori.w           &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
16051
16052        mov.l           &0x3fff,%d0
16053        sub.l           %d1,%d0                 # scale = BIAS + (-exp)
16054        asr.l           &0x1,%d0                # divide scale factor by 2
16055        rts
16056
16057ss_norm_even:
16058        ori.w           &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
16059
16060        mov.l           &0x3ffe,%d0
16061        sub.l           %d1,%d0                 # scale = BIAS + (-exp)
16062        asr.l           &0x1,%d0                # divide scale factor by 2
16063        rts
16064
16065ss_denorm:
16066        lea             FP_SCR0(%a6),%a0        # pass ptr to src op
16067        bsr.l           norm                    # normalize denorm
16068
16069        btst            &0x0,%d0                # is exp even or odd?
16070        beq.b           ss_denorm_even
16071
16072        ori.w           &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
16073
16074        add.l           &0x3fff,%d0
16075        asr.l           &0x1,%d0                # divide scale factor by 2
16076        rts
16077
16078ss_denorm_even:
16079        ori.w           &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
16080
16081        add.l           &0x3ffe,%d0
16082        asr.l           &0x1,%d0                # divide scale factor by 2
16083        rts
16084
16085###
16086
16087#########################################################################
16088# XDEF **************************************************************** #
16089#       scale_to_zero_dst(): scale the exponent of extended precision   #
16090#                            value at FP_SCR1(a6).                      #
16091#                                                                       #
16092# XREF **************************************************************** #
16093#       norm() - normalize the mantissa if the operand was a DENORM     #
16094#                                                                       #
16095# INPUT *************************************************************** #
16096#       FP_SCR1(a6) = extended precision operand to be scaled           #
16097#                                                                       #
16098# OUTPUT ************************************************************** #
16099#       FP_SCR1(a6) = scaled extended precision operand                 #
16100#       d0          = scale value                                       #
16101#                                                                       #
16102# ALGORITHM *********************************************************** #
16103#       Set the exponent of the input operand to 0x3fff. Save the value #
16104# of the difference between the original and new exponent. Then,        #
16105# normalize the operand if it was a DENORM. Add this normalization      #
16106# value to the previous value. Return the result.                       #
16107#                                                                       #
16108#########################################################################
16109
16110        global          scale_to_zero_dst
16111scale_to_zero_dst:
16112        mov.w           FP_SCR1_EX(%a6),%d1     # extract operand's {sgn,exp}
16113        mov.w           %d1,%d0                 # make a copy
16114
16115        andi.l          &0x7fff,%d1             # extract operand's exponent
16116
16117        andi.w          &0x8000,%d0             # extract operand's sgn
16118        or.w            &0x3fff,%d0             # insert new operand's exponent(=0)
16119
16120        mov.w           %d0,FP_SCR1_EX(%a6)     # insert biased exponent
16121
16122        cmpi.b          DTAG(%a6),&DENORM       # is operand normalized?
16123        beq.b           stzd_denorm             # normalize the DENORM
16124
16125stzd_norm:
16126        mov.l           &0x3fff,%d0
16127        sub.l           %d1,%d0                 # scale = BIAS + (-exp)
16128        rts
16129
16130stzd_denorm:
16131        lea             FP_SCR1(%a6),%a0        # pass ptr to dst op
16132        bsr.l           norm                    # normalize denorm
16133        neg.l           %d0                     # new exponent = -(shft val)
16134        mov.l           %d0,%d1                 # prepare for op_norm call
16135        bra.b           stzd_norm               # finish scaling
16136
16137##########################################################################
16138
16139#########################################################################
16140# XDEF **************************************************************** #
16141#       res_qnan(): return default result w/ QNAN operand for dyadic    #
16142#       res_snan(): return default result w/ SNAN operand for dyadic    #
16143#       res_qnan_1op(): return dflt result w/ QNAN operand for monadic  #
16144#       res_snan_1op(): return dflt result w/ SNAN operand for monadic  #
16145#                                                                       #
16146# XREF **************************************************************** #
16147#       None                                                            #
16148#                                                                       #
16149# INPUT *************************************************************** #
16150#       FP_SRC(a6) = pointer to extended precision src operand          #
16151#       FP_DST(a6) = pointer to extended precision dst operand          #
16152#                                                                       #
16153# OUTPUT ************************************************************** #
16154#       fp0 = default result                                            #
16155#                                                                       #
16156# ALGORITHM *********************************************************** #
16157#       If either operand (but not both operands) of an operation is a  #
16158# nonsignalling NAN, then that NAN is returned as the result. If both   #
16159# operands are nonsignalling NANs, then the destination operand         #
16160# nonsignalling NAN is returned as the result.                          #
16161#       If either operand to an operation is a signalling NAN (SNAN),   #
16162# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap      #
16163# enable bit is set in the FPCR, then the trap is taken and the         #
16164# destination is not modified. If the SNAN trap enable bit is not set,  #
16165# then the SNAN is converted to a nonsignalling NAN (by setting the     #
16166# SNAN bit in the operand to one), and the operation continues as       #
16167# described in the preceding paragraph, for nonsignalling NANs.         #
16168#       Make sure the appropriate FPSR bits are set before exiting.     #
16169#                                                                       #
16170#########################################################################
16171
16172        global          res_qnan
16173        global          res_snan
16174res_qnan:
16175res_snan:
16176        cmp.b           DTAG(%a6), &SNAN        # is the dst an SNAN?
16177        beq.b           dst_snan2
16178        cmp.b           DTAG(%a6), &QNAN        # is the dst a  QNAN?
16179        beq.b           dst_qnan2
16180src_nan:
16181        cmp.b           STAG(%a6), &QNAN
16182        beq.b           src_qnan2
16183        global          res_snan_1op
16184res_snan_1op:
16185src_snan2:
16186        bset            &0x6, FP_SRC_HI(%a6)    # set SNAN bit
16187        or.l            &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
16188        lea             FP_SRC(%a6), %a0
16189        bra.b           nan_comp
16190        global          res_qnan_1op
16191res_qnan_1op:
16192src_qnan2:
16193        or.l            &nan_mask, USER_FPSR(%a6)
16194        lea             FP_SRC(%a6), %a0
16195        bra.b           nan_comp
16196dst_snan2:
16197        or.l            &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
16198        bset            &0x6, FP_DST_HI(%a6)    # set SNAN bit
16199        lea             FP_DST(%a6), %a0
16200        bra.b           nan_comp
16201dst_qnan2:
16202        lea             FP_DST(%a6), %a0
16203        cmp.b           STAG(%a6), &SNAN
16204        bne             nan_done
16205        or.l            &aiop_mask+snan_mask, USER_FPSR(%a6)
16206nan_done:
16207        or.l            &nan_mask, USER_FPSR(%a6)
16208nan_comp:
16209        btst            &0x7, FTEMP_EX(%a0)     # is NAN neg?
16210        beq.b           nan_not_neg
16211        or.l            &neg_mask, USER_FPSR(%a6)
16212nan_not_neg:
16213        fmovm.x         (%a0), &0x80
16214        rts
16215
16216#########################################################################
16217# XDEF **************************************************************** #
16218#       res_operr(): return default result during operand error         #
16219#                                                                       #
16220# XREF **************************************************************** #
16221#       None                                                            #
16222#                                                                       #
16223# INPUT *************************************************************** #
16224#       None                                                            #
16225#                                                                       #
16226# OUTPUT ************************************************************** #
16227#       fp0 = default operand error result                              #
16228#                                                                       #
16229# ALGORITHM *********************************************************** #
16230#       An nonsignalling NAN is returned as the default result when     #
16231# an operand error occurs for the following cases:                      #
16232#                                                                       #
16233#       Multiply: (Infinity x Zero)                                     #
16234#       Divide  : (Zero / Zero) || (Infinity / Infinity)                #
16235#                                                                       #
16236#########################################################################
16237
16238        global          res_operr
16239res_operr:
16240        or.l            &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
16241        fmovm.x         nan_return(%pc), &0x80
16242        rts
16243
16244nan_return:
16245        long            0x7fff0000, 0xffffffff, 0xffffffff
16246
16247#########################################################################
16248# fdbcc(): routine to emulate the fdbcc instruction                     #
16249#                                                                       #
16250# XDEF **************************************************************** #
16251#       _fdbcc()                                                        #
16252#                                                                       #
16253# XREF **************************************************************** #
16254#       fetch_dreg() - fetch Dn value                                   #
16255#       store_dreg_l() - store updated Dn value                         #
16256#                                                                       #
16257# INPUT *************************************************************** #
16258#       d0 = displacement                                               #
16259#                                                                       #
16260# OUTPUT ************************************************************** #
16261#       none                                                            #
16262#                                                                       #
16263# ALGORITHM *********************************************************** #
16264#       This routine checks which conditional predicate is specified by #
16265# the stacked fdbcc instruction opcode and then branches to a routine   #
16266# for that predicate. The corresponding fbcc instruction is then used   #
16267# to see whether the condition (specified by the stacked FPSR) is true  #
16268# or false.                                                             #
16269#       If a BSUN exception should be indicated, the BSUN and ABSUN     #
16270# bits are set in the stacked FPSR. If the BSUN exception is enabled,   #
16271# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an   #
16272# enabled BSUN should not be flagged and the predicate is true, then    #
16273# Dn is fetched and decremented by one. If Dn is not equal to -1, add   #
16274# the displacement value to the stacked PC so that when an "rte" is     #
16275# finally executed, the branch occurs.                                  #
16276#                                                                       #
16277#########################################################################
16278        global          _fdbcc
16279_fdbcc:
16280        mov.l           %d0,L_SCR1(%a6)         # save displacement
16281
16282        mov.w           EXC_CMDREG(%a6),%d0     # fetch predicate
16283
16284        clr.l           %d1                     # clear scratch reg
16285        mov.b           FPSR_CC(%a6),%d1        # fetch fp ccodes
16286        ror.l           &0x8,%d1                # rotate to top byte
16287        fmov.l          %d1,%fpsr               # insert into FPSR
16288
16289        mov.w           (tbl_fdbcc.b,%pc,%d0.w*2),%d1 # load table
16290        jmp             (tbl_fdbcc.b,%pc,%d1.w) # jump to fdbcc routine
16291
16292tbl_fdbcc:
16293        short           fdbcc_f         -       tbl_fdbcc       # 00
16294        short           fdbcc_eq        -       tbl_fdbcc       # 01
16295        short           fdbcc_ogt       -       tbl_fdbcc       # 02
16296        short           fdbcc_oge       -       tbl_fdbcc       # 03
16297        short           fdbcc_olt       -       tbl_fdbcc       # 04
16298        short           fdbcc_ole       -       tbl_fdbcc       # 05
16299        short           fdbcc_ogl       -       tbl_fdbcc       # 06
16300        short           fdbcc_or        -       tbl_fdbcc       # 07
16301        short           fdbcc_un        -       tbl_fdbcc       # 08
16302        short           fdbcc_ueq       -       tbl_fdbcc       # 09
16303        short           fdbcc_ugt       -       tbl_fdbcc       # 10
16304        short           fdbcc_uge       -       tbl_fdbcc       # 11
16305        short           fdbcc_ult       -       tbl_fdbcc       # 12
16306        short           fdbcc_ule       -       tbl_fdbcc       # 13
16307        short           fdbcc_neq       -       tbl_fdbcc       # 14
16308        short           fdbcc_t         -       tbl_fdbcc       # 15
16309        short           fdbcc_sf        -       tbl_fdbcc       # 16
16310        short           fdbcc_seq       -       tbl_fdbcc       # 17
16311        short           fdbcc_gt        -       tbl_fdbcc       # 18
16312        short           fdbcc_ge        -       tbl_fdbcc       # 19
16313        short           fdbcc_lt        -       tbl_fdbcc       # 20
16314        short           fdbcc_le        -       tbl_fdbcc       # 21
16315        short           fdbcc_gl        -       tbl_fdbcc       # 22
16316        short           fdbcc_gle       -       tbl_fdbcc       # 23
16317        short           fdbcc_ngle      -       tbl_fdbcc       # 24
16318        short           fdbcc_ngl       -       tbl_fdbcc       # 25
16319        short           fdbcc_nle       -       tbl_fdbcc       # 26
16320        short           fdbcc_nlt       -       tbl_fdbcc       # 27
16321        short           fdbcc_nge       -       tbl_fdbcc       # 28
16322        short           fdbcc_ngt       -       tbl_fdbcc       # 29
16323        short           fdbcc_sneq      -       tbl_fdbcc       # 30
16324        short           fdbcc_st        -       tbl_fdbcc       # 31
16325
16326#########################################################################
16327#                                                                       #
16328# IEEE Nonaware tests                                                   #
16329#                                                                       #
16330# For the IEEE nonaware tests, only the false branch changes the        #
16331# counter. However, the true branch may set bsun so we check to see     #
16332# if the NAN bit is set, in which case BSUN and AIOP will be set.       #
16333#                                                                       #
16334# The cases EQ and NE are shared by the Aware and Nonaware groups       #
16335# and are incapable of setting the BSUN exception bit.                  #
16336#                                                                       #
16337# Typically, only one of the two possible branch directions could       #
16338# have the NAN bit set.                                                 #
16339# (This is assuming the mutual exclusiveness of FPSR cc bit groupings   #
16340#  is preserved.)                                                       #
16341#                                                                       #
16342#########################################################################
16343
16344#
16345# equal:
16346#
16347#       Z
16348#
16349fdbcc_eq:
16350        fbeq.w          fdbcc_eq_yes            # equal?
16351fdbcc_eq_no:
16352        bra.w           fdbcc_false             # no; go handle counter
16353fdbcc_eq_yes:
16354        rts
16355
16356#
16357# not equal:
16358#       _
16359#       Z
16360#
16361fdbcc_neq:
16362        fbneq.w         fdbcc_neq_yes           # not equal?
16363fdbcc_neq_no:
16364        bra.w           fdbcc_false             # no; go handle counter
16365fdbcc_neq_yes:
16366        rts
16367
16368#
16369# greater than:
16370#       _______
16371#       NANvZvN
16372#
16373fdbcc_gt:
16374        fbgt.w          fdbcc_gt_yes            # greater than?
16375        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16376        beq.w           fdbcc_false             # no;go handle counter
16377        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16378        btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16379        bne.w           fdbcc_bsun              # yes; we have an exception
16380        bra.w           fdbcc_false             # no; go handle counter
16381fdbcc_gt_yes:
16382        rts                                     # do nothing
16383
16384#
16385# not greater than:
16386#
16387#       NANvZvN
16388#
16389fdbcc_ngt:
16390        fbngt.w         fdbcc_ngt_yes           # not greater than?
16391fdbcc_ngt_no:
16392        bra.w           fdbcc_false             # no; go handle counter
16393fdbcc_ngt_yes:
16394        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16395        beq.b           fdbcc_ngt_done          # no;go finish
16396        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16397        btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16398        bne.w           fdbcc_bsun              # yes; we have an exception
16399fdbcc_ngt_done:
16400        rts                                     # no; do nothing
16401
16402#
16403# greater than or equal:
16404#          _____
16405#       Zv(NANvN)
16406#
16407fdbcc_ge:
16408        fbge.w          fdbcc_ge_yes            # greater than or equal?
16409fdbcc_ge_no:
16410        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16411        beq.w           fdbcc_false             # no;go handle counter
16412        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16413        btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16414        bne.w           fdbcc_bsun              # yes; we have an exception
16415        bra.w           fdbcc_false             # no; go handle counter
16416fdbcc_ge_yes:
16417        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16418        beq.b           fdbcc_ge_yes_done       # no;go do nothing
16419        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16420        btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16421        bne.w           fdbcc_bsun              # yes; we have an exception
16422fdbcc_ge_yes_done:
16423        rts                                     # do nothing
16424
16425#
16426# not (greater than or equal):
16427#              _
16428#       NANv(N^Z)
16429#
16430fdbcc_nge:
16431        fbnge.w         fdbcc_nge_yes           # not (greater than or equal)?
16432fdbcc_nge_no:
16433        bra.w           fdbcc_false             # no; go handle counter
16434fdbcc_nge_yes:
16435        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16436        beq.b           fdbcc_nge_done          # no;go finish
16437        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16438        btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16439        bne.w           fdbcc_bsun              # yes; we have an exception
16440fdbcc_nge_done:
16441        rts                                     # no; do nothing
16442
16443#
16444# less than:
16445#          _____
16446#       N^(NANvZ)
16447#
16448fdbcc_lt:
16449        fblt.w          fdbcc_lt_yes            # less than?
16450fdbcc_lt_no:
16451        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16452        beq.w           fdbcc_false             # no; go handle counter
16453        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16454        btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16455        bne.w           fdbcc_bsun              # yes; we have an exception
16456        bra.w           fdbcc_false             # no; go handle counter
16457fdbcc_lt_yes:
16458        rts                                     # do nothing
16459
16460#
16461# not less than:
16462#              _
16463#       NANv(ZvN)
16464#
16465fdbcc_nlt:
16466        fbnlt.w         fdbcc_nlt_yes           # not less than?
16467fdbcc_nlt_no:
16468        bra.w           fdbcc_false             # no; go handle counter
16469fdbcc_nlt_yes:
16470        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16471        beq.b           fdbcc_nlt_done          # no;go finish
16472        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16473        btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16474        bne.w           fdbcc_bsun              # yes; we have an exception
16475fdbcc_nlt_done:
16476        rts                                     # no; do nothing
16477
16478#
16479# less than or equal:
16480#            ___
16481#       Zv(N^NAN)
16482#
16483fdbcc_le:
16484        fble.w          fdbcc_le_yes            # less than or equal?
16485fdbcc_le_no:
16486        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16487        beq.w           fdbcc_false             # no; go handle counter
16488        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16489        btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16490        bne.w           fdbcc_bsun              # yes; we have an exception
16491        bra.w           fdbcc_false             # no; go handle counter
16492fdbcc_le_yes:
16493        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16494        beq.b           fdbcc_le_yes_done       # no; go do nothing
16495        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16496        btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16497        bne.w           fdbcc_bsun              # yes; we have an exception
16498fdbcc_le_yes_done:
16499        rts                                     # do nothing
16500
16501#
16502# not (less than or equal):
16503#            ___
16504#       NANv(NvZ)
16505#
16506fdbcc_nle:
16507        fbnle.w         fdbcc_nle_yes           # not (less than or equal)?
16508fdbcc_nle_no:
16509        bra.w           fdbcc_false             # no; go handle counter
16510fdbcc_nle_yes:
16511        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16512        beq.w           fdbcc_nle_done          # no; go finish
16513        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16514        btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16515        bne.w           fdbcc_bsun              # yes; we have an exception
16516fdbcc_nle_done:
16517        rts                                     # no; do nothing
16518
16519#
16520# greater or less than:
16521#       _____
16522#       NANvZ
16523#
16524fdbcc_gl:
16525        fbgl.w          fdbcc_gl_yes            # greater or less than?
16526fdbcc_gl_no:
16527        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16528        beq.w           fdbcc_false             # no; handle counter
16529        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16530        btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16531        bne.w           fdbcc_bsun              # yes; we have an exception
16532        bra.w           fdbcc_false             # no; go handle counter
16533fdbcc_gl_yes:
16534        rts                                     # do nothing
16535
16536#
16537# not (greater or less than):
16538#
16539#       NANvZ
16540#
16541fdbcc_ngl:
16542        fbngl.w         fdbcc_ngl_yes           # not (greater or less than)?
16543fdbcc_ngl_no:
16544        bra.w           fdbcc_false             # no; go handle counter
16545fdbcc_ngl_yes:
16546        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16547        beq.b           fdbcc_ngl_done          # no; go finish
16548        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16549        btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16550        bne.w           fdbcc_bsun              # yes; we have an exception
16551fdbcc_ngl_done:
16552        rts                                     # no; do nothing
16553
16554#
16555# greater, less, or equal:
16556#       ___
16557#       NAN
16558#
16559fdbcc_gle:
16560        fbgle.w         fdbcc_gle_yes           # greater, less, or equal?
16561fdbcc_gle_no:
16562        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16563        btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16564        bne.w           fdbcc_bsun              # yes; we have an exception
16565        bra.w           fdbcc_false             # no; go handle counter
16566fdbcc_gle_yes:
16567        rts                                     # do nothing
16568
16569#
16570# not (greater, less, or equal):
16571#
16572#       NAN
16573#
16574fdbcc_ngle:
16575        fbngle.w        fdbcc_ngle_yes          # not (greater, less, or equal)?
16576fdbcc_ngle_no:
16577        bra.w           fdbcc_false             # no; go handle counter
16578fdbcc_ngle_yes:
16579        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16580        btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16581        bne.w           fdbcc_bsun              # yes; we have an exception
16582        rts                                     # no; do nothing
16583
16584#########################################################################
16585#                                                                       #
16586# Miscellaneous tests                                                   #
16587#                                                                       #
16588# For the IEEE miscellaneous tests, all but fdbf and fdbt can set bsun. #
16589#                                                                       #
16590#########################################################################
16591
16592#
16593# false:
16594#
16595#       False
16596#
16597fdbcc_f:                                        # no bsun possible
16598        bra.w           fdbcc_false             # go handle counter
16599
16600#
16601# true:
16602#
16603#       True
16604#
16605fdbcc_t:                                        # no bsun possible
16606        rts                                     # do nothing
16607
16608#
16609# signalling false:
16610#
16611#       False
16612#
16613fdbcc_sf:
16614        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set?
16615        beq.w           fdbcc_false             # no;go handle counter
16616        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16617        btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16618        bne.w           fdbcc_bsun              # yes; we have an exception
16619        bra.w           fdbcc_false             # go handle counter
16620
16621#
16622# signalling true:
16623#
16624#       True
16625#
16626fdbcc_st:
16627        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set?
16628        beq.b           fdbcc_st_done           # no;go finish
16629        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16630        btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16631        bne.w           fdbcc_bsun              # yes; we have an exception
16632fdbcc_st_done:
16633        rts
16634
16635#
16636# signalling equal:
16637#
16638#       Z
16639#
16640fdbcc_seq:
16641        fbseq.w         fdbcc_seq_yes           # signalling equal?
16642fdbcc_seq_no:
16643        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set?
16644        beq.w           fdbcc_false             # no;go handle counter
16645        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16646        btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16647        bne.w           fdbcc_bsun              # yes; we have an exception
16648        bra.w           fdbcc_false             # go handle counter
16649fdbcc_seq_yes:
16650        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set?
16651        beq.b           fdbcc_seq_yes_done      # no;go do nothing
16652        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16653        btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16654        bne.w           fdbcc_bsun              # yes; we have an exception
16655fdbcc_seq_yes_done:
16656        rts                                     # yes; do nothing
16657
16658#
16659# signalling not equal:
16660#       _
16661#       Z
16662#
16663fdbcc_sneq:
16664        fbsneq.w        fdbcc_sneq_yes          # signalling not equal?
16665fdbcc_sneq_no:
16666        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set?
16667        beq.w           fdbcc_false             # no;go handle counter
16668        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16669        btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16670        bne.w           fdbcc_bsun              # yes; we have an exception
16671        bra.w           fdbcc_false             # go handle counter
16672fdbcc_sneq_yes:
16673        btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
16674        beq.w           fdbcc_sneq_done         # no;go finish
16675        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16676        btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16677        bne.w           fdbcc_bsun              # yes; we have an exception
16678fdbcc_sneq_done:
16679        rts
16680
16681#########################################################################
16682#                                                                       #
16683# IEEE Aware tests                                                      #
16684#                                                                       #
16685# For the IEEE aware tests, action is only taken if the result is false.#
16686# Therefore, the opposite branch type is used to jump to the decrement  #
16687# routine.                                                              #
16688# The BSUN exception will not be set for any of these tests.            #
16689#                                                                       #
16690#########################################################################
16691
16692#
16693# ordered greater than:
16694#       _______
16695#       NANvZvN
16696#
16697fdbcc_ogt:
16698        fbogt.w         fdbcc_ogt_yes           # ordered greater than?
16699fdbcc_ogt_no:
16700        bra.w           fdbcc_false             # no; go handle counter
16701fdbcc_ogt_yes:
16702        rts                                     # yes; do nothing
16703
16704#
16705# unordered or less or equal:
16706#       _______
16707#       NANvZvN
16708#
16709fdbcc_ule:
16710        fbule.w         fdbcc_ule_yes           # unordered or less or equal?
16711fdbcc_ule_no:
16712        bra.w           fdbcc_false             # no; go handle counter
16713fdbcc_ule_yes:
16714        rts                                     # yes; do nothing
16715
16716#
16717# ordered greater than or equal:
16718#          _____
16719#       Zv(NANvN)
16720#
16721fdbcc_oge:
16722        fboge.w         fdbcc_oge_yes           # ordered greater than or equal?
16723fdbcc_oge_no:
16724        bra.w           fdbcc_false             # no; go handle counter
16725fdbcc_oge_yes:
16726        rts                                     # yes; do nothing
16727
16728#
16729# unordered or less than:
16730#              _
16731#       NANv(N^Z)
16732#
16733fdbcc_ult:
16734        fbult.w         fdbcc_ult_yes           # unordered or less than?
16735fdbcc_ult_no:
16736        bra.w           fdbcc_false             # no; go handle counter
16737fdbcc_ult_yes:
16738        rts                                     # yes; do nothing
16739
16740#
16741# ordered less than:
16742#          _____
16743#       N^(NANvZ)
16744#
16745fdbcc_olt:
16746        fbolt.w         fdbcc_olt_yes           # ordered less than?
16747fdbcc_olt_no:
16748        bra.w           fdbcc_false             # no; go handle counter
16749fdbcc_olt_yes:
16750        rts                                     # yes; do nothing
16751
16752#
16753# unordered or greater or equal:
16754#
16755#       NANvZvN
16756#
16757fdbcc_uge:
16758        fbuge.w         fdbcc_uge_yes           # unordered or greater than?
16759fdbcc_uge_no:
16760        bra.w           fdbcc_false             # no; go handle counter
16761fdbcc_uge_yes:
16762        rts                                     # yes; do nothing
16763
16764#
16765# ordered less than or equal:
16766#            ___
16767#       Zv(N^NAN)
16768#
16769fdbcc_ole:
16770        fbole.w         fdbcc_ole_yes           # ordered greater or less than?
16771fdbcc_ole_no:
16772        bra.w           fdbcc_false             # no; go handle counter
16773fdbcc_ole_yes:
16774        rts                                     # yes; do nothing
16775
16776#
16777# unordered or greater than:
16778#            ___
16779#       NANv(NvZ)
16780#
16781fdbcc_ugt:
16782        fbugt.w         fdbcc_ugt_yes           # unordered or greater than?
16783fdbcc_ugt_no:
16784        bra.w           fdbcc_false             # no; go handle counter
16785fdbcc_ugt_yes:
16786        rts                                     # yes; do nothing
16787
16788#
16789# ordered greater or less than:
16790#       _____
16791#       NANvZ
16792#
16793fdbcc_ogl:
16794        fbogl.w         fdbcc_ogl_yes           # ordered greater or less than?
16795fdbcc_ogl_no:
16796        bra.w           fdbcc_false             # no; go handle counter
16797fdbcc_ogl_yes:
16798        rts                                     # yes; do nothing
16799
16800#
16801# unordered or equal:
16802#
16803#       NANvZ
16804#
16805fdbcc_ueq:
16806        fbueq.w         fdbcc_ueq_yes           # unordered or equal?
16807fdbcc_ueq_no:
16808        bra.w           fdbcc_false             # no; go handle counter
16809fdbcc_ueq_yes:
16810        rts                                     # yes; do nothing
16811
16812#
16813# ordered:
16814#       ___
16815#       NAN
16816#
16817fdbcc_or:
16818        fbor.w          fdbcc_or_yes            # ordered?
16819fdbcc_or_no:
16820        bra.w           fdbcc_false             # no; go handle counter
16821fdbcc_or_yes:
16822        rts                                     # yes; do nothing
16823
16824#
16825# unordered:
16826#
16827#       NAN
16828#
16829fdbcc_un:
16830        fbun.w          fdbcc_un_yes            # unordered?
16831fdbcc_un_no:
16832        bra.w           fdbcc_false             # no; go handle counter
16833fdbcc_un_yes:
16834        rts                                     # yes; do nothing
16835
16836#######################################################################
16837
16838#
16839# the bsun exception bit was not set.
16840#
16841# (1) subtract 1 from the count register
16842# (2) if (cr == -1) then
16843#       pc = pc of next instruction
16844#     else
16845#       pc += sign_ext(16-bit displacement)
16846#
16847fdbcc_false:
16848        mov.b           1+EXC_OPWORD(%a6), %d1  # fetch lo opword
16849        andi.w          &0x7, %d1               # extract count register
16850
16851        bsr.l           fetch_dreg              # fetch count value
16852# make sure that d0 isn't corrupted between calls...
16853
16854        subq.w          &0x1, %d0               # Dn - 1 -> Dn
16855
16856        bsr.l           store_dreg_l            # store new count value
16857
16858        cmpi.w          %d0, &-0x1              # is (Dn == -1)?
16859        bne.b           fdbcc_false_cont        # no;
16860        rts
16861
16862fdbcc_false_cont:
16863        mov.l           L_SCR1(%a6),%d0         # fetch displacement
16864        add.l           USER_FPIAR(%a6),%d0     # add instruction PC
16865        addq.l          &0x4,%d0                # add instruction length
16866        mov.l           %d0,EXC_PC(%a6)         # set new PC
16867        rts
16868
16869# the emulation routine set bsun and BSUN was enabled. have to
16870# fix stack and jump to the bsun handler.
16871# let the caller of this routine shift the stack frame up to
16872# eliminate the effective address field.
16873fdbcc_bsun:
16874        mov.b           &fbsun_flg,SPCOND_FLG(%a6)
16875        rts
16876
16877#########################################################################
16878# ftrapcc(): routine to emulate the ftrapcc instruction                 #
16879#                                                                       #
16880# XDEF **************************************************************** #
16881#       _ftrapcc()                                                      #
16882#                                                                       #
16883# XREF **************************************************************** #
16884#       none                                                            #
16885#                                                                       #
16886# INPUT *************************************************************** #
16887#       none                                                            #
16888#                                                                       #
16889# OUTPUT ************************************************************** #
16890#       none                                                            #
16891#                                                                       #
16892# ALGORITHM *********************************************************** #
16893#       This routine checks which conditional predicate is specified by #
16894# the stacked ftrapcc instruction opcode and then branches to a routine #
16895# for that predicate. The corresponding fbcc instruction is then used   #
16896# to see whether the condition (specified by the stacked FPSR) is true  #
16897# or false.                                                             #
16898#       If a BSUN exception should be indicated, the BSUN and ABSUN     #
16899# bits are set in the stacked FPSR. If the BSUN exception is enabled,   #
16900# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an   #
16901# enabled BSUN should not be flagged and the predicate is true, then    #
16902# the ftrapcc_flg is set in the SPCOND_FLG location. These special      #
16903# flags indicate to the calling routine to emulate the exceptional      #
16904# condition.                                                            #
16905#                                                                       #
16906#########################################################################
16907
16908        global          _ftrapcc
16909_ftrapcc:
16910        mov.w           EXC_CMDREG(%a6),%d0     # fetch predicate
16911
16912        clr.l           %d1                     # clear scratch reg
16913        mov.b           FPSR_CC(%a6),%d1        # fetch fp ccodes
16914        ror.l           &0x8,%d1                # rotate to top byte
16915        fmov.l          %d1,%fpsr               # insert into FPSR
16916
16917        mov.w           (tbl_ftrapcc.b,%pc,%d0.w*2), %d1 # load table
16918        jmp             (tbl_ftrapcc.b,%pc,%d1.w) # jump to ftrapcc routine
16919
16920tbl_ftrapcc:
16921        short           ftrapcc_f       -       tbl_ftrapcc     # 00
16922        short           ftrapcc_eq      -       tbl_ftrapcc     # 01
16923        short           ftrapcc_ogt     -       tbl_ftrapcc     # 02
16924        short           ftrapcc_oge     -       tbl_ftrapcc     # 03
16925        short           ftrapcc_olt     -       tbl_ftrapcc     # 04
16926        short           ftrapcc_ole     -       tbl_ftrapcc     # 05
16927        short           ftrapcc_ogl     -       tbl_ftrapcc     # 06
16928        short           ftrapcc_or      -       tbl_ftrapcc     # 07
16929        short           ftrapcc_un      -       tbl_ftrapcc     # 08
16930        short           ftrapcc_ueq     -       tbl_ftrapcc     # 09
16931        short           ftrapcc_ugt     -       tbl_ftrapcc     # 10
16932        short           ftrapcc_uge     -       tbl_ftrapcc     # 11
16933        short           ftrapcc_ult     -       tbl_ftrapcc     # 12
16934        short           ftrapcc_ule     -       tbl_ftrapcc     # 13
16935        short           ftrapcc_neq     -       tbl_ftrapcc     # 14
16936        short           ftrapcc_t       -       tbl_ftrapcc     # 15
16937        short           ftrapcc_sf      -       tbl_ftrapcc     # 16
16938        short           ftrapcc_seq     -       tbl_ftrapcc     # 17
16939        short           ftrapcc_gt      -       tbl_ftrapcc     # 18
16940        short           ftrapcc_ge      -       tbl_ftrapcc     # 19
16941        short           ftrapcc_lt      -       tbl_ftrapcc     # 20
16942        short           ftrapcc_le      -       tbl_ftrapcc     # 21
16943        short           ftrapcc_gl      -       tbl_ftrapcc     # 22
16944        short           ftrapcc_gle     -       tbl_ftrapcc     # 23
16945        short           ftrapcc_ngle    -       tbl_ftrapcc     # 24
16946        short           ftrapcc_ngl     -       tbl_ftrapcc     # 25
16947        short           ftrapcc_nle     -       tbl_ftrapcc     # 26
16948        short           ftrapcc_nlt     -       tbl_ftrapcc     # 27
16949        short           ftrapcc_nge     -       tbl_ftrapcc     # 28
16950        short           ftrapcc_ngt     -       tbl_ftrapcc     # 29
16951        short           ftrapcc_sneq    -       tbl_ftrapcc     # 30
16952        short           ftrapcc_st      -       tbl_ftrapcc     # 31
16953
16954#########################################################################
16955#                                                                       #
16956# IEEE Nonaware tests                                                   #
16957#                                                                       #
16958# For the IEEE nonaware tests, we set the result based on the           #
16959# floating point condition codes. In addition, we check to see          #
16960# if the NAN bit is set, in which case BSUN and AIOP will be set.       #
16961#                                                                       #
16962# The cases EQ and NE are shared by the Aware and Nonaware groups       #
16963# and are incapable of setting the BSUN exception bit.                  #
16964#                                                                       #
16965# Typically, only one of the two possible branch directions could       #
16966# have the NAN bit set.                                                 #
16967#                                                                       #
16968#########################################################################
16969
16970#
16971# equal:
16972#
16973#       Z
16974#
16975ftrapcc_eq:
16976        fbeq.w          ftrapcc_trap            # equal?
16977ftrapcc_eq_no:
16978        rts                                     # do nothing
16979
16980#
16981# not equal:
16982#       _
16983#       Z
16984#
16985ftrapcc_neq:
16986        fbneq.w         ftrapcc_trap            # not equal?
16987ftrapcc_neq_no:
16988        rts                                     # do nothing
16989
16990#
16991# greater than:
16992#       _______
16993#       NANvZvN
16994#
16995ftrapcc_gt:
16996        fbgt.w          ftrapcc_trap            # greater than?
16997ftrapcc_gt_no:
16998        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16999        beq.b           ftrapcc_gt_done         # no
17000        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17001        btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17002        bne.w           ftrapcc_bsun            # yes
17003ftrapcc_gt_done:
17004        rts                                     # no; do nothing
17005
17006#
17007# not greater than:
17008#
17009#       NANvZvN
17010#
17011ftrapcc_ngt:
17012        fbngt.w         ftrapcc_ngt_yes         # not greater than?
17013ftrapcc_ngt_no:
17014        rts                                     # do nothing
17015ftrapcc_ngt_yes:
17016        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17017        beq.w           ftrapcc_trap            # no; go take trap
17018        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17019        btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17020        bne.w           ftrapcc_bsun            # yes
17021        bra.w           ftrapcc_trap            # no; go take trap
17022
17023#
17024# greater than or equal:
17025#          _____
17026#       Zv(NANvN)
17027#
17028ftrapcc_ge:
17029        fbge.w          ftrapcc_ge_yes          # greater than or equal?
17030ftrapcc_ge_no:
17031        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17032        beq.b           ftrapcc_ge_done         # no; go finish
17033        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17034        btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17035        bne.w           ftrapcc_bsun            # yes
17036ftrapcc_ge_done:
17037        rts                                     # no; do nothing
17038ftrapcc_ge_yes:
17039        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17040        beq.w           ftrapcc_trap            # no; go take trap
17041        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17042        btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17043        bne.w           ftrapcc_bsun            # yes
17044        bra.w           ftrapcc_trap            # no; go take trap
17045
17046#
17047# not (greater than or equal):
17048#              _
17049#       NANv(N^Z)
17050#
17051ftrapcc_nge:
17052        fbnge.w         ftrapcc_nge_yes         # not (greater than or equal)?
17053ftrapcc_nge_no:
17054        rts                                     # do nothing
17055ftrapcc_nge_yes:
17056        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17057        beq.w           ftrapcc_trap            # no; go take trap
17058        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17059        btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17060        bne.w           ftrapcc_bsun            # yes
17061        bra.w           ftrapcc_trap            # no; go take trap
17062
17063#
17064# less than:
17065#          _____
17066#       N^(NANvZ)
17067#
17068ftrapcc_lt:
17069        fblt.w          ftrapcc_trap            # less than?
17070ftrapcc_lt_no:
17071        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17072        beq.b           ftrapcc_lt_done         # no; go finish
17073        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17074        btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17075        bne.w           ftrapcc_bsun            # yes
17076ftrapcc_lt_done:
17077        rts                                     # no; do nothing
17078
17079#
17080# not less than:
17081#              _
17082#       NANv(ZvN)
17083#
17084ftrapcc_nlt:
17085        fbnlt.w         ftrapcc_nlt_yes         # not less than?
17086ftrapcc_nlt_no:
17087        rts                                     # do nothing
17088ftrapcc_nlt_yes:
17089        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17090        beq.w           ftrapcc_trap            # no; go take trap
17091        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17092        btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17093        bne.w           ftrapcc_bsun            # yes
17094        bra.w           ftrapcc_trap            # no; go take trap
17095
17096#
17097# less than or equal:
17098#            ___
17099#       Zv(N^NAN)
17100#
17101ftrapcc_le:
17102        fble.w          ftrapcc_le_yes          # less than or equal?
17103ftrapcc_le_no:
17104        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17105        beq.b           ftrapcc_le_done         # no; go finish
17106        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17107        btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17108        bne.w           ftrapcc_bsun            # yes
17109ftrapcc_le_done:
17110        rts                                     # no; do nothing
17111ftrapcc_le_yes:
17112        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17113        beq.w           ftrapcc_trap            # no; go take trap
17114        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17115        btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17116        bne.w           ftrapcc_bsun            # yes
17117        bra.w           ftrapcc_trap            # no; go take trap
17118
17119#
17120# not (less than or equal):
17121#            ___
17122#       NANv(NvZ)
17123#
17124ftrapcc_nle:
17125        fbnle.w         ftrapcc_nle_yes         # not (less than or equal)?
17126ftrapcc_nle_no:
17127        rts                                     # do nothing
17128ftrapcc_nle_yes:
17129        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17130        beq.w           ftrapcc_trap            # no; go take trap
17131        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17132        btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17133        bne.w           ftrapcc_bsun            # yes
17134        bra.w           ftrapcc_trap            # no; go take trap
17135
17136#
17137# greater or less than:
17138#       _____
17139#       NANvZ
17140#
17141ftrapcc_gl:
17142        fbgl.w          ftrapcc_trap            # greater or less than?
17143ftrapcc_gl_no:
17144        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17145        beq.b           ftrapcc_gl_done         # no; go finish
17146        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17147        btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17148        bne.w           ftrapcc_bsun            # yes
17149ftrapcc_gl_done:
17150        rts                                     # no; do nothing
17151
17152#
17153# not (greater or less than):
17154#
17155#       NANvZ
17156#
17157ftrapcc_ngl:
17158        fbngl.w         ftrapcc_ngl_yes         # not (greater or less than)?
17159ftrapcc_ngl_no:
17160        rts                                     # do nothing
17161ftrapcc_ngl_yes:
17162        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17163        beq.w           ftrapcc_trap            # no; go take trap
17164        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17165        btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17166        bne.w           ftrapcc_bsun            # yes
17167        bra.w           ftrapcc_trap            # no; go take trap
17168
17169#
17170# greater, less, or equal:
17171#       ___
17172#       NAN
17173#
17174ftrapcc_gle:
17175        fbgle.w         ftrapcc_trap            # greater, less, or equal?
17176ftrapcc_gle_no:
17177        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17178        btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17179        bne.w           ftrapcc_bsun            # yes
17180        rts                                     # no; do nothing
17181
17182#
17183# not (greater, less, or equal):
17184#
17185#       NAN
17186#
17187ftrapcc_ngle:
17188        fbngle.w        ftrapcc_ngle_yes        # not (greater, less, or equal)?
17189ftrapcc_ngle_no:
17190        rts                                     # do nothing
17191ftrapcc_ngle_yes:
17192        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17193        btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17194        bne.w           ftrapcc_bsun            # yes
17195        bra.w           ftrapcc_trap            # no; go take trap
17196
17197#########################################################################
17198#                                                                       #
17199# Miscellaneous tests                                                   #
17200#                                                                       #
17201# For the IEEE aware tests, we only have to set the result based on the #
17202# floating point condition codes. The BSUN exception will not be        #
17203# set for any of these tests.                                           #
17204#                                                                       #
17205#########################################################################
17206
17207#
17208# false:
17209#
17210#       False
17211#
17212ftrapcc_f:
17213        rts                                     # do nothing
17214
17215#
17216# true:
17217#
17218#       True
17219#
17220ftrapcc_t:
17221        bra.w           ftrapcc_trap            # go take trap
17222
17223#
17224# signalling false:
17225#
17226#       False
17227#
17228ftrapcc_sf:
17229        btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17230        beq.b           ftrapcc_sf_done         # no; go finish
17231        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17232        btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17233        bne.w           ftrapcc_bsun            # yes
17234ftrapcc_sf_done:
17235        rts                                     # no; do nothing
17236
17237#
17238# signalling true:
17239#
17240#       True
17241#
17242ftrapcc_st:
17243        btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17244        beq.w           ftrapcc_trap            # no; go take trap
17245        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17246        btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17247        bne.w           ftrapcc_bsun            # yes
17248        bra.w           ftrapcc_trap            # no; go take trap
17249
17250#
17251# signalling equal:
17252#
17253#       Z
17254#
17255ftrapcc_seq:
17256        fbseq.w         ftrapcc_seq_yes         # signalling equal?
17257ftrapcc_seq_no:
17258        btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17259        beq.w           ftrapcc_seq_done        # no; go finish
17260        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17261        btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17262        bne.w           ftrapcc_bsun            # yes
17263ftrapcc_seq_done:
17264        rts                                     # no; do nothing
17265ftrapcc_seq_yes:
17266        btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17267        beq.w           ftrapcc_trap            # no; go take trap
17268        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17269        btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17270        bne.w           ftrapcc_bsun            # yes
17271        bra.w           ftrapcc_trap            # no; go take trap
17272
17273#
17274# signalling not equal:
17275#       _
17276#       Z
17277#
17278ftrapcc_sneq:
17279        fbsneq.w        ftrapcc_sneq_yes        # signalling equal?
17280ftrapcc_sneq_no:
17281        btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17282        beq.w           ftrapcc_sneq_no_done    # no; go finish
17283        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17284        btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17285        bne.w           ftrapcc_bsun            # yes
17286ftrapcc_sneq_no_done:
17287        rts                                     # do nothing
17288ftrapcc_sneq_yes:
17289        btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17290        beq.w           ftrapcc_trap            # no; go take trap
17291        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17292        btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17293        bne.w           ftrapcc_bsun            # yes
17294        bra.w           ftrapcc_trap            # no; go take trap
17295
17296#########################################################################
17297#                                                                       #
17298# IEEE Aware tests                                                      #
17299#                                                                       #
17300# For the IEEE aware tests, we only have to set the result based on the #
17301# floating point condition codes. The BSUN exception will not be        #
17302# set for any of these tests.                                           #
17303#                                                                       #
17304#########################################################################
17305
17306#
17307# ordered greater than:
17308#       _______
17309#       NANvZvN
17310#
17311ftrapcc_ogt:
17312        fbogt.w         ftrapcc_trap            # ordered greater than?
17313ftrapcc_ogt_no:
17314        rts                                     # do nothing
17315
17316#
17317# unordered or less or equal:
17318#       _______
17319#       NANvZvN
17320#
17321ftrapcc_ule:
17322        fbule.w         ftrapcc_trap            # unordered or less or equal?
17323ftrapcc_ule_no:
17324        rts                                     # do nothing
17325
17326#
17327# ordered greater than or equal:
17328#          _____
17329#       Zv(NANvN)
17330#
17331ftrapcc_oge:
17332        fboge.w         ftrapcc_trap            # ordered greater than or equal?
17333ftrapcc_oge_no:
17334        rts                                     # do nothing
17335
17336#
17337# unordered or less than:
17338#              _
17339#       NANv(N^Z)
17340#
17341ftrapcc_ult:
17342        fbult.w         ftrapcc_trap            # unordered or less than?
17343ftrapcc_ult_no:
17344        rts                                     # do nothing
17345
17346#
17347# ordered less than:
17348#          _____
17349#       N^(NANvZ)
17350#
17351ftrapcc_olt:
17352        fbolt.w         ftrapcc_trap            # ordered less than?
17353ftrapcc_olt_no:
17354        rts                                     # do nothing
17355
17356#
17357# unordered or greater or equal:
17358#
17359#       NANvZvN
17360#
17361ftrapcc_uge:
17362        fbuge.w         ftrapcc_trap            # unordered or greater than?
17363ftrapcc_uge_no:
17364        rts                                     # do nothing
17365
17366#
17367# ordered less than or equal:
17368#            ___
17369#       Zv(N^NAN)
17370#
17371ftrapcc_ole:
17372        fbole.w         ftrapcc_trap            # ordered greater or less than?
17373ftrapcc_ole_no:
17374        rts                                     # do nothing
17375
17376#
17377# unordered or greater than:
17378#            ___
17379#       NANv(NvZ)
17380#
17381ftrapcc_ugt:
17382        fbugt.w         ftrapcc_trap            # unordered or greater than?
17383ftrapcc_ugt_no:
17384        rts                                     # do nothing
17385
17386#
17387# ordered greater or less than:
17388#       _____
17389#       NANvZ
17390#
17391ftrapcc_ogl:
17392        fbogl.w         ftrapcc_trap            # ordered greater or less than?
17393ftrapcc_ogl_no:
17394        rts                                     # do nothing
17395
17396#
17397# unordered or equal:
17398#
17399#       NANvZ
17400#
17401ftrapcc_ueq:
17402        fbueq.w         ftrapcc_trap            # unordered or equal?
17403ftrapcc_ueq_no:
17404        rts                                     # do nothing
17405
17406#
17407# ordered:
17408#       ___
17409#       NAN
17410#
17411ftrapcc_or:
17412        fbor.w          ftrapcc_trap            # ordered?
17413ftrapcc_or_no:
17414        rts                                     # do nothing
17415
17416#
17417# unordered:
17418#
17419#       NAN
17420#
17421ftrapcc_un:
17422        fbun.w          ftrapcc_trap            # unordered?
17423ftrapcc_un_no:
17424        rts                                     # do nothing
17425
17426#######################################################################
17427
17428# the bsun exception bit was not set.
17429# we will need to jump to the ftrapcc vector. the stack frame
17430# is the same size as that of the fp unimp instruction. the
17431# only difference is that the <ea> field should hold the PC
17432# of the ftrapcc instruction and the vector offset field
17433# should denote the ftrapcc trap.
17434ftrapcc_trap:
17435        mov.b           &ftrapcc_flg,SPCOND_FLG(%a6)
17436        rts
17437
17438# the emulation routine set bsun and BSUN was enabled. have to
17439# fix stack and jump to the bsun handler.
17440# let the caller of this routine shift the stack frame up to
17441# eliminate the effective address field.
17442ftrapcc_bsun:
17443        mov.b           &fbsun_flg,SPCOND_FLG(%a6)
17444        rts
17445
17446#########################################################################
17447# fscc(): routine to emulate the fscc instruction                       #
17448#                                                                       #
17449# XDEF **************************************************************** #
17450#       _fscc()                                                         #
17451#                                                                       #
17452# XREF **************************************************************** #
17453#       store_dreg_b() - store result to data register file             #
17454#       dec_areg() - decrement an areg for -(an) mode                   #
17455#       inc_areg() - increment an areg for (an)+ mode                   #
17456#       _dmem_write_byte() - store result to memory                     #
17457#                                                                       #
17458# INPUT *************************************************************** #
17459#       none                                                            #
17460#                                                                       #
17461# OUTPUT ************************************************************** #
17462#       none                                                            #
17463#                                                                       #
17464# ALGORITHM *********************************************************** #
17465#       This routine checks which conditional predicate is specified by #
17466# the stacked fscc instruction opcode and then branches to a routine    #
17467# for that predicate. The corresponding fbcc instruction is then used   #
17468# to see whether the condition (specified by the stacked FPSR) is true  #
17469# or false.                                                             #
17470#       If a BSUN exception should be indicated, the BSUN and ABSUN     #
17471# bits are set in the stacked FPSR. If the BSUN exception is enabled,   #
17472# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an   #
17473# enabled BSUN should not be flagged and the predicate is true, then    #
17474# the result is stored to the data register file or memory              #
17475#                                                                       #
17476#########################################################################
17477
17478        global          _fscc
17479_fscc:
17480        mov.w           EXC_CMDREG(%a6),%d0     # fetch predicate
17481
17482        clr.l           %d1                     # clear scratch reg
17483        mov.b           FPSR_CC(%a6),%d1        # fetch fp ccodes
17484        ror.l           &0x8,%d1                # rotate to top byte
17485        fmov.l          %d1,%fpsr               # insert into FPSR
17486
17487        mov.w           (tbl_fscc.b,%pc,%d0.w*2),%d1 # load table
17488        jmp             (tbl_fscc.b,%pc,%d1.w)  # jump to fscc routine
17489
17490tbl_fscc:
17491        short           fscc_f          -       tbl_fscc        # 00
17492        short           fscc_eq         -       tbl_fscc        # 01
17493        short           fscc_ogt        -       tbl_fscc        # 02
17494        short           fscc_oge        -       tbl_fscc        # 03
17495        short           fscc_olt        -       tbl_fscc        # 04
17496        short           fscc_ole        -       tbl_fscc        # 05
17497        short           fscc_ogl        -       tbl_fscc        # 06
17498        short           fscc_or         -       tbl_fscc        # 07
17499        short           fscc_un         -       tbl_fscc        # 08
17500        short           fscc_ueq        -       tbl_fscc        # 09
17501        short           fscc_ugt        -       tbl_fscc        # 10
17502        short           fscc_uge        -       tbl_fscc        # 11
17503        short           fscc_ult        -       tbl_fscc        # 12
17504        short           fscc_ule        -       tbl_fscc        # 13
17505        short           fscc_neq        -       tbl_fscc        # 14
17506        short           fscc_t          -       tbl_fscc        # 15
17507        short           fscc_sf         -       tbl_fscc        # 16
17508        short           fscc_seq        -       tbl_fscc        # 17
17509        short           fscc_gt         -       tbl_fscc        # 18
17510        short           fscc_ge         -       tbl_fscc        # 19
17511        short           fscc_lt         -       tbl_fscc        # 20
17512        short           fscc_le         -       tbl_fscc        # 21
17513        short           fscc_gl         -       tbl_fscc        # 22
17514        short           fscc_gle        -       tbl_fscc        # 23
17515        short           fscc_ngle       -       tbl_fscc        # 24
17516        short           fscc_ngl        -       tbl_fscc        # 25
17517        short           fscc_nle        -       tbl_fscc        # 26
17518        short           fscc_nlt        -       tbl_fscc        # 27
17519        short           fscc_nge        -       tbl_fscc        # 28
17520        short           fscc_ngt        -       tbl_fscc        # 29
17521        short           fscc_sneq       -       tbl_fscc        # 30
17522        short           fscc_st         -       tbl_fscc        # 31
17523
17524#########################################################################
17525#                                                                       #
17526# IEEE Nonaware tests                                                   #
17527#                                                                       #
17528# For the IEEE nonaware tests, we set the result based on the           #
17529# floating point condition codes. In addition, we check to see          #
17530# if the NAN bit is set, in which case BSUN and AIOP will be set.       #
17531#                                                                       #
17532# The cases EQ and NE are shared by the Aware and Nonaware groups       #
17533# and are incapable of setting the BSUN exception bit.                  #
17534#                                                                       #
17535# Typically, only one of the two possible branch directions could       #
17536# have the NAN bit set.                                                 #
17537#                                                                       #
17538#########################################################################
17539
17540#
17541# equal:
17542#
17543#       Z
17544#
17545fscc_eq:
17546        fbeq.w          fscc_eq_yes             # equal?
17547fscc_eq_no:
17548        clr.b           %d0                     # set false
17549        bra.w           fscc_done               # go finish
17550fscc_eq_yes:
17551        st              %d0                     # set true
17552        bra.w           fscc_done               # go finish
17553
17554#
17555# not equal:
17556#       _
17557#       Z
17558#
17559fscc_neq:
17560        fbneq.w         fscc_neq_yes            # not equal?
17561fscc_neq_no:
17562        clr.b           %d0                     # set false
17563        bra.w           fscc_done               # go finish
17564fscc_neq_yes:
17565        st              %d0                     # set true
17566        bra.w           fscc_done               # go finish
17567
17568#
17569# greater than:
17570#       _______
17571#       NANvZvN
17572#
17573fscc_gt:
17574        fbgt.w          fscc_gt_yes             # greater than?
17575fscc_gt_no:
17576        clr.b           %d0                     # set false
17577        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17578        beq.w           fscc_done               # no;go finish
17579        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17580        bra.w           fscc_chk_bsun           # go finish
17581fscc_gt_yes:
17582        st              %d0                     # set true
17583        bra.w           fscc_done               # go finish
17584
17585#
17586# not greater than:
17587#
17588#       NANvZvN
17589#
17590fscc_ngt:
17591        fbngt.w         fscc_ngt_yes            # not greater than?
17592fscc_ngt_no:
17593        clr.b           %d0                     # set false
17594        bra.w           fscc_done               # go finish
17595fscc_ngt_yes:
17596        st              %d0                     # set true
17597        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17598        beq.w           fscc_done               # no;go finish
17599        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17600        bra.w           fscc_chk_bsun           # go finish
17601
17602#
17603# greater than or equal:
17604#          _____
17605#       Zv(NANvN)
17606#
17607fscc_ge:
17608        fbge.w          fscc_ge_yes             # greater than or equal?
17609fscc_ge_no:
17610        clr.b           %d0                     # set false
17611        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17612        beq.w           fscc_done               # no;go finish
17613        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17614        bra.w           fscc_chk_bsun           # go finish
17615fscc_ge_yes:
17616        st              %d0                     # set true
17617        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17618        beq.w           fscc_done               # no;go finish
17619        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17620        bra.w           fscc_chk_bsun           # go finish
17621
17622#
17623# not (greater than or equal):
17624#              _
17625#       NANv(N^Z)
17626#
17627fscc_nge:
17628        fbnge.w         fscc_nge_yes            # not (greater than or equal)?
17629fscc_nge_no:
17630        clr.b           %d0                     # set false
17631        bra.w           fscc_done               # go finish
17632fscc_nge_yes:
17633        st              %d0                     # set true
17634        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17635        beq.w           fscc_done               # no;go finish
17636        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17637        bra.w           fscc_chk_bsun           # go finish
17638
17639#
17640# less than:
17641#          _____
17642#       N^(NANvZ)
17643#
17644fscc_lt:
17645        fblt.w          fscc_lt_yes             # less than?
17646fscc_lt_no:
17647        clr.b           %d0                     # set false
17648        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17649        beq.w           fscc_done               # no;go finish
17650        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17651        bra.w           fscc_chk_bsun           # go finish
17652fscc_lt_yes:
17653        st              %d0                     # set true
17654        bra.w           fscc_done               # go finish
17655
17656#
17657# not less than:
17658#              _
17659#       NANv(ZvN)
17660#
17661fscc_nlt:
17662        fbnlt.w         fscc_nlt_yes            # not less than?
17663fscc_nlt_no:
17664        clr.b           %d0                     # set false
17665        bra.w           fscc_done               # go finish
17666fscc_nlt_yes:
17667        st              %d0                     # set true
17668        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17669        beq.w           fscc_done               # no;go finish
17670        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17671        bra.w           fscc_chk_bsun           # go finish
17672
17673#
17674# less than or equal:
17675#            ___
17676#       Zv(N^NAN)
17677#
17678fscc_le:
17679        fble.w          fscc_le_yes             # less than or equal?
17680fscc_le_no:
17681        clr.b           %d0                     # set false
17682        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17683        beq.w           fscc_done               # no;go finish
17684        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17685        bra.w           fscc_chk_bsun           # go finish
17686fscc_le_yes:
17687        st              %d0                     # set true
17688        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17689        beq.w           fscc_done               # no;go finish
17690        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17691        bra.w           fscc_chk_bsun           # go finish
17692
17693#
17694# not (less than or equal):
17695#            ___
17696#       NANv(NvZ)
17697#
17698fscc_nle:
17699        fbnle.w         fscc_nle_yes            # not (less than or equal)?
17700fscc_nle_no:
17701        clr.b           %d0                     # set false
17702        bra.w           fscc_done               # go finish
17703fscc_nle_yes:
17704        st              %d0                     # set true
17705        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17706        beq.w           fscc_done               # no;go finish
17707        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17708        bra.w           fscc_chk_bsun           # go finish
17709
17710#
17711# greater or less than:
17712#       _____
17713#       NANvZ
17714#
17715fscc_gl:
17716        fbgl.w          fscc_gl_yes             # greater or less than?
17717fscc_gl_no:
17718        clr.b           %d0                     # set false
17719        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17720        beq.w           fscc_done               # no;go finish
17721        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17722        bra.w           fscc_chk_bsun           # go finish
17723fscc_gl_yes:
17724        st              %d0                     # set true
17725        bra.w           fscc_done               # go finish
17726
17727#
17728# not (greater or less than):
17729#
17730#       NANvZ
17731#
17732fscc_ngl:
17733        fbngl.w         fscc_ngl_yes            # not (greater or less than)?
17734fscc_ngl_no:
17735        clr.b           %d0                     # set false
17736        bra.w           fscc_done               # go finish
17737fscc_ngl_yes:
17738        st              %d0                     # set true
17739        btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17740        beq.w           fscc_done               # no;go finish
17741        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17742        bra.w           fscc_chk_bsun           # go finish
17743
17744#
17745# greater, less, or equal:
17746#       ___
17747#       NAN
17748#
17749fscc_gle:
17750        fbgle.w         fscc_gle_yes            # greater, less, or equal?
17751fscc_gle_no:
17752        clr.b           %d0                     # set false
17753        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17754        bra.w           fscc_chk_bsun           # go finish
17755fscc_gle_yes:
17756        st              %d0                     # set true
17757        bra.w           fscc_done               # go finish
17758
17759#
17760# not (greater, less, or equal):
17761#
17762#       NAN
17763#
17764fscc_ngle:
17765        fbngle.w                fscc_ngle_yes   # not (greater, less, or equal)?
17766fscc_ngle_no:
17767        clr.b           %d0                     # set false
17768        bra.w           fscc_done               # go finish
17769fscc_ngle_yes:
17770        st              %d0                     # set true
17771        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17772        bra.w           fscc_chk_bsun           # go finish
17773
17774#########################################################################
17775#                                                                       #
17776# Miscellaneous tests                                                   #
17777#                                                                       #
17778# For the IEEE aware tests, we only have to set the result based on the #
17779# floating point condition codes. The BSUN exception will not be        #
17780# set for any of these tests.                                           #
17781#                                                                       #
17782#########################################################################
17783
17784#
17785# false:
17786#
17787#       False
17788#
17789fscc_f:
17790        clr.b           %d0                     # set false
17791        bra.w           fscc_done               # go finish
17792
17793#
17794# true:
17795#
17796#       True
17797#
17798fscc_t:
17799        st              %d0                     # set true
17800        bra.w           fscc_done               # go finish
17801
17802#
17803# signalling false:
17804#
17805#       False
17806#
17807fscc_sf:
17808        clr.b           %d0                     # set false
17809        btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17810        beq.w           fscc_done               # no;go finish
17811        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17812        bra.w           fscc_chk_bsun           # go finish
17813
17814#
17815# signalling true:
17816#
17817#       True
17818#
17819fscc_st:
17820        st              %d0                     # set false
17821        btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17822        beq.w           fscc_done               # no;go finish
17823        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17824        bra.w           fscc_chk_bsun           # go finish
17825
17826#
17827# signalling equal:
17828#
17829#       Z
17830#
17831fscc_seq:
17832        fbseq.w         fscc_seq_yes            # signalling equal?
17833fscc_seq_no:
17834        clr.b           %d0                     # set false
17835        btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17836        beq.w           fscc_done               # no;go finish
17837        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17838        bra.w           fscc_chk_bsun           # go finish
17839fscc_seq_yes:
17840        st              %d0                     # set true
17841        btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17842        beq.w           fscc_done               # no;go finish
17843        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17844        bra.w           fscc_chk_bsun           # go finish
17845
17846#
17847# signalling not equal:
17848#       _
17849#       Z
17850#
17851fscc_sneq:
17852        fbsneq.w        fscc_sneq_yes           # signalling equal?
17853fscc_sneq_no:
17854        clr.b           %d0                     # set false
17855        btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17856        beq.w           fscc_done               # no;go finish
17857        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17858        bra.w           fscc_chk_bsun           # go finish
17859fscc_sneq_yes:
17860        st              %d0                     # set true
17861        btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17862        beq.w           fscc_done               # no;go finish
17863        ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17864        bra.w           fscc_chk_bsun           # go finish
17865
17866#########################################################################
17867#                                                                       #
17868# IEEE Aware tests                                                      #
17869#                                                                       #
17870# For the IEEE aware tests, we only have to set the result based on the #
17871# floating point condition codes. The BSUN exception will not be        #
17872# set for any of these tests.                                           #
17873#                                                                       #
17874#########################################################################
17875
17876#
17877# ordered greater than:
17878#       _______
17879#       NANvZvN
17880#
17881fscc_ogt:
17882        fbogt.w         fscc_ogt_yes            # ordered greater than?
17883fscc_ogt_no:
17884        clr.b           %d0                     # set false
17885        bra.w           fscc_done               # go finish
17886fscc_ogt_yes:
17887        st              %d0                     # set true
17888        bra.w           fscc_done               # go finish
17889
17890#
17891# unordered or less or equal:
17892#       _______
17893#       NANvZvN
17894#
17895fscc_ule:
17896        fbule.w         fscc_ule_yes            # unordered or less or equal?
17897fscc_ule_no:
17898        clr.b           %d0                     # set false
17899        bra.w           fscc_done               # go finish
17900fscc_ule_yes:
17901        st              %d0                     # set true
17902        bra.w           fscc_done               # go finish
17903
17904#
17905# ordered greater than or equal:
17906#          _____
17907#       Zv(NANvN)
17908#
17909fscc_oge:
17910        fboge.w         fscc_oge_yes            # ordered greater than or equal?
17911fscc_oge_no:
17912        clr.b           %d0                     # set false
17913        bra.w           fscc_done               # go finish
17914fscc_oge_yes:
17915        st              %d0                     # set true
17916        bra.w           fscc_done               # go finish
17917
17918#
17919# unordered or less than:
17920#              _
17921#       NANv(N^Z)
17922#
17923fscc_ult:
17924        fbult.w         fscc_ult_yes            # unordered or less than?
17925fscc_ult_no:
17926        clr.b           %d0                     # set false
17927        bra.w           fscc_done               # go finish
17928fscc_ult_yes:
17929        st              %d0                     # set true
17930        bra.w           fscc_done               # go finish
17931
17932#
17933# ordered less than:
17934#          _____
17935#       N^(NANvZ)
17936#
17937fscc_olt:
17938        fbolt.w         fscc_olt_yes            # ordered less than?
17939fscc_olt_no:
17940        clr.b           %d0                     # set false
17941        bra.w           fscc_done               # go finish
17942fscc_olt_yes:
17943        st              %d0                     # set true
17944        bra.w           fscc_done               # go finish
17945
17946#
17947# unordered or greater or equal:
17948#
17949#       NANvZvN
17950#
17951fscc_uge:
17952        fbuge.w         fscc_uge_yes            # unordered or greater than?
17953fscc_uge_no:
17954        clr.b           %d0                     # set false
17955        bra.w           fscc_done               # go finish
17956fscc_uge_yes:
17957        st              %d0                     # set true
17958        bra.w           fscc_done               # go finish
17959
17960#
17961# ordered less than or equal:
17962#            ___
17963#       Zv(N^NAN)
17964#
17965fscc_ole:
17966        fbole.w         fscc_ole_yes            # ordered greater or less than?
17967fscc_ole_no:
17968        clr.b           %d0                     # set false
17969        bra.w           fscc_done               # go finish
17970fscc_ole_yes:
17971        st              %d0                     # set true
17972        bra.w           fscc_done               # go finish
17973
17974#
17975# unordered or greater than:
17976#            ___
17977#       NANv(NvZ)
17978#
17979fscc_ugt:
17980        fbugt.w         fscc_ugt_yes            # unordered or greater than?
17981fscc_ugt_no:
17982        clr.b           %d0                     # set false
17983        bra.w           fscc_done               # go finish
17984fscc_ugt_yes:
17985        st              %d0                     # set true
17986        bra.w           fscc_done               # go finish
17987
17988#
17989# ordered greater or less than:
17990#       _____
17991#       NANvZ
17992#
17993fscc_ogl:
17994        fbogl.w         fscc_ogl_yes            # ordered greater or less than?
17995fscc_ogl_no:
17996        clr.b           %d0                     # set false
17997        bra.w           fscc_done               # go finish
17998fscc_ogl_yes:
17999        st              %d0                     # set true
18000        bra.w           fscc_done               # go finish
18001
18002#
18003# unordered or equal:
18004#
18005#       NANvZ
18006#
18007fscc_ueq:
18008        fbueq.w         fscc_ueq_yes            # unordered or equal?
18009fscc_ueq_no:
18010        clr.b           %d0                     # set false
18011        bra.w           fscc_done               # go finish
18012fscc_ueq_yes:
18013        st              %d0                     # set true
18014        bra.w           fscc_done               # go finish
18015
18016#
18017# ordered:
18018#       ___
18019#       NAN
18020#
18021fscc_or:
18022        fbor.w          fscc_or_yes             # ordered?
18023fscc_or_no:
18024        clr.b           %d0                     # set false
18025        bra.w           fscc_done               # go finish
18026fscc_or_yes:
18027        st              %d0                     # set true
18028        bra.w           fscc_done               # go finish
18029
18030#
18031# unordered:
18032#
18033#       NAN
18034#
18035fscc_un:
18036        fbun.w          fscc_un_yes             # unordered?
18037fscc_un_no:
18038        clr.b           %d0                     # set false
18039        bra.w           fscc_done               # go finish
18040fscc_un_yes:
18041        st              %d0                     # set true
18042        bra.w           fscc_done               # go finish
18043
18044#######################################################################
18045
18046#
18047# the bsun exception bit was set. now, check to see is BSUN
18048# is enabled. if so, don't store result and correct stack frame
18049# for a bsun exception.
18050#
18051fscc_chk_bsun:
18052        btst            &bsun_bit,FPCR_ENABLE(%a6) # was BSUN set?
18053        bne.w           fscc_bsun
18054
18055#
18056# the bsun exception bit was not set.
18057# the result has been selected.
18058# now, check to see if the result is to be stored in the data register
18059# file or in memory.
18060#
18061fscc_done:
18062        mov.l           %d0,%a0                 # save result for a moment
18063
18064        mov.b           1+EXC_OPWORD(%a6),%d1   # fetch lo opword
18065        mov.l           %d1,%d0                 # make a copy
18066        andi.b          &0x38,%d1               # extract src mode
18067
18068        bne.b           fscc_mem_op             # it's a memory operation
18069
18070        mov.l           %d0,%d1
18071        andi.w          &0x7,%d1                # pass index in d1
18072        mov.l           %a0,%d0                 # pass result in d0
18073        bsr.l           store_dreg_b            # save result in regfile
18074        rts
18075
18076#
18077# the stacked <ea> is correct with the exception of:
18078#       -> Dn : <ea> is garbage
18079#
18080# if the addressing mode is post-increment or pre-decrement,
18081# then the address registers have not been updated.
18082#
18083fscc_mem_op:
18084        cmpi.b          %d1,&0x18               # is <ea> (An)+ ?
18085        beq.b           fscc_mem_inc            # yes
18086        cmpi.b          %d1,&0x20               # is <ea> -(An) ?
18087        beq.b           fscc_mem_dec            # yes
18088
18089        mov.l           %a0,%d0                 # pass result in d0
18090        mov.l           EXC_EA(%a6),%a0         # fetch <ea>
18091        bsr.l           _dmem_write_byte        # write result byte
18092
18093        tst.l           %d1                     # did dstore fail?
18094        bne.w           fscc_err                # yes
18095
18096        rts
18097
18098# addressing mode is post-increment. write the result byte. if the write
18099# fails then don't update the address register. if write passes then
18100# call inc_areg() to update the address register.
18101fscc_mem_inc:
18102        mov.l           %a0,%d0                 # pass result in d0
18103        mov.l           EXC_EA(%a6),%a0         # fetch <ea>
18104        bsr.l           _dmem_write_byte        # write result byte
18105
18106        tst.l           %d1                     # did dstore fail?
18107        bne.w           fscc_err                # yes
18108
18109        mov.b           0x1+EXC_OPWORD(%a6),%d1 # fetch opword
18110        andi.w          &0x7,%d1                # pass index in d1
18111        movq.l          &0x1,%d0                # pass amt to inc by
18112        bsr.l           inc_areg                # increment address register
18113
18114        rts
18115
18116# addressing mode is pre-decrement. write the result byte. if the write
18117# fails then don't update the address register. if the write passes then
18118# call dec_areg() to update the address register.
18119fscc_mem_dec:
18120        mov.l           %a0,%d0                 # pass result in d0
18121        mov.l           EXC_EA(%a6),%a0         # fetch <ea>
18122        bsr.l           _dmem_write_byte        # write result byte
18123
18124        tst.l           %d1                     # did dstore fail?
18125        bne.w           fscc_err                # yes
18126
18127        mov.b           0x1+EXC_OPWORD(%a6),%d1 # fetch opword
18128        andi.w          &0x7,%d1                # pass index in d1
18129        movq.l          &0x1,%d0                # pass amt to dec by
18130        bsr.l           dec_areg                # decrement address register
18131
18132        rts
18133
18134# the emulation routine set bsun and BSUN was enabled. have to
18135# fix stack and jump to the bsun handler.
18136# let the caller of this routine shift the stack frame up to
18137# eliminate the effective address field.
18138fscc_bsun:
18139        mov.b           &fbsun_flg,SPCOND_FLG(%a6)
18140        rts
18141
18142# the byte write to memory has failed. pass the failing effective address
18143# and a FSLW to funimp_dacc().
18144fscc_err:
18145        mov.w           &0x00a1,EXC_VOFF(%a6)
18146        bra.l           facc_finish
18147
18148#########################################################################
18149# XDEF **************************************************************** #
18150#       fmovm_dynamic(): emulate "fmovm" dynamic instruction            #
18151#                                                                       #
18152# XREF **************************************************************** #
18153#       fetch_dreg() - fetch data register                              #
18154#       {i,d,}mem_read() - fetch data from memory                       #
18155#       _mem_write() - write data to memory                             #
18156#       iea_iacc() - instruction memory access error occurred           #
18157#       iea_dacc() - data memory access error occurred                  #
18158#       restore() - restore An index regs if access error occurred      #
18159#                                                                       #
18160# INPUT *************************************************************** #
18161#       None                                                            #
18162#                                                                       #
18163# OUTPUT ************************************************************** #
18164#       If instr is "fmovm Dn,-(A7)" from supervisor mode,              #
18165#               d0 = size of dump                                       #
18166#               d1 = Dn                                                 #
18167#       Else if instruction access error,                               #
18168#               d0 = FSLW                                               #
18169#       Else if data access error,                                      #
18170#               d0 = FSLW                                               #
18171#               a0 = address of fault                                   #
18172#       Else                                                            #
18173#               none.                                                   #
18174#                                                                       #
18175# ALGORITHM *********************************************************** #
18176#       The effective address must be calculated since this is entered  #
18177# from an "Unimplemented Effective Address" exception handler. So, we   #
18178# have our own fcalc_ea() routine here. If an access error is flagged   #
18179# by a _{i,d,}mem_read() call, we must exit through the special         #
18180# handler.                                                              #
18181#       The data register is determined and its value loaded to get the #
18182# string of FP registers affected. This value is used as an index into  #
18183# a lookup table such that we can determine the number of bytes         #
18184# involved.                                                             #
18185#       If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used  #
18186# to read in all FP values. Again, _mem_read() may fail and require a   #
18187# special exit.                                                         #
18188#       If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used #
18189# to write all FP values. _mem_write() may also fail.                   #
18190#       If the instruction is "fmovm.x DN,-(a7)" from supervisor mode,  #
18191# then we return the size of the dump and the string to the caller      #
18192# so that the move can occur outside of this routine. This special      #
18193# case is required so that moves to the system stack are handled        #
18194# correctly.                                                            #
18195#                                                                       #
18196# DYNAMIC:                                                              #
18197#       fmovm.x dn, <ea>                                                #
18198#       fmovm.x <ea>, dn                                                #
18199#                                                                       #
18200#             <WORD 1>                <WORD2>                           #
18201#       1111 0010 00 |<ea>|     11@& 1000 0$$$ 0000                     #
18202#                                                                       #
18203#       & = (0): predecrement addressing mode                           #
18204#           (1): postincrement or control addressing mode               #
18205#       @ = (0): move listed regs from memory to the FPU                #
18206#           (1): move listed regs from the FPU to memory                #
18207#       $$$    : index of data register holding reg select mask         #
18208#                                                                       #
18209# NOTES:                                                                #
18210#       If the data register holds a zero, then the                     #
18211#       instruction is a nop.                                           #
18212#                                                                       #
18213#########################################################################
18214
18215        global          fmovm_dynamic
18216fmovm_dynamic:
18217
18218# extract the data register in which the bit string resides...
18219        mov.b           1+EXC_EXTWORD(%a6),%d1  # fetch extword
18220        andi.w          &0x70,%d1               # extract reg bits
18221        lsr.b           &0x4,%d1                # shift into lo bits
18222
18223# fetch the bit string into d0...
18224        bsr.l           fetch_dreg              # fetch reg string
18225
18226        andi.l          &0x000000ff,%d0         # keep only lo byte
18227
18228        mov.l           %d0,-(%sp)              # save strg
18229        mov.b           (tbl_fmovm_size.w,%pc,%d0),%d0
18230        mov.l           %d0,-(%sp)              # save size
18231        bsr.l           fmovm_calc_ea           # calculate <ea>
18232        mov.l           (%sp)+,%d0              # restore size
18233        mov.l           (%sp)+,%d1              # restore strg
18234
18235# if the bit string is a zero, then the operation is a no-op
18236# but, make sure that we've calculated ea and advanced the opword pointer
18237        beq.w           fmovm_data_done
18238
18239# separate move ins from move outs...
18240        btst            &0x5,EXC_EXTWORD(%a6)   # is it a move in or out?
18241        beq.w           fmovm_data_in           # it's a move out
18242
18243#############
18244# MOVE OUT: #
18245#############
18246fmovm_data_out:
18247        btst            &0x4,EXC_EXTWORD(%a6)   # control or predecrement?
18248        bne.w           fmovm_out_ctrl          # control
18249
18250############################
18251fmovm_out_predec:
18252# for predecrement mode, the bit string is the opposite of both control
18253# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
18254# here, we convert it to be just like the others...
18255        mov.b           (tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
18256
18257        btst            &0x5,EXC_SR(%a6)        # user or supervisor mode?
18258        beq.b           fmovm_out_ctrl          # user
18259
18260fmovm_out_predec_s:
18261        cmpi.b          SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
18262        bne.b           fmovm_out_ctrl
18263
18264# the operation was unfortunately an: fmovm.x dn,-(sp)
18265# called from supervisor mode.
18266# we're also passing "size" and "strg" back to the calling routine
18267        rts
18268
18269############################
18270fmovm_out_ctrl:
18271        mov.l           %a0,%a1                 # move <ea> to a1
18272
18273        sub.l           %d0,%sp                 # subtract size of dump
18274        lea             (%sp),%a0
18275
18276        tst.b           %d1                     # should FP0 be moved?
18277        bpl.b           fmovm_out_ctrl_fp1      # no
18278
18279        mov.l           0x0+EXC_FP0(%a6),(%a0)+ # yes
18280        mov.l           0x4+EXC_FP0(%a6),(%a0)+
18281        mov.l           0x8+EXC_FP0(%a6),(%a0)+
18282
18283fmovm_out_ctrl_fp1:
18284        lsl.b           &0x1,%d1                # should FP1 be moved?
18285        bpl.b           fmovm_out_ctrl_fp2      # no
18286
18287        mov.l           0x0+EXC_FP1(%a6),(%a0)+ # yes
18288        mov.l           0x4+EXC_FP1(%a6),(%a0)+
18289        mov.l           0x8+EXC_FP1(%a6),(%a0)+
18290
18291fmovm_out_ctrl_fp2:
18292        lsl.b           &0x1,%d1                # should FP2 be moved?
18293        bpl.b           fmovm_out_ctrl_fp3      # no
18294
18295        fmovm.x         &0x20,(%a0)             # yes
18296        add.l           &0xc,%a0
18297
18298fmovm_out_ctrl_fp3:
18299        lsl.b           &0x1,%d1                # should FP3 be moved?
18300        bpl.b           fmovm_out_ctrl_fp4      # no
18301
18302        fmovm.x         &0x10,(%a0)             # yes
18303        add.l           &0xc,%a0
18304
18305fmovm_out_ctrl_fp4:
18306        lsl.b           &0x1,%d1                # should FP4 be moved?
18307        bpl.b           fmovm_out_ctrl_fp5      # no
18308
18309        fmovm.x         &0x08,(%a0)             # yes
18310        add.l           &0xc,%a0
18311
18312fmovm_out_ctrl_fp5:
18313        lsl.b           &0x1,%d1                # should FP5 be moved?
18314        bpl.b           fmovm_out_ctrl_fp6      # no
18315
18316        fmovm.x         &0x04,(%a0)             # yes
18317        add.l           &0xc,%a0
18318
18319fmovm_out_ctrl_fp6:
18320        lsl.b           &0x1,%d1                # should FP6 be moved?
18321        bpl.b           fmovm_out_ctrl_fp7      # no
18322
18323        fmovm.x         &0x02,(%a0)             # yes
18324        add.l           &0xc,%a0
18325
18326fmovm_out_ctrl_fp7:
18327        lsl.b           &0x1,%d1                # should FP7 be moved?
18328        bpl.b           fmovm_out_ctrl_done     # no
18329
18330        fmovm.x         &0x01,(%a0)             # yes
18331        add.l           &0xc,%a0
18332
18333fmovm_out_ctrl_done:
18334        mov.l           %a1,L_SCR1(%a6)
18335
18336        lea             (%sp),%a0               # pass: supervisor src
18337        mov.l           %d0,-(%sp)              # save size
18338        bsr.l           _dmem_write             # copy data to user mem
18339
18340        mov.l           (%sp)+,%d0
18341        add.l           %d0,%sp                 # clear fpreg data from stack
18342
18343        tst.l           %d1                     # did dstore err?
18344        bne.w           fmovm_out_err           # yes
18345
18346        rts
18347
18348############
18349# MOVE IN: #
18350############
18351fmovm_data_in:
18352        mov.l           %a0,L_SCR1(%a6)
18353
18354        sub.l           %d0,%sp                 # make room for fpregs
18355        lea             (%sp),%a1
18356
18357        mov.l           %d1,-(%sp)              # save bit string for later
18358        mov.l           %d0,-(%sp)              # save # of bytes
18359
18360        bsr.l           _dmem_read              # copy data from user mem
18361
18362        mov.l           (%sp)+,%d0              # retrieve # of bytes
18363
18364        tst.l           %d1                     # did dfetch fail?
18365        bne.w           fmovm_in_err            # yes
18366
18367        mov.l           (%sp)+,%d1              # load bit string
18368
18369        lea             (%sp),%a0               # addr of stack
18370
18371        tst.b           %d1                     # should FP0 be moved?
18372        bpl.b           fmovm_data_in_fp1       # no
18373
18374        mov.l           (%a0)+,0x0+EXC_FP0(%a6) # yes
18375        mov.l           (%a0)+,0x4+EXC_FP0(%a6)
18376        mov.l           (%a0)+,0x8+EXC_FP0(%a6)
18377
18378fmovm_data_in_fp1:
18379        lsl.b           &0x1,%d1                # should FP1 be moved?
18380        bpl.b           fmovm_data_in_fp2       # no
18381
18382        mov.l           (%a0)+,0x0+EXC_FP1(%a6) # yes
18383        mov.l           (%a0)+,0x4+EXC_FP1(%a6)
18384        mov.l           (%a0)+,0x8+EXC_FP1(%a6)
18385
18386fmovm_data_in_fp2:
18387        lsl.b           &0x1,%d1                # should FP2 be moved?
18388        bpl.b           fmovm_data_in_fp3       # no
18389
18390        fmovm.x         (%a0)+,&0x20            # yes
18391
18392fmovm_data_in_fp3:
18393        lsl.b           &0x1,%d1                # should FP3 be moved?
18394        bpl.b           fmovm_data_in_fp4       # no
18395
18396        fmovm.x         (%a0)+,&0x10            # yes
18397
18398fmovm_data_in_fp4:
18399        lsl.b           &0x1,%d1                # should FP4 be moved?
18400        bpl.b           fmovm_data_in_fp5       # no
18401
18402        fmovm.x         (%a0)+,&0x08            # yes
18403
18404fmovm_data_in_fp5:
18405        lsl.b           &0x1,%d1                # should FP5 be moved?
18406        bpl.b           fmovm_data_in_fp6       # no
18407
18408        fmovm.x         (%a0)+,&0x04            # yes
18409
18410fmovm_data_in_fp6:
18411        lsl.b           &0x1,%d1                # should FP6 be moved?
18412        bpl.b           fmovm_data_in_fp7       # no
18413
18414        fmovm.x         (%a0)+,&0x02            # yes
18415
18416fmovm_data_in_fp7:
18417        lsl.b           &0x1,%d1                # should FP7 be moved?
18418        bpl.b           fmovm_data_in_done      # no
18419
18420        fmovm.x         (%a0)+,&0x01            # yes
18421
18422fmovm_data_in_done:
18423        add.l           %d0,%sp                 # remove fpregs from stack
18424        rts
18425
18426#####################################
18427
18428fmovm_data_done:
18429        rts
18430
18431##############################################################################
18432
18433#
18434# table indexed by the operation's bit string that gives the number
18435# of bytes that will be moved.
18436#
18437# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
18438#
18439tbl_fmovm_size:
18440        byte    0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
18441        byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18442        byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18443        byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18444        byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18445        byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18446        byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18447        byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18448        byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18449        byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18450        byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18451        byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18452        byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18453        byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18454        byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18455        byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18456        byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18457        byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18458        byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18459        byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18460        byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18461        byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18462        byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18463        byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18464        byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18465        byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18466        byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18467        byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18468        byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18469        byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18470        byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18471        byte    0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
18472
18473#
18474# table to convert a pre-decrement bit string into a post-increment
18475# or control bit string.
18476# ex:   0x00    ==>     0x00
18477#       0x01    ==>     0x80
18478#       0x02    ==>     0x40
18479#               .
18480#               .
18481#       0xfd    ==>     0xbf
18482#       0xfe    ==>     0x7f
18483#       0xff    ==>     0xff
18484#
18485tbl_fmovm_convert:
18486        byte    0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
18487        byte    0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
18488        byte    0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
18489        byte    0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
18490        byte    0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
18491        byte    0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
18492        byte    0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
18493        byte    0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
18494        byte    0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
18495        byte    0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
18496        byte    0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
18497        byte    0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
18498        byte    0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
18499        byte    0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
18500        byte    0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
18501        byte    0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
18502        byte    0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
18503        byte    0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
18504        byte    0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
18505        byte    0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
18506        byte    0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
18507        byte    0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
18508        byte    0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
18509        byte    0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
18510        byte    0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
18511        byte    0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
18512        byte    0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
18513        byte    0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
18514        byte    0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
18515        byte    0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
18516        byte    0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
18517        byte    0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
18518
18519        global          fmovm_calc_ea
18520###############################################
18521# _fmovm_calc_ea: calculate effective address #
18522###############################################
18523fmovm_calc_ea:
18524        mov.l           %d0,%a0                 # move # bytes to a0
18525
18526# currently, MODE and REG are taken from the EXC_OPWORD. this could be
18527# easily changed if they were inputs passed in registers.
18528        mov.w           EXC_OPWORD(%a6),%d0     # fetch opcode word
18529        mov.w           %d0,%d1                 # make a copy
18530
18531        andi.w          &0x3f,%d0               # extract mode field
18532        andi.l          &0x7,%d1                # extract reg  field
18533
18534# jump to the corresponding function for each {MODE,REG} pair.
18535        mov.w           (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
18536        jmp             (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
18537
18538        swbeg           &64
18539tbl_fea_mode:
18540        short           tbl_fea_mode    -       tbl_fea_mode
18541        short           tbl_fea_mode    -       tbl_fea_mode
18542        short           tbl_fea_mode    -       tbl_fea_mode
18543        short           tbl_fea_mode    -       tbl_fea_mode
18544        short           tbl_fea_mode    -       tbl_fea_mode
18545        short           tbl_fea_mode    -       tbl_fea_mode
18546        short           tbl_fea_mode    -       tbl_fea_mode
18547        short           tbl_fea_mode    -       tbl_fea_mode
18548
18549        short           tbl_fea_mode    -       tbl_fea_mode
18550        short           tbl_fea_mode    -       tbl_fea_mode
18551        short           tbl_fea_mode    -       tbl_fea_mode
18552        short           tbl_fea_mode    -       tbl_fea_mode
18553        short           tbl_fea_mode    -       tbl_fea_mode
18554        short           tbl_fea_mode    -       tbl_fea_mode
18555        short           tbl_fea_mode    -       tbl_fea_mode
18556        short           tbl_fea_mode    -       tbl_fea_mode
18557
18558        short           faddr_ind_a0    -       tbl_fea_mode
18559        short           faddr_ind_a1    -       tbl_fea_mode
18560        short           faddr_ind_a2    -       tbl_fea_mode
18561        short           faddr_ind_a3    -       tbl_fea_mode
18562        short           faddr_ind_a4    -       tbl_fea_mode
18563        short           faddr_ind_a5    -       tbl_fea_mode
18564        short           faddr_ind_a6    -       tbl_fea_mode
18565        short           faddr_ind_a7    -       tbl_fea_mode
18566
18567        short           faddr_ind_p_a0  -       tbl_fea_mode
18568        short           faddr_ind_p_a1  -       tbl_fea_mode
18569        short           faddr_ind_p_a2  -       tbl_fea_mode
18570        short           faddr_ind_p_a3  -       tbl_fea_mode
18571        short           faddr_ind_p_a4  -       tbl_fea_mode
18572        short           faddr_ind_p_a5  -       tbl_fea_mode
18573        short           faddr_ind_p_a6  -       tbl_fea_mode
18574        short           faddr_ind_p_a7  -       tbl_fea_mode
18575
18576        short           faddr_ind_m_a0  -       tbl_fea_mode
18577        short           faddr_ind_m_a1  -       tbl_fea_mode
18578        short           faddr_ind_m_a2  -       tbl_fea_mode
18579        short           faddr_ind_m_a3  -       tbl_fea_mode
18580        short           faddr_ind_m_a4  -       tbl_fea_mode
18581        short           faddr_ind_m_a5  -       tbl_fea_mode
18582        short           faddr_ind_m_a6  -       tbl_fea_mode
18583        short           faddr_ind_m_a7  -       tbl_fea_mode
18584
18585        short           faddr_ind_disp_a0       -       tbl_fea_mode
18586        short           faddr_ind_disp_a1       -       tbl_fea_mode
18587        short           faddr_ind_disp_a2       -       tbl_fea_mode
18588        short           faddr_ind_disp_a3       -       tbl_fea_mode
18589        short           faddr_ind_disp_a4       -       tbl_fea_mode
18590        short           faddr_ind_disp_a5       -       tbl_fea_mode
18591        short           faddr_ind_disp_a6       -       tbl_fea_mode
18592        short           faddr_ind_disp_a7       -       tbl_fea_mode
18593
18594        short           faddr_ind_ext   -       tbl_fea_mode
18595        short           faddr_ind_ext   -       tbl_fea_mode
18596        short           faddr_ind_ext   -       tbl_fea_mode
18597        short           faddr_ind_ext   -       tbl_fea_mode
18598        short           faddr_ind_ext   -       tbl_fea_mode
18599        short           faddr_ind_ext   -       tbl_fea_mode
18600        short           faddr_ind_ext   -       tbl_fea_mode
18601        short           faddr_ind_ext   -       tbl_fea_mode
18602
18603        short           fabs_short      -       tbl_fea_mode
18604        short           fabs_long       -       tbl_fea_mode
18605        short           fpc_ind         -       tbl_fea_mode
18606        short           fpc_ind_ext     -       tbl_fea_mode
18607        short           tbl_fea_mode    -       tbl_fea_mode
18608        short           tbl_fea_mode    -       tbl_fea_mode
18609        short           tbl_fea_mode    -       tbl_fea_mode
18610        short           tbl_fea_mode    -       tbl_fea_mode
18611
18612###################################
18613# Address register indirect: (An) #
18614###################################
18615faddr_ind_a0:
18616        mov.l           EXC_DREGS+0x8(%a6),%a0  # Get current a0
18617        rts
18618
18619faddr_ind_a1:
18620        mov.l           EXC_DREGS+0xc(%a6),%a0  # Get current a1
18621        rts
18622
18623faddr_ind_a2:
18624        mov.l           %a2,%a0                 # Get current a2
18625        rts
18626
18627faddr_ind_a3:
18628        mov.l           %a3,%a0                 # Get current a3
18629        rts
18630
18631faddr_ind_a4:
18632        mov.l           %a4,%a0                 # Get current a4
18633        rts
18634
18635faddr_ind_a5:
18636        mov.l           %a5,%a0                 # Get current a5
18637        rts
18638
18639faddr_ind_a6:
18640        mov.l           (%a6),%a0               # Get current a6
18641        rts
18642
18643faddr_ind_a7:
18644        mov.l           EXC_A7(%a6),%a0         # Get current a7
18645        rts
18646
18647#####################################################
18648# Address register indirect w/ postincrement: (An)+ #
18649#####################################################
18650faddr_ind_p_a0:
18651        mov.l           EXC_DREGS+0x8(%a6),%d0  # Get current a0
18652        mov.l           %d0,%d1
18653        add.l           %a0,%d1                 # Increment
18654        mov.l           %d1,EXC_DREGS+0x8(%a6)  # Save incr value
18655        mov.l           %d0,%a0
18656        rts
18657
18658faddr_ind_p_a1:
18659        mov.l           EXC_DREGS+0xc(%a6),%d0  # Get current a1
18660        mov.l           %d0,%d1
18661        add.l           %a0,%d1                 # Increment
18662        mov.l           %d1,EXC_DREGS+0xc(%a6)  # Save incr value
18663        mov.l           %d0,%a0
18664        rts
18665
18666faddr_ind_p_a2:
18667        mov.l           %a2,%d0                 # Get current a2
18668        mov.l           %d0,%d1
18669        add.l           %a0,%d1                 # Increment
18670        mov.l           %d1,%a2                 # Save incr value
18671        mov.l           %d0,%a0
18672        rts
18673
18674faddr_ind_p_a3:
18675        mov.l           %a3,%d0                 # Get current a3
18676        mov.l           %d0,%d1
18677        add.l           %a0,%d1                 # Increment
18678        mov.l           %d1,%a3                 # Save incr value
18679        mov.l           %d0,%a0
18680        rts
18681
18682faddr_ind_p_a4:
18683        mov.l           %a4,%d0                 # Get current a4
18684        mov.l           %d0,%d1
18685        add.l           %a0,%d1                 # Increment
18686        mov.l           %d1,%a4                 # Save incr value
18687        mov.l           %d0,%a0
18688        rts
18689
18690faddr_ind_p_a5:
18691        mov.l           %a5,%d0                 # Get current a5
18692        mov.l           %d0,%d1
18693        add.l           %a0,%d1                 # Increment
18694        mov.l           %d1,%a5                 # Save incr value
18695        mov.l           %d0,%a0
18696        rts
18697
18698faddr_ind_p_a6:
18699        mov.l           (%a6),%d0               # Get current a6
18700        mov.l           %d0,%d1
18701        add.l           %a0,%d1                 # Increment
18702        mov.l           %d1,(%a6)               # Save incr value
18703        mov.l           %d0,%a0
18704        rts
18705
18706faddr_ind_p_a7:
18707        mov.b           &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
18708
18709        mov.l           EXC_A7(%a6),%d0         # Get current a7
18710        mov.l           %d0,%d1
18711        add.l           %a0,%d1                 # Increment
18712        mov.l           %d1,EXC_A7(%a6)         # Save incr value
18713        mov.l           %d0,%a0
18714        rts
18715
18716####################################################
18717# Address register indirect w/ predecrement: -(An) #
18718####################################################
18719faddr_ind_m_a0:
18720        mov.l           EXC_DREGS+0x8(%a6),%d0  # Get current a0
18721        sub.l           %a0,%d0                 # Decrement
18722        mov.l           %d0,EXC_DREGS+0x8(%a6)  # Save decr value
18723        mov.l           %d0,%a0
18724        rts
18725
18726faddr_ind_m_a1:
18727        mov.l           EXC_DREGS+0xc(%a6),%d0  # Get current a1
18728        sub.l           %a0,%d0                 # Decrement
18729        mov.l           %d0,EXC_DREGS+0xc(%a6)  # Save decr value
18730        mov.l           %d0,%a0
18731        rts
18732
18733faddr_ind_m_a2:
18734        mov.l           %a2,%d0                 # Get current a2
18735        sub.l           %a0,%d0                 # Decrement
18736        mov.l           %d0,%a2                 # Save decr value
18737        mov.l           %d0,%a0
18738        rts
18739
18740faddr_ind_m_a3:
18741        mov.l           %a3,%d0                 # Get current a3
18742        sub.l           %a0,%d0                 # Decrement
18743        mov.l           %d0,%a3                 # Save decr value
18744        mov.l           %d0,%a0
18745        rts
18746
18747faddr_ind_m_a4:
18748        mov.l           %a4,%d0                 # Get current a4
18749        sub.l           %a0,%d0                 # Decrement
18750        mov.l           %d0,%a4                 # Save decr value
18751        mov.l           %d0,%a0
18752        rts
18753
18754faddr_ind_m_a5:
18755        mov.l           %a5,%d0                 # Get current a5
18756        sub.l           %a0,%d0                 # Decrement
18757        mov.l           %d0,%a5                 # Save decr value
18758        mov.l           %d0,%a0
18759        rts
18760
18761faddr_ind_m_a6:
18762        mov.l           (%a6),%d0               # Get current a6
18763        sub.l           %a0,%d0                 # Decrement
18764        mov.l           %d0,(%a6)               # Save decr value
18765        mov.l           %d0,%a0
18766        rts
18767
18768faddr_ind_m_a7:
18769        mov.b           &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
18770
18771        mov.l           EXC_A7(%a6),%d0         # Get current a7
18772        sub.l           %a0,%d0                 # Decrement
18773        mov.l           %d0,EXC_A7(%a6)         # Save decr value
18774        mov.l           %d0,%a0
18775        rts
18776
18777########################################################
18778# Address register indirect w/ displacement: (d16, An) #
18779########################################################
18780faddr_ind_disp_a0:
18781        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18782        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18783        bsr.l           _imem_read_word
18784
18785        tst.l           %d1                     # did ifetch fail?
18786        bne.l           iea_iacc                # yes
18787
18788        mov.w           %d0,%a0                 # sign extend displacement
18789
18790        add.l           EXC_DREGS+0x8(%a6),%a0  # a0 + d16
18791        rts
18792
18793faddr_ind_disp_a1:
18794        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18795        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18796        bsr.l           _imem_read_word
18797
18798        tst.l           %d1                     # did ifetch fail?
18799        bne.l           iea_iacc                # yes
18800
18801        mov.w           %d0,%a0                 # sign extend displacement
18802
18803        add.l           EXC_DREGS+0xc(%a6),%a0  # a1 + d16
18804        rts
18805
18806faddr_ind_disp_a2:
18807        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18808        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18809        bsr.l           _imem_read_word
18810
18811        tst.l           %d1                     # did ifetch fail?
18812        bne.l           iea_iacc                # yes
18813
18814        mov.w           %d0,%a0                 # sign extend displacement
18815
18816        add.l           %a2,%a0                 # a2 + d16
18817        rts
18818
18819faddr_ind_disp_a3:
18820        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18821        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18822        bsr.l           _imem_read_word
18823
18824        tst.l           %d1                     # did ifetch fail?
18825        bne.l           iea_iacc                # yes
18826
18827        mov.w           %d0,%a0                 # sign extend displacement
18828
18829        add.l           %a3,%a0                 # a3 + d16
18830        rts
18831
18832faddr_ind_disp_a4:
18833        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18834        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18835        bsr.l           _imem_read_word
18836
18837        tst.l           %d1                     # did ifetch fail?
18838        bne.l           iea_iacc                # yes
18839
18840        mov.w           %d0,%a0                 # sign extend displacement
18841
18842        add.l           %a4,%a0                 # a4 + d16
18843        rts
18844
18845faddr_ind_disp_a5:
18846        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18847        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18848        bsr.l           _imem_read_word
18849
18850        tst.l           %d1                     # did ifetch fail?
18851        bne.l           iea_iacc                # yes
18852
18853        mov.w           %d0,%a0                 # sign extend displacement
18854
18855        add.l           %a5,%a0                 # a5 + d16
18856        rts
18857
18858faddr_ind_disp_a6:
18859        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18860        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18861        bsr.l           _imem_read_word
18862
18863        tst.l           %d1                     # did ifetch fail?
18864        bne.l           iea_iacc                # yes
18865
18866        mov.w           %d0,%a0                 # sign extend displacement
18867
18868        add.l           (%a6),%a0               # a6 + d16
18869        rts
18870
18871faddr_ind_disp_a7:
18872        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18873        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18874        bsr.l           _imem_read_word
18875
18876        tst.l           %d1                     # did ifetch fail?
18877        bne.l           iea_iacc                # yes
18878
18879        mov.w           %d0,%a0                 # sign extend displacement
18880
18881        add.l           EXC_A7(%a6),%a0         # a7 + d16
18882        rts
18883
18884########################################################################
18885# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
18886#    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
18887# Memory indirect postindexed: ([bd, An], Xn, od)                      #
18888# Memory indirect preindexed: ([bd, An, Xn], od)                       #
18889########################################################################
18890faddr_ind_ext:
18891        addq.l          &0x8,%d1
18892        bsr.l           fetch_dreg              # fetch base areg
18893        mov.l           %d0,-(%sp)
18894
18895        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18896        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18897        bsr.l           _imem_read_word         # fetch extword in d0
18898
18899        tst.l           %d1                     # did ifetch fail?
18900        bne.l           iea_iacc                # yes
18901
18902        mov.l           (%sp)+,%a0
18903
18904        btst            &0x8,%d0
18905        bne.w           fcalc_mem_ind
18906
18907        mov.l           %d0,L_SCR1(%a6)         # hold opword
18908
18909        mov.l           %d0,%d1
18910        rol.w           &0x4,%d1
18911        andi.w          &0xf,%d1                # extract index regno
18912
18913# count on fetch_dreg() not to alter a0...
18914        bsr.l           fetch_dreg              # fetch index
18915
18916        mov.l           %d2,-(%sp)              # save d2
18917        mov.l           L_SCR1(%a6),%d2         # fetch opword
18918
18919        btst            &0xb,%d2                # is it word or long?
18920        bne.b           faii8_long
18921        ext.l           %d0                     # sign extend word index
18922faii8_long:
18923        mov.l           %d2,%d1
18924        rol.w           &0x7,%d1
18925        andi.l          &0x3,%d1                # extract scale value
18926
18927        lsl.l           %d1,%d0                 # shift index by scale
18928
18929        extb.l          %d2                     # sign extend displacement
18930        add.l           %d2,%d0                 # index + disp
18931        add.l           %d0,%a0                 # An + (index + disp)
18932
18933        mov.l           (%sp)+,%d2              # restore old d2
18934        rts
18935
18936###########################
18937# Absolute short: (XXX).W #
18938###########################
18939fabs_short:
18940        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18941        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18942        bsr.l           _imem_read_word         # fetch short address
18943
18944        tst.l           %d1                     # did ifetch fail?
18945        bne.l           iea_iacc                # yes
18946
18947        mov.w           %d0,%a0                 # return <ea> in a0
18948        rts
18949
18950##########################
18951# Absolute long: (XXX).L #
18952##########################
18953fabs_long:
18954        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18955        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
18956        bsr.l           _imem_read_long         # fetch long address
18957
18958        tst.l           %d1                     # did ifetch fail?
18959        bne.l           iea_iacc                # yes
18960
18961        mov.l           %d0,%a0                 # return <ea> in a0
18962        rts
18963
18964#######################################################
18965# Program counter indirect w/ displacement: (d16, PC) #
18966#######################################################
18967fpc_ind:
18968        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18969        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18970        bsr.l           _imem_read_word         # fetch word displacement
18971
18972        tst.l           %d1                     # did ifetch fail?
18973        bne.l           iea_iacc                # yes
18974
18975        mov.w           %d0,%a0                 # sign extend displacement
18976
18977        add.l           EXC_EXTWPTR(%a6),%a0    # pc + d16
18978
18979# _imem_read_word() increased the extwptr by 2. need to adjust here.
18980        subq.l          &0x2,%a0                # adjust <ea>
18981        rts
18982
18983##########################################################
18984# PC indirect w/ index(8-bit displacement): (d8, PC, An) #
18985# "     "     w/   "  (base displacement): (bd, PC, An)  #
18986# PC memory indirect postindexed: ([bd, PC], Xn, od)     #
18987# PC memory indirect preindexed: ([bd, PC, Xn], od)      #
18988##########################################################
18989fpc_ind_ext:
18990        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18991        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18992        bsr.l           _imem_read_word         # fetch ext word
18993
18994        tst.l           %d1                     # did ifetch fail?
18995        bne.l           iea_iacc                # yes
18996
18997        mov.l           EXC_EXTWPTR(%a6),%a0    # put base in a0
18998        subq.l          &0x2,%a0                # adjust base
18999
19000        btst            &0x8,%d0                # is disp only 8 bits?
19001        bne.w           fcalc_mem_ind           # calc memory indirect
19002
19003        mov.l           %d0,L_SCR1(%a6)         # store opword
19004
19005        mov.l           %d0,%d1                 # make extword copy
19006        rol.w           &0x4,%d1                # rotate reg num into place
19007        andi.w          &0xf,%d1                # extract register number
19008
19009# count on fetch_dreg() not to alter a0...
19010        bsr.l           fetch_dreg              # fetch index
19011
19012        mov.l           %d2,-(%sp)              # save d2
19013        mov.l           L_SCR1(%a6),%d2         # fetch opword
19014
19015        btst            &0xb,%d2                # is index word or long?
19016        bne.b           fpii8_long              # long
19017        ext.l           %d0                     # sign extend word index
19018fpii8_long:
19019        mov.l           %d2,%d1
19020        rol.w           &0x7,%d1                # rotate scale value into place
19021        andi.l          &0x3,%d1                # extract scale value
19022
19023        lsl.l           %d1,%d0                 # shift index by scale
19024
19025        extb.l          %d2                     # sign extend displacement
19026        add.l           %d2,%d0                 # disp + index
19027        add.l           %d0,%a0                 # An + (index + disp)
19028
19029        mov.l           (%sp)+,%d2              # restore temp register
19030        rts
19031
19032# d2 = index
19033# d3 = base
19034# d4 = od
19035# d5 = extword
19036fcalc_mem_ind:
19037        btst            &0x6,%d0                # is the index suppressed?
19038        beq.b           fcalc_index
19039
19040        movm.l          &0x3c00,-(%sp)          # save d2-d5
19041
19042        mov.l           %d0,%d5                 # put extword in d5
19043        mov.l           %a0,%d3                 # put base in d3
19044
19045        clr.l           %d2                     # yes, so index = 0
19046        bra.b           fbase_supp_ck
19047
19048# index:
19049fcalc_index:
19050        mov.l           %d0,L_SCR1(%a6)         # save d0 (opword)
19051        bfextu          %d0{&16:&4},%d1         # fetch dreg index
19052        bsr.l           fetch_dreg
19053
19054        movm.l          &0x3c00,-(%sp)          # save d2-d5
19055        mov.l           %d0,%d2                 # put index in d2
19056        mov.l           L_SCR1(%a6),%d5
19057        mov.l           %a0,%d3
19058
19059        btst            &0xb,%d5                # is index word or long?
19060        bne.b           fno_ext
19061        ext.l           %d2
19062
19063fno_ext:
19064        bfextu          %d5{&21:&2},%d0
19065        lsl.l           %d0,%d2
19066
19067# base address (passed as parameter in d3):
19068# we clear the value here if it should actually be suppressed.
19069fbase_supp_ck:
19070        btst            &0x7,%d5                # is the bd suppressed?
19071        beq.b           fno_base_sup
19072        clr.l           %d3
19073
19074# base displacement:
19075fno_base_sup:
19076        bfextu          %d5{&26:&2},%d0         # get bd size
19077#       beq.l           fmovm_error             # if (size == 0) it's reserved
19078
19079        cmpi.b          %d0,&0x2
19080        blt.b           fno_bd
19081        beq.b           fget_word_bd
19082
19083        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19084        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19085        bsr.l           _imem_read_long
19086
19087        tst.l           %d1                     # did ifetch fail?
19088        bne.l           fcea_iacc               # yes
19089
19090        bra.b           fchk_ind
19091
19092fget_word_bd:
19093        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19094        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
19095        bsr.l           _imem_read_word
19096
19097        tst.l           %d1                     # did ifetch fail?
19098        bne.l           fcea_iacc               # yes
19099
19100        ext.l           %d0                     # sign extend bd
19101
19102fchk_ind:
19103        add.l           %d0,%d3                 # base += bd
19104
19105# outer displacement:
19106fno_bd:
19107        bfextu          %d5{&30:&2},%d0         # is od suppressed?
19108        beq.w           faii_bd
19109
19110        cmpi.b          %d0,&0x2
19111        blt.b           fnull_od
19112        beq.b           fword_od
19113
19114        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19115        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19116        bsr.l           _imem_read_long
19117
19118        tst.l           %d1                     # did ifetch fail?
19119        bne.l           fcea_iacc               # yes
19120
19121        bra.b           fadd_them
19122
19123fword_od:
19124        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19125        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
19126        bsr.l           _imem_read_word
19127
19128        tst.l           %d1                     # did ifetch fail?
19129        bne.l           fcea_iacc               # yes
19130
19131        ext.l           %d0                     # sign extend od
19132        bra.b           fadd_them
19133
19134fnull_od:
19135        clr.l           %d0
19136
19137fadd_them:
19138        mov.l           %d0,%d4
19139
19140        btst            &0x2,%d5                # pre or post indexing?
19141        beq.b           fpre_indexed
19142
19143        mov.l           %d3,%a0
19144        bsr.l           _dmem_read_long
19145
19146        tst.l           %d1                     # did dfetch fail?
19147        bne.w           fcea_err                # yes
19148
19149        add.l           %d2,%d0                 # <ea> += index
19150        add.l           %d4,%d0                 # <ea> += od
19151        bra.b           fdone_ea
19152
19153fpre_indexed:
19154        add.l           %d2,%d3                 # preindexing
19155        mov.l           %d3,%a0
19156        bsr.l           _dmem_read_long
19157
19158        tst.l           %d1                     # did dfetch fail?
19159        bne.w           fcea_err                # yes
19160
19161        add.l           %d4,%d0                 # ea += od
19162        bra.b           fdone_ea
19163
19164faii_bd:
19165        add.l           %d2,%d3                 # ea = (base + bd) + index
19166        mov.l           %d3,%d0
19167fdone_ea:
19168        mov.l           %d0,%a0
19169
19170        movm.l          (%sp)+,&0x003c          # restore d2-d5
19171        rts
19172
19173#########################################################
19174fcea_err:
19175        mov.l           %d3,%a0
19176
19177        movm.l          (%sp)+,&0x003c          # restore d2-d5
19178        mov.w           &0x0101,%d0
19179        bra.l           iea_dacc
19180
19181fcea_iacc:
19182        movm.l          (%sp)+,&0x003c          # restore d2-d5
19183        bra.l           iea_iacc
19184
19185fmovm_out_err:
19186        bsr.l           restore
19187        mov.w           &0x00e1,%d0
19188        bra.b           fmovm_err
19189
19190fmovm_in_err:
19191        bsr.l           restore
19192        mov.w           &0x0161,%d0
19193
19194fmovm_err:
19195        mov.l           L_SCR1(%a6),%a0
19196        bra.l           iea_dacc
19197
19198#########################################################################
19199# XDEF **************************************************************** #
19200#       fmovm_ctrl(): emulate fmovm.l of control registers instr        #
19201#                                                                       #
19202# XREF **************************************************************** #
19203#       _imem_read_long() - read longword from memory                   #
19204#       iea_iacc() - _imem_read_long() failed; error recovery           #
19205#                                                                       #
19206# INPUT *************************************************************** #
19207#       None                                                            #
19208#                                                                       #
19209# OUTPUT ************************************************************** #
19210#       If _imem_read_long() doesn't fail:                              #
19211#               USER_FPCR(a6)  = new FPCR value                         #
19212#               USER_FPSR(a6)  = new FPSR value                         #
19213#               USER_FPIAR(a6) = new FPIAR value                        #
19214#                                                                       #
19215# ALGORITHM *********************************************************** #
19216#       Decode the instruction type by looking at the extension word    #
19217# in order to see how many control registers to fetch from memory.      #
19218# Fetch them using _imem_read_long(). If this fetch fails, exit through #
19219# the special access error exit handler iea_iacc().                     #
19220#                                                                       #
19221# Instruction word decoding:                                            #
19222#                                                                       #
19223#       fmovem.l #<data>, {FPIAR&|FPCR&|FPSR}                           #
19224#                                                                       #
19225#               WORD1                   WORD2                           #
19226#       1111 0010 00 111100     100$ $$00 0000 0000                     #
19227#                                                                       #
19228#       $$$ (100): FPCR                                                 #
19229#           (010): FPSR                                                 #
19230#           (001): FPIAR                                                #
19231#           (000): FPIAR                                                #
19232#                                                                       #
19233#########################################################################
19234
19235        global          fmovm_ctrl
19236fmovm_ctrl:
19237        mov.b           EXC_EXTWORD(%a6),%d0    # fetch reg select bits
19238        cmpi.b          %d0,&0x9c               # fpcr & fpsr & fpiar ?
19239        beq.w           fctrl_in_7              # yes
19240        cmpi.b          %d0,&0x98               # fpcr & fpsr ?
19241        beq.w           fctrl_in_6              # yes
19242        cmpi.b          %d0,&0x94               # fpcr & fpiar ?
19243        beq.b           fctrl_in_5              # yes
19244
19245# fmovem.l #<data>, fpsr/fpiar
19246fctrl_in_3:
19247        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19248        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19249        bsr.l           _imem_read_long         # fetch FPSR from mem
19250
19251        tst.l           %d1                     # did ifetch fail?
19252        bne.l           iea_iacc                # yes
19253
19254        mov.l           %d0,USER_FPSR(%a6)      # store new FPSR to stack
19255        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19256        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19257        bsr.l           _imem_read_long         # fetch FPIAR from mem
19258
19259        tst.l           %d1                     # did ifetch fail?
19260        bne.l           iea_iacc                # yes
19261
19262        mov.l           %d0,USER_FPIAR(%a6)     # store new FPIAR to stack
19263        rts
19264
19265# fmovem.l #<data>, fpcr/fpiar
19266fctrl_in_5:
19267        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19268        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19269        bsr.l           _imem_read_long         # fetch FPCR from mem
19270
19271        tst.l           %d1                     # did ifetch fail?
19272        bne.l           iea_iacc                # yes
19273
19274        mov.l           %d0,USER_FPCR(%a6)      # store new FPCR to stack
19275        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19276        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19277        bsr.l           _imem_read_long         # fetch FPIAR from mem
19278
19279        tst.l           %d1                     # did ifetch fail?
19280        bne.l           iea_iacc                # yes
19281
19282        mov.l           %d0,USER_FPIAR(%a6)     # store new FPIAR to stack
19283        rts
19284
19285# fmovem.l #<data>, fpcr/fpsr
19286fctrl_in_6:
19287        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19288        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19289        bsr.l           _imem_read_long         # fetch FPCR from mem
19290
19291        tst.l           %d1                     # did ifetch fail?
19292        bne.l           iea_iacc                # yes
19293
19294        mov.l           %d0,USER_FPCR(%a6)      # store new FPCR to mem
19295        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19296        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19297        bsr.l           _imem_read_long         # fetch FPSR from mem
19298
19299        tst.l           %d1                     # did ifetch fail?
19300        bne.l           iea_iacc                # yes
19301
19302        mov.l           %d0,USER_FPSR(%a6)      # store new FPSR to mem
19303        rts
19304
19305# fmovem.l #<data>, fpcr/fpsr/fpiar
19306fctrl_in_7:
19307        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19308        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19309        bsr.l           _imem_read_long         # fetch FPCR from mem
19310
19311        tst.l           %d1                     # did ifetch fail?
19312        bne.l           iea_iacc                # yes
19313
19314        mov.l           %d0,USER_FPCR(%a6)      # store new FPCR to mem
19315        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19316        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19317        bsr.l           _imem_read_long         # fetch FPSR from mem
19318
19319        tst.l           %d1                     # did ifetch fail?
19320        bne.l           iea_iacc                # yes
19321
19322        mov.l           %d0,USER_FPSR(%a6)      # store new FPSR to mem
19323        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19324        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19325        bsr.l           _imem_read_long         # fetch FPIAR from mem
19326
19327        tst.l           %d1                     # did ifetch fail?
19328        bne.l           iea_iacc                # yes
19329
19330        mov.l           %d0,USER_FPIAR(%a6)     # store new FPIAR to mem
19331        rts
19332
19333#########################################################################
19334# XDEF **************************************************************** #
19335#       _dcalc_ea(): calc correct <ea> from <ea> stacked on exception   #
19336#                                                                       #
19337# XREF **************************************************************** #
19338#       inc_areg() - increment an address register                      #
19339#       dec_areg() - decrement an address register                      #
19340#                                                                       #
19341# INPUT *************************************************************** #
19342#       d0 = number of bytes to adjust <ea> by                          #
19343#                                                                       #
19344# OUTPUT ************************************************************** #
19345#       None                                                            #
19346#                                                                       #
19347# ALGORITHM *********************************************************** #
19348# "Dummy" CALCulate Effective Address:                                  #
19349#       The stacked <ea> for FP unimplemented instructions and opclass  #
19350#       two packed instructions is correct with the exception of...     #
19351#                                                                       #
19352#       1) -(An)   : The register is not updated regardless of size.    #
19353#                    Also, for extended precision and packed, the       #
19354#                    stacked <ea> value is 8 bytes too big              #
19355#       2) (An)+   : The register is not updated.                       #
19356#       3) #<data> : The upper longword of the immediate operand is     #
19357#                    stacked b,w,l and s sizes are completely stacked.  #
19358#                    d,x, and p are not.                                #
19359#                                                                       #
19360#########################################################################
19361
19362        global          _dcalc_ea
19363_dcalc_ea:
19364        mov.l           %d0, %a0                # move # bytes to %a0
19365
19366        mov.b           1+EXC_OPWORD(%a6), %d0  # fetch opcode word
19367        mov.l           %d0, %d1                # make a copy
19368
19369        andi.w          &0x38, %d0              # extract mode field
19370        andi.l          &0x7, %d1               # extract reg  field
19371
19372        cmpi.b          %d0,&0x18               # is mode (An)+ ?
19373        beq.b           dcea_pi                 # yes
19374
19375        cmpi.b          %d0,&0x20               # is mode -(An) ?
19376        beq.b           dcea_pd                 # yes
19377
19378        or.w            %d1,%d0                 # concat mode,reg
19379        cmpi.b          %d0,&0x3c               # is mode #<data>?
19380
19381        beq.b           dcea_imm                # yes
19382
19383        mov.l           EXC_EA(%a6),%a0         # return <ea>
19384        rts
19385
19386# need to set immediate data flag here since we'll need to do
19387# an imem_read to fetch this later.
19388dcea_imm:
19389        mov.b           &immed_flg,SPCOND_FLG(%a6)
19390        lea             ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
19391        rts
19392
19393# here, the <ea> is stacked correctly. however, we must update the
19394# address register...
19395dcea_pi:
19396        mov.l           %a0,%d0                 # pass amt to inc by
19397        bsr.l           inc_areg                # inc addr register
19398
19399        mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
19400        rts
19401
19402# the <ea> is stacked correctly for all but extended and packed which
19403# the <ea>s are 8 bytes too large.
19404# it would make no sense to have a pre-decrement to a7 in supervisor
19405# mode so we don't even worry about this tricky case here : )
19406dcea_pd:
19407        mov.l           %a0,%d0                 # pass amt to dec by
19408        bsr.l           dec_areg                # dec addr register
19409
19410        mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
19411
19412        cmpi.b          %d0,&0xc                # is opsize ext or packed?
19413        beq.b           dcea_pd2                # yes
19414        rts
19415dcea_pd2:
19416        sub.l           &0x8,%a0                # correct <ea>
19417        mov.l           %a0,EXC_EA(%a6)         # put correct <ea> on stack
19418        rts
19419
19420#########################################################################
19421# XDEF **************************************************************** #
19422#       _calc_ea_fout(): calculate correct stacked <ea> for extended    #
19423#                        and packed data opclass 3 operations.          #
19424#                                                                       #
19425# XREF **************************************************************** #
19426#       None                                                            #
19427#                                                                       #
19428# INPUT *************************************************************** #
19429#       None                                                            #
19430#                                                                       #
19431# OUTPUT ************************************************************** #
19432#       a0 = return correct effective address                           #
19433#                                                                       #
19434# ALGORITHM *********************************************************** #
19435#       For opclass 3 extended and packed data operations, the <ea>     #
19436# stacked for the exception is incorrect for -(an) and (an)+ addressing #
19437# modes. Also, while we're at it, the index register itself must get    #
19438# updated.                                                              #
19439#       So, for -(an), we must subtract 8 off of the stacked <ea> value #
19440# and return that value as the correct <ea> and store that value in An. #
19441# For (an)+, the stacked <ea> is correct but we must adjust An by +12.  #
19442#                                                                       #
19443#########################################################################
19444
19445# This calc_ea is currently used to retrieve the correct <ea>
19446# for fmove outs of type extended and packed.
19447        global          _calc_ea_fout
19448_calc_ea_fout:
19449        mov.b           1+EXC_OPWORD(%a6),%d0   # fetch opcode word
19450        mov.l           %d0,%d1                 # make a copy
19451
19452        andi.w          &0x38,%d0               # extract mode field
19453        andi.l          &0x7,%d1                # extract reg  field
19454
19455        cmpi.b          %d0,&0x18               # is mode (An)+ ?
19456        beq.b           ceaf_pi                 # yes
19457
19458        cmpi.b          %d0,&0x20               # is mode -(An) ?
19459        beq.w           ceaf_pd                 # yes
19460
19461        mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
19462        rts
19463
19464# (An)+ : extended and packed fmove out
19465#       : stacked <ea> is correct
19466#       : "An" not updated
19467ceaf_pi:
19468        mov.w           (tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
19469        mov.l           EXC_EA(%a6),%a0
19470        jmp             (tbl_ceaf_pi.b,%pc,%d1.w*1)
19471
19472        swbeg           &0x8
19473tbl_ceaf_pi:
19474        short           ceaf_pi0 - tbl_ceaf_pi
19475        short           ceaf_pi1 - tbl_ceaf_pi
19476        short           ceaf_pi2 - tbl_ceaf_pi
19477        short           ceaf_pi3 - tbl_ceaf_pi
19478        short           ceaf_pi4 - tbl_ceaf_pi
19479        short           ceaf_pi5 - tbl_ceaf_pi
19480        short           ceaf_pi6 - tbl_ceaf_pi
19481        short           ceaf_pi7 - tbl_ceaf_pi
19482
19483ceaf_pi0:
19484        addi.l          &0xc,EXC_DREGS+0x8(%a6)
19485        rts
19486ceaf_pi1:
19487        addi.l          &0xc,EXC_DREGS+0xc(%a6)
19488        rts
19489ceaf_pi2:
19490        add.l           &0xc,%a2
19491        rts
19492ceaf_pi3:
19493        add.l           &0xc,%a3
19494        rts
19495ceaf_pi4:
19496        add.l           &0xc,%a4
19497        rts
19498ceaf_pi5:
19499        add.l           &0xc,%a5
19500        rts
19501ceaf_pi6:
19502        addi.l          &0xc,EXC_A6(%a6)
19503        rts
19504ceaf_pi7:
19505        mov.b           &mia7_flg,SPCOND_FLG(%a6)
19506        addi.l          &0xc,EXC_A7(%a6)
19507        rts
19508
19509# -(An) : extended and packed fmove out
19510#       : stacked <ea> = actual <ea> + 8
19511#       : "An" not updated
19512ceaf_pd:
19513        mov.w           (tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
19514        mov.l           EXC_EA(%a6),%a0
19515        sub.l           &0x8,%a0
19516        sub.l           &0x8,EXC_EA(%a6)
19517        jmp             (tbl_ceaf_pd.b,%pc,%d1.w*1)
19518
19519        swbeg           &0x8
19520tbl_ceaf_pd:
19521        short           ceaf_pd0 - tbl_ceaf_pd
19522        short           ceaf_pd1 - tbl_ceaf_pd
19523        short           ceaf_pd2 - tbl_ceaf_pd
19524        short           ceaf_pd3 - tbl_ceaf_pd
19525        short           ceaf_pd4 - tbl_ceaf_pd
19526        short           ceaf_pd5 - tbl_ceaf_pd
19527        short           ceaf_pd6 - tbl_ceaf_pd
19528        short           ceaf_pd7 - tbl_ceaf_pd
19529
19530ceaf_pd0:
19531        mov.l           %a0,EXC_DREGS+0x8(%a6)
19532        rts
19533ceaf_pd1:
19534        mov.l           %a0,EXC_DREGS+0xc(%a6)
19535        rts
19536ceaf_pd2:
19537        mov.l           %a0,%a2
19538        rts
19539ceaf_pd3:
19540        mov.l           %a0,%a3
19541        rts
19542ceaf_pd4:
19543        mov.l           %a0,%a4
19544        rts
19545ceaf_pd5:
19546        mov.l           %a0,%a5
19547        rts
19548ceaf_pd6:
19549        mov.l           %a0,EXC_A6(%a6)
19550        rts
19551ceaf_pd7:
19552        mov.l           %a0,EXC_A7(%a6)
19553        mov.b           &mda7_flg,SPCOND_FLG(%a6)
19554        rts
19555
19556#########################################################################
19557# XDEF **************************************************************** #
19558#       _load_fop(): load operand for unimplemented FP exception        #
19559#                                                                       #
19560# XREF **************************************************************** #
19561#       set_tag_x() - determine ext prec optype tag                     #
19562#       set_tag_s() - determine sgl prec optype tag                     #
19563#       set_tag_d() - determine dbl prec optype tag                     #
19564#       unnorm_fix() - convert normalized number to denorm or zero      #
19565#       norm() - normalize a denormalized number                        #
19566#       get_packed() - fetch a packed operand from memory               #
19567#       _dcalc_ea() - calculate <ea>, fixing An in process              #
19568#                                                                       #
19569#       _imem_read_{word,long}() - read from instruction memory         #
19570#       _dmem_read() - read from data memory                            #
19571#       _dmem_read_{byte,word,long}() - read from data memory           #
19572#                                                                       #
19573#       facc_in_{b,w,l,d,x}() - mem read failed; special exit point     #
19574#                                                                       #
19575# INPUT *************************************************************** #
19576#       None                                                            #
19577#                                                                       #
19578# OUTPUT ************************************************************** #
19579#       If memory access doesn't fail:                                  #
19580#               FP_SRC(a6) = source operand in extended precision       #
19581#               FP_DST(a6) = destination operand in extended precision  #
19582#                                                                       #
19583# ALGORITHM *********************************************************** #
19584#       This is called from the Unimplemented FP exception handler in   #
19585# order to load the source and maybe destination operand into           #
19586# FP_SRC(a6) and FP_DST(a6). If the instruction was opclass zero, load  #
19587# the source and destination from the FP register file. Set the optype  #
19588# tags for both if dyadic, one for monadic. If a number is an UNNORM,   #
19589# convert it to a DENORM or a ZERO.                                     #
19590#       If the instruction is opclass two (memory->reg), then fetch     #
19591# the destination from the register file and the source operand from    #
19592# memory. Tag and fix both as above w/ opclass zero instructions.       #
19593#       If the source operand is byte,word,long, or single, it may be   #
19594# in the data register file. If it's actually out in memory, use one of #
19595# the mem_read() routines to fetch it. If the mem_read() access returns #
19596# a failing value, exit through the special facc_in() routine which     #
19597# will create an access error exception frame from the current exception #
19598# frame.                                                                #
19599#       Immediate data and regular data accesses are separated because  #
19600# if an immediate data access fails, the resulting fault status         #
19601# longword stacked for the access error exception must have the         #
19602# instruction bit set.                                                  #
19603#                                                                       #
19604#########################################################################
19605
19606        global          _load_fop
19607_load_fop:
19608
19609#  15     13 12 10  9 7  6       0
19610# /        \ /   \ /  \ /         \
19611# ---------------------------------
19612# | opclass | RX  | RY | EXTENSION |  (2nd word of general FP instruction)
19613# ---------------------------------
19614#
19615
19616#       bfextu          EXC_CMDREG(%a6){&0:&3}, %d0 # extract opclass
19617#       cmpi.b          %d0, &0x2               # which class is it? ('000,'010,'011)
19618#       beq.w           op010                   # handle <ea> -> fpn
19619#       bgt.w           op011                   # handle fpn -> <ea>
19620
19621# we're not using op011 for now...
19622        btst            &0x6,EXC_CMDREG(%a6)
19623        bne.b           op010
19624
19625############################
19626# OPCLASS '000: reg -> reg #
19627############################
19628op000:
19629        mov.b           1+EXC_CMDREG(%a6),%d0   # fetch extension word lo
19630        btst            &0x5,%d0                # testing extension bits
19631        beq.b           op000_src               # (bit 5 == 0) => monadic
19632        btst            &0x4,%d0                # (bit 5 == 1)
19633        beq.b           op000_dst               # (bit 4 == 0) => dyadic
19634        and.w           &0x007f,%d0             # extract extension bits {6:0}
19635        cmpi.w          %d0,&0x0038             # is it an fcmp (dyadic) ?
19636        bne.b           op000_src               # it's an fcmp
19637
19638op000_dst:
19639        bfextu          EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
19640        bsr.l           load_fpn2               # fetch dst fpreg into FP_DST
19641
19642        bsr.l           set_tag_x               # get dst optype tag
19643
19644        cmpi.b          %d0, &UNNORM            # is dst fpreg an UNNORM?
19645        beq.b           op000_dst_unnorm        # yes
19646op000_dst_cont:
19647        mov.b           %d0, DTAG(%a6)          # store the dst optype tag
19648
19649op000_src:
19650        bfextu          EXC_CMDREG(%a6){&3:&3}, %d0 # extract src field
19651        bsr.l           load_fpn1               # fetch src fpreg into FP_SRC
19652
19653        bsr.l           set_tag_x               # get src optype tag
19654
19655        cmpi.b          %d0, &UNNORM            # is src fpreg an UNNORM?
19656        beq.b           op000_src_unnorm        # yes
19657op000_src_cont:
19658        mov.b           %d0, STAG(%a6)          # store the src optype tag
19659        rts
19660
19661op000_dst_unnorm:
19662        bsr.l           unnorm_fix              # fix the dst UNNORM
19663        bra.b           op000_dst_cont
19664op000_src_unnorm:
19665        bsr.l           unnorm_fix              # fix the src UNNORM
19666        bra.b           op000_src_cont
19667
19668#############################
19669# OPCLASS '010: <ea> -> reg #
19670#############################
19671op010:
19672        mov.w           EXC_CMDREG(%a6),%d0     # fetch extension word
19673        btst            &0x5,%d0                # testing extension bits
19674        beq.b           op010_src               # (bit 5 == 0) => monadic
19675        btst            &0x4,%d0                # (bit 5 == 1)
19676        beq.b           op010_dst               # (bit 4 == 0) => dyadic
19677        and.w           &0x007f,%d0             # extract extension bits {6:0}
19678        cmpi.w          %d0,&0x0038             # is it an fcmp (dyadic) ?
19679        bne.b           op010_src               # it's an fcmp
19680
19681op010_dst:
19682        bfextu          EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
19683        bsr.l           load_fpn2               # fetch dst fpreg ptr
19684
19685        bsr.l           set_tag_x               # get dst type tag
19686
19687        cmpi.b          %d0, &UNNORM            # is dst fpreg an UNNORM?
19688        beq.b           op010_dst_unnorm        # yes
19689op010_dst_cont:
19690        mov.b           %d0, DTAG(%a6)          # store the dst optype tag
19691
19692op010_src:
19693        bfextu          EXC_CMDREG(%a6){&3:&3}, %d0 # extract src type field
19694
19695        bfextu          EXC_OPWORD(%a6){&10:&3}, %d1 # extract <ea> mode field
19696        bne.w           fetch_from_mem          # src op is in memory
19697
19698op010_dreg:
19699        clr.b           STAG(%a6)               # either NORM or ZERO
19700        bfextu          EXC_OPWORD(%a6){&13:&3}, %d1 # extract src reg field
19701
19702        mov.w           (tbl_op010_dreg.b,%pc,%d0.w*2), %d0 # jmp based on optype
19703        jmp             (tbl_op010_dreg.b,%pc,%d0.w*1) # fetch src from dreg
19704
19705op010_dst_unnorm:
19706        bsr.l           unnorm_fix              # fix the dst UNNORM
19707        bra.b           op010_dst_cont
19708
19709        swbeg           &0x8
19710tbl_op010_dreg:
19711        short           opd_long        - tbl_op010_dreg
19712        short           opd_sgl         - tbl_op010_dreg
19713        short           tbl_op010_dreg  - tbl_op010_dreg
19714        short           tbl_op010_dreg  - tbl_op010_dreg
19715        short           opd_word        - tbl_op010_dreg
19716        short           tbl_op010_dreg  - tbl_op010_dreg
19717        short           opd_byte        - tbl_op010_dreg
19718        short           tbl_op010_dreg  - tbl_op010_dreg
19719
19720#
19721# LONG: can be either NORM or ZERO...
19722#
19723opd_long:
19724        bsr.l           fetch_dreg              # fetch long in d0
19725        fmov.l          %d0, %fp0               # load a long
19726        fmovm.x         &0x80, FP_SRC(%a6)      # return src op in FP_SRC
19727        fbeq.w          opd_long_zero           # long is a ZERO
19728        rts
19729opd_long_zero:
19730        mov.b           &ZERO, STAG(%a6)        # set ZERO optype flag
19731        rts
19732
19733#
19734# WORD: can be either NORM or ZERO...
19735#
19736opd_word:
19737        bsr.l           fetch_dreg              # fetch word in d0
19738        fmov.w          %d0, %fp0               # load a word
19739        fmovm.x         &0x80, FP_SRC(%a6)      # return src op in FP_SRC
19740        fbeq.w          opd_word_zero           # WORD is a ZERO
19741        rts
19742opd_word_zero:
19743        mov.b           &ZERO, STAG(%a6)        # set ZERO optype flag
19744        rts
19745
19746#
19747# BYTE: can be either NORM or ZERO...
19748#
19749opd_byte:
19750        bsr.l           fetch_dreg              # fetch word in d0
19751        fmov.b          %d0, %fp0               # load a byte
19752        fmovm.x         &0x80, FP_SRC(%a6)      # return src op in FP_SRC
19753        fbeq.w          opd_byte_zero           # byte is a ZERO
19754        rts
19755opd_byte_zero:
19756        mov.b           &ZERO, STAG(%a6)        # set ZERO optype flag
19757        rts
19758
19759#
19760# SGL: can be either NORM, DENORM, ZERO, INF, QNAN or SNAN but not UNNORM
19761#
19762# separate SNANs and DENORMs so they can be loaded w/ special care.
19763# all others can simply be moved "in" using fmove.
19764#
19765opd_sgl:
19766        bsr.l           fetch_dreg              # fetch sgl in d0
19767        mov.l           %d0,L_SCR1(%a6)
19768
19769        lea             L_SCR1(%a6), %a0        # pass: ptr to the sgl
19770        bsr.l           set_tag_s               # determine sgl type
19771        mov.b           %d0, STAG(%a6)          # save the src tag
19772
19773        cmpi.b          %d0, &SNAN              # is it an SNAN?
19774        beq.w           get_sgl_snan            # yes
19775
19776        cmpi.b          %d0, &DENORM            # is it a DENORM?
19777        beq.w           get_sgl_denorm          # yes
19778
19779        fmov.s          (%a0), %fp0             # no, so can load it regular
19780        fmovm.x         &0x80, FP_SRC(%a6)      # return src op in FP_SRC
19781        rts
19782
19783##############################################################################
19784
19785#########################################################################
19786# fetch_from_mem():                                                     #
19787# - src is out in memory. must:                                         #
19788#       (1) calc ea - must read AFTER you know the src type since       #
19789#                     if the ea is -() or ()+, need to know # of bytes. #
19790#       (2) read it in from either user or supervisor space             #
19791#       (3) if (b || w || l) then simply read in                        #
19792#           if (s || d || x) then check for SNAN,UNNORM,DENORM          #
19793#           if (packed) then punt for now                               #
19794# INPUT:                                                                #
19795#       %d0 : src type field                                            #
19796#########################################################################
19797fetch_from_mem:
19798        clr.b           STAG(%a6)               # either NORM or ZERO
19799
19800        mov.w           (tbl_fp_type.b,%pc,%d0.w*2), %d0 # index by src type field
19801        jmp             (tbl_fp_type.b,%pc,%d0.w*1)
19802
19803        swbeg           &0x8
19804tbl_fp_type:
19805        short           load_long       - tbl_fp_type
19806        short           load_sgl        - tbl_fp_type
19807        short           load_ext        - tbl_fp_type
19808        short           load_packed     - tbl_fp_type
19809        short           load_word       - tbl_fp_type
19810        short           load_dbl        - tbl_fp_type
19811        short           load_byte       - tbl_fp_type
19812        short           tbl_fp_type     - tbl_fp_type
19813
19814#########################################
19815# load a LONG into %fp0:                #
19816#       -number can't fault             #
19817#       (1) calc ea                     #
19818#       (2) read 4 bytes into L_SCR1    #
19819#       (3) fmov.l into %fp0            #
19820#########################################
19821load_long:
19822        movq.l          &0x4, %d0               # pass: 4 (bytes)
19823        bsr.l           _dcalc_ea               # calc <ea>; <ea> in %a0
19824
19825        cmpi.b          SPCOND_FLG(%a6),&immed_flg
19826        beq.b           load_long_immed
19827
19828        bsr.l           _dmem_read_long         # fetch src operand from memory
19829
19830        tst.l           %d1                     # did dfetch fail?
19831        bne.l           facc_in_l               # yes
19832
19833load_long_cont:
19834        fmov.l          %d0, %fp0               # read into %fp0;convert to xprec
19835        fmovm.x         &0x80, FP_SRC(%a6)      # return src op in FP_SRC
19836
19837        fbeq.w          load_long_zero          # src op is a ZERO
19838        rts
19839load_long_zero:
19840        mov.b           &ZERO, STAG(%a6)        # set optype tag to ZERO
19841        rts
19842
19843load_long_immed:
19844        bsr.l           _imem_read_long         # fetch src operand immed data
19845
19846        tst.l           %d1                     # did ifetch fail?
19847        bne.l           funimp_iacc             # yes
19848        bra.b           load_long_cont
19849
19850#########################################
19851# load a WORD into %fp0:                #
19852#       -number can't fault             #
19853#       (1) calc ea                     #
19854#       (2) read 2 bytes into L_SCR1    #
19855#       (3) fmov.w into %fp0            #
19856#########################################
19857load_word:
19858        movq.l          &0x2, %d0               # pass: 2 (bytes)
19859        bsr.l           _dcalc_ea               # calc <ea>; <ea> in %a0
19860
19861        cmpi.b          SPCOND_FLG(%a6),&immed_flg
19862        beq.b           load_word_immed
19863
19864        bsr.l           _dmem_read_word         # fetch src operand from memory
19865
19866        tst.l           %d1                     # did dfetch fail?
19867        bne.l           facc_in_w               # yes
19868
19869load_word_cont:
19870        fmov.w          %d0, %fp0               # read into %fp0;convert to xprec
19871        fmovm.x         &0x80, FP_SRC(%a6)      # return src op in FP_SRC
19872
19873        fbeq.w          load_word_zero          # src op is a ZERO
19874        rts
19875load_word_zero:
19876        mov.b           &ZERO, STAG(%a6)        # set optype tag to ZERO
19877        rts
19878
19879load_word_immed:
19880        bsr.l           _imem_read_word         # fetch src operand immed data
19881
19882        tst.l           %d1                     # did ifetch fail?
19883        bne.l           funimp_iacc             # yes
19884        bra.b           load_word_cont
19885
19886#########################################
19887# load a BYTE into %fp0:                #
19888#       -number can't fault             #
19889#       (1) calc ea                     #
19890#       (2) read 1 byte into L_SCR1     #
19891#       (3) fmov.b into %fp0            #
19892#########################################
19893load_byte:
19894        movq.l          &0x1, %d0               # pass: 1 (byte)
19895        bsr.l           _dcalc_ea               # calc <ea>; <ea> in %a0
19896
19897        cmpi.b          SPCOND_FLG(%a6),&immed_flg
19898        beq.b           load_byte_immed
19899
19900        bsr.l           _dmem_read_byte         # fetch src operand from memory
19901
19902        tst.l           %d1                     # did dfetch fail?
19903        bne.l           facc_in_b               # yes
19904
19905load_byte_cont:
19906        fmov.b          %d0, %fp0               # read into %fp0;convert to xprec
19907        fmovm.x         &0x80, FP_SRC(%a6)      # return src op in FP_SRC
19908
19909        fbeq.w          load_byte_zero          # src op is a ZERO
19910        rts
19911load_byte_zero:
19912        mov.b           &ZERO, STAG(%a6)        # set optype tag to ZERO
19913        rts
19914
19915load_byte_immed:
19916        bsr.l           _imem_read_word         # fetch src operand immed data
19917
19918        tst.l           %d1                     # did ifetch fail?
19919        bne.l           funimp_iacc             # yes
19920        bra.b           load_byte_cont
19921
19922#########################################
19923# load a SGL into %fp0:                 #
19924#       -number can't fault             #
19925#       (1) calc ea                     #
19926#       (2) read 4 bytes into L_SCR1    #
19927#       (3) fmov.s into %fp0            #
19928#########################################
19929load_sgl:
19930        movq.l          &0x4, %d0               # pass: 4 (bytes)
19931        bsr.l           _dcalc_ea               # calc <ea>; <ea> in %a0
19932
19933        cmpi.b          SPCOND_FLG(%a6),&immed_flg
19934        beq.b           load_sgl_immed
19935
19936        bsr.l           _dmem_read_long         # fetch src operand from memory
19937        mov.l           %d0, L_SCR1(%a6)        # store src op on stack
19938
19939        tst.l           %d1                     # did dfetch fail?
19940        bne.l           facc_in_l               # yes
19941
19942load_sgl_cont:
19943        lea             L_SCR1(%a6), %a0        # pass: ptr to sgl src op
19944        bsr.l           set_tag_s               # determine src type tag
19945        mov.b           %d0, STAG(%a6)          # save src optype tag on stack
19946
19947        cmpi.b          %d0, &DENORM            # is it a sgl DENORM?
19948        beq.w           get_sgl_denorm          # yes
19949
19950        cmpi.b          %d0, &SNAN              # is it a sgl SNAN?
19951        beq.w           get_sgl_snan            # yes
19952
19953        fmov.s          L_SCR1(%a6), %fp0       # read into %fp0;convert to xprec
19954        fmovm.x         &0x80, FP_SRC(%a6)      # return src op in FP_SRC
19955        rts
19956
19957load_sgl_immed:
19958        bsr.l           _imem_read_long         # fetch src operand immed data
19959
19960        tst.l           %d1                     # did ifetch fail?
19961        bne.l           funimp_iacc             # yes
19962        bra.b           load_sgl_cont
19963
19964# must convert sgl denorm format to an Xprec denorm fmt suitable for
19965# normalization...
19966# %a0 : points to sgl denorm
19967get_sgl_denorm:
19968        clr.w           FP_SRC_EX(%a6)
19969        bfextu          (%a0){&9:&23}, %d0      # fetch sgl hi(_mantissa)
19970        lsl.l           &0x8, %d0
19971        mov.l           %d0, FP_SRC_HI(%a6)     # set ext hi(_mantissa)
19972        clr.l           FP_SRC_LO(%a6)          # set ext lo(_mantissa)
19973
19974        clr.w           FP_SRC_EX(%a6)
19975        btst            &0x7, (%a0)             # is sgn bit set?
19976        beq.b           sgl_dnrm_norm
19977        bset            &0x7, FP_SRC_EX(%a6)    # set sgn of xprec value
19978
19979sgl_dnrm_norm:
19980        lea             FP_SRC(%a6), %a0
19981        bsr.l           norm                    # normalize number
19982        mov.w           &0x3f81, %d1            # xprec exp = 0x3f81
19983        sub.w           %d0, %d1                # exp = 0x3f81 - shft amt.
19984        or.w            %d1, FP_SRC_EX(%a6)     # {sgn,exp}
19985
19986        mov.b           &NORM, STAG(%a6)        # fix src type tag
19987        rts
19988
19989# convert sgl to ext SNAN
19990# %a0 : points to sgl SNAN
19991get_sgl_snan:
19992        mov.w           &0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
19993        bfextu          (%a0){&9:&23}, %d0
19994        lsl.l           &0x8, %d0               # extract and insert hi(man)
19995        mov.l           %d0, FP_SRC_HI(%a6)
19996        clr.l           FP_SRC_LO(%a6)
19997
19998        btst            &0x7, (%a0)             # see if sign of SNAN is set
19999        beq.b           no_sgl_snan_sgn
20000        bset            &0x7, FP_SRC_EX(%a6)
20001no_sgl_snan_sgn:
20002        rts
20003
20004#########################################
20005# load a DBL into %fp0:                 #
20006#       -number can't fault             #
20007#       (1) calc ea                     #
20008#       (2) read 8 bytes into L_SCR(1,2)#
20009#       (3) fmov.d into %fp0            #
20010#########################################
20011load_dbl:
20012        movq.l          &0x8, %d0               # pass: 8 (bytes)
20013        bsr.l           _dcalc_ea               # calc <ea>; <ea> in %a0
20014
20015        cmpi.b          SPCOND_FLG(%a6),&immed_flg
20016        beq.b           load_dbl_immed
20017
20018        lea             L_SCR1(%a6), %a1        # pass: ptr to input dbl tmp space
20019        movq.l          &0x8, %d0               # pass: # bytes to read
20020        bsr.l           _dmem_read              # fetch src operand from memory
20021
20022        tst.l           %d1                     # did dfetch fail?
20023        bne.l           facc_in_d               # yes
20024
20025load_dbl_cont:
20026        lea             L_SCR1(%a6), %a0        # pass: ptr to input dbl
20027        bsr.l           set_tag_d               # determine src type tag
20028        mov.b           %d0, STAG(%a6)          # set src optype tag
20029
20030        cmpi.b          %d0, &DENORM            # is it a dbl DENORM?
20031        beq.w           get_dbl_denorm          # yes
20032
20033        cmpi.b          %d0, &SNAN              # is it a dbl SNAN?
20034        beq.w           get_dbl_snan            # yes
20035
20036        fmov.d          L_SCR1(%a6), %fp0       # read into %fp0;convert to xprec
20037        fmovm.x         &0x80, FP_SRC(%a6)      # return src op in FP_SRC
20038        rts
20039
20040load_dbl_immed:
20041        lea             L_SCR1(%a6), %a1        # pass: ptr to input dbl tmp space
20042        movq.l          &0x8, %d0               # pass: # bytes to read
20043        bsr.l           _imem_read              # fetch src operand from memory
20044
20045        tst.l           %d1                     # did ifetch fail?
20046        bne.l           funimp_iacc             # yes
20047        bra.b           load_dbl_cont
20048
20049# must convert dbl denorm format to an Xprec denorm fmt suitable for
20050# normalization...
20051# %a0 : loc. of dbl denorm
20052get_dbl_denorm:
20053        clr.w           FP_SRC_EX(%a6)
20054        bfextu          (%a0){&12:&31}, %d0     # fetch hi(_mantissa)
20055        mov.l           %d0, FP_SRC_HI(%a6)
20056        bfextu          4(%a0){&11:&21}, %d0    # fetch lo(_mantissa)
20057        mov.l           &0xb, %d1
20058        lsl.l           %d1, %d0
20059        mov.l           %d0, FP_SRC_LO(%a6)
20060
20061        btst            &0x7, (%a0)             # is sgn bit set?
20062        beq.b           dbl_dnrm_norm
20063        bset            &0x7, FP_SRC_EX(%a6)    # set sgn of xprec value
20064
20065dbl_dnrm_norm:
20066        lea             FP_SRC(%a6), %a0
20067        bsr.l           norm                    # normalize number
20068        mov.w           &0x3c01, %d1            # xprec exp = 0x3c01
20069        sub.w           %d0, %d1                # exp = 0x3c01 - shft amt.
20070        or.w            %d1, FP_SRC_EX(%a6)     # {sgn,exp}
20071
20072        mov.b           &NORM, STAG(%a6)        # fix src type tag
20073        rts
20074
20075# convert dbl to ext SNAN
20076# %a0 : points to dbl SNAN
20077get_dbl_snan:
20078        mov.w           &0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
20079
20080        bfextu          (%a0){&12:&31}, %d0     # fetch hi(_mantissa)
20081        mov.l           %d0, FP_SRC_HI(%a6)
20082        bfextu          4(%a0){&11:&21}, %d0    # fetch lo(_mantissa)
20083        mov.l           &0xb, %d1
20084        lsl.l           %d1, %d0
20085        mov.l           %d0, FP_SRC_LO(%a6)
20086
20087        btst            &0x7, (%a0)             # see if sign of SNAN is set
20088        beq.b           no_dbl_snan_sgn
20089        bset            &0x7, FP_SRC_EX(%a6)
20090no_dbl_snan_sgn:
20091        rts
20092
20093#################################################
20094# load a Xprec into %fp0:                       #
20095#       -number can't fault                     #
20096#       (1) calc ea                             #
20097#       (2) read 12 bytes into L_SCR(1,2)       #
20098#       (3) fmov.x into %fp0                    #
20099#################################################
20100load_ext:
20101        mov.l           &0xc, %d0               # pass: 12 (bytes)
20102        bsr.l           _dcalc_ea               # calc <ea>
20103
20104        lea             FP_SRC(%a6), %a1        # pass: ptr to input ext tmp space
20105        mov.l           &0xc, %d0               # pass: # of bytes to read
20106        bsr.l           _dmem_read              # fetch src operand from memory
20107
20108        tst.l           %d1                     # did dfetch fail?
20109        bne.l           facc_in_x               # yes
20110
20111        lea             FP_SRC(%a6), %a0        # pass: ptr to src op
20112        bsr.l           set_tag_x               # determine src type tag
20113
20114        cmpi.b          %d0, &UNNORM            # is the src op an UNNORM?
20115        beq.b           load_ext_unnorm         # yes
20116
20117        mov.b           %d0, STAG(%a6)          # store the src optype tag
20118        rts
20119
20120load_ext_unnorm:
20121        bsr.l           unnorm_fix              # fix the src UNNORM
20122        mov.b           %d0, STAG(%a6)          # store the src optype tag
20123        rts
20124
20125#################################################
20126# load a packed into %fp0:                      #
20127#       -number can't fault                     #
20128#       (1) calc ea                             #
20129#       (2) read 12 bytes into L_SCR(1,2,3)     #
20130#       (3) fmov.x into %fp0                    #
20131#################################################
20132load_packed:
20133        bsr.l           get_packed
20134
20135        lea             FP_SRC(%a6),%a0         # pass ptr to src op
20136        bsr.l           set_tag_x               # determine src type tag
20137        cmpi.b          %d0,&UNNORM             # is the src op an UNNORM ZERO?
20138        beq.b           load_packed_unnorm      # yes
20139
20140        mov.b           %d0,STAG(%a6)           # store the src optype tag
20141        rts
20142
20143load_packed_unnorm:
20144        bsr.l           unnorm_fix              # fix the UNNORM ZERO
20145        mov.b           %d0,STAG(%a6)           # store the src optype tag
20146        rts
20147
20148#########################################################################
20149# XDEF **************************************************************** #
20150#       fout(): move from fp register to memory or data register        #
20151#                                                                       #
20152# XREF **************************************************************** #
20153#       _round() - needed to create EXOP for sgl/dbl precision          #
20154#       norm() - needed to create EXOP for extended precision           #
20155#       ovf_res() - create default overflow result for sgl/dbl precision#
20156#       unf_res() - create default underflow result for sgl/dbl prec.   #
20157#       dst_dbl() - create rounded dbl precision result.                #
20158#       dst_sgl() - create rounded sgl precision result.                #
20159#       fetch_dreg() - fetch dynamic k-factor reg for packed.           #
20160#       bindec() - convert FP binary number to packed number.           #
20161#       _mem_write() - write data to memory.                            #
20162#       _mem_write2() - write data to memory unless supv mode -(a7) exc.#
20163#       _dmem_write_{byte,word,long}() - write data to memory.          #
20164#       store_dreg_{b,w,l}() - store data to data register file.        #
20165#       facc_out_{b,w,l,d,x}() - data access error occurred.            #
20166#                                                                       #
20167# INPUT *************************************************************** #
20168#       a0 = pointer to extended precision source operand               #
20169#       d0 = round prec,mode                                            #
20170#                                                                       #
20171# OUTPUT ************************************************************** #
20172#       fp0 : intermediate underflow or overflow result if              #
20173#             OVFL/UNFL occurred for a sgl or dbl operand               #
20174#                                                                       #
20175# ALGORITHM *********************************************************** #
20176#       This routine is accessed by many handlers that need to do an    #
20177# opclass three move of an operand out to memory.                       #
20178#       Decode an fmove out (opclass 3) instruction to determine if     #
20179# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data  #
20180# register or memory. The algorithm uses a standard "fmove" to create   #
20181# the rounded result. Also, since exceptions are disabled, this also    #
20182# create the correct OPERR default result if appropriate.               #
20183#       For sgl or dbl precision, overflow or underflow can occur. If   #
20184# either occurs and is enabled, the EXOP.                               #
20185#       For extended precision, the stacked <ea> must be fixed along    #
20186# w/ the address index register as appropriate w/ _calc_ea_fout(). If   #
20187# the source is a denorm and if underflow is enabled, an EXOP must be   #
20188# created.                                                              #
20189#       For packed, the k-factor must be fetched from the instruction   #
20190# word or a data register. The <ea> must be fixed as w/ extended        #
20191# precision. Then, bindec() is called to create the appropriate         #
20192# packed result.                                                        #
20193#       If at any time an access error is flagged by one of the move-   #
20194# to-memory routines, then a special exit must be made so that the      #
20195# access error can be handled properly.                                 #
20196#                                                                       #
20197#########################################################################
20198
20199        global          fout
20200fout:
20201        bfextu          EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
20202        mov.w           (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
20203        jmp             (tbl_fout.b,%pc,%a1)    # jump to routine
20204
20205        swbeg           &0x8
20206tbl_fout:
20207        short           fout_long       -       tbl_fout
20208        short           fout_sgl        -       tbl_fout
20209        short           fout_ext        -       tbl_fout
20210        short           fout_pack       -       tbl_fout
20211        short           fout_word       -       tbl_fout
20212        short           fout_dbl        -       tbl_fout
20213        short           fout_byte       -       tbl_fout
20214        short           fout_pack       -       tbl_fout
20215
20216#################################################################
20217# fmove.b out ###################################################
20218#################################################################
20219
20220# Only "Unimplemented Data Type" exceptions enter here. The operand
20221# is either a DENORM or a NORM.
20222fout_byte:
20223        tst.b           STAG(%a6)               # is operand normalized?
20224        bne.b           fout_byte_denorm        # no
20225
20226        fmovm.x         SRC(%a0),&0x80          # load value
20227
20228fout_byte_norm:
20229        fmov.l          %d0,%fpcr               # insert rnd prec,mode
20230
20231        fmov.b          %fp0,%d0                # exec move out w/ correct rnd mode
20232
20233        fmov.l          &0x0,%fpcr              # clear FPCR
20234        fmov.l          %fpsr,%d1               # fetch FPSR
20235        or.w            %d1,2+USER_FPSR(%a6)    # save new exc,accrued bits
20236
20237        mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
20238        andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
20239        beq.b           fout_byte_dn            # must save to integer regfile
20240
20241        mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
20242        bsr.l           _dmem_write_byte        # write byte
20243
20244        tst.l           %d1                     # did dstore fail?
20245        bne.l           facc_out_b              # yes
20246
20247        rts
20248
20249fout_byte_dn:
20250        mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
20251        andi.w          &0x7,%d1
20252        bsr.l           store_dreg_b
20253        rts
20254
20255fout_byte_denorm:
20256        mov.l           SRC_EX(%a0),%d1
20257        andi.l          &0x80000000,%d1         # keep DENORM sign
20258        ori.l           &0x00800000,%d1         # make smallest sgl
20259        fmov.s          %d1,%fp0
20260        bra.b           fout_byte_norm
20261
20262#################################################################
20263# fmove.w out ###################################################
20264#################################################################
20265
20266# Only "Unimplemented Data Type" exceptions enter here. The operand
20267# is either a DENORM or a NORM.
20268fout_word:
20269        tst.b           STAG(%a6)               # is operand normalized?
20270        bne.b           fout_word_denorm        # no
20271
20272        fmovm.x         SRC(%a0),&0x80          # load value
20273
20274fout_word_norm:
20275        fmov.l          %d0,%fpcr               # insert rnd prec:mode
20276
20277        fmov.w          %fp0,%d0                # exec move out w/ correct rnd mode
20278
20279        fmov.l          &0x0,%fpcr              # clear FPCR
20280        fmov.l          %fpsr,%d1               # fetch FPSR
20281        or.w            %d1,2+USER_FPSR(%a6)    # save new exc,accrued bits
20282
20283        mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
20284        andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
20285        beq.b           fout_word_dn            # must save to integer regfile
20286
20287        mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
20288        bsr.l           _dmem_write_word        # write word
20289
20290        tst.l           %d1                     # did dstore fail?
20291        bne.l           facc_out_w              # yes
20292
20293        rts
20294
20295fout_word_dn:
20296        mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
20297        andi.w          &0x7,%d1
20298        bsr.l           store_dreg_w
20299        rts
20300
20301fout_word_denorm:
20302        mov.l           SRC_EX(%a0),%d1
20303        andi.l          &0x80000000,%d1         # keep DENORM sign
20304        ori.l           &0x00800000,%d1         # make smallest sgl
20305        fmov.s          %d1,%fp0
20306        bra.b           fout_word_norm
20307
20308#################################################################
20309# fmove.l out ###################################################
20310#################################################################
20311
20312# Only "Unimplemented Data Type" exceptions enter here. The operand
20313# is either a DENORM or a NORM.
20314fout_long:
20315        tst.b           STAG(%a6)               # is operand normalized?
20316        bne.b           fout_long_denorm        # no
20317
20318        fmovm.x         SRC(%a0),&0x80          # load value
20319
20320fout_long_norm:
20321        fmov.l          %d0,%fpcr               # insert rnd prec:mode
20322
20323        fmov.l          %fp0,%d0                # exec move out w/ correct rnd mode
20324
20325        fmov.l          &0x0,%fpcr              # clear FPCR
20326        fmov.l          %fpsr,%d1               # fetch FPSR
20327        or.w            %d1,2+USER_FPSR(%a6)    # save new exc,accrued bits
20328
20329fout_long_write:
20330        mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
20331        andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
20332        beq.b           fout_long_dn            # must save to integer regfile
20333
20334        mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
20335        bsr.l           _dmem_write_long        # write long
20336
20337        tst.l           %d1                     # did dstore fail?
20338        bne.l           facc_out_l              # yes
20339
20340        rts
20341
20342fout_long_dn:
20343        mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
20344        andi.w          &0x7,%d1
20345        bsr.l           store_dreg_l
20346        rts
20347
20348fout_long_denorm:
20349        mov.l           SRC_EX(%a0),%d1
20350        andi.l          &0x80000000,%d1         # keep DENORM sign
20351        ori.l           &0x00800000,%d1         # make smallest sgl
20352        fmov.s          %d1,%fp0
20353        bra.b           fout_long_norm
20354
20355#################################################################
20356# fmove.x out ###################################################
20357#################################################################
20358
20359# Only "Unimplemented Data Type" exceptions enter here. The operand
20360# is either a DENORM or a NORM.
20361# The DENORM causes an Underflow exception.
20362fout_ext:
20363
20364# we copy the extended precision result to FP_SCR0 so that the reserved
20365# 16-bit field gets zeroed. we do this since we promise not to disturb
20366# what's at SRC(a0).
20367        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
20368        clr.w           2+FP_SCR0_EX(%a6)       # clear reserved field
20369        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
20370        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
20371
20372        fmovm.x         SRC(%a0),&0x80          # return result
20373
20374        bsr.l           _calc_ea_fout           # fix stacked <ea>
20375
20376        mov.l           %a0,%a1                 # pass: dst addr
20377        lea             FP_SCR0(%a6),%a0        # pass: src addr
20378        mov.l           &0xc,%d0                # pass: opsize is 12 bytes
20379
20380# we must not yet write the extended precision data to the stack
20381# in the pre-decrement case from supervisor mode or else we'll corrupt
20382# the stack frame. so, leave it in FP_SRC for now and deal with it later...
20383        cmpi.b          SPCOND_FLG(%a6),&mda7_flg
20384        beq.b           fout_ext_a7
20385
20386        bsr.l           _dmem_write             # write ext prec number to memory
20387
20388        tst.l           %d1                     # did dstore fail?
20389        bne.w           fout_ext_err            # yes
20390
20391        tst.b           STAG(%a6)               # is operand normalized?
20392        bne.b           fout_ext_denorm         # no
20393        rts
20394
20395# the number is a DENORM. must set the underflow exception bit
20396fout_ext_denorm:
20397        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
20398
20399        mov.b           FPCR_ENABLE(%a6),%d0
20400        andi.b          &0x0a,%d0               # is UNFL or INEX enabled?
20401        bne.b           fout_ext_exc            # yes
20402        rts
20403
20404# we don't want to do the write if the exception occurred in supervisor mode
20405# so _mem_write2() handles this for us.
20406fout_ext_a7:
20407        bsr.l           _mem_write2             # write ext prec number to memory
20408
20409        tst.l           %d1                     # did dstore fail?
20410        bne.w           fout_ext_err            # yes
20411
20412        tst.b           STAG(%a6)               # is operand normalized?
20413        bne.b           fout_ext_denorm         # no
20414        rts
20415
20416fout_ext_exc:
20417        lea             FP_SCR0(%a6),%a0
20418        bsr.l           norm                    # normalize the mantissa
20419        neg.w           %d0                     # new exp = -(shft amt)
20420        andi.w          &0x7fff,%d0
20421        andi.w          &0x8000,FP_SCR0_EX(%a6) # keep only old sign
20422        or.w            %d0,FP_SCR0_EX(%a6)     # insert new exponent
20423        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
20424        rts
20425
20426fout_ext_err:
20427        mov.l           EXC_A6(%a6),(%a6)       # fix stacked a6
20428        bra.l           facc_out_x
20429
20430#########################################################################
20431# fmove.s out ###########################################################
20432#########################################################################
20433fout_sgl:
20434        andi.b          &0x30,%d0               # clear rnd prec
20435        ori.b           &s_mode*0x10,%d0        # insert sgl prec
20436        mov.l           %d0,L_SCR3(%a6)         # save rnd prec,mode on stack
20437
20438#
20439# operand is a normalized number. first, we check to see if the move out
20440# would cause either an underflow or overflow. these cases are handled
20441# separately. otherwise, set the FPCR to the proper rounding mode and
20442# execute the move.
20443#
20444        mov.w           SRC_EX(%a0),%d0         # extract exponent
20445        andi.w          &0x7fff,%d0             # strip sign
20446
20447        cmpi.w          %d0,&SGL_HI             # will operand overflow?
20448        bgt.w           fout_sgl_ovfl           # yes; go handle OVFL
20449        beq.w           fout_sgl_may_ovfl       # maybe; go handle possible OVFL
20450        cmpi.w          %d0,&SGL_LO             # will operand underflow?
20451        blt.w           fout_sgl_unfl           # yes; go handle underflow
20452
20453#
20454# NORMs(in range) can be stored out by a simple "fmov.s"
20455# Unnormalized inputs can come through this point.
20456#
20457fout_sgl_exg:
20458        fmovm.x         SRC(%a0),&0x80          # fetch fop from stack
20459
20460        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
20461        fmov.l          &0x0,%fpsr              # clear FPSR
20462
20463        fmov.s          %fp0,%d0                # store does convert and round
20464
20465        fmov.l          &0x0,%fpcr              # clear FPCR
20466        fmov.l          %fpsr,%d1               # save FPSR
20467
20468        or.w            %d1,2+USER_FPSR(%a6)    # set possible inex2/ainex
20469
20470fout_sgl_exg_write:
20471        mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
20472        andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
20473        beq.b           fout_sgl_exg_write_dn   # must save to integer regfile
20474
20475        mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
20476        bsr.l           _dmem_write_long        # write long
20477
20478        tst.l           %d1                     # did dstore fail?
20479        bne.l           facc_out_l              # yes
20480
20481        rts
20482
20483fout_sgl_exg_write_dn:
20484        mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
20485        andi.w          &0x7,%d1
20486        bsr.l           store_dreg_l
20487        rts
20488
20489#
20490# here, we know that the operand would UNFL if moved out to single prec,
20491# so, denorm and round and then use generic store single routine to
20492# write the value to memory.
20493#
20494fout_sgl_unfl:
20495        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
20496
20497        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
20498        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
20499        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
20500        mov.l           %a0,-(%sp)
20501
20502        clr.l           %d0                     # pass: S.F. = 0
20503
20504        cmpi.b          STAG(%a6),&DENORM       # fetch src optype tag
20505        bne.b           fout_sgl_unfl_cont      # let DENORMs fall through
20506
20507        lea             FP_SCR0(%a6),%a0
20508        bsr.l           norm                    # normalize the DENORM
20509
20510fout_sgl_unfl_cont:
20511        lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
20512        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
20513        bsr.l           unf_res                 # calc default underflow result
20514
20515        lea             FP_SCR0(%a6),%a0        # pass: ptr to fop
20516        bsr.l           dst_sgl                 # convert to single prec
20517
20518        mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
20519        andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
20520        beq.b           fout_sgl_unfl_dn        # must save to integer regfile
20521
20522        mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
20523        bsr.l           _dmem_write_long        # write long
20524
20525        tst.l           %d1                     # did dstore fail?
20526        bne.l           facc_out_l              # yes
20527
20528        bra.b           fout_sgl_unfl_chkexc
20529
20530fout_sgl_unfl_dn:
20531        mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
20532        andi.w          &0x7,%d1
20533        bsr.l           store_dreg_l
20534
20535fout_sgl_unfl_chkexc:
20536        mov.b           FPCR_ENABLE(%a6),%d1
20537        andi.b          &0x0a,%d1               # is UNFL or INEX enabled?
20538        bne.w           fout_sd_exc_unfl        # yes
20539        addq.l          &0x4,%sp
20540        rts
20541
20542#
20543# it's definitely an overflow so call ovf_res to get the correct answer
20544#
20545fout_sgl_ovfl:
20546        tst.b           3+SRC_HI(%a0)           # is result inexact?
20547        bne.b           fout_sgl_ovfl_inex2
20548        tst.l           SRC_LO(%a0)             # is result inexact?
20549        bne.b           fout_sgl_ovfl_inex2
20550        ori.w           &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
20551        bra.b           fout_sgl_ovfl_cont
20552fout_sgl_ovfl_inex2:
20553        ori.w           &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
20554
20555fout_sgl_ovfl_cont:
20556        mov.l           %a0,-(%sp)
20557
20558# call ovf_res() w/ sgl prec and the correct rnd mode to create the default
20559# overflow result. DON'T save the returned ccodes from ovf_res() since
20560# fmove out doesn't alter them.
20561        tst.b           SRC_EX(%a0)             # is operand negative?
20562        smi             %d1                     # set if so
20563        mov.l           L_SCR3(%a6),%d0         # pass: sgl prec,rnd mode
20564        bsr.l           ovf_res                 # calc OVFL result
20565        fmovm.x         (%a0),&0x80             # load default overflow result
20566        fmov.s          %fp0,%d0                # store to single
20567
20568        mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
20569        andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
20570        beq.b           fout_sgl_ovfl_dn        # must save to integer regfile
20571
20572        mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
20573        bsr.l           _dmem_write_long        # write long
20574
20575        tst.l           %d1                     # did dstore fail?
20576        bne.l           facc_out_l              # yes
20577
20578        bra.b           fout_sgl_ovfl_chkexc
20579
20580fout_sgl_ovfl_dn:
20581        mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
20582        andi.w          &0x7,%d1
20583        bsr.l           store_dreg_l
20584
20585fout_sgl_ovfl_chkexc:
20586        mov.b           FPCR_ENABLE(%a6),%d1
20587        andi.b          &0x0a,%d1               # is UNFL or INEX enabled?
20588        bne.w           fout_sd_exc_ovfl        # yes
20589        addq.l          &0x4,%sp
20590        rts
20591
20592#
20593# move out MAY overflow:
20594# (1) force the exp to 0x3fff
20595# (2) do a move w/ appropriate rnd mode
20596# (3) if exp still equals zero, then insert original exponent
20597#       for the correct result.
20598#     if exp now equals one, then it overflowed so call ovf_res.
20599#
20600fout_sgl_may_ovfl:
20601        mov.w           SRC_EX(%a0),%d1         # fetch current sign
20602        andi.w          &0x8000,%d1             # keep it,clear exp
20603        ori.w           &0x3fff,%d1             # insert exp = 0
20604        mov.w           %d1,FP_SCR0_EX(%a6)     # insert scaled exp
20605        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
20606        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
20607
20608        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
20609
20610        fmov.x          FP_SCR0(%a6),%fp0       # force fop to be rounded
20611        fmov.l          &0x0,%fpcr              # clear FPCR
20612
20613        fabs.x          %fp0                    # need absolute value
20614        fcmp.b          %fp0,&0x2               # did exponent increase?
20615        fblt.w          fout_sgl_exg            # no; go finish NORM
20616        bra.w           fout_sgl_ovfl           # yes; go handle overflow
20617
20618################
20619
20620fout_sd_exc_unfl:
20621        mov.l           (%sp)+,%a0
20622
20623        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
20624        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
20625        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
20626
20627        cmpi.b          STAG(%a6),&DENORM       # was src a DENORM?
20628        bne.b           fout_sd_exc_cont        # no
20629
20630        lea             FP_SCR0(%a6),%a0
20631        bsr.l           norm
20632        neg.l           %d0
20633        andi.w          &0x7fff,%d0
20634        bfins           %d0,FP_SCR0_EX(%a6){&1:&15}
20635        bra.b           fout_sd_exc_cont
20636
20637fout_sd_exc:
20638fout_sd_exc_ovfl:
20639        mov.l           (%sp)+,%a0              # restore a0
20640
20641        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
20642        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
20643        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
20644
20645fout_sd_exc_cont:
20646        bclr            &0x7,FP_SCR0_EX(%a6)    # clear sign bit
20647        sne.b           2+FP_SCR0_EX(%a6)       # set internal sign bit
20648        lea             FP_SCR0(%a6),%a0        # pass: ptr to DENORM
20649
20650        mov.b           3+L_SCR3(%a6),%d1
20651        lsr.b           &0x4,%d1
20652        andi.w          &0x0c,%d1
20653        swap            %d1
20654        mov.b           3+L_SCR3(%a6),%d1
20655        lsr.b           &0x4,%d1
20656        andi.w          &0x03,%d1
20657        clr.l           %d0                     # pass: zero g,r,s
20658        bsr.l           _round                  # round the DENORM
20659
20660        tst.b           2+FP_SCR0_EX(%a6)       # is EXOP negative?
20661        beq.b           fout_sd_exc_done        # no
20662        bset            &0x7,FP_SCR0_EX(%a6)    # yes
20663
20664fout_sd_exc_done:
20665        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
20666        rts
20667
20668#################################################################
20669# fmove.d out ###################################################
20670#################################################################
20671fout_dbl:
20672        andi.b          &0x30,%d0               # clear rnd prec
20673        ori.b           &d_mode*0x10,%d0        # insert dbl prec
20674        mov.l           %d0,L_SCR3(%a6)         # save rnd prec,mode on stack
20675
20676#
20677# operand is a normalized number. first, we check to see if the move out
20678# would cause either an underflow or overflow. these cases are handled
20679# separately. otherwise, set the FPCR to the proper rounding mode and
20680# execute the move.
20681#
20682        mov.w           SRC_EX(%a0),%d0         # extract exponent
20683        andi.w          &0x7fff,%d0             # strip sign
20684
20685        cmpi.w          %d0,&DBL_HI             # will operand overflow?
20686        bgt.w           fout_dbl_ovfl           # yes; go handle OVFL
20687        beq.w           fout_dbl_may_ovfl       # maybe; go handle possible OVFL
20688        cmpi.w          %d0,&DBL_LO             # will operand underflow?
20689        blt.w           fout_dbl_unfl           # yes; go handle underflow
20690
20691#
20692# NORMs(in range) can be stored out by a simple "fmov.d"
20693# Unnormalized inputs can come through this point.
20694#
20695fout_dbl_exg:
20696        fmovm.x         SRC(%a0),&0x80          # fetch fop from stack
20697
20698        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
20699        fmov.l          &0x0,%fpsr              # clear FPSR
20700
20701        fmov.d          %fp0,L_SCR1(%a6)        # store does convert and round
20702
20703        fmov.l          &0x0,%fpcr              # clear FPCR
20704        fmov.l          %fpsr,%d0               # save FPSR
20705
20706        or.w            %d0,2+USER_FPSR(%a6)    # set possible inex2/ainex
20707
20708        mov.l           EXC_EA(%a6),%a1         # pass: dst addr
20709        lea             L_SCR1(%a6),%a0         # pass: src addr
20710        movq.l          &0x8,%d0                # pass: opsize is 8 bytes
20711        bsr.l           _dmem_write             # store dbl fop to memory
20712
20713        tst.l           %d1                     # did dstore fail?
20714        bne.l           facc_out_d              # yes
20715
20716        rts                                     # no; so we're finished
20717
20718#
20719# here, we know that the operand would UNFL if moved out to double prec,
20720# so, denorm and round and then use generic store double routine to
20721# write the value to memory.
20722#
20723fout_dbl_unfl:
20724        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
20725
20726        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
20727        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
20728        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
20729        mov.l           %a0,-(%sp)
20730
20731        clr.l           %d0                     # pass: S.F. = 0
20732
20733        cmpi.b          STAG(%a6),&DENORM       # fetch src optype tag
20734        bne.b           fout_dbl_unfl_cont      # let DENORMs fall through
20735
20736        lea             FP_SCR0(%a6),%a0
20737        bsr.l           norm                    # normalize the DENORM
20738
20739fout_dbl_unfl_cont:
20740        lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
20741        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
20742        bsr.l           unf_res                 # calc default underflow result
20743
20744        lea             FP_SCR0(%a6),%a0        # pass: ptr to fop
20745        bsr.l           dst_dbl                 # convert to single prec
20746        mov.l           %d0,L_SCR1(%a6)
20747        mov.l           %d1,L_SCR2(%a6)
20748
20749        mov.l           EXC_EA(%a6),%a1         # pass: dst addr
20750        lea             L_SCR1(%a6),%a0         # pass: src addr
20751        movq.l          &0x8,%d0                # pass: opsize is 8 bytes
20752        bsr.l           _dmem_write             # store dbl fop to memory
20753
20754        tst.l           %d1                     # did dstore fail?
20755        bne.l           facc_out_d              # yes
20756
20757        mov.b           FPCR_ENABLE(%a6),%d1
20758        andi.b          &0x0a,%d1               # is UNFL or INEX enabled?
20759        bne.w           fout_sd_exc_unfl        # yes
20760        addq.l          &0x4,%sp
20761        rts
20762
20763#
20764# it's definitely an overflow so call ovf_res to get the correct answer
20765#
20766fout_dbl_ovfl:
20767        mov.w           2+SRC_LO(%a0),%d0
20768        andi.w          &0x7ff,%d0
20769        bne.b           fout_dbl_ovfl_inex2
20770
20771        ori.w           &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
20772        bra.b           fout_dbl_ovfl_cont
20773fout_dbl_ovfl_inex2:
20774        ori.w           &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
20775
20776fout_dbl_ovfl_cont:
20777        mov.l           %a0,-(%sp)
20778
20779# call ovf_res() w/ dbl prec and the correct rnd mode to create the default
20780# overflow result. DON'T save the returned ccodes from ovf_res() since
20781# fmove out doesn't alter them.
20782        tst.b           SRC_EX(%a0)             # is operand negative?
20783        smi             %d1                     # set if so
20784        mov.l           L_SCR3(%a6),%d0         # pass: dbl prec,rnd mode
20785        bsr.l           ovf_res                 # calc OVFL result
20786        fmovm.x         (%a0),&0x80             # load default overflow result
20787        fmov.d          %fp0,L_SCR1(%a6)        # store to double
20788
20789        mov.l           EXC_EA(%a6),%a1         # pass: dst addr
20790        lea             L_SCR1(%a6),%a0         # pass: src addr
20791        movq.l          &0x8,%d0                # pass: opsize is 8 bytes
20792        bsr.l           _dmem_write             # store dbl fop to memory
20793
20794        tst.l           %d1                     # did dstore fail?
20795        bne.l           facc_out_d              # yes
20796
20797        mov.b           FPCR_ENABLE(%a6),%d1
20798        andi.b          &0x0a,%d1               # is UNFL or INEX enabled?
20799        bne.w           fout_sd_exc_ovfl        # yes
20800        addq.l          &0x4,%sp
20801        rts
20802
20803#
20804# move out MAY overflow:
20805# (1) force the exp to 0x3fff
20806# (2) do a move w/ appropriate rnd mode
20807# (3) if exp still equals zero, then insert original exponent
20808#       for the correct result.
20809#     if exp now equals one, then it overflowed so call ovf_res.
20810#
20811fout_dbl_may_ovfl:
20812        mov.w           SRC_EX(%a0),%d1         # fetch current sign
20813        andi.w          &0x8000,%d1             # keep it,clear exp
20814        ori.w           &0x3fff,%d1             # insert exp = 0
20815        mov.w           %d1,FP_SCR0_EX(%a6)     # insert scaled exp
20816        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
20817        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
20818
20819        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
20820
20821        fmov.x          FP_SCR0(%a6),%fp0       # force fop to be rounded
20822        fmov.l          &0x0,%fpcr              # clear FPCR
20823
20824        fabs.x          %fp0                    # need absolute value
20825        fcmp.b          %fp0,&0x2               # did exponent increase?
20826        fblt.w          fout_dbl_exg            # no; go finish NORM
20827        bra.w           fout_dbl_ovfl           # yes; go handle overflow
20828
20829#########################################################################
20830# XDEF **************************************************************** #
20831#       dst_dbl(): create double precision value from extended prec.    #
20832#                                                                       #
20833# XREF **************************************************************** #
20834#       None                                                            #
20835#                                                                       #
20836# INPUT *************************************************************** #
20837#       a0 = pointer to source operand in extended precision            #
20838#                                                                       #
20839# OUTPUT ************************************************************** #
20840#       d0 = hi(double precision result)                                #
20841#       d1 = lo(double precision result)                                #
20842#                                                                       #
20843# ALGORITHM *********************************************************** #
20844#                                                                       #
20845#  Changes extended precision to double precision.                      #
20846#  Note: no attempt is made to round the extended value to double.      #
20847#       dbl_sign = ext_sign                                             #
20848#       dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)            #
20849#       get rid of ext integer bit                                      #
20850#       dbl_mant = ext_mant{62:12}                                      #
20851#                                                                       #
20852#               ---------------   ---------------    ---------------    #
20853#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |    #
20854#               ---------------   ---------------    ---------------    #
20855#                95         64    63 62       32      31     11   0     #
20856#                                    |                       |          #
20857#                                    |                       |          #
20858#                                    |                       |          #
20859#                                    v                       v          #
20860#                             ---------------   ---------------         #
20861#  double   ->                |s|exp| mant  |   |  mant       |         #
20862#                             ---------------   ---------------         #
20863#                             63     51   32   31              0        #
20864#                                                                       #
20865#########################################################################
20866
20867dst_dbl:
20868        clr.l           %d0                     # clear d0
20869        mov.w           FTEMP_EX(%a0),%d0       # get exponent
20870        subi.w          &EXT_BIAS,%d0           # subtract extended precision bias
20871        addi.w          &DBL_BIAS,%d0           # add double precision bias
20872        tst.b           FTEMP_HI(%a0)           # is number a denorm?
20873        bmi.b           dst_get_dupper          # no
20874        subq.w          &0x1,%d0                # yes; denorm bias = DBL_BIAS - 1
20875dst_get_dupper:
20876        swap            %d0                     # d0 now in upper word
20877        lsl.l           &0x4,%d0                # d0 in proper place for dbl prec exp
20878        tst.b           FTEMP_EX(%a0)           # test sign
20879        bpl.b           dst_get_dman            # if positive, go process mantissa
20880        bset            &0x1f,%d0               # if negative, set sign
20881dst_get_dman:
20882        mov.l           FTEMP_HI(%a0),%d1       # get ms mantissa
20883        bfextu          %d1{&1:&20},%d1         # get upper 20 bits of ms
20884        or.l            %d1,%d0                 # put these bits in ms word of double
20885        mov.l           %d0,L_SCR1(%a6)         # put the new exp back on the stack
20886        mov.l           FTEMP_HI(%a0),%d1       # get ms mantissa
20887        mov.l           &21,%d0                 # load shift count
20888        lsl.l           %d0,%d1                 # put lower 11 bits in upper bits
20889        mov.l           %d1,L_SCR2(%a6)         # build lower lword in memory
20890        mov.l           FTEMP_LO(%a0),%d1       # get ls mantissa
20891        bfextu          %d1{&0:&21},%d0         # get ls 21 bits of double
20892        mov.l           L_SCR2(%a6),%d1
20893        or.l            %d0,%d1                 # put them in double result
20894        mov.l           L_SCR1(%a6),%d0
20895        rts
20896
20897#########################################################################
20898# XDEF **************************************************************** #
20899#       dst_sgl(): create single precision value from extended prec     #
20900#                                                                       #
20901# XREF **************************************************************** #
20902#                                                                       #
20903# INPUT *************************************************************** #
20904#       a0 = pointer to source operand in extended precision            #
20905#                                                                       #
20906# OUTPUT ************************************************************** #
20907#       d0 = single precision result                                    #
20908#                                                                       #
20909# ALGORITHM *********************************************************** #
20910#                                                                       #
20911# Changes extended precision to single precision.                       #
20912#       sgl_sign = ext_sign                                             #
20913#       sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)             #
20914#       get rid of ext integer bit                                      #
20915#       sgl_mant = ext_mant{62:12}                                      #
20916#                                                                       #
20917#               ---------------   ---------------    ---------------    #
20918#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |    #
20919#               ---------------   ---------------    ---------------    #
20920#                95         64    63 62    40 32      31     12   0     #
20921#                                    |     |                            #
20922#                                    |     |                            #
20923#                                    |     |                            #
20924#                                    v     v                            #
20925#                             ---------------                           #
20926#  single   ->                |s|exp| mant  |                           #
20927#                             ---------------                           #
20928#                             31     22     0                           #
20929#                                                                       #
20930#########################################################################
20931
20932dst_sgl:
20933        clr.l           %d0
20934        mov.w           FTEMP_EX(%a0),%d0       # get exponent
20935        subi.w          &EXT_BIAS,%d0           # subtract extended precision bias
20936        addi.w          &SGL_BIAS,%d0           # add single precision bias
20937        tst.b           FTEMP_HI(%a0)           # is number a denorm?
20938        bmi.b           dst_get_supper          # no
20939        subq.w          &0x1,%d0                # yes; denorm bias = SGL_BIAS - 1
20940dst_get_supper:
20941        swap            %d0                     # put exp in upper word of d0
20942        lsl.l           &0x7,%d0                # shift it into single exp bits
20943        tst.b           FTEMP_EX(%a0)           # test sign
20944        bpl.b           dst_get_sman            # if positive, continue
20945        bset            &0x1f,%d0               # if negative, put in sign first
20946dst_get_sman:
20947        mov.l           FTEMP_HI(%a0),%d1       # get ms mantissa
20948        andi.l          &0x7fffff00,%d1         # get upper 23 bits of ms
20949        lsr.l           &0x8,%d1                # and put them flush right
20950        or.l            %d1,%d0                 # put these bits in ms word of single
20951        rts
20952
20953##############################################################################
20954fout_pack:
20955        bsr.l           _calc_ea_fout           # fetch the <ea>
20956        mov.l           %a0,-(%sp)
20957
20958        mov.b           STAG(%a6),%d0           # fetch input type
20959        bne.w           fout_pack_not_norm      # input is not NORM
20960
20961fout_pack_norm:
20962        btst            &0x4,EXC_CMDREG(%a6)    # static or dynamic?
20963        beq.b           fout_pack_s             # static
20964
20965fout_pack_d:
20966        mov.b           1+EXC_CMDREG(%a6),%d1   # fetch dynamic reg
20967        lsr.b           &0x4,%d1
20968        andi.w          &0x7,%d1
20969
20970        bsr.l           fetch_dreg              # fetch Dn w/ k-factor
20971
20972        bra.b           fout_pack_type
20973fout_pack_s:
20974        mov.b           1+EXC_CMDREG(%a6),%d0   # fetch static field
20975
20976fout_pack_type:
20977        bfexts          %d0{&25:&7},%d0         # extract k-factor
20978        mov.l   %d0,-(%sp)
20979
20980        lea             FP_SRC(%a6),%a0         # pass: ptr to input
20981
20982# bindec is currently scrambling FP_SRC for denorm inputs.
20983# we'll have to change this, but for now, tough luck!!!
20984        bsr.l           bindec                  # convert xprec to packed
20985
20986#       andi.l          &0xcfff000f,FP_SCR0(%a6) # clear unused fields
20987        andi.l          &0xcffff00f,FP_SCR0(%a6) # clear unused fields
20988
20989        mov.l   (%sp)+,%d0
20990
20991        tst.b           3+FP_SCR0_EX(%a6)
20992        bne.b           fout_pack_set
20993        tst.l           FP_SCR0_HI(%a6)
20994        bne.b           fout_pack_set
20995        tst.l           FP_SCR0_LO(%a6)
20996        bne.b           fout_pack_set
20997
20998# add the extra condition that only if the k-factor was zero, too, should
20999# we zero the exponent
21000        tst.l           %d0
21001        bne.b           fout_pack_set
21002# "mantissa" is all zero which means that the answer is zero. but, the '040
21003# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
21004# if the mantissa is zero, I will zero the exponent, too.
21005# the question now is whether the exponents sign bit is allowed to be non-zero
21006# for a zero, also...
21007        andi.w          &0xf000,FP_SCR0(%a6)
21008
21009fout_pack_set:
21010
21011        lea             FP_SCR0(%a6),%a0        # pass: src addr
21012
21013fout_pack_write:
21014        mov.l           (%sp)+,%a1              # pass: dst addr
21015        mov.l           &0xc,%d0                # pass: opsize is 12 bytes
21016
21017        cmpi.b          SPCOND_FLG(%a6),&mda7_flg
21018        beq.b           fout_pack_a7
21019
21020        bsr.l           _dmem_write             # write ext prec number to memory
21021
21022        tst.l           %d1                     # did dstore fail?
21023        bne.w           fout_ext_err            # yes
21024
21025        rts
21026
21027# we don't want to do the write if the exception occurred in supervisor mode
21028# so _mem_write2() handles this for us.
21029fout_pack_a7:
21030        bsr.l           _mem_write2             # write ext prec number to memory
21031
21032        tst.l           %d1                     # did dstore fail?
21033        bne.w           fout_ext_err            # yes
21034
21035        rts
21036
21037fout_pack_not_norm:
21038        cmpi.b          %d0,&DENORM             # is it a DENORM?
21039        beq.w           fout_pack_norm          # yes
21040        lea             FP_SRC(%a6),%a0
21041        clr.w           2+FP_SRC_EX(%a6)
21042        cmpi.b          %d0,&SNAN               # is it an SNAN?
21043        beq.b           fout_pack_snan          # yes
21044        bra.b           fout_pack_write         # no
21045
21046fout_pack_snan:
21047        ori.w           &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
21048        bset            &0x6,FP_SRC_HI(%a6)     # set snan bit
21049        bra.b           fout_pack_write
21050
21051#########################################################################
21052# XDEF **************************************************************** #
21053#       fetch_dreg(): fetch register according to index in d1           #
21054#                                                                       #
21055# XREF **************************************************************** #
21056#       None                                                            #
21057#                                                                       #
21058# INPUT *************************************************************** #
21059#       d1 = index of register to fetch from                            #
21060#                                                                       #
21061# OUTPUT ************************************************************** #
21062#       d0 = value of register fetched                                  #
21063#                                                                       #
21064# ALGORITHM *********************************************************** #
21065#       According to the index value in d1 which can range from zero    #
21066# to fifteen, load the corresponding register file value (where         #
21067# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the    #
21068# stack. The rest should still be in their original places.             #
21069#                                                                       #
21070#########################################################################
21071
21072# this routine leaves d1 intact for subsequent store_dreg calls.
21073        global          fetch_dreg
21074fetch_dreg:
21075        mov.w           (tbl_fdreg.b,%pc,%d1.w*2),%d0
21076        jmp             (tbl_fdreg.b,%pc,%d0.w*1)
21077
21078tbl_fdreg:
21079        short           fdreg0 - tbl_fdreg
21080        short           fdreg1 - tbl_fdreg
21081        short           fdreg2 - tbl_fdreg
21082        short           fdreg3 - tbl_fdreg
21083        short           fdreg4 - tbl_fdreg
21084        short           fdreg5 - tbl_fdreg
21085        short           fdreg6 - tbl_fdreg
21086        short           fdreg7 - tbl_fdreg
21087        short           fdreg8 - tbl_fdreg
21088        short           fdreg9 - tbl_fdreg
21089        short           fdrega - tbl_fdreg
21090        short           fdregb - tbl_fdreg
21091        short           fdregc - tbl_fdreg
21092        short           fdregd - tbl_fdreg
21093        short           fdrege - tbl_fdreg
21094        short           fdregf - tbl_fdreg
21095
21096fdreg0:
21097        mov.l           EXC_DREGS+0x0(%a6),%d0
21098        rts
21099fdreg1:
21100        mov.l           EXC_DREGS+0x4(%a6),%d0
21101        rts
21102fdreg2:
21103        mov.l           %d2,%d0
21104        rts
21105fdreg3:
21106        mov.l           %d3,%d0
21107        rts
21108fdreg4:
21109        mov.l           %d4,%d0
21110        rts
21111fdreg5:
21112        mov.l           %d5,%d0
21113        rts
21114fdreg6:
21115        mov.l           %d6,%d0
21116        rts
21117fdreg7:
21118        mov.l           %d7,%d0
21119        rts
21120fdreg8:
21121        mov.l           EXC_DREGS+0x8(%a6),%d0
21122        rts
21123fdreg9:
21124        mov.l           EXC_DREGS+0xc(%a6),%d0
21125        rts
21126fdrega:
21127        mov.l           %a2,%d0
21128        rts
21129fdregb:
21130        mov.l           %a3,%d0
21131        rts
21132fdregc:
21133        mov.l           %a4,%d0
21134        rts
21135fdregd:
21136        mov.l           %a5,%d0
21137        rts
21138fdrege:
21139        mov.l           (%a6),%d0
21140        rts
21141fdregf:
21142        mov.l           EXC_A7(%a6),%d0
21143        rts
21144
21145#########################################################################
21146# XDEF **************************************************************** #
21147#       store_dreg_l(): store longword to data register specified by d1 #
21148#                                                                       #
21149# XREF **************************************************************** #
21150#       None                                                            #
21151#                                                                       #
21152# INPUT *************************************************************** #
21153#       d0 = longowrd value to store                                    #
21154#       d1 = index of register to fetch from                            #
21155#                                                                       #
21156# OUTPUT ************************************************************** #
21157#       (data register is updated)                                      #
21158#                                                                       #
21159# ALGORITHM *********************************************************** #
21160#       According to the index value in d1, store the longword value    #
21161# in d0 to the corresponding data register. D0/D1 are on the stack      #
21162# while the rest are in their initial places.                           #
21163#                                                                       #
21164#########################################################################
21165
21166        global          store_dreg_l
21167store_dreg_l:
21168        mov.w           (tbl_sdregl.b,%pc,%d1.w*2),%d1
21169        jmp             (tbl_sdregl.b,%pc,%d1.w*1)
21170
21171tbl_sdregl:
21172        short           sdregl0 - tbl_sdregl
21173        short           sdregl1 - tbl_sdregl
21174        short           sdregl2 - tbl_sdregl
21175        short           sdregl3 - tbl_sdregl
21176        short           sdregl4 - tbl_sdregl
21177        short           sdregl5 - tbl_sdregl
21178        short           sdregl6 - tbl_sdregl
21179        short           sdregl7 - tbl_sdregl
21180
21181sdregl0:
21182        mov.l           %d0,EXC_DREGS+0x0(%a6)
21183        rts
21184sdregl1:
21185        mov.l           %d0,EXC_DREGS+0x4(%a6)
21186        rts
21187sdregl2:
21188        mov.l           %d0,%d2
21189        rts
21190sdregl3:
21191        mov.l           %d0,%d3
21192        rts
21193sdregl4:
21194        mov.l           %d0,%d4
21195        rts
21196sdregl5:
21197        mov.l           %d0,%d5
21198        rts
21199sdregl6:
21200        mov.l           %d0,%d6
21201        rts
21202sdregl7:
21203        mov.l           %d0,%d7
21204        rts
21205
21206#########################################################################
21207# XDEF **************************************************************** #
21208#       store_dreg_w(): store word to data register specified by d1     #
21209#                                                                       #
21210# XREF **************************************************************** #
21211#       None                                                            #
21212#                                                                       #
21213# INPUT *************************************************************** #
21214#       d0 = word value to store                                        #
21215#       d1 = index of register to fetch from                            #
21216#                                                                       #
21217# OUTPUT ************************************************************** #
21218#       (data register is updated)                                      #
21219#                                                                       #
21220# ALGORITHM *********************************************************** #
21221#       According to the index value in d1, store the word value        #
21222# in d0 to the corresponding data register. D0/D1 are on the stack      #
21223# while the rest are in their initial places.                           #
21224#                                                                       #
21225#########################################################################
21226
21227        global          store_dreg_w
21228store_dreg_w:
21229        mov.w           (tbl_sdregw.b,%pc,%d1.w*2),%d1
21230        jmp             (tbl_sdregw.b,%pc,%d1.w*1)
21231
21232tbl_sdregw:
21233        short           sdregw0 - tbl_sdregw
21234        short           sdregw1 - tbl_sdregw
21235        short           sdregw2 - tbl_sdregw
21236        short           sdregw3 - tbl_sdregw
21237        short           sdregw4 - tbl_sdregw
21238        short           sdregw5 - tbl_sdregw
21239        short           sdregw6 - tbl_sdregw
21240        short           sdregw7 - tbl_sdregw
21241
21242sdregw0:
21243        mov.w           %d0,2+EXC_DREGS+0x0(%a6)
21244        rts
21245sdregw1:
21246        mov.w           %d0,2+EXC_DREGS+0x4(%a6)
21247        rts
21248sdregw2:
21249        mov.w           %d0,%d2
21250        rts
21251sdregw3:
21252        mov.w           %d0,%d3
21253        rts
21254sdregw4:
21255        mov.w           %d0,%d4
21256        rts
21257sdregw5:
21258        mov.w           %d0,%d5
21259        rts
21260sdregw6:
21261        mov.w           %d0,%d6
21262        rts
21263sdregw7:
21264        mov.w           %d0,%d7
21265        rts
21266
21267#########################################################################
21268# XDEF **************************************************************** #
21269#       store_dreg_b(): store byte to data register specified by d1     #
21270#                                                                       #
21271# XREF **************************************************************** #
21272#       None                                                            #
21273#                                                                       #
21274# INPUT *************************************************************** #
21275#       d0 = byte value to store                                        #
21276#       d1 = index of register to fetch from                            #
21277#                                                                       #
21278# OUTPUT ************************************************************** #
21279#       (data register is updated)                                      #
21280#                                                                       #
21281# ALGORITHM *********************************************************** #
21282#       According to the index value in d1, store the byte value        #
21283# in d0 to the corresponding data register. D0/D1 are on the stack      #
21284# while the rest are in their initial places.                           #
21285#                                                                       #
21286#########################################################################
21287
21288        global          store_dreg_b
21289store_dreg_b:
21290        mov.w           (tbl_sdregb.b,%pc,%d1.w*2),%d1
21291        jmp             (tbl_sdregb.b,%pc,%d1.w*1)
21292
21293tbl_sdregb:
21294        short           sdregb0 - tbl_sdregb
21295        short           sdregb1 - tbl_sdregb
21296        short           sdregb2 - tbl_sdregb
21297        short           sdregb3 - tbl_sdregb
21298        short           sdregb4 - tbl_sdregb
21299        short           sdregb5 - tbl_sdregb
21300        short           sdregb6 - tbl_sdregb
21301        short           sdregb7 - tbl_sdregb
21302
21303sdregb0:
21304        mov.b           %d0,3+EXC_DREGS+0x0(%a6)
21305        rts
21306sdregb1:
21307        mov.b           %d0,3+EXC_DREGS+0x4(%a6)
21308        rts
21309sdregb2:
21310        mov.b           %d0,%d2
21311        rts
21312sdregb3:
21313        mov.b           %d0,%d3
21314        rts
21315sdregb4:
21316        mov.b           %d0,%d4
21317        rts
21318sdregb5:
21319        mov.b           %d0,%d5
21320        rts
21321sdregb6:
21322        mov.b           %d0,%d6
21323        rts
21324sdregb7:
21325        mov.b           %d0,%d7
21326        rts
21327
21328#########################################################################
21329# XDEF **************************************************************** #
21330#       inc_areg(): increment an address register by the value in d0    #
21331#                                                                       #
21332# XREF **************************************************************** #
21333#       None                                                            #
21334#                                                                       #
21335# INPUT *************************************************************** #
21336#       d0 = amount to increment by                                     #
21337#       d1 = index of address register to increment                     #
21338#                                                                       #
21339# OUTPUT ************************************************************** #
21340#       (address register is updated)                                   #
21341#                                                                       #
21342# ALGORITHM *********************************************************** #
21343#       Typically used for an instruction w/ a post-increment <ea>,     #
21344# this routine adds the increment value in d0 to the address register   #
21345# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside     #
21346# in their original places.                                             #
21347#       For a7, if the increment amount is one, then we have to         #
21348# increment by two. For any a7 update, set the mia7_flag so that if     #
21349# an access error exception occurs later in emulation, this address     #
21350# register update can be undone.                                        #
21351#                                                                       #
21352#########################################################################
21353
21354        global          inc_areg
21355inc_areg:
21356        mov.w           (tbl_iareg.b,%pc,%d1.w*2),%d1
21357        jmp             (tbl_iareg.b,%pc,%d1.w*1)
21358
21359tbl_iareg:
21360        short           iareg0 - tbl_iareg
21361        short           iareg1 - tbl_iareg
21362        short           iareg2 - tbl_iareg
21363        short           iareg3 - tbl_iareg
21364        short           iareg4 - tbl_iareg
21365        short           iareg5 - tbl_iareg
21366        short           iareg6 - tbl_iareg
21367        short           iareg7 - tbl_iareg
21368
21369iareg0: add.l           %d0,EXC_DREGS+0x8(%a6)
21370        rts
21371iareg1: add.l           %d0,EXC_DREGS+0xc(%a6)
21372        rts
21373iareg2: add.l           %d0,%a2
21374        rts
21375iareg3: add.l           %d0,%a3
21376        rts
21377iareg4: add.l           %d0,%a4
21378        rts
21379iareg5: add.l           %d0,%a5
21380        rts
21381iareg6: add.l           %d0,(%a6)
21382        rts
21383iareg7: mov.b           &mia7_flg,SPCOND_FLG(%a6)
21384        cmpi.b          %d0,&0x1
21385        beq.b           iareg7b
21386        add.l           %d0,EXC_A7(%a6)
21387        rts
21388iareg7b:
21389        addq.l          &0x2,EXC_A7(%a6)
21390        rts
21391
21392#########################################################################
21393# XDEF **************************************************************** #
21394#       dec_areg(): decrement an address register by the value in d0    #
21395#                                                                       #
21396# XREF **************************************************************** #
21397#       None                                                            #
21398#                                                                       #
21399# INPUT *************************************************************** #
21400#       d0 = amount to decrement by                                     #
21401#       d1 = index of address register to decrement                     #
21402#                                                                       #
21403# OUTPUT ************************************************************** #
21404#       (address register is updated)                                   #
21405#                                                                       #
21406# ALGORITHM *********************************************************** #
21407#       Typically used for an instruction w/ a pre-decrement <ea>,      #
21408# this routine adds the decrement value in d0 to the address register   #
21409# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside     #
21410# in their original places.                                             #
21411#       For a7, if the decrement amount is one, then we have to         #
21412# decrement by two. For any a7 update, set the mda7_flag so that if     #
21413# an access error exception occurs later in emulation, this address     #
21414# register update can be undone.                                        #
21415#                                                                       #
21416#########################################################################
21417
21418        global          dec_areg
21419dec_areg:
21420        mov.w           (tbl_dareg.b,%pc,%d1.w*2),%d1
21421        jmp             (tbl_dareg.b,%pc,%d1.w*1)
21422
21423tbl_dareg:
21424        short           dareg0 - tbl_dareg
21425        short           dareg1 - tbl_dareg
21426        short           dareg2 - tbl_dareg
21427        short           dareg3 - tbl_dareg
21428        short           dareg4 - tbl_dareg
21429        short           dareg5 - tbl_dareg
21430        short           dareg6 - tbl_dareg
21431        short           dareg7 - tbl_dareg
21432
21433dareg0: sub.l           %d0,EXC_DREGS+0x8(%a6)
21434        rts
21435dareg1: sub.l           %d0,EXC_DREGS+0xc(%a6)
21436        rts
21437dareg2: sub.l           %d0,%a2
21438        rts
21439dareg3: sub.l           %d0,%a3
21440        rts
21441dareg4: sub.l           %d0,%a4
21442        rts
21443dareg5: sub.l           %d0,%a5
21444        rts
21445dareg6: sub.l           %d0,(%a6)
21446        rts
21447dareg7: mov.b           &mda7_flg,SPCOND_FLG(%a6)
21448        cmpi.b          %d0,&0x1
21449        beq.b           dareg7b
21450        sub.l           %d0,EXC_A7(%a6)
21451        rts
21452dareg7b:
21453        subq.l          &0x2,EXC_A7(%a6)
21454        rts
21455
21456##############################################################################
21457
21458#########################################################################
21459# XDEF **************************************************************** #
21460#       load_fpn1(): load FP register value into FP_SRC(a6).            #
21461#                                                                       #
21462# XREF **************************************************************** #
21463#       None                                                            #
21464#                                                                       #
21465# INPUT *************************************************************** #
21466#       d0 = index of FP register to load                               #
21467#                                                                       #
21468# OUTPUT ************************************************************** #
21469#       FP_SRC(a6) = value loaded from FP register file                 #
21470#                                                                       #
21471# ALGORITHM *********************************************************** #
21472#       Using the index in d0, load FP_SRC(a6) with a number from the   #
21473# FP register file.                                                     #
21474#                                                                       #
21475#########################################################################
21476
21477        global          load_fpn1
21478load_fpn1:
21479        mov.w           (tbl_load_fpn1.b,%pc,%d0.w*2), %d0
21480        jmp             (tbl_load_fpn1.b,%pc,%d0.w*1)
21481
21482tbl_load_fpn1:
21483        short           load_fpn1_0 - tbl_load_fpn1
21484        short           load_fpn1_1 - tbl_load_fpn1
21485        short           load_fpn1_2 - tbl_load_fpn1
21486        short           load_fpn1_3 - tbl_load_fpn1
21487        short           load_fpn1_4 - tbl_load_fpn1
21488        short           load_fpn1_5 - tbl_load_fpn1
21489        short           load_fpn1_6 - tbl_load_fpn1
21490        short           load_fpn1_7 - tbl_load_fpn1
21491
21492load_fpn1_0:
21493        mov.l           0+EXC_FP0(%a6), 0+FP_SRC(%a6)
21494        mov.l           4+EXC_FP0(%a6), 4+FP_SRC(%a6)
21495        mov.l           8+EXC_FP0(%a6), 8+FP_SRC(%a6)
21496        lea             FP_SRC(%a6), %a0
21497        rts
21498load_fpn1_1:
21499        mov.l           0+EXC_FP1(%a6), 0+FP_SRC(%a6)
21500        mov.l           4+EXC_FP1(%a6), 4+FP_SRC(%a6)
21501        mov.l           8+EXC_FP1(%a6), 8+FP_SRC(%a6)
21502        lea             FP_SRC(%a6), %a0
21503        rts
21504load_fpn1_2:
21505        fmovm.x         &0x20, FP_SRC(%a6)
21506        lea             FP_SRC(%a6), %a0
21507        rts
21508load_fpn1_3:
21509        fmovm.x         &0x10, FP_SRC(%a6)
21510        lea             FP_SRC(%a6), %a0
21511        rts
21512load_fpn1_4:
21513        fmovm.x         &0x08, FP_SRC(%a6)
21514        lea             FP_SRC(%a6), %a0
21515        rts
21516load_fpn1_5:
21517        fmovm.x         &0x04, FP_SRC(%a6)
21518        lea             FP_SRC(%a6), %a0
21519        rts
21520load_fpn1_6:
21521        fmovm.x         &0x02, FP_SRC(%a6)
21522        lea             FP_SRC(%a6), %a0
21523        rts
21524load_fpn1_7:
21525        fmovm.x         &0x01, FP_SRC(%a6)
21526        lea             FP_SRC(%a6), %a0
21527        rts
21528
21529#############################################################################
21530
21531#########################################################################
21532# XDEF **************************************************************** #
21533#       load_fpn2(): load FP register value into FP_DST(a6).            #
21534#                                                                       #
21535# XREF **************************************************************** #
21536#       None                                                            #
21537#                                                                       #
21538# INPUT *************************************************************** #
21539#       d0 = index of FP register to load                               #
21540#                                                                       #
21541# OUTPUT ************************************************************** #
21542#       FP_DST(a6) = value loaded from FP register file                 #
21543#                                                                       #
21544# ALGORITHM *********************************************************** #
21545#       Using the index in d0, load FP_DST(a6) with a number from the   #
21546# FP register file.                                                     #
21547#                                                                       #
21548#########################################################################
21549
21550        global          load_fpn2
21551load_fpn2:
21552        mov.w           (tbl_load_fpn2.b,%pc,%d0.w*2), %d0
21553        jmp             (tbl_load_fpn2.b,%pc,%d0.w*1)
21554
21555tbl_load_fpn2:
21556        short           load_fpn2_0 - tbl_load_fpn2
21557        short           load_fpn2_1 - tbl_load_fpn2
21558        short           load_fpn2_2 - tbl_load_fpn2
21559        short           load_fpn2_3 - tbl_load_fpn2
21560        short           load_fpn2_4 - tbl_load_fpn2
21561        short           load_fpn2_5 - tbl_load_fpn2
21562        short           load_fpn2_6 - tbl_load_fpn2
21563        short           load_fpn2_7 - tbl_load_fpn2
21564
21565load_fpn2_0:
21566        mov.l           0+EXC_FP0(%a6), 0+FP_DST(%a6)
21567        mov.l           4+EXC_FP0(%a6), 4+FP_DST(%a6)
21568        mov.l           8+EXC_FP0(%a6), 8+FP_DST(%a6)
21569        lea             FP_DST(%a6), %a0
21570        rts
21571load_fpn2_1:
21572        mov.l           0+EXC_FP1(%a6), 0+FP_DST(%a6)
21573        mov.l           4+EXC_FP1(%a6), 4+FP_DST(%a6)
21574        mov.l           8+EXC_FP1(%a6), 8+FP_DST(%a6)
21575        lea             FP_DST(%a6), %a0
21576        rts
21577load_fpn2_2:
21578        fmovm.x         &0x20, FP_DST(%a6)
21579        lea             FP_DST(%a6), %a0
21580        rts
21581load_fpn2_3:
21582        fmovm.x         &0x10, FP_DST(%a6)
21583        lea             FP_DST(%a6), %a0
21584        rts
21585load_fpn2_4:
21586        fmovm.x         &0x08, FP_DST(%a6)
21587        lea             FP_DST(%a6), %a0
21588        rts
21589load_fpn2_5:
21590        fmovm.x         &0x04, FP_DST(%a6)
21591        lea             FP_DST(%a6), %a0
21592        rts
21593load_fpn2_6:
21594        fmovm.x         &0x02, FP_DST(%a6)
21595        lea             FP_DST(%a6), %a0
21596        rts
21597load_fpn2_7:
21598        fmovm.x         &0x01, FP_DST(%a6)
21599        lea             FP_DST(%a6), %a0
21600        rts
21601
21602#############################################################################
21603
21604#########################################################################
21605# XDEF **************************************************************** #
21606#       store_fpreg(): store an fp value to the fpreg designated d0.    #
21607#                                                                       #
21608# XREF **************************************************************** #
21609#       None                                                            #
21610#                                                                       #
21611# INPUT *************************************************************** #
21612#       fp0 = extended precision value to store                         #
21613#       d0  = index of floating-point register                          #
21614#                                                                       #
21615# OUTPUT ************************************************************** #
21616#       None                                                            #
21617#                                                                       #
21618# ALGORITHM *********************************************************** #
21619#       Store the value in fp0 to the FP register designated by the     #
21620# value in d0. The FP number can be DENORM or SNAN so we have to be     #
21621# careful that we don't take an exception here.                         #
21622#                                                                       #
21623#########################################################################
21624
21625        global          store_fpreg
21626store_fpreg:
21627        mov.w           (tbl_store_fpreg.b,%pc,%d0.w*2), %d0
21628        jmp             (tbl_store_fpreg.b,%pc,%d0.w*1)
21629
21630tbl_store_fpreg:
21631        short           store_fpreg_0 - tbl_store_fpreg
21632        short           store_fpreg_1 - tbl_store_fpreg
21633        short           store_fpreg_2 - tbl_store_fpreg
21634        short           store_fpreg_3 - tbl_store_fpreg
21635        short           store_fpreg_4 - tbl_store_fpreg
21636        short           store_fpreg_5 - tbl_store_fpreg
21637        short           store_fpreg_6 - tbl_store_fpreg
21638        short           store_fpreg_7 - tbl_store_fpreg
21639
21640store_fpreg_0:
21641        fmovm.x         &0x80, EXC_FP0(%a6)
21642        rts
21643store_fpreg_1:
21644        fmovm.x         &0x80, EXC_FP1(%a6)
21645        rts
21646store_fpreg_2:
21647        fmovm.x         &0x01, -(%sp)
21648        fmovm.x         (%sp)+, &0x20
21649        rts
21650store_fpreg_3:
21651        fmovm.x         &0x01, -(%sp)
21652        fmovm.x         (%sp)+, &0x10
21653        rts
21654store_fpreg_4:
21655        fmovm.x         &0x01, -(%sp)
21656        fmovm.x         (%sp)+, &0x08
21657        rts
21658store_fpreg_5:
21659        fmovm.x         &0x01, -(%sp)
21660        fmovm.x         (%sp)+, &0x04
21661        rts
21662store_fpreg_6:
21663        fmovm.x         &0x01, -(%sp)
21664        fmovm.x         (%sp)+, &0x02
21665        rts
21666store_fpreg_7:
21667        fmovm.x         &0x01, -(%sp)
21668        fmovm.x         (%sp)+, &0x01
21669        rts
21670
21671#########################################################################
21672# XDEF **************************************************************** #
21673#       _denorm(): denormalize an intermediate result                   #
21674#                                                                       #
21675# XREF **************************************************************** #
21676#       None                                                            #
21677#                                                                       #
21678# INPUT *************************************************************** #
21679#       a0 = points to the operand to be denormalized                   #
21680#               (in the internal extended format)                       #
21681#                                                                       #
21682#       d0 = rounding precision                                         #
21683#                                                                       #
21684# OUTPUT ************************************************************** #
21685#       a0 = pointer to the denormalized result                         #
21686#               (in the internal extended format)                       #
21687#                                                                       #
21688#       d0 = guard,round,sticky                                         #
21689#                                                                       #
21690# ALGORITHM *********************************************************** #
21691#       According to the exponent underflow threshold for the given     #
21692# precision, shift the mantissa bits to the right in order raise the    #
21693# exponent of the operand to the threshold value. While shifting the    #
21694# mantissa bits right, maintain the value of the guard, round, and      #
21695# sticky bits.                                                          #
21696# other notes:                                                          #
21697#       (1) _denorm() is called by the underflow routines               #
21698#       (2) _denorm() does NOT affect the status register               #
21699#                                                                       #
21700#########################################################################
21701
21702#
21703# table of exponent threshold values for each precision
21704#
21705tbl_thresh:
21706        short           0x0
21707        short           sgl_thresh
21708        short           dbl_thresh
21709
21710        global          _denorm
21711_denorm:
21712#
21713# Load the exponent threshold for the precision selected and check
21714# to see if (threshold - exponent) is > 65 in which case we can
21715# simply calculate the sticky bit and zero the mantissa. otherwise
21716# we have to call the denormalization routine.
21717#
21718        lsr.b           &0x2, %d0               # shift prec to lo bits
21719        mov.w           (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
21720        mov.w           %d1, %d0                # copy d1 into d0
21721        sub.w           FTEMP_EX(%a0), %d0      # diff = threshold - exp
21722        cmpi.w          %d0, &66                # is diff > 65? (mant + g,r bits)
21723        bpl.b           denorm_set_stky         # yes; just calc sticky
21724
21725        clr.l           %d0                     # clear g,r,s
21726        btst            &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
21727        beq.b           denorm_call             # no; don't change anything
21728        bset            &29, %d0                # yes; set sticky bit
21729
21730denorm_call:
21731        bsr.l           dnrm_lp                 # denormalize the number
21732        rts
21733
21734#
21735# all bit would have been shifted off during the denorm so simply
21736# calculate if the sticky should be set and clear the entire mantissa.
21737#
21738denorm_set_stky:
21739        mov.l           &0x20000000, %d0        # set sticky bit in return value
21740        mov.w           %d1, FTEMP_EX(%a0)      # load exp with threshold
21741        clr.l           FTEMP_HI(%a0)           # set d1 = 0 (ms mantissa)
21742        clr.l           FTEMP_LO(%a0)           # set d2 = 0 (ms mantissa)
21743        rts
21744
21745#                                                                       #
21746# dnrm_lp(): normalize exponent/mantissa to specified threshold         #
21747#                                                                       #
21748# INPUT:                                                                #
21749#       %a0        : points to the operand to be denormalized           #
21750#       %d0{31:29} : initial guard,round,sticky                         #
21751#       %d1{15:0}  : denormalization threshold                          #
21752# OUTPUT:                                                               #
21753#       %a0        : points to the denormalized operand                 #
21754#       %d0{31:29} : final guard,round,sticky                           #
21755#                                                                       #
21756
21757# *** Local Equates *** #
21758set     GRS,            L_SCR2                  # g,r,s temp storage
21759set     FTEMP_LO2,      L_SCR1                  # FTEMP_LO copy
21760
21761        global          dnrm_lp
21762dnrm_lp:
21763
21764#
21765# make a copy of FTEMP_LO and place the g,r,s bits directly after it
21766# in memory so as to make the bitfield extraction for denormalization easier.
21767#
21768        mov.l           FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
21769        mov.l           %d0, GRS(%a6)           # place g,r,s after it
21770
21771#
21772# check to see how much less than the underflow threshold the operand
21773# exponent is.
21774#
21775        mov.l           %d1, %d0                # copy the denorm threshold
21776        sub.w           FTEMP_EX(%a0), %d1      # d1 = threshold - uns exponent
21777        ble.b           dnrm_no_lp              # d1 <= 0
21778        cmpi.w          %d1, &0x20              # is ( 0 <= d1 < 32) ?
21779        blt.b           case_1                  # yes
21780        cmpi.w          %d1, &0x40              # is (32 <= d1 < 64) ?
21781        blt.b           case_2                  # yes
21782        bra.w           case_3                  # (d1 >= 64)
21783
21784#
21785# No normalization necessary
21786#
21787dnrm_no_lp:
21788        mov.l           GRS(%a6), %d0           # restore original g,r,s
21789        rts
21790
21791#
21792# case (0<d1<32)
21793#
21794# %d0 = denorm threshold
21795# %d1 = "n" = amt to shift
21796#
21797#       ---------------------------------------------------------
21798#       |     FTEMP_HI    |     FTEMP_LO     |grs000.........000|
21799#       ---------------------------------------------------------
21800#       <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
21801#       \          \                  \                  \
21802#        \          \                  \                  \
21803#         \          \                  \                  \
21804#          \          \                  \                  \
21805#           \          \                  \                  \
21806#            \          \                  \                  \
21807#             \          \                  \                  \
21808#              \          \                  \                  \
21809#       <-(n)-><-(32 - n)-><------(32)-------><------(32)------->
21810#       ---------------------------------------------------------
21811#       |0.....0| NEW_HI  |  NEW_FTEMP_LO     |grs              |
21812#       ---------------------------------------------------------
21813#
21814case_1:
21815        mov.l           %d2, -(%sp)             # create temp storage
21816
21817        mov.w           %d0, FTEMP_EX(%a0)      # exponent = denorm threshold
21818        mov.l           &32, %d0
21819        sub.w           %d1, %d0                # %d0 = 32 - %d1
21820
21821        cmpi.w          %d1, &29                # is shft amt >= 29
21822        blt.b           case1_extract           # no; no fix needed
21823        mov.b           GRS(%a6), %d2
21824        or.b            %d2, 3+FTEMP_LO2(%a6)
21825
21826case1_extract:
21827        bfextu          FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
21828        bfextu          FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
21829        bfextu          FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
21830
21831        mov.l           %d2, FTEMP_HI(%a0)      # store new FTEMP_HI
21832        mov.l           %d1, FTEMP_LO(%a0)      # store new FTEMP_LO
21833
21834        bftst           %d0{&2:&30}             # were bits shifted off?
21835        beq.b           case1_sticky_clear      # no; go finish
21836        bset            &rnd_stky_bit, %d0      # yes; set sticky bit
21837
21838case1_sticky_clear:
21839        and.l           &0xe0000000, %d0        # clear all but G,R,S
21840        mov.l           (%sp)+, %d2             # restore temp register
21841        rts
21842
21843#
21844# case (32<=d1<64)
21845#
21846# %d0 = denorm threshold
21847# %d1 = "n" = amt to shift
21848#
21849#       ---------------------------------------------------------
21850#       |     FTEMP_HI    |     FTEMP_LO     |grs000.........000|
21851#       ---------------------------------------------------------
21852#       <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
21853#       \          \                  \
21854#        \          \                  \
21855#         \          \                  -------------------
21856#          \          --------------------                 \
21857#           -------------------           \                 \
21858#                              \           \                 \
21859#                               \           \                 \
21860#                                \           \                 \
21861#       <-------(32)------><-(n)-><-(32 - n)-><------(32)------->
21862#       ---------------------------------------------------------
21863#       |0...............0|0....0| NEW_LO     |grs              |
21864#       ---------------------------------------------------------
21865#
21866case_2:
21867        mov.l           %d2, -(%sp)             # create temp storage
21868
21869        mov.w           %d0, FTEMP_EX(%a0)      # exponent = denorm threshold
21870        subi.w          &0x20, %d1              # %d1 now between 0 and 32
21871        mov.l           &0x20, %d0
21872        sub.w           %d1, %d0                # %d0 = 32 - %d1
21873
21874# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
21875# the number of bits to check for the sticky detect.
21876# it only plays a role in shift amounts of 61-63.
21877        mov.b           GRS(%a6), %d2
21878        or.b            %d2, 3+FTEMP_LO2(%a6)
21879
21880        bfextu          FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
21881        bfextu          FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
21882
21883        bftst           %d1{&2:&30}             # were any bits shifted off?
21884        bne.b           case2_set_sticky        # yes; set sticky bit
21885        bftst           FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off?
21886        bne.b           case2_set_sticky        # yes; set sticky bit
21887
21888        mov.l           %d1, %d0                # move new G,R,S to %d0
21889        bra.b           case2_end
21890
21891case2_set_sticky:
21892        mov.l           %d1, %d0                # move new G,R,S to %d0
21893        bset            &rnd_stky_bit, %d0      # set sticky bit
21894
21895case2_end:
21896        clr.l           FTEMP_HI(%a0)           # store FTEMP_HI = 0
21897        mov.l           %d2, FTEMP_LO(%a0)      # store FTEMP_LO
21898        and.l           &0xe0000000, %d0        # clear all but G,R,S
21899
21900        mov.l           (%sp)+,%d2              # restore temp register
21901        rts
21902
21903#
21904# case (d1>=64)
21905#
21906# %d0 = denorm threshold
21907# %d1 = amt to shift
21908#
21909case_3:
21910        mov.w           %d0, FTEMP_EX(%a0)      # insert denorm threshold
21911
21912        cmpi.w          %d1, &65                # is shift amt > 65?
21913        blt.b           case3_64                # no; it's == 64
21914        beq.b           case3_65                # no; it's == 65
21915
21916#
21917# case (d1>65)
21918#
21919# Shift value is > 65 and out of range. All bits are shifted off.
21920# Return a zero mantissa with the sticky bit set
21921#
21922        clr.l           FTEMP_HI(%a0)           # clear hi(mantissa)
21923        clr.l           FTEMP_LO(%a0)           # clear lo(mantissa)
21924        mov.l           &0x20000000, %d0        # set sticky bit
21925        rts
21926
21927#
21928# case (d1 == 64)
21929#
21930#       ---------------------------------------------------------
21931#       |     FTEMP_HI    |     FTEMP_LO     |grs000.........000|
21932#       ---------------------------------------------------------
21933#       <-------(32)------>
21934#       \                  \
21935#        \                  \
21936#         \                  \
21937#          \                  ------------------------------
21938#           -------------------------------                 \
21939#                                          \                 \
21940#                                           \                 \
21941#                                            \                 \
21942#                                             <-------(32)------>
21943#       ---------------------------------------------------------
21944#       |0...............0|0................0|grs               |
21945#       ---------------------------------------------------------
21946#
21947case3_64:
21948        mov.l           FTEMP_HI(%a0), %d0      # fetch hi(mantissa)
21949        mov.l           %d0, %d1                # make a copy
21950        and.l           &0xc0000000, %d0        # extract G,R
21951        and.l           &0x3fffffff, %d1        # extract other bits
21952
21953        bra.b           case3_complete
21954
21955#
21956# case (d1 == 65)
21957#
21958#       ---------------------------------------------------------
21959#       |     FTEMP_HI    |     FTEMP_LO     |grs000.........000|
21960#       ---------------------------------------------------------
21961#       <-------(32)------>
21962#       \                  \
21963#        \                  \
21964#         \                  \
21965#          \                  ------------------------------
21966#           --------------------------------                \
21967#                                           \                \
21968#                                            \                \
21969#                                             \                \
21970#                                              <-------(31)----->
21971#       ---------------------------------------------------------
21972#       |0...............0|0................0|0rs               |
21973#       ---------------------------------------------------------
21974#
21975case3_65:
21976        mov.l           FTEMP_HI(%a0), %d0      # fetch hi(mantissa)
21977        and.l           &0x80000000, %d0        # extract R bit
21978        lsr.l           &0x1, %d0               # shift high bit into R bit
21979        and.l           &0x7fffffff, %d1        # extract other bits
21980
21981case3_complete:
21982# last operation done was an "and" of the bits shifted off so the condition
21983# codes are already set so branch accordingly.
21984        bne.b           case3_set_sticky        # yes; go set new sticky
21985        tst.l           FTEMP_LO(%a0)           # were any bits shifted off?
21986        bne.b           case3_set_sticky        # yes; go set new sticky
21987        tst.b           GRS(%a6)                # were any bits shifted off?
21988        bne.b           case3_set_sticky        # yes; go set new sticky
21989
21990#
21991# no bits were shifted off so don't set the sticky bit.
21992# the guard and
21993# the entire mantissa is zero.
21994#
21995        clr.l           FTEMP_HI(%a0)           # clear hi(mantissa)
21996        clr.l           FTEMP_LO(%a0)           # clear lo(mantissa)
21997        rts
21998
21999#
22000# some bits were shifted off so set the sticky bit.
22001# the entire mantissa is zero.
22002#
22003case3_set_sticky:
22004        bset            &rnd_stky_bit,%d0       # set new sticky bit
22005        clr.l           FTEMP_HI(%a0)           # clear hi(mantissa)
22006        clr.l           FTEMP_LO(%a0)           # clear lo(mantissa)
22007        rts
22008
22009#########################################################################
22010# XDEF **************************************************************** #
22011#       _round(): round result according to precision/mode              #
22012#                                                                       #
22013# XREF **************************************************************** #
22014#       None                                                            #
22015#                                                                       #
22016# INPUT *************************************************************** #
22017#       a0        = ptr to input operand in internal extended format    #
22018#       d1(hi)    = contains rounding precision:                        #
22019#                       ext = $0000xxxx                                 #
22020#                       sgl = $0004xxxx                                 #
22021#                       dbl = $0008xxxx                                 #
22022#       d1(lo)    = contains rounding mode:                             #
22023#                       RN  = $xxxx0000                                 #
22024#                       RZ  = $xxxx0001                                 #
22025#                       RM  = $xxxx0002                                 #
22026#                       RP  = $xxxx0003                                 #
22027#       d0{31:29} = contains the g,r,s bits (extended)                  #
22028#                                                                       #
22029# OUTPUT ************************************************************** #
22030#       a0 = pointer to rounded result                                  #
22031#                                                                       #
22032# ALGORITHM *********************************************************** #
22033#       On return the value pointed to by a0 is correctly rounded,      #
22034#       a0 is preserved and the g-r-s bits in d0 are cleared.           #
22035#       The result is not typed - the tag field is invalid.  The        #
22036#       result is still in the internal extended format.                #
22037#                                                                       #
22038#       The INEX bit of USER_FPSR will be set if the rounded result was #
22039#       inexact (i.e. if any of the g-r-s bits were set).               #
22040#                                                                       #
22041#########################################################################
22042
22043        global          _round
22044_round:
22045#
22046# ext_grs() looks at the rounding precision and sets the appropriate
22047# G,R,S bits.
22048# If (G,R,S == 0) then result is exact and round is done, else set
22049# the inex flag in status reg and continue.
22050#
22051        bsr.l           ext_grs                 # extract G,R,S
22052
22053        tst.l           %d0                     # are G,R,S zero?
22054        beq.w           truncate                # yes; round is complete
22055
22056        or.w            &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
22057
22058#
22059# Use rounding mode as an index into a jump table for these modes.
22060# All of the following assumes grs != 0.
22061#
22062        mov.w           (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
22063        jmp             (tbl_mode.b,%pc,%a1)    # jmp to rnd mode handler
22064
22065tbl_mode:
22066        short           rnd_near - tbl_mode
22067        short           truncate - tbl_mode     # RZ always truncates
22068        short           rnd_mnus - tbl_mode
22069        short           rnd_plus - tbl_mode
22070
22071#################################################################
22072#       ROUND PLUS INFINITY                                     #
22073#                                                               #
22074#       If sign of fp number = 0 (positive), then add 1 to l.   #
22075#################################################################
22076rnd_plus:
22077        tst.b           FTEMP_SGN(%a0)          # check for sign
22078        bmi.w           truncate                # if positive then truncate
22079
22080        mov.l           &0xffffffff, %d0        # force g,r,s to be all f's
22081        swap            %d1                     # set up d1 for round prec.
22082
22083        cmpi.b          %d1, &s_mode            # is prec = sgl?
22084        beq.w           add_sgl                 # yes
22085        bgt.w           add_dbl                 # no; it's dbl
22086        bra.w           add_ext                 # no; it's ext
22087
22088#################################################################
22089#       ROUND MINUS INFINITY                                    #
22090#                                                               #
22091#       If sign of fp number = 1 (negative), then add 1 to l.   #
22092#################################################################
22093rnd_mnus:
22094        tst.b           FTEMP_SGN(%a0)          # check for sign
22095        bpl.w           truncate                # if negative then truncate
22096
22097        mov.l           &0xffffffff, %d0        # force g,r,s to be all f's
22098        swap            %d1                     # set up d1 for round prec.
22099
22100        cmpi.b          %d1, &s_mode            # is prec = sgl?
22101        beq.w           add_sgl                 # yes
22102        bgt.w           add_dbl                 # no; it's dbl
22103        bra.w           add_ext                 # no; it's ext
22104
22105#################################################################
22106#       ROUND NEAREST                                           #
22107#                                                               #
22108#       If (g=1), then add 1 to l and if (r=s=0), then clear l  #
22109#       Note that this will round to even in case of a tie.     #
22110#################################################################
22111rnd_near:
22112        asl.l           &0x1, %d0               # shift g-bit to c-bit
22113        bcc.w           truncate                # if (g=1) then
22114
22115        swap            %d1                     # set up d1 for round prec.
22116
22117        cmpi.b          %d1, &s_mode            # is prec = sgl?
22118        beq.w           add_sgl                 # yes
22119        bgt.w           add_dbl                 # no; it's dbl
22120        bra.w           add_ext                 # no; it's ext
22121
22122# *** LOCAL EQUATES ***
22123set     ad_1_sgl,       0x00000100      # constant to add 1 to l-bit in sgl prec
22124set     ad_1_dbl,       0x00000800      # constant to add 1 to l-bit in dbl prec
22125
22126#########################
22127#       ADD SINGLE      #
22128#########################
22129add_sgl:
22130        add.l           &ad_1_sgl, FTEMP_HI(%a0)
22131        bcc.b           scc_clr                 # no mantissa overflow
22132        roxr.w          FTEMP_HI(%a0)           # shift v-bit back in
22133        roxr.w          FTEMP_HI+2(%a0)         # shift v-bit back in
22134        add.w           &0x1, FTEMP_EX(%a0)     # and incr exponent
22135scc_clr:
22136        tst.l           %d0                     # test for rs = 0
22137        bne.b           sgl_done
22138        and.w           &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
22139sgl_done:
22140        and.l           &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
22141        clr.l           FTEMP_LO(%a0)           # clear d2
22142        rts
22143
22144#########################
22145#       ADD EXTENDED    #
22146#########################
22147add_ext:
22148        addq.l          &1,FTEMP_LO(%a0)        # add 1 to l-bit
22149        bcc.b           xcc_clr                 # test for carry out
22150        addq.l          &1,FTEMP_HI(%a0)        # propagate carry
22151        bcc.b           xcc_clr
22152        roxr.w          FTEMP_HI(%a0)           # mant is 0 so restore v-bit
22153        roxr.w          FTEMP_HI+2(%a0)         # mant is 0 so restore v-bit
22154        roxr.w          FTEMP_LO(%a0)
22155        roxr.w          FTEMP_LO+2(%a0)
22156        add.w           &0x1,FTEMP_EX(%a0)      # and inc exp
22157xcc_clr:
22158        tst.l           %d0                     # test rs = 0
22159        bne.b           add_ext_done
22160        and.b           &0xfe,FTEMP_LO+3(%a0)   # clear the l bit
22161add_ext_done:
22162        rts
22163
22164#########################
22165#       ADD DOUBLE      #
22166#########################
22167add_dbl:
22168        add.l           &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
22169        bcc.b           dcc_clr                 # no carry
22170        addq.l          &0x1, FTEMP_HI(%a0)     # propagate carry
22171        bcc.b           dcc_clr                 # no carry
22172
22173        roxr.w          FTEMP_HI(%a0)           # mant is 0 so restore v-bit
22174        roxr.w          FTEMP_HI+2(%a0)         # mant is 0 so restore v-bit
22175        roxr.w          FTEMP_LO(%a0)
22176        roxr.w          FTEMP_LO+2(%a0)
22177        addq.w          &0x1, FTEMP_EX(%a0)     # incr exponent
22178dcc_clr:
22179        tst.l           %d0                     # test for rs = 0
22180        bne.b           dbl_done
22181        and.w           &0xf000, FTEMP_LO+2(%a0) # clear the l-bit
22182
22183dbl_done:
22184        and.l           &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
22185        rts
22186
22187###########################
22188# Truncate all other bits #
22189###########################
22190truncate:
22191        swap            %d1                     # select rnd prec
22192
22193        cmpi.b          %d1, &s_mode            # is prec sgl?
22194        beq.w           sgl_done                # yes
22195        bgt.b           dbl_done                # no; it's dbl
22196        rts                                     # no; it's ext
22197
22198
22199#
22200# ext_grs(): extract guard, round and sticky bits according to
22201#            rounding precision.
22202#
22203# INPUT
22204#       d0         = extended precision g,r,s (in d0{31:29})
22205#       d1         = {PREC,ROUND}
22206# OUTPUT
22207#       d0{31:29}  = guard, round, sticky
22208#
22209# The ext_grs extract the guard/round/sticky bits according to the
22210# selected rounding precision. It is called by the round subroutine
22211# only.  All registers except d0 are kept intact. d0 becomes an
22212# updated guard,round,sticky in d0{31:29}
22213#
22214# Notes: the ext_grs uses the round PREC, and therefore has to swap d1
22215#        prior to usage, and needs to restore d1 to original. this
22216#        routine is tightly tied to the round routine and not meant to
22217#        uphold standard subroutine calling practices.
22218#
22219
22220ext_grs:
22221        swap            %d1                     # have d1.w point to round precision
22222        tst.b           %d1                     # is rnd prec = extended?
22223        bne.b           ext_grs_not_ext         # no; go handle sgl or dbl
22224
22225#
22226# %d0 actually already hold g,r,s since _round() had it before calling
22227# this function. so, as long as we don't disturb it, we are "returning" it.
22228#
22229ext_grs_ext:
22230        swap            %d1                     # yes; return to correct positions
22231        rts
22232
22233ext_grs_not_ext:
22234        movm.l          &0x3000, -(%sp)         # make some temp registers {d2/d3}
22235
22236        cmpi.b          %d1, &s_mode            # is rnd prec = sgl?
22237        bne.b           ext_grs_dbl             # no; go handle dbl
22238
22239#
22240# sgl:
22241#       96              64        40    32              0
22242#       -----------------------------------------------------
22243#       | EXP   |XXXXXXX|         |xx   |               |grs|
22244#       -----------------------------------------------------
22245#                       <--(24)--->nn\                     /
22246#                                  ee ---------------------
22247#                                  ww           |
22248#                                               v
22249#                                  gr      new sticky
22250#
22251ext_grs_sgl:
22252        bfextu          FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
22253        mov.l           &30, %d2                # of the sgl prec. limits
22254        lsl.l           %d2, %d3                # shift g-r bits to MSB of d3
22255        mov.l           FTEMP_HI(%a0), %d2      # get word 2 for s-bit test
22256        and.l           &0x0000003f, %d2        # s bit is the or of all other
22257        bne.b           ext_grs_st_stky         # bits to the right of g-r
22258        tst.l           FTEMP_LO(%a0)           # test lower mantissa
22259        bne.b           ext_grs_st_stky         # if any are set, set sticky
22260        tst.l           %d0                     # test original g,r,s
22261        bne.b           ext_grs_st_stky         # if any are set, set sticky
22262        bra.b           ext_grs_end_sd          # if words 3 and 4 are clr, exit
22263
22264#
22265# dbl:
22266#       96              64              32       11     0
22267#       -----------------------------------------------------
22268#       | EXP   |XXXXXXX|               |        |xx    |grs|
22269#       -----------------------------------------------------
22270#                                                 nn\       /
22271#                                                 ee -------
22272#                                                 ww    |
22273#                                                       v
22274#                                                 gr    new sticky
22275#
22276ext_grs_dbl:
22277        bfextu          FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
22278        mov.l           &30, %d2                # of the dbl prec. limits
22279        lsl.l           %d2, %d3                # shift g-r bits to the MSB of d3
22280        mov.l           FTEMP_LO(%a0), %d2      # get lower mantissa  for s-bit test
22281        and.l           &0x000001ff, %d2        # s bit is the or-ing of all
22282        bne.b           ext_grs_st_stky         # other bits to the right of g-r
22283        tst.l           %d0                     # test word original g,r,s
22284        bne.b           ext_grs_st_stky         # if any are set, set sticky
22285        bra.b           ext_grs_end_sd          # if clear, exit
22286
22287ext_grs_st_stky:
22288        bset            &rnd_stky_bit, %d3      # set sticky bit
22289ext_grs_end_sd:
22290        mov.l           %d3, %d0                # return grs to d0
22291
22292        movm.l          (%sp)+, &0xc            # restore scratch registers {d2/d3}
22293
22294        swap            %d1                     # restore d1 to original
22295        rts
22296
22297#########################################################################
22298# norm(): normalize the mantissa of an extended precision input. the    #
22299#         input operand should not be normalized already.               #
22300#                                                                       #
22301# XDEF **************************************************************** #
22302#       norm()                                                          #
22303#                                                                       #
22304# XREF **************************************************************** #
22305#       none                                                            #
22306#                                                                       #
22307# INPUT *************************************************************** #
22308#       a0 = pointer fp extended precision operand to normalize         #
22309#                                                                       #
22310# OUTPUT ************************************************************** #
22311#       d0 = number of bit positions the mantissa was shifted           #
22312#       a0 = the input operand's mantissa is normalized; the exponent   #
22313#            is unchanged.                                              #
22314#                                                                       #
22315#########################################################################
22316        global          norm
22317norm:
22318        mov.l           %d2, -(%sp)             # create some temp regs
22319        mov.l           %d3, -(%sp)
22320
22321        mov.l           FTEMP_HI(%a0), %d0      # load hi(mantissa)
22322        mov.l           FTEMP_LO(%a0), %d1      # load lo(mantissa)
22323
22324        bfffo           %d0{&0:&32}, %d2        # how many places to shift?
22325        beq.b           norm_lo                 # hi(man) is all zeroes!
22326
22327norm_hi:
22328        lsl.l           %d2, %d0                # left shift hi(man)
22329        bfextu          %d1{&0:%d2}, %d3        # extract lo bits
22330
22331        or.l            %d3, %d0                # create hi(man)
22332        lsl.l           %d2, %d1                # create lo(man)
22333
22334        mov.l           %d0, FTEMP_HI(%a0)      # store new hi(man)
22335        mov.l           %d1, FTEMP_LO(%a0)      # store new lo(man)
22336
22337        mov.l           %d2, %d0                # return shift amount
22338
22339        mov.l           (%sp)+, %d3             # restore temp regs
22340        mov.l           (%sp)+, %d2
22341
22342        rts
22343
22344norm_lo:
22345        bfffo           %d1{&0:&32}, %d2        # how many places to shift?
22346        lsl.l           %d2, %d1                # shift lo(man)
22347        add.l           &32, %d2                # add 32 to shft amount
22348
22349        mov.l           %d1, FTEMP_HI(%a0)      # store hi(man)
22350        clr.l           FTEMP_LO(%a0)           # lo(man) is now zero
22351
22352        mov.l           %d2, %d0                # return shift amount
22353
22354        mov.l           (%sp)+, %d3             # restore temp regs
22355        mov.l           (%sp)+, %d2
22356
22357        rts
22358
22359#########################################################################
22360# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO     #
22361#               - returns corresponding optype tag                      #
22362#                                                                       #
22363# XDEF **************************************************************** #
22364#       unnorm_fix()                                                    #
22365#                                                                       #
22366# XREF **************************************************************** #
22367#       norm() - normalize the mantissa                                 #
22368#                                                                       #
22369# INPUT *************************************************************** #
22370#       a0 = pointer to unnormalized extended precision number          #
22371#                                                                       #
22372# OUTPUT ************************************************************** #
22373#       d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO  #
22374#       a0 = input operand has been converted to a norm, denorm, or     #
22375#            zero; both the exponent and mantissa are changed.          #
22376#                                                                       #
22377#########################################################################
22378
22379        global          unnorm_fix
22380unnorm_fix:
22381        bfffo           FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
22382        bne.b           unnorm_shift            # hi(man) is not all zeroes
22383
22384#
22385# hi(man) is all zeroes so see if any bits in lo(man) are set
22386#
22387unnorm_chk_lo:
22388        bfffo           FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
22389        beq.w           unnorm_zero             # yes
22390
22391        add.w           &32, %d0                # no; fix shift distance
22392
22393#
22394# d0 = # shifts needed for complete normalization
22395#
22396unnorm_shift:
22397        clr.l           %d1                     # clear top word
22398        mov.w           FTEMP_EX(%a0), %d1      # extract exponent
22399        and.w           &0x7fff, %d1            # strip off sgn
22400
22401        cmp.w           %d0, %d1                # will denorm push exp < 0?
22402        bgt.b           unnorm_nrm_zero         # yes; denorm only until exp = 0
22403
22404#
22405# exponent would not go < 0. Therefore, number stays normalized
22406#
22407        sub.w           %d0, %d1                # shift exponent value
22408        mov.w           FTEMP_EX(%a0), %d0      # load old exponent
22409        and.w           &0x8000, %d0            # save old sign
22410        or.w            %d0, %d1                # {sgn,new exp}
22411        mov.w           %d1, FTEMP_EX(%a0)      # insert new exponent
22412
22413        bsr.l           norm                    # normalize UNNORM
22414
22415        mov.b           &NORM, %d0              # return new optype tag
22416        rts
22417
22418#
22419# exponent would go < 0, so only denormalize until exp = 0
22420#
22421unnorm_nrm_zero:
22422        cmp.b           %d1, &32                # is exp <= 32?
22423        bgt.b           unnorm_nrm_zero_lrg     # no; go handle large exponent
22424
22425        bfextu          FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
22426        mov.l           %d0, FTEMP_HI(%a0)      # save new hi(man)
22427
22428        mov.l           FTEMP_LO(%a0), %d0      # fetch old lo(man)
22429        lsl.l           %d1, %d0                # extract new lo(man)
22430        mov.l           %d0, FTEMP_LO(%a0)      # save new lo(man)
22431
22432        and.w           &0x8000, FTEMP_EX(%a0)  # set exp = 0
22433
22434        mov.b           &DENORM, %d0            # return new optype tag
22435        rts
22436
22437#
22438# only mantissa bits set are in lo(man)
22439#
22440unnorm_nrm_zero_lrg:
22441        sub.w           &32, %d1                # adjust shft amt by 32
22442
22443        mov.l           FTEMP_LO(%a0), %d0      # fetch old lo(man)
22444        lsl.l           %d1, %d0                # left shift lo(man)
22445
22446        mov.l           %d0, FTEMP_HI(%a0)      # store new hi(man)
22447        clr.l           FTEMP_LO(%a0)           # lo(man) = 0
22448
22449        and.w           &0x8000, FTEMP_EX(%a0)  # set exp = 0
22450
22451        mov.b           &DENORM, %d0            # return new optype tag
22452        rts
22453
22454#
22455# whole mantissa is zero so this UNNORM is actually a zero
22456#
22457unnorm_zero:
22458        and.w           &0x8000, FTEMP_EX(%a0)  # force exponent to zero
22459
22460        mov.b           &ZERO, %d0              # fix optype tag
22461        rts
22462
22463#########################################################################
22464# XDEF **************************************************************** #
22465#       set_tag_x(): return the optype of the input ext fp number       #
22466#                                                                       #
22467# XREF **************************************************************** #
22468#       None                                                            #
22469#                                                                       #
22470# INPUT *************************************************************** #
22471#       a0 = pointer to extended precision operand                      #
22472#                                                                       #
22473# OUTPUT ************************************************************** #
22474#       d0 = value of type tag                                          #
22475#               one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO     #
22476#                                                                       #
22477# ALGORITHM *********************************************************** #
22478#       Simply test the exponent, j-bit, and mantissa values to         #
22479# determine the type of operand.                                        #
22480#       If it's an unnormalized zero, alter the operand and force it    #
22481# to be a normal zero.                                                  #
22482#                                                                       #
22483#########################################################################
22484
22485        global          set_tag_x
22486set_tag_x:
22487        mov.w           FTEMP_EX(%a0), %d0      # extract exponent
22488        andi.w          &0x7fff, %d0            # strip off sign
22489        cmpi.w          %d0, &0x7fff            # is (EXP == MAX)?
22490        beq.b           inf_or_nan_x
22491not_inf_or_nan_x:
22492        btst            &0x7,FTEMP_HI(%a0)
22493        beq.b           not_norm_x
22494is_norm_x:
22495        mov.b           &NORM, %d0
22496        rts
22497not_norm_x:
22498        tst.w           %d0                     # is exponent = 0?
22499        bne.b           is_unnorm_x
22500not_unnorm_x:
22501        tst.l           FTEMP_HI(%a0)
22502        bne.b           is_denorm_x
22503        tst.l           FTEMP_LO(%a0)
22504        bne.b           is_denorm_x
22505is_zero_x:
22506        mov.b           &ZERO, %d0
22507        rts
22508is_denorm_x:
22509        mov.b           &DENORM, %d0
22510        rts
22511# must distinguish now "Unnormalized zeroes" which we
22512# must convert to zero.
22513is_unnorm_x:
22514        tst.l           FTEMP_HI(%a0)
22515        bne.b           is_unnorm_reg_x
22516        tst.l           FTEMP_LO(%a0)
22517        bne.b           is_unnorm_reg_x
22518# it's an "unnormalized zero". let's convert it to an actual zero...
22519        andi.w          &0x8000,FTEMP_EX(%a0)   # clear exponent
22520        mov.b           &ZERO, %d0
22521        rts
22522is_unnorm_reg_x:
22523        mov.b           &UNNORM, %d0
22524        rts
22525inf_or_nan_x:
22526        tst.l           FTEMP_LO(%a0)
22527        bne.b           is_nan_x
22528        mov.l           FTEMP_HI(%a0), %d0
22529        and.l           &0x7fffffff, %d0        # msb is a don't care!
22530        bne.b           is_nan_x
22531is_inf_x:
22532        mov.b           &INF, %d0
22533        rts
22534is_nan_x:
22535        btst            &0x6, FTEMP_HI(%a0)
22536        beq.b           is_snan_x
22537        mov.b           &QNAN, %d0
22538        rts
22539is_snan_x:
22540        mov.b           &SNAN, %d0
22541        rts
22542
22543#########################################################################
22544# XDEF **************************************************************** #
22545#       set_tag_d(): return the optype of the input dbl fp number       #
22546#                                                                       #
22547# XREF **************************************************************** #
22548#       None                                                            #
22549#                                                                       #
22550# INPUT *************************************************************** #
22551#       a0 = points to double precision operand                         #
22552#                                                                       #
22553# OUTPUT ************************************************************** #
22554#       d0 = value of type tag                                          #
22555#               one of: NORM, INF, QNAN, SNAN, DENORM, ZERO             #
22556#                                                                       #
22557# ALGORITHM *********************************************************** #
22558#       Simply test the exponent, j-bit, and mantissa values to         #
22559# determine the type of operand.                                        #
22560#                                                                       #
22561#########################################################################
22562
22563        global          set_tag_d
22564set_tag_d:
22565        mov.l           FTEMP(%a0), %d0
22566        mov.l           %d0, %d1
22567
22568        andi.l          &0x7ff00000, %d0
22569        beq.b           zero_or_denorm_d
22570
22571        cmpi.l          %d0, &0x7ff00000
22572        beq.b           inf_or_nan_d
22573
22574is_norm_d:
22575        mov.b           &NORM, %d0
22576        rts
22577zero_or_denorm_d:
22578        and.l           &0x000fffff, %d1
22579        bne             is_denorm_d
22580        tst.l           4+FTEMP(%a0)
22581        bne             is_denorm_d
22582is_zero_d:
22583        mov.b           &ZERO, %d0
22584        rts
22585is_denorm_d:
22586        mov.b           &DENORM, %d0
22587        rts
22588inf_or_nan_d:
22589        and.l           &0x000fffff, %d1
22590        bne             is_nan_d
22591        tst.l           4+FTEMP(%a0)
22592        bne             is_nan_d
22593is_inf_d:
22594        mov.b           &INF, %d0
22595        rts
22596is_nan_d:
22597        btst            &19, %d1
22598        bne             is_qnan_d
22599is_snan_d:
22600        mov.b           &SNAN, %d0
22601        rts
22602is_qnan_d:
22603        mov.b           &QNAN, %d0
22604        rts
22605
22606#########################################################################
22607# XDEF **************************************************************** #
22608#       set_tag_s(): return the optype of the input sgl fp number       #
22609#                                                                       #
22610# XREF **************************************************************** #
22611#       None                                                            #
22612#                                                                       #
22613# INPUT *************************************************************** #
22614#       a0 = pointer to single precision operand                        #
22615#                                                                       #
22616# OUTPUT ************************************************************** #
22617#       d0 = value of type tag                                          #
22618#               one of: NORM, INF, QNAN, SNAN, DENORM, ZERO             #
22619#                                                                       #
22620# ALGORITHM *********************************************************** #
22621#       Simply test the exponent, j-bit, and mantissa values to         #
22622# determine the type of operand.                                        #
22623#                                                                       #
22624#########################################################################
22625
22626        global          set_tag_s
22627set_tag_s:
22628        mov.l           FTEMP(%a0), %d0
22629        mov.l           %d0, %d1
22630
22631        andi.l          &0x7f800000, %d0
22632        beq.b           zero_or_denorm_s
22633
22634        cmpi.l          %d0, &0x7f800000
22635        beq.b           inf_or_nan_s
22636
22637is_norm_s:
22638        mov.b           &NORM, %d0
22639        rts
22640zero_or_denorm_s:
22641        and.l           &0x007fffff, %d1
22642        bne             is_denorm_s
22643is_zero_s:
22644        mov.b           &ZERO, %d0
22645        rts
22646is_denorm_s:
22647        mov.b           &DENORM, %d0
22648        rts
22649inf_or_nan_s:
22650        and.l           &0x007fffff, %d1
22651        bne             is_nan_s
22652is_inf_s:
22653        mov.b           &INF, %d0
22654        rts
22655is_nan_s:
22656        btst            &22, %d1
22657        bne             is_qnan_s
22658is_snan_s:
22659        mov.b           &SNAN, %d0
22660        rts
22661is_qnan_s:
22662        mov.b           &QNAN, %d0
22663        rts
22664
22665#########################################################################
22666# XDEF **************************************************************** #
22667#       unf_res(): routine to produce default underflow result of a     #
22668#                  scaled extended precision number; this is used by    #
22669#                  fadd/fdiv/fmul/etc. emulation routines.              #
22670#       unf_res4(): same as above but for fsglmul/fsgldiv which use     #
22671#                   single round prec and extended prec mode.           #
22672#                                                                       #
22673# XREF **************************************************************** #
22674#       _denorm() - denormalize according to scale factor               #
22675#       _round() - round denormalized number according to rnd prec      #
22676#                                                                       #
22677# INPUT *************************************************************** #
22678#       a0 = pointer to extended precison operand                       #
22679#       d0 = scale factor                                               #
22680#       d1 = rounding precision/mode                                    #
22681#                                                                       #
22682# OUTPUT ************************************************************** #
22683#       a0 = pointer to default underflow result in extended precision  #
22684#       d0.b = result FPSR_cc which caller may or may not want to save  #
22685#                                                                       #
22686# ALGORITHM *********************************************************** #
22687#       Convert the input operand to "internal format" which means the  #
22688# exponent is extended to 16 bits and the sign is stored in the unused  #
22689# portion of the extended precison operand. Denormalize the number      #
22690# according to the scale factor passed in d0. Then, round the           #
22691# denormalized result.                                                  #
22692#       Set the FPSR_exc bits as appropriate but return the cc bits in  #
22693# d0 in case the caller doesn't want to save them (as is the case for   #
22694# fmove out).                                                           #
22695#       unf_res4() for fsglmul/fsgldiv forces the denorm to extended    #
22696# precision and the rounding mode to single.                            #
22697#                                                                       #
22698#########################################################################
22699        global          unf_res
22700unf_res:
22701        mov.l           %d1, -(%sp)             # save rnd prec,mode on stack
22702
22703        btst            &0x7, FTEMP_EX(%a0)     # make "internal" format
22704        sne             FTEMP_SGN(%a0)
22705
22706        mov.w           FTEMP_EX(%a0), %d1      # extract exponent
22707        and.w           &0x7fff, %d1
22708        sub.w           %d0, %d1
22709        mov.w           %d1, FTEMP_EX(%a0)      # insert 16 bit exponent
22710
22711        mov.l           %a0, -(%sp)             # save operand ptr during calls
22712
22713        mov.l           0x4(%sp),%d0            # pass rnd prec.
22714        andi.w          &0x00c0,%d0
22715        lsr.w           &0x4,%d0
22716        bsr.l           _denorm                 # denorm result
22717
22718        mov.l           (%sp),%a0
22719        mov.w           0x6(%sp),%d1            # load prec:mode into %d1
22720        andi.w          &0xc0,%d1               # extract rnd prec
22721        lsr.w           &0x4,%d1
22722        swap            %d1
22723        mov.w           0x6(%sp),%d1
22724        andi.w          &0x30,%d1
22725        lsr.w           &0x4,%d1
22726        bsr.l           _round                  # round the denorm
22727
22728        mov.l           (%sp)+, %a0
22729
22730# result is now rounded properly. convert back to normal format
22731        bclr            &0x7, FTEMP_EX(%a0)     # clear sgn first; may have residue
22732        tst.b           FTEMP_SGN(%a0)          # is "internal result" sign set?
22733        beq.b           unf_res_chkifzero       # no; result is positive
22734        bset            &0x7, FTEMP_EX(%a0)     # set result sgn
22735        clr.b           FTEMP_SGN(%a0)          # clear temp sign
22736
22737# the number may have become zero after rounding. set ccodes accordingly.
22738unf_res_chkifzero:
22739        clr.l           %d0
22740        tst.l           FTEMP_HI(%a0)           # is value now a zero?
22741        bne.b           unf_res_cont            # no
22742        tst.l           FTEMP_LO(%a0)
22743        bne.b           unf_res_cont            # no
22744#       bset            &z_bit, FPSR_CC(%a6)    # yes; set zero ccode bit
22745        bset            &z_bit, %d0             # yes; set zero ccode bit
22746
22747unf_res_cont:
22748
22749#
22750# can inex1 also be set along with unfl and inex2???
22751#
22752# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
22753#
22754        btst            &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
22755        beq.b           unf_res_end             # no
22756        bset            &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
22757
22758unf_res_end:
22759        add.l           &0x4, %sp               # clear stack
22760        rts
22761
22762# unf_res() for fsglmul() and fsgldiv().
22763        global          unf_res4
22764unf_res4:
22765        mov.l           %d1,-(%sp)              # save rnd prec,mode on stack
22766
22767        btst            &0x7,FTEMP_EX(%a0)      # make "internal" format
22768        sne             FTEMP_SGN(%a0)
22769
22770        mov.w           FTEMP_EX(%a0),%d1       # extract exponent
22771        and.w           &0x7fff,%d1
22772        sub.w           %d0,%d1
22773        mov.w           %d1,FTEMP_EX(%a0)       # insert 16 bit exponent
22774
22775        mov.l           %a0,-(%sp)              # save operand ptr during calls
22776
22777        clr.l           %d0                     # force rnd prec = ext
22778        bsr.l           _denorm                 # denorm result
22779
22780        mov.l           (%sp),%a0
22781        mov.w           &s_mode,%d1             # force rnd prec = sgl
22782        swap            %d1
22783        mov.w           0x6(%sp),%d1            # load rnd mode
22784        andi.w          &0x30,%d1               # extract rnd prec
22785        lsr.w           &0x4,%d1
22786        bsr.l           _round                  # round the denorm
22787
22788        mov.l           (%sp)+,%a0
22789
22790# result is now rounded properly. convert back to normal format
22791        bclr            &0x7,FTEMP_EX(%a0)      # clear sgn first; may have residue
22792        tst.b           FTEMP_SGN(%a0)          # is "internal result" sign set?
22793        beq.b           unf_res4_chkifzero      # no; result is positive
22794        bset            &0x7,FTEMP_EX(%a0)      # set result sgn
22795        clr.b           FTEMP_SGN(%a0)          # clear temp sign
22796
22797# the number may have become zero after rounding. set ccodes accordingly.
22798unf_res4_chkifzero:
22799        clr.l           %d0
22800        tst.l           FTEMP_HI(%a0)           # is value now a zero?
22801        bne.b           unf_res4_cont           # no
22802        tst.l           FTEMP_LO(%a0)
22803        bne.b           unf_res4_cont           # no
22804#       bset            &z_bit,FPSR_CC(%a6)     # yes; set zero ccode bit
22805        bset            &z_bit,%d0              # yes; set zero ccode bit
22806
22807unf_res4_cont:
22808
22809#
22810# can inex1 also be set along with unfl and inex2???
22811#
22812# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
22813#
22814        btst            &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
22815        beq.b           unf_res4_end            # no
22816        bset            &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
22817
22818unf_res4_end:
22819        add.l           &0x4,%sp                # clear stack
22820        rts
22821
22822#########################################################################
22823# XDEF **************************************************************** #
22824#       ovf_res(): routine to produce the default overflow result of    #
22825#                  an overflowing number.                               #
22826#       ovf_res2(): same as above but the rnd mode/prec are passed      #
22827#                   differently.                                        #
22828#                                                                       #
22829# XREF **************************************************************** #
22830#       none                                                            #
22831#                                                                       #
22832# INPUT *************************************************************** #
22833#       d1.b    = '-1' => (-); '0' => (+)                               #
22834#   ovf_res():                                                          #
22835#       d0      = rnd mode/prec                                         #
22836#   ovf_res2():                                                         #
22837#       hi(d0)  = rnd prec                                              #
22838#       lo(d0)  = rnd mode                                              #
22839#                                                                       #
22840# OUTPUT ************************************************************** #
22841#       a0      = points to extended precision result                   #
22842#       d0.b    = condition code bits                                   #
22843#                                                                       #
22844# ALGORITHM *********************************************************** #
22845#       The default overflow result can be determined by the sign of    #
22846# the result and the rounding mode/prec in effect. These bits are       #
22847# concatenated together to create an index into the default result      #
22848# table. A pointer to the correct result is returned in a0. The         #
22849# resulting condition codes are returned in d0 in case the caller       #
22850# doesn't want FPSR_cc altered (as is the case for fmove out).          #
22851#                                                                       #
22852#########################################################################
22853
22854        global          ovf_res
22855ovf_res:
22856        andi.w          &0x10,%d1               # keep result sign
22857        lsr.b           &0x4,%d0                # shift prec/mode
22858        or.b            %d0,%d1                 # concat the two
22859        mov.w           %d1,%d0                 # make a copy
22860        lsl.b           &0x1,%d1                # multiply d1 by 2
22861        bra.b           ovf_res_load
22862
22863        global          ovf_res2
22864ovf_res2:
22865        and.w           &0x10, %d1              # keep result sign
22866        or.b            %d0, %d1                # insert rnd mode
22867        swap            %d0
22868        or.b            %d0, %d1                # insert rnd prec
22869        mov.w           %d1, %d0                # make a copy
22870        lsl.b           &0x1, %d1               # shift left by 1
22871
22872#
22873# use the rounding mode, precision, and result sign as in index into the
22874# two tables below to fetch the default result and the result ccodes.
22875#
22876ovf_res_load:
22877        mov.b           (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
22878        lea             (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
22879
22880        rts
22881
22882tbl_ovfl_cc:
22883        byte            0x2, 0x0, 0x0, 0x2
22884        byte            0x2, 0x0, 0x0, 0x2
22885        byte            0x2, 0x0, 0x0, 0x2
22886        byte            0x0, 0x0, 0x0, 0x0
22887        byte            0x2+0x8, 0x8, 0x2+0x8, 0x8
22888        byte            0x2+0x8, 0x8, 0x2+0x8, 0x8
22889        byte            0x2+0x8, 0x8, 0x2+0x8, 0x8
22890
22891tbl_ovfl_result:
22892        long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22893        long            0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
22894        long            0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
22895        long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22896
22897        long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22898        long            0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
22899        long            0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
22900        long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22901
22902        long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22903        long            0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
22904        long            0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
22905        long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22906
22907        long            0x00000000,0x00000000,0x00000000,0x00000000
22908        long            0x00000000,0x00000000,0x00000000,0x00000000
22909        long            0x00000000,0x00000000,0x00000000,0x00000000
22910        long            0x00000000,0x00000000,0x00000000,0x00000000
22911
22912        long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22913        long            0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
22914        long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22915        long            0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
22916
22917        long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22918        long            0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
22919        long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22920        long            0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
22921
22922        long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22923        long            0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
22924        long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22925        long            0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
22926
22927#########################################################################
22928# XDEF **************************************************************** #
22929#       get_packed(): fetch a packed operand from memory and then       #
22930#                     convert it to a floating-point binary number.     #
22931#                                                                       #
22932# XREF **************************************************************** #
22933#       _dcalc_ea() - calculate the correct <ea>                        #
22934#       _mem_read() - fetch the packed operand from memory              #
22935#       facc_in_x() - the fetch failed so jump to special exit code     #
22936#       decbin()    - convert packed to binary extended precision       #
22937#                                                                       #
22938# INPUT *************************************************************** #
22939#       None                                                            #
22940#                                                                       #
22941# OUTPUT ************************************************************** #
22942#       If no failure on _mem_read():                                   #
22943#       FP_SRC(a6) = packed operand now as a binary FP number           #
22944#                                                                       #
22945# ALGORITHM *********************************************************** #
22946#       Get the correct <ea> which is the value on the exception stack  #
22947# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+.     #
22948# Then, fetch the operand from memory. If the fetch fails, exit         #
22949# through facc_in_x().                                                  #
22950#       If the packed operand is a ZERO,NAN, or INF, convert it to      #
22951# its binary representation here. Else, call decbin() which will        #
22952# convert the packed value to an extended precision binary value.       #
22953#                                                                       #
22954#########################################################################
22955
22956# the stacked <ea> for packed is correct except for -(An).
22957# the base reg must be updated for both -(An) and (An)+.
22958        global          get_packed
22959get_packed:
22960        mov.l           &0xc,%d0                # packed is 12 bytes
22961        bsr.l           _dcalc_ea               # fetch <ea>; correct An
22962
22963        lea             FP_SRC(%a6),%a1         # pass: ptr to super dst
22964        mov.l           &0xc,%d0                # pass: 12 bytes
22965        bsr.l           _dmem_read              # read packed operand
22966
22967        tst.l           %d1                     # did dfetch fail?
22968        bne.l           facc_in_x               # yes
22969
22970# The packed operand is an INF or a NAN if the exponent field is all ones.
22971        bfextu          FP_SRC(%a6){&1:&15},%d0 # get exp
22972        cmpi.w          %d0,&0x7fff             # INF or NAN?
22973        bne.b           gp_try_zero             # no
22974        rts                                     # operand is an INF or NAN
22975
22976# The packed operand is a zero if the mantissa is all zero, else it's
22977# a normal packed op.
22978gp_try_zero:
22979        mov.b           3+FP_SRC(%a6),%d0       # get byte 4
22980        andi.b          &0x0f,%d0               # clear all but last nybble
22981        bne.b           gp_not_spec             # not a zero
22982        tst.l           FP_SRC_HI(%a6)          # is lw 2 zero?
22983        bne.b           gp_not_spec             # not a zero
22984        tst.l           FP_SRC_LO(%a6)          # is lw 3 zero?
22985        bne.b           gp_not_spec             # not a zero
22986        rts                                     # operand is a ZERO
22987gp_not_spec:
22988        lea             FP_SRC(%a6),%a0         # pass: ptr to packed op
22989        bsr.l           decbin                  # convert to extended
22990        fmovm.x         &0x80,FP_SRC(%a6)       # make this the srcop
22991        rts
22992
22993#########################################################################
22994# decbin(): Converts normalized packed bcd value pointed to by register #
22995#           a0 to extended-precision value in fp0.                      #
22996#                                                                       #
22997# INPUT *************************************************************** #
22998#       a0 = pointer to normalized packed bcd value                     #
22999#                                                                       #
23000# OUTPUT ************************************************************** #
23001#       fp0 = exact fp representation of the packed bcd value.          #
23002#                                                                       #
23003# ALGORITHM *********************************************************** #
23004#       Expected is a normal bcd (i.e. non-exceptional; all inf, zero,  #
23005#       and NaN operands are dispatched without entering this routine)  #
23006#       value in 68881/882 format at location (a0).                     #
23007#                                                                       #
23008#       A1. Convert the bcd exponent to binary by successive adds and   #
23009#       muls. Set the sign according to SE. Subtract 16 to compensate   #
23010#       for the mantissa which is to be interpreted as 17 integer       #
23011#       digits, rather than 1 integer and 16 fraction digits.           #
23012#       Note: this operation can never overflow.                        #
23013#                                                                       #
23014#       A2. Convert the bcd mantissa to binary by successive            #
23015#       adds and muls in FP0. Set the sign according to SM.             #
23016#       The mantissa digits will be converted with the decimal point    #
23017#       assumed following the least-significant digit.                  #
23018#       Note: this operation can never overflow.                        #
23019#                                                                       #
23020#       A3. Count the number of leading/trailing zeros in the           #
23021#       bcd string.  If SE is positive, count the leading zeros;        #
23022#       if negative, count the trailing zeros.  Set the adjusted        #
23023#       exponent equal to the exponent from A1 and the zero count       #
23024#       added if SM = 1 and subtracted if SM = 0.  Scale the            #
23025#       mantissa the equivalent of forcing in the bcd value:            #
23026#                                                                       #
23027#       SM = 0  a non-zero digit in the integer position                #
23028#       SM = 1  a non-zero digit in Mant0, lsd of the fraction          #
23029#                                                                       #
23030#       this will insure that any value, regardless of its              #
23031#       representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted     #
23032#       consistently.                                                   #
23033#                                                                       #
23034#       A4. Calculate the factor 10^exp in FP1 using a table of         #
23035#       10^(2^n) values.  To reduce the error in forming factors        #
23036#       greater than 10^27, a directed rounding scheme is used with     #
23037#       tables rounded to RN, RM, and RP, according to the table        #
23038#       in the comments of the pwrten section.                          #
23039#                                                                       #
23040#       A5. Form the final binary number by scaling the mantissa by     #
23041#       the exponent factor.  This is done by multiplying the           #
23042#       mantissa in FP0 by the factor in FP1 if the adjusted            #
23043#       exponent sign is positive, and dividing FP0 by FP1 if           #
23044#       it is negative.                                                 #
23045#                                                                       #
23046#       Clean up and return. Check if the final mul or div was inexact. #
23047#       If so, set INEX1 in USER_FPSR.                                  #
23048#                                                                       #
23049#########################################################################
23050
23051#
23052#       PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
23053#       to nearest, minus, and plus, respectively.  The tables include
23054#       10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
23055#       is required until the power is greater than 27, however, all
23056#       tables include the first 5 for ease of indexing.
23057#
23058RTABLE:
23059        byte            0,0,0,0
23060        byte            2,3,2,3
23061        byte            2,3,3,2
23062        byte            3,2,2,3
23063
23064        set             FNIBS,7
23065        set             FSTRT,0
23066
23067        set             ESTRT,4
23068        set             EDIGITS,2
23069
23070        global          decbin
23071decbin:
23072        mov.l           0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
23073        mov.l           0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
23074        mov.l           0x8(%a0),FP_SCR0_LO(%a6)
23075
23076        lea             FP_SCR0(%a6),%a0
23077
23078        movm.l          &0x3c00,-(%sp)          # save d2-d5
23079        fmovm.x         &0x1,-(%sp)             # save fp1
23080#
23081# Calculate exponent:
23082#  1. Copy bcd value in memory for use as a working copy.
23083#  2. Calculate absolute value of exponent in d1 by mul and add.
23084#  3. Correct for exponent sign.
23085#  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
23086#     (i.e., all digits assumed left of the decimal point.)
23087#
23088# Register usage:
23089#
23090#  calc_e:
23091#       (*)  d0: temp digit storage
23092#       (*)  d1: accumulator for binary exponent
23093#       (*)  d2: digit count
23094#       (*)  d3: offset pointer
23095#       ( )  d4: first word of bcd
23096#       ( )  a0: pointer to working bcd value
23097#       ( )  a6: pointer to original bcd value
23098#       (*)  FP_SCR1: working copy of original bcd value
23099#       (*)  L_SCR1: copy of original exponent word
23100#
23101calc_e:
23102        mov.l           &EDIGITS,%d2            # # of nibbles (digits) in fraction part
23103        mov.l           &ESTRT,%d3              # counter to pick up digits
23104        mov.l           (%a0),%d4               # get first word of bcd
23105        clr.l           %d1                     # zero d1 for accumulator
23106e_gd:
23107        mulu.l          &0xa,%d1                # mul partial product by one digit place
23108        bfextu          %d4{%d3:&4},%d0         # get the digit and zero extend into d0
23109        add.l           %d0,%d1                 # d1 = d1 + d0
23110        addq.b          &4,%d3                  # advance d3 to the next digit
23111        dbf.w           %d2,e_gd                # if we have used all 3 digits, exit loop
23112        btst            &30,%d4                 # get SE
23113        beq.b           e_pos                   # don't negate if pos
23114        neg.l           %d1                     # negate before subtracting
23115e_pos:
23116        sub.l           &16,%d1                 # sub to compensate for shift of mant
23117        bge.b           e_save                  # if still pos, do not neg
23118        neg.l           %d1                     # now negative, make pos and set SE
23119        or.l            &0x40000000,%d4         # set SE in d4,
23120        or.l            &0x40000000,(%a0)       # and in working bcd
23121e_save:
23122        mov.l           %d1,-(%sp)              # save exp on stack
23123#
23124#
23125# Calculate mantissa:
23126#  1. Calculate absolute value of mantissa in fp0 by mul and add.
23127#  2. Correct for mantissa sign.
23128#     (i.e., all digits assumed left of the decimal point.)
23129#
23130# Register usage:
23131#
23132#  calc_m:
23133#       (*)  d0: temp digit storage
23134#       (*)  d1: lword counter
23135#       (*)  d2: digit count
23136#       (*)  d3: offset pointer
23137#       ( )  d4: words 2 and 3 of bcd
23138#       ( )  a0: pointer to working bcd value
23139#       ( )  a6: pointer to original bcd value
23140#       (*) fp0: mantissa accumulator
23141#       ( )  FP_SCR1: working copy of original bcd value
23142#       ( )  L_SCR1: copy of original exponent word
23143#
23144calc_m:
23145        mov.l           &1,%d1                  # word counter, init to 1
23146        fmov.s          &0x00000000,%fp0        # accumulator
23147#
23148#
23149#  Since the packed number has a long word between the first & second parts,
23150#  get the integer digit then skip down & get the rest of the
23151#  mantissa.  We will unroll the loop once.
23152#
23153        bfextu          (%a0){&28:&4},%d0       # integer part is ls digit in long word
23154        fadd.b          %d0,%fp0                # add digit to sum in fp0
23155#
23156#
23157#  Get the rest of the mantissa.
23158#
23159loadlw:
23160        mov.l           (%a0,%d1.L*4),%d4       # load mantissa lonqword into d4
23161        mov.l           &FSTRT,%d3              # counter to pick up digits
23162        mov.l           &FNIBS,%d2              # reset number of digits per a0 ptr
23163md2b:
23164        fmul.s          &0x41200000,%fp0        # fp0 = fp0 * 10
23165        bfextu          %d4{%d3:&4},%d0         # get the digit and zero extend
23166        fadd.b          %d0,%fp0                # fp0 = fp0 + digit
23167#
23168#
23169#  If all the digits (8) in that long word have been converted (d2=0),
23170#  then inc d1 (=2) to point to the next long word and reset d3 to 0
23171#  to initialize the digit offset, and set d2 to 7 for the digit count;
23172#  else continue with this long word.
23173#
23174        addq.b          &4,%d3                  # advance d3 to the next digit
23175        dbf.w           %d2,md2b                # check for last digit in this lw
23176nextlw:
23177        addq.l          &1,%d1                  # inc lw pointer in mantissa
23178        cmp.l           %d1,&2                  # test for last lw
23179        ble.b           loadlw                  # if not, get last one
23180#
23181#  Check the sign of the mant and make the value in fp0 the same sign.
23182#
23183m_sign:
23184        btst            &31,(%a0)               # test sign of the mantissa
23185        beq.b           ap_st_z                 # if clear, go to append/strip zeros
23186        fneg.x          %fp0                    # if set, negate fp0
23187#
23188# Append/strip zeros:
23189#
23190#  For adjusted exponents which have an absolute value greater than 27*,
23191#  this routine calculates the amount needed to normalize the mantissa
23192#  for the adjusted exponent.  That number is subtracted from the exp
23193#  if the exp was positive, and added if it was negative.  The purpose
23194#  of this is to reduce the value of the exponent and the possibility
23195#  of error in calculation of pwrten.
23196#
23197#  1. Branch on the sign of the adjusted exponent.
23198#  2p.(positive exp)
23199#   2. Check M16 and the digits in lwords 2 and 3 in decending order.
23200#   3. Add one for each zero encountered until a non-zero digit.
23201#   4. Subtract the count from the exp.
23202#   5. Check if the exp has crossed zero in #3 above; make the exp abs
23203#          and set SE.
23204#       6. Multiply the mantissa by 10**count.
23205#  2n.(negative exp)
23206#   2. Check the digits in lwords 3 and 2 in decending order.
23207#   3. Add one for each zero encountered until a non-zero digit.
23208#   4. Add the count to the exp.
23209#   5. Check if the exp has crossed zero in #3 above; clear SE.
23210#   6. Divide the mantissa by 10**count.
23211#
23212#  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
23213#   any adjustment due to append/strip zeros will drive the resultane
23214#   exponent towards zero.  Since all pwrten constants with a power
23215#   of 27 or less are exact, there is no need to use this routine to
23216#   attempt to lessen the resultant exponent.
23217#
23218# Register usage:
23219#
23220#  ap_st_z:
23221#       (*)  d0: temp digit storage
23222#       (*)  d1: zero count
23223#       (*)  d2: digit count
23224#       (*)  d3: offset pointer
23225#       ( )  d4: first word of bcd
23226#       (*)  d5: lword counter
23227#       ( )  a0: pointer to working bcd value
23228#       ( )  FP_SCR1: working copy of original bcd value
23229#       ( )  L_SCR1: copy of original exponent word
23230#
23231#
23232# First check the absolute value of the exponent to see if this
23233# routine is necessary.  If so, then check the sign of the exponent
23234# and do append (+) or strip (-) zeros accordingly.
23235# This section handles a positive adjusted exponent.
23236#
23237ap_st_z:
23238        mov.l           (%sp),%d1               # load expA for range test
23239        cmp.l           %d1,&27                 # test is with 27
23240        ble.w           pwrten                  # if abs(expA) <28, skip ap/st zeros
23241        btst            &30,(%a0)               # check sign of exp
23242        bne.b           ap_st_n                 # if neg, go to neg side
23243        clr.l           %d1                     # zero count reg
23244        mov.l           (%a0),%d4               # load lword 1 to d4
23245        bfextu          %d4{&28:&4},%d0         # get M16 in d0
23246        bne.b           ap_p_fx                 # if M16 is non-zero, go fix exp
23247        addq.l          &1,%d1                  # inc zero count
23248        mov.l           &1,%d5                  # init lword counter
23249        mov.l           (%a0,%d5.L*4),%d4       # get lword 2 to d4
23250        bne.b           ap_p_cl                 # if lw 2 is zero, skip it
23251        addq.l          &8,%d1                  # and inc count by 8
23252        addq.l          &1,%d5                  # inc lword counter
23253        mov.l           (%a0,%d5.L*4),%d4       # get lword 3 to d4
23254ap_p_cl:
23255        clr.l           %d3                     # init offset reg
23256        mov.l           &7,%d2                  # init digit counter
23257ap_p_gd:
23258        bfextu          %d4{%d3:&4},%d0         # get digit
23259        bne.b           ap_p_fx                 # if non-zero, go to fix exp
23260        addq.l          &4,%d3                  # point to next digit
23261        addq.l          &1,%d1                  # inc digit counter
23262        dbf.w           %d2,ap_p_gd             # get next digit
23263ap_p_fx:
23264        mov.l           %d1,%d0                 # copy counter to d2
23265        mov.l           (%sp),%d1               # get adjusted exp from memory
23266        sub.l           %d0,%d1                 # subtract count from exp
23267        bge.b           ap_p_fm                 # if still pos, go to pwrten
23268        neg.l           %d1                     # now its neg; get abs
23269        mov.l           (%a0),%d4               # load lword 1 to d4
23270        or.l            &0x40000000,%d4         # and set SE in d4
23271        or.l            &0x40000000,(%a0)       # and in memory
23272#
23273# Calculate the mantissa multiplier to compensate for the striping of
23274# zeros from the mantissa.
23275#
23276ap_p_fm:
23277        lea.l           PTENRN(%pc),%a1         # get address of power-of-ten table
23278        clr.l           %d3                     # init table index
23279        fmov.s          &0x3f800000,%fp1        # init fp1 to 1
23280        mov.l           &3,%d2                  # init d2 to count bits in counter
23281ap_p_el:
23282        asr.l           &1,%d0                  # shift lsb into carry
23283        bcc.b           ap_p_en                 # if 1, mul fp1 by pwrten factor
23284        fmul.x          (%a1,%d3),%fp1          # mul by 10**(d3_bit_no)
23285ap_p_en:
23286        add.l           &12,%d3                 # inc d3 to next rtable entry
23287        tst.l           %d0                     # check if d0 is zero
23288        bne.b           ap_p_el                 # if not, get next bit
23289        fmul.x          %fp1,%fp0               # mul mantissa by 10**(no_bits_shifted)
23290        bra.b           pwrten                  # go calc pwrten
23291#
23292# This section handles a negative adjusted exponent.
23293#
23294ap_st_n:
23295        clr.l           %d1                     # clr counter
23296        mov.l           &2,%d5                  # set up d5 to point to lword 3
23297        mov.l           (%a0,%d5.L*4),%d4       # get lword 3
23298        bne.b           ap_n_cl                 # if not zero, check digits
23299        sub.l           &1,%d5                  # dec d5 to point to lword 2
23300        addq.l          &8,%d1                  # inc counter by 8
23301        mov.l           (%a0,%d5.L*4),%d4       # get lword 2
23302ap_n_cl:
23303        mov.l           &28,%d3                 # point to last digit
23304        mov.l           &7,%d2                  # init digit counter
23305ap_n_gd:
23306        bfextu          %d4{%d3:&4},%d0         # get digit
23307        bne.b           ap_n_fx                 # if non-zero, go to exp fix
23308        subq.l          &4,%d3                  # point to previous digit
23309        addq.l          &1,%d1                  # inc digit counter
23310        dbf.w           %d2,ap_n_gd             # get next digit
23311ap_n_fx:
23312        mov.l           %d1,%d0                 # copy counter to d0
23313        mov.l           (%sp),%d1               # get adjusted exp from memory
23314        sub.l           %d0,%d1                 # subtract count from exp
23315        bgt.b           ap_n_fm                 # if still pos, go fix mantissa
23316        neg.l           %d1                     # take abs of exp and clr SE
23317        mov.l           (%a0),%d4               # load lword 1 to d4
23318        and.l           &0xbfffffff,%d4         # and clr SE in d4
23319        and.l           &0xbfffffff,(%a0)       # and in memory
23320#
23321# Calculate the mantissa multiplier to compensate for the appending of
23322# zeros to the mantissa.
23323#
23324ap_n_fm:
23325        lea.l           PTENRN(%pc),%a1         # get address of power-of-ten table
23326        clr.l           %d3                     # init table index
23327        fmov.s          &0x3f800000,%fp1        # init fp1 to 1
23328        mov.l           &3,%d2                  # init d2 to count bits in counter
23329ap_n_el:
23330        asr.l           &1,%d0                  # shift lsb into carry
23331        bcc.b           ap_n_en                 # if 1, mul fp1 by pwrten factor
23332        fmul.x          (%a1,%d3),%fp1          # mul by 10**(d3_bit_no)
23333ap_n_en:
23334        add.l           &12,%d3                 # inc d3 to next rtable entry
23335        tst.l           %d0                     # check if d0 is zero
23336        bne.b           ap_n_el                 # if not, get next bit
23337        fdiv.x          %fp1,%fp0               # div mantissa by 10**(no_bits_shifted)
23338#
23339#
23340# Calculate power-of-ten factor from adjusted and shifted exponent.
23341#
23342# Register usage:
23343#
23344#  pwrten:
23345#       (*)  d0: temp
23346#       ( )  d1: exponent
23347#       (*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
23348#       (*)  d3: FPCR work copy
23349#       ( )  d4: first word of bcd
23350#       (*)  a1: RTABLE pointer
23351#  calc_p:
23352#       (*)  d0: temp
23353#       ( )  d1: exponent
23354#       (*)  d3: PWRTxx table index
23355#       ( )  a0: pointer to working copy of bcd
23356#       (*)  a1: PWRTxx pointer
23357#       (*) fp1: power-of-ten accumulator
23358#
23359# Pwrten calculates the exponent factor in the selected rounding mode
23360# according to the following table:
23361#
23362#       Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
23363#
23364#       ANY       ANY   RN      RN
23365#
23366#        +         +    RP      RP
23367#        -         +    RP      RM
23368#        +         -    RP      RM
23369#        -         -    RP      RP
23370#
23371#        +         +    RM      RM
23372#        -         +    RM      RP
23373#        +         -    RM      RP
23374#        -         -    RM      RM
23375#
23376#        +         +    RZ      RM
23377#        -         +    RZ      RM
23378#        +         -    RZ      RP
23379#        -         -    RZ      RP
23380#
23381#
23382pwrten:
23383        mov.l           USER_FPCR(%a6),%d3      # get user's FPCR
23384        bfextu          %d3{&26:&2},%d2         # isolate rounding mode bits
23385        mov.l           (%a0),%d4               # reload 1st bcd word to d4
23386        asl.l           &2,%d2                  # format d2 to be
23387        bfextu          %d4{&0:&2},%d0          # {FPCR[6],FPCR[5],SM,SE}
23388        add.l           %d0,%d2                 # in d2 as index into RTABLE
23389        lea.l           RTABLE(%pc),%a1         # load rtable base
23390        mov.b           (%a1,%d2),%d0           # load new rounding bits from table
23391        clr.l           %d3                     # clear d3 to force no exc and extended
23392        bfins           %d0,%d3{&26:&2}         # stuff new rounding bits in FPCR
23393        fmov.l          %d3,%fpcr               # write new FPCR
23394        asr.l           &1,%d0                  # write correct PTENxx table
23395        bcc.b           not_rp                  # to a1
23396        lea.l           PTENRP(%pc),%a1         # it is RP
23397        bra.b           calc_p                  # go to init section
23398not_rp:
23399        asr.l           &1,%d0                  # keep checking
23400        bcc.b           not_rm
23401        lea.l           PTENRM(%pc),%a1         # it is RM
23402        bra.b           calc_p                  # go to init section
23403not_rm:
23404        lea.l           PTENRN(%pc),%a1         # it is RN
23405calc_p:
23406        mov.l           %d1,%d0                 # copy exp to d0;use d0
23407        bpl.b           no_neg                  # if exp is negative,
23408        neg.l           %d0                     # invert it
23409        or.l            &0x40000000,(%a0)       # and set SE bit
23410no_neg:
23411        clr.l           %d3                     # table index
23412        fmov.s          &0x3f800000,%fp1        # init fp1 to 1
23413e_loop:
23414        asr.l           &1,%d0                  # shift next bit into carry
23415        bcc.b           e_next                  # if zero, skip the mul
23416        fmul.x          (%a1,%d3),%fp1          # mul by 10**(d3_bit_no)
23417e_next:
23418        add.l           &12,%d3                 # inc d3 to next rtable entry
23419        tst.l           %d0                     # check if d0 is zero
23420        bne.b           e_loop                  # not zero, continue shifting
23421#
23422#
23423#  Check the sign of the adjusted exp and make the value in fp0 the
23424#  same sign. If the exp was pos then multiply fp1*fp0;
23425#  else divide fp0/fp1.
23426#
23427# Register Usage:
23428#  norm:
23429#       ( )  a0: pointer to working bcd value
23430#       (*) fp0: mantissa accumulator
23431#       ( ) fp1: scaling factor - 10**(abs(exp))
23432#
23433pnorm:
23434        btst            &30,(%a0)               # test the sign of the exponent
23435        beq.b           mul                     # if clear, go to multiply
23436div:
23437        fdiv.x          %fp1,%fp0               # exp is negative, so divide mant by exp
23438        bra.b           end_dec
23439mul:
23440        fmul.x          %fp1,%fp0               # exp is positive, so multiply by exp
23441#
23442#
23443# Clean up and return with result in fp0.
23444#
23445# If the final mul/div in decbin incurred an inex exception,
23446# it will be inex2, but will be reported as inex1 by get_op.
23447#
23448end_dec:
23449        fmov.l          %fpsr,%d0               # get status register
23450        bclr            &inex2_bit+8,%d0        # test for inex2 and clear it
23451        beq.b           no_exc                  # skip this if no exc
23452        ori.w           &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
23453no_exc:
23454        add.l           &0x4,%sp                # clear 1 lw param
23455        fmovm.x         (%sp)+,&0x40            # restore fp1
23456        movm.l          (%sp)+,&0x3c            # restore d2-d5
23457        fmov.l          &0x0,%fpcr
23458        fmov.l          &0x0,%fpsr
23459        rts
23460
23461#########################################################################
23462# bindec(): Converts an input in extended precision format to bcd format#
23463#                                                                       #
23464# INPUT *************************************************************** #
23465#       a0 = pointer to the input extended precision value in memory.   #
23466#            the input may be either normalized, unnormalized, or       #
23467#            denormalized.                                              #
23468#       d0 = contains the k-factor sign-extended to 32-bits.            #
23469#                                                                       #
23470# OUTPUT ************************************************************** #
23471#       FP_SCR0(a6) = bcd format result on the stack.                   #
23472#                                                                       #
23473# ALGORITHM *********************************************************** #
23474#                                                                       #
23475#       A1.     Set RM and size ext;  Set SIGMA = sign of input.        #
23476#               The k-factor is saved for use in d7. Clear the          #
23477#               BINDEC_FLG for separating normalized/denormalized       #
23478#               input.  If input is unnormalized or denormalized,       #
23479#               normalize it.                                           #
23480#                                                                       #
23481#       A2.     Set X = abs(input).                                     #
23482#                                                                       #
23483#       A3.     Compute ILOG.                                           #
23484#               ILOG is the log base 10 of the input value.  It is      #
23485#               approximated by adding e + 0.f when the original        #
23486#               value is viewed as 2^^e * 1.f in extended precision.    #
23487#               This value is stored in d6.                             #
23488#                                                                       #
23489#       A4.     Clr INEX bit.                                           #
23490#               The operation in A3 above may have set INEX2.           #
23491#                                                                       #
23492#       A5.     Set ICTR = 0;                                           #
23493#               ICTR is a flag used in A13.  It must be set before the  #
23494#               loop entry A6.                                          #
23495#                                                                       #
23496#       A6.     Calculate LEN.                                          #
23497#               LEN is the number of digits to be displayed.  The       #
23498#               k-factor can dictate either the total number of digits, #
23499#               if it is a positive number, or the number of digits     #
23500#               after the decimal point which are to be included as     #
23501#               significant.  See the 68882 manual for examples.        #
23502#               If LEN is computed to be greater than 17, set OPERR in  #
23503#               USER_FPSR.  LEN is stored in d4.                        #
23504#                                                                       #
23505#       A7.     Calculate SCALE.                                        #
23506#               SCALE is equal to 10^ISCALE, where ISCALE is the number #
23507#               of decimal places needed to insure LEN integer digits   #
23508#               in the output before conversion to bcd. LAMBDA is the   #
23509#               sign of ISCALE, used in A9. Fp1 contains                #
23510#               10^^(abs(ISCALE)) using a rounding mode which is a      #
23511#               function of the original rounding mode and the signs    #
23512#               of ISCALE and X.  A table is given in the code.         #
23513#                                                                       #
23514#       A8.     Clr INEX; Force RZ.                                     #
23515#               The operation in A3 above may have set INEX2.           #
23516#               RZ mode is forced for the scaling operation to insure   #
23517#               only one rounding error.  The grs bits are collected in #
23518#               the INEX flag for use in A10.                           #
23519#                                                                       #
23520#       A9.     Scale X -> Y.                                           #
23521#               The mantissa is scaled to the desired number of         #
23522#               significant digits.  The excess digits are collected    #
23523#               in INEX2.                                               #
23524#                                                                       #
23525#       A10.    Or in INEX.                                             #
23526#               If INEX is set, round error occurred.  This is          #
23527#               compensated for by 'or-ing' in the INEX2 flag to        #
23528#               the lsb of Y.                                           #
23529#                                                                       #
23530#       A11.    Restore original FPCR; set size ext.                    #
23531#               Perform FINT operation in the user's rounding mode.     #
23532#               Keep the size to extended.                              #
23533#                                                                       #
23534#       A12.    Calculate YINT = FINT(Y) according to user's rounding   #
23535#               mode.  The FPSP routine sintd0 is used.  The output     #
23536#               is in fp0.                                              #
23537#                                                                       #
23538#       A13.    Check for LEN digits.                                   #
23539#               If the int operation results in more than LEN digits,   #
23540#               or less than LEN -1 digits, adjust ILOG and repeat from #
23541#               A6.  This test occurs only on the first pass.  If the   #
23542#               result is exactly 10^LEN, decrement ILOG and divide     #
23543#               the mantissa by 10.                                     #
23544#                                                                       #
23545#       A14.    Convert the mantissa to bcd.                            #
23546#               The binstr routine is used to convert the LEN digit     #
23547#               mantissa to bcd in memory.  The input to binstr is      #
23548#               to be a fraction; i.e. (mantissa)/10^LEN and adjusted   #
23549#               such that the decimal point is to the left of bit 63.   #
23550#               The bcd digits are stored in the correct position in    #
23551#               the final string area in memory.                        #
23552#                                                                       #
23553#       A15.    Convert the exponent to bcd.                            #
23554#               As in A14 above, the exp is converted to bcd and the    #
23555#               digits are stored in the final string.                  #
23556#               Test the length of the final exponent string.  If the   #
23557#               length is 4, set operr.                                 #
23558#                                                                       #
23559#       A16.    Write sign bits to final string.                        #
23560#                                                                       #
23561#########################################################################
23562
23563set     BINDEC_FLG,     EXC_TEMP        # DENORM flag
23564
23565# Constants in extended precision
23566PLOG2:
23567        long            0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
23568PLOG2UP1:
23569        long            0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
23570
23571# Constants in single precision
23572FONE:
23573        long            0x3F800000,0x00000000,0x00000000,0x00000000
23574FTWO:
23575        long            0x40000000,0x00000000,0x00000000,0x00000000
23576FTEN:
23577        long            0x41200000,0x00000000,0x00000000,0x00000000
23578F4933:
23579        long            0x459A2800,0x00000000,0x00000000,0x00000000
23580
23581RBDTBL:
23582        byte            0,0,0,0
23583        byte            3,3,2,2
23584        byte            3,2,2,3
23585        byte            2,3,3,2
23586
23587#       Implementation Notes:
23588#
23589#       The registers are used as follows:
23590#
23591#               d0: scratch; LEN input to binstr
23592#               d1: scratch
23593#               d2: upper 32-bits of mantissa for binstr
23594#               d3: scratch;lower 32-bits of mantissa for binstr
23595#               d4: LEN
23596#               d5: LAMBDA/ICTR
23597#               d6: ILOG
23598#               d7: k-factor
23599#               a0: ptr for original operand/final result
23600#               a1: scratch pointer
23601#               a2: pointer to FP_X; abs(original value) in ext
23602#               fp0: scratch
23603#               fp1: scratch
23604#               fp2: scratch
23605#               F_SCR1:
23606#               F_SCR2:
23607#               L_SCR1:
23608#               L_SCR2:
23609
23610        global          bindec
23611bindec:
23612        movm.l          &0x3f20,-(%sp)  #  {%d2-%d7/%a2}
23613        fmovm.x         &0x7,-(%sp)     #  {%fp0-%fp2}
23614
23615# A1. Set RM and size ext. Set SIGMA = sign input;
23616#     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
23617#     separating  normalized/denormalized input.  If the input
23618#     is a denormalized number, set the BINDEC_FLG memory word
23619#     to signal denorm.  If the input is unnormalized, normalize
23620#     the input and test for denormalized result.
23621#
23622        fmov.l          &rm_mode*0x10,%fpcr     # set RM and ext
23623        mov.l           (%a0),L_SCR2(%a6)       # save exponent for sign check
23624        mov.l           %d0,%d7         # move k-factor to d7
23625
23626        clr.b           BINDEC_FLG(%a6) # clr norm/denorm flag
23627        cmpi.b          STAG(%a6),&DENORM # is input a DENORM?
23628        bne.w           A2_str          # no; input is a NORM
23629
23630#
23631# Normalize the denorm
23632#
23633un_de_norm:
23634        mov.w           (%a0),%d0
23635        and.w           &0x7fff,%d0     # strip sign of normalized exp
23636        mov.l           4(%a0),%d1
23637        mov.l           8(%a0),%d2
23638norm_loop:
23639        sub.w           &1,%d0
23640        lsl.l           &1,%d2
23641        roxl.l          &1,%d1
23642        tst.l           %d1
23643        bge.b           norm_loop
23644#
23645# Test if the normalized input is denormalized
23646#
23647        tst.w           %d0
23648        bgt.b           pos_exp         # if greater than zero, it is a norm
23649        st              BINDEC_FLG(%a6) # set flag for denorm
23650pos_exp:
23651        and.w           &0x7fff,%d0     # strip sign of normalized exp
23652        mov.w           %d0,(%a0)
23653        mov.l           %d1,4(%a0)
23654        mov.l           %d2,8(%a0)
23655
23656# A2. Set X = abs(input).
23657#
23658A2_str:
23659        mov.l           (%a0),FP_SCR1(%a6)      # move input to work space
23660        mov.l           4(%a0),FP_SCR1+4(%a6)   # move input to work space
23661        mov.l           8(%a0),FP_SCR1+8(%a6)   # move input to work space
23662        and.l           &0x7fffffff,FP_SCR1(%a6)        # create abs(X)
23663
23664# A3. Compute ILOG.
23665#     ILOG is the log base 10 of the input value.  It is approx-
23666#     imated by adding e + 0.f when the original value is viewed
23667#     as 2^^e * 1.f in extended precision.  This value is stored
23668#     in d6.
23669#
23670# Register usage:
23671#       Input/Output
23672#       d0: k-factor/exponent
23673#       d2: x/x
23674#       d3: x/x
23675#       d4: x/x
23676#       d5: x/x
23677#       d6: x/ILOG
23678#       d7: k-factor/Unchanged
23679#       a0: ptr for original operand/final result
23680#       a1: x/x
23681#       a2: x/x
23682#       fp0: x/float(ILOG)
23683#       fp1: x/x
23684#       fp2: x/x
23685#       F_SCR1:x/x
23686#       F_SCR2:Abs(X)/Abs(X) with $3fff exponent
23687#       L_SCR1:x/x
23688#       L_SCR2:first word of X packed/Unchanged
23689
23690        tst.b           BINDEC_FLG(%a6) # check for denorm
23691        beq.b           A3_cont         # if clr, continue with norm
23692        mov.l           &-4933,%d6      # force ILOG = -4933
23693        bra.b           A4_str
23694A3_cont:
23695        mov.w           FP_SCR1(%a6),%d0        # move exp to d0
23696        mov.w           &0x3fff,FP_SCR1(%a6)    # replace exponent with 0x3fff
23697        fmov.x          FP_SCR1(%a6),%fp0       # now fp0 has 1.f
23698        sub.w           &0x3fff,%d0     # strip off bias
23699        fadd.w          %d0,%fp0        # add in exp
23700        fsub.s          FONE(%pc),%fp0  # subtract off 1.0
23701        fbge.w          pos_res         # if pos, branch
23702        fmul.x          PLOG2UP1(%pc),%fp0      # if neg, mul by LOG2UP1
23703        fmov.l          %fp0,%d6        # put ILOG in d6 as a lword
23704        bra.b           A4_str          # go move out ILOG
23705pos_res:
23706        fmul.x          PLOG2(%pc),%fp0 # if pos, mul by LOG2
23707        fmov.l          %fp0,%d6        # put ILOG in d6 as a lword
23708
23709
23710# A4. Clr INEX bit.
23711#     The operation in A3 above may have set INEX2.
23712
23713A4_str:
23714        fmov.l          &0,%fpsr        # zero all of fpsr - nothing needed
23715
23716
23717# A5. Set ICTR = 0;
23718#     ICTR is a flag used in A13.  It must be set before the
23719#     loop entry A6. The lower word of d5 is used for ICTR.
23720
23721        clr.w           %d5             # clear ICTR
23722
23723# A6. Calculate LEN.
23724#     LEN is the number of digits to be displayed.  The k-factor
23725#     can dictate either the total number of digits, if it is
23726#     a positive number, or the number of digits after the
23727#     original decimal point which are to be included as
23728#     significant.  See the 68882 manual for examples.
23729#     If LEN is computed to be greater than 17, set OPERR in
23730#     USER_FPSR.  LEN is stored in d4.
23731#
23732# Register usage:
23733#       Input/Output
23734#       d0: exponent/Unchanged
23735#       d2: x/x/scratch
23736#       d3: x/x
23737#       d4: exc picture/LEN
23738#       d5: ICTR/Unchanged
23739#       d6: ILOG/Unchanged
23740#       d7: k-factor/Unchanged
23741#       a0: ptr for original operand/final result
23742#       a1: x/x
23743#       a2: x/x
23744#       fp0: float(ILOG)/Unchanged
23745#       fp1: x/x
23746#       fp2: x/x
23747#       F_SCR1:x/x
23748#       F_SCR2:Abs(X) with $3fff exponent/Unchanged
23749#       L_SCR1:x/x
23750#       L_SCR2:first word of X packed/Unchanged
23751
23752A6_str:
23753        tst.l           %d7             # branch on sign of k
23754        ble.b           k_neg           # if k <= 0, LEN = ILOG + 1 - k
23755        mov.l           %d7,%d4         # if k > 0, LEN = k
23756        bra.b           len_ck          # skip to LEN check
23757k_neg:
23758        mov.l           %d6,%d4         # first load ILOG to d4
23759        sub.l           %d7,%d4         # subtract off k
23760        addq.l          &1,%d4          # add in the 1
23761len_ck:
23762        tst.l           %d4             # LEN check: branch on sign of LEN
23763        ble.b           LEN_ng          # if neg, set LEN = 1
23764        cmp.l           %d4,&17         # test if LEN > 17
23765        ble.b           A7_str          # if not, forget it
23766        mov.l           &17,%d4         # set max LEN = 17
23767        tst.l           %d7             # if negative, never set OPERR
23768        ble.b           A7_str          # if positive, continue
23769        or.l            &opaop_mask,USER_FPSR(%a6)      # set OPERR & AIOP in USER_FPSR
23770        bra.b           A7_str          # finished here
23771LEN_ng:
23772        mov.l           &1,%d4          # min LEN is 1
23773
23774
23775# A7. Calculate SCALE.
23776#     SCALE is equal to 10^ISCALE, where ISCALE is the number
23777#     of decimal places needed to insure LEN integer digits
23778#     in the output before conversion to bcd. LAMBDA is the sign
23779#     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
23780#     the rounding mode as given in the following table (see
23781#     Coonen, p. 7.23 as ref.; however, the SCALE variable is
23782#     of opposite sign in bindec.sa from Coonen).
23783#
23784#       Initial                                 USE
23785#       FPCR[6:5]       LAMBDA  SIGN(X)         FPCR[6:5]
23786#       ----------------------------------------------
23787#        RN     00         0       0            00/0    RN
23788#        RN     00         0       1            00/0    RN
23789#        RN     00         1       0            00/0    RN
23790#        RN     00         1       1            00/0    RN
23791#        RZ     01         0       0            11/3    RP
23792#        RZ     01         0       1            11/3    RP
23793#        RZ     01         1       0            10/2    RM
23794#        RZ     01         1       1            10/2    RM
23795#        RM     10         0       0            11/3    RP
23796#        RM     10         0       1            10/2    RM
23797#        RM     10         1       0            10/2    RM
23798#        RM     10         1       1            11/3    RP
23799#        RP     11         0       0            10/2    RM
23800#        RP     11         0       1            11/3    RP
23801#        RP     11         1       0            11/3    RP
23802#        RP     11         1       1            10/2    RM
23803#
23804# Register usage:
23805#       Input/Output
23806#       d0: exponent/scratch - final is 0
23807#       d2: x/0 or 24 for A9
23808#       d3: x/scratch - offset ptr into PTENRM array
23809#       d4: LEN/Unchanged
23810#       d5: 0/ICTR:LAMBDA
23811#       d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
23812#       d7: k-factor/Unchanged
23813#       a0: ptr for original operand/final result
23814#       a1: x/ptr to PTENRM array
23815#       a2: x/x
23816#       fp0: float(ILOG)/Unchanged
23817#       fp1: x/10^ISCALE
23818#       fp2: x/x
23819#       F_SCR1:x/x
23820#       F_SCR2:Abs(X) with $3fff exponent/Unchanged
23821#       L_SCR1:x/x
23822#       L_SCR2:first word of X packed/Unchanged
23823
23824A7_str:
23825        tst.l           %d7             # test sign of k
23826        bgt.b           k_pos           # if pos and > 0, skip this
23827        cmp.l           %d7,%d6         # test k - ILOG
23828        blt.b           k_pos           # if ILOG >= k, skip this
23829        mov.l           %d7,%d6         # if ((k<0) & (ILOG < k)) ILOG = k
23830k_pos:
23831        mov.l           %d6,%d0         # calc ILOG + 1 - LEN in d0
23832        addq.l          &1,%d0          # add the 1
23833        sub.l           %d4,%d0         # sub off LEN
23834        swap            %d5             # use upper word of d5 for LAMBDA
23835        clr.w           %d5             # set it zero initially
23836        clr.w           %d2             # set up d2 for very small case
23837        tst.l           %d0             # test sign of ISCALE
23838        bge.b           iscale          # if pos, skip next inst
23839        addq.w          &1,%d5          # if neg, set LAMBDA true
23840        cmp.l           %d0,&0xffffecd4 # test iscale <= -4908
23841        bgt.b           no_inf          # if false, skip rest
23842        add.l           &24,%d0         # add in 24 to iscale
23843        mov.l           &24,%d2         # put 24 in d2 for A9
23844no_inf:
23845        neg.l           %d0             # and take abs of ISCALE
23846iscale:
23847        fmov.s          FONE(%pc),%fp1  # init fp1 to 1
23848        bfextu          USER_FPCR(%a6){&26:&2},%d1      # get initial rmode bits
23849        lsl.w           &1,%d1          # put them in bits 2:1
23850        add.w           %d5,%d1         # add in LAMBDA
23851        lsl.w           &1,%d1          # put them in bits 3:1
23852        tst.l           L_SCR2(%a6)     # test sign of original x
23853        bge.b           x_pos           # if pos, don't set bit 0
23854        addq.l          &1,%d1          # if neg, set bit 0
23855x_pos:
23856        lea.l           RBDTBL(%pc),%a2 # load rbdtbl base
23857        mov.b           (%a2,%d1),%d3   # load d3 with new rmode
23858        lsl.l           &4,%d3          # put bits in proper position
23859        fmov.l          %d3,%fpcr       # load bits into fpu
23860        lsr.l           &4,%d3          # put bits in proper position
23861        tst.b           %d3             # decode new rmode for pten table
23862        bne.b           not_rn          # if zero, it is RN
23863        lea.l           PTENRN(%pc),%a1 # load a1 with RN table base
23864        bra.b           rmode           # exit decode
23865not_rn:
23866        lsr.b           &1,%d3          # get lsb in carry
23867        bcc.b           not_rp2         # if carry clear, it is RM
23868        lea.l           PTENRP(%pc),%a1 # load a1 with RP table base
23869        bra.b           rmode           # exit decode
23870not_rp2:
23871        lea.l           PTENRM(%pc),%a1 # load a1 with RM table base
23872rmode:
23873        clr.l           %d3             # clr table index
23874e_loop2:
23875        lsr.l           &1,%d0          # shift next bit into carry
23876        bcc.b           e_next2         # if zero, skip the mul
23877        fmul.x          (%a1,%d3),%fp1  # mul by 10**(d3_bit_no)
23878e_next2:
23879        add.l           &12,%d3         # inc d3 to next pwrten table entry
23880        tst.l           %d0             # test if ISCALE is zero
23881        bne.b           e_loop2         # if not, loop
23882
23883# A8. Clr INEX; Force RZ.
23884#     The operation in A3 above may have set INEX2.
23885#     RZ mode is forced for the scaling operation to insure
23886#     only one rounding error.  The grs bits are collected in
23887#     the INEX flag for use in A10.
23888#
23889# Register usage:
23890#       Input/Output
23891
23892        fmov.l          &0,%fpsr        # clr INEX
23893        fmov.l          &rz_mode*0x10,%fpcr     # set RZ rounding mode
23894
23895# A9. Scale X -> Y.
23896#     The mantissa is scaled to the desired number of significant
23897#     digits.  The excess digits are collected in INEX2. If mul,
23898#     Check d2 for excess 10 exponential value.  If not zero,
23899#     the iscale value would have caused the pwrten calculation
23900#     to overflow.  Only a negative iscale can cause this, so
23901#     multiply by 10^(d2), which is now only allowed to be 24,
23902#     with a multiply by 10^8 and 10^16, which is exact since
23903#     10^24 is exact.  If the input was denormalized, we must
23904#     create a busy stack frame with the mul command and the
23905#     two operands, and allow the fpu to complete the multiply.
23906#
23907# Register usage:
23908#       Input/Output
23909#       d0: FPCR with RZ mode/Unchanged
23910#       d2: 0 or 24/unchanged
23911#       d3: x/x
23912#       d4: LEN/Unchanged
23913#       d5: ICTR:LAMBDA
23914#       d6: ILOG/Unchanged
23915#       d7: k-factor/Unchanged
23916#       a0: ptr for original operand/final result
23917#       a1: ptr to PTENRM array/Unchanged
23918#       a2: x/x
23919#       fp0: float(ILOG)/X adjusted for SCALE (Y)
23920#       fp1: 10^ISCALE/Unchanged
23921#       fp2: x/x
23922#       F_SCR1:x/x
23923#       F_SCR2:Abs(X) with $3fff exponent/Unchanged
23924#       L_SCR1:x/x
23925#       L_SCR2:first word of X packed/Unchanged
23926
23927A9_str:
23928        fmov.x          (%a0),%fp0      # load X from memory
23929        fabs.x          %fp0            # use abs(X)
23930        tst.w           %d5             # LAMBDA is in lower word of d5
23931        bne.b           sc_mul          # if neg (LAMBDA = 1), scale by mul
23932        fdiv.x          %fp1,%fp0       # calculate X / SCALE -> Y to fp0
23933        bra.w           A10_st          # branch to A10
23934
23935sc_mul:
23936        tst.b           BINDEC_FLG(%a6) # check for denorm
23937        beq.w           A9_norm         # if norm, continue with mul
23938
23939# for DENORM, we must calculate:
23940#       fp0 = input_op * 10^ISCALE * 10^24
23941# since the input operand is a DENORM, we can't multiply it directly.
23942# so, we do the multiplication of the exponents and mantissas separately.
23943# in this way, we avoid underflow on intermediate stages of the
23944# multiplication and guarantee a result without exception.
23945        fmovm.x         &0x2,-(%sp)     # save 10^ISCALE to stack
23946
23947        mov.w           (%sp),%d3       # grab exponent
23948        andi.w          &0x7fff,%d3     # clear sign
23949        ori.w           &0x8000,(%a0)   # make DENORM exp negative
23950        add.w           (%a0),%d3       # add DENORM exp to 10^ISCALE exp
23951        subi.w          &0x3fff,%d3     # subtract BIAS
23952        add.w           36(%a1),%d3
23953        subi.w          &0x3fff,%d3     # subtract BIAS
23954        add.w           48(%a1),%d3
23955        subi.w          &0x3fff,%d3     # subtract BIAS
23956
23957        bmi.w           sc_mul_err      # is result is DENORM, punt!!!
23958
23959        andi.w          &0x8000,(%sp)   # keep sign
23960        or.w            %d3,(%sp)       # insert new exponent
23961        andi.w          &0x7fff,(%a0)   # clear sign bit on DENORM again
23962        mov.l           0x8(%a0),-(%sp) # put input op mantissa on stk
23963        mov.l           0x4(%a0),-(%sp)
23964        mov.l           &0x3fff0000,-(%sp) # force exp to zero
23965        fmovm.x         (%sp)+,&0x80    # load normalized DENORM into fp0
23966        fmul.x          (%sp)+,%fp0
23967
23968#       fmul.x  36(%a1),%fp0    # multiply fp0 by 10^8
23969#       fmul.x  48(%a1),%fp0    # multiply fp0 by 10^16
23970        mov.l           36+8(%a1),-(%sp) # get 10^8 mantissa
23971        mov.l           36+4(%a1),-(%sp)
23972        mov.l           &0x3fff0000,-(%sp) # force exp to zero
23973        mov.l           48+8(%a1),-(%sp) # get 10^16 mantissa
23974        mov.l           48+4(%a1),-(%sp)
23975        mov.l           &0x3fff0000,-(%sp)# force exp to zero
23976        fmul.x          (%sp)+,%fp0     # multiply fp0 by 10^8
23977        fmul.x          (%sp)+,%fp0     # multiply fp0 by 10^16
23978        bra.b           A10_st
23979
23980sc_mul_err:
23981        bra.b           sc_mul_err
23982
23983A9_norm:
23984        tst.w           %d2             # test for small exp case
23985        beq.b           A9_con          # if zero, continue as normal
23986        fmul.x          36(%a1),%fp0    # multiply fp0 by 10^8
23987        fmul.x          48(%a1),%fp0    # multiply fp0 by 10^16
23988A9_con:
23989        fmul.x          %fp1,%fp0       # calculate X * SCALE -> Y to fp0
23990
23991# A10. Or in INEX.
23992#      If INEX is set, round error occurred.  This is compensated
23993#      for by 'or-ing' in the INEX2 flag to the lsb of Y.
23994#
23995# Register usage:
23996#       Input/Output
23997#       d0: FPCR with RZ mode/FPSR with INEX2 isolated
23998#       d2: x/x
23999#       d3: x/x
24000#       d4: LEN/Unchanged
24001#       d5: ICTR:LAMBDA
24002#       d6: ILOG/Unchanged
24003#       d7: k-factor/Unchanged
24004#       a0: ptr for original operand/final result
24005#       a1: ptr to PTENxx array/Unchanged
24006#       a2: x/ptr to FP_SCR1(a6)
24007#       fp0: Y/Y with lsb adjusted
24008#       fp1: 10^ISCALE/Unchanged
24009#       fp2: x/x
24010
24011A10_st:
24012        fmov.l          %fpsr,%d0       # get FPSR
24013        fmov.x          %fp0,FP_SCR1(%a6)       # move Y to memory
24014        lea.l           FP_SCR1(%a6),%a2        # load a2 with ptr to FP_SCR1
24015        btst            &9,%d0          # check if INEX2 set
24016        beq.b           A11_st          # if clear, skip rest
24017        or.l            &1,8(%a2)       # or in 1 to lsb of mantissa
24018        fmov.x          FP_SCR1(%a6),%fp0       # write adjusted Y back to fpu
24019
24020
24021# A11. Restore original FPCR; set size ext.
24022#      Perform FINT operation in the user's rounding mode.  Keep
24023#      the size to extended.  The sintdo entry point in the sint
24024#      routine expects the FPCR value to be in USER_FPCR for
24025#      mode and precision.  The original FPCR is saved in L_SCR1.
24026
24027A11_st:
24028        mov.l           USER_FPCR(%a6),L_SCR1(%a6)      # save it for later
24029        and.l           &0x00000030,USER_FPCR(%a6)      # set size to ext,
24030#                                       ;block exceptions
24031
24032
24033# A12. Calculate YINT = FINT(Y) according to user's rounding mode.
24034#      The FPSP routine sintd0 is used.  The output is in fp0.
24035#
24036# Register usage:
24037#       Input/Output
24038#       d0: FPSR with AINEX cleared/FPCR with size set to ext
24039#       d2: x/x/scratch
24040#       d3: x/x
24041#       d4: LEN/Unchanged
24042#       d5: ICTR:LAMBDA/Unchanged
24043#       d6: ILOG/Unchanged
24044#       d7: k-factor/Unchanged
24045#       a0: ptr for original operand/src ptr for sintdo
24046#       a1: ptr to PTENxx array/Unchanged
24047#       a2: ptr to FP_SCR1(a6)/Unchanged
24048#       a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
24049#       fp0: Y/YINT
24050#       fp1: 10^ISCALE/Unchanged
24051#       fp2: x/x
24052#       F_SCR1:x/x
24053#       F_SCR2:Y adjusted for inex/Y with original exponent
24054#       L_SCR1:x/original USER_FPCR
24055#       L_SCR2:first word of X packed/Unchanged
24056
24057A12_st:
24058        movm.l  &0xc0c0,-(%sp)  # save regs used by sintd0       {%d0-%d1/%a0-%a1}
24059        mov.l   L_SCR1(%a6),-(%sp)
24060        mov.l   L_SCR2(%a6),-(%sp)
24061
24062        lea.l           FP_SCR1(%a6),%a0        # a0 is ptr to FP_SCR1(a6)
24063        fmov.x          %fp0,(%a0)      # move Y to memory at FP_SCR1(a6)
24064        tst.l           L_SCR2(%a6)     # test sign of original operand
24065        bge.b           do_fint12               # if pos, use Y
24066        or.l            &0x80000000,(%a0)       # if neg, use -Y
24067do_fint12:
24068        mov.l   USER_FPSR(%a6),-(%sp)
24069#       bsr     sintdo          # sint routine returns int in fp0
24070
24071        fmov.l  USER_FPCR(%a6),%fpcr
24072        fmov.l  &0x0,%fpsr                      # clear the AEXC bits!!!
24073##      mov.l           USER_FPCR(%a6),%d0      # ext prec/keep rnd mode
24074##      andi.l          &0x00000030,%d0
24075##      fmov.l          %d0,%fpcr
24076        fint.x          FP_SCR1(%a6),%fp0       # do fint()
24077        fmov.l  %fpsr,%d0
24078        or.w    %d0,FPSR_EXCEPT(%a6)
24079##      fmov.l          &0x0,%fpcr
24080##      fmov.l          %fpsr,%d0               # don't keep ccodes
24081##      or.w            %d0,FPSR_EXCEPT(%a6)
24082
24083        mov.b   (%sp),USER_FPSR(%a6)
24084        add.l   &4,%sp
24085
24086        mov.l   (%sp)+,L_SCR2(%a6)
24087        mov.l   (%sp)+,L_SCR1(%a6)
24088        movm.l  (%sp)+,&0x303   # restore regs used by sint      {%d0-%d1/%a0-%a1}
24089
24090        mov.l   L_SCR2(%a6),FP_SCR1(%a6)        # restore original exponent
24091        mov.l   L_SCR1(%a6),USER_FPCR(%a6)      # restore user's FPCR
24092
24093# A13. Check for LEN digits.
24094#      If the int operation results in more than LEN digits,
24095#      or less than LEN -1 digits, adjust ILOG and repeat from
24096#      A6.  This test occurs only on the first pass.  If the
24097#      result is exactly 10^LEN, decrement ILOG and divide
24098#      the mantissa by 10.  The calculation of 10^LEN cannot
24099#      be inexact, since all powers of ten up to 10^27 are exact
24100#      in extended precision, so the use of a previous power-of-ten
24101#      table will introduce no error.
24102#
24103#
24104# Register usage:
24105#       Input/Output
24106#       d0: FPCR with size set to ext/scratch final = 0
24107#       d2: x/x
24108#       d3: x/scratch final = x
24109#       d4: LEN/LEN adjusted
24110#       d5: ICTR:LAMBDA/LAMBDA:ICTR
24111#       d6: ILOG/ILOG adjusted
24112#       d7: k-factor/Unchanged
24113#       a0: pointer into memory for packed bcd string formation
24114#       a1: ptr to PTENxx array/Unchanged
24115#       a2: ptr to FP_SCR1(a6)/Unchanged
24116#       fp0: int portion of Y/abs(YINT) adjusted
24117#       fp1: 10^ISCALE/Unchanged
24118#       fp2: x/10^LEN
24119#       F_SCR1:x/x
24120#       F_SCR2:Y with original exponent/Unchanged
24121#       L_SCR1:original USER_FPCR/Unchanged
24122#       L_SCR2:first word of X packed/Unchanged
24123
24124A13_st:
24125        swap            %d5             # put ICTR in lower word of d5
24126        tst.w           %d5             # check if ICTR = 0
24127        bne             not_zr          # if non-zero, go to second test
24128#
24129# Compute 10^(LEN-1)
24130#
24131        fmov.s          FONE(%pc),%fp2  # init fp2 to 1.0
24132        mov.l           %d4,%d0         # put LEN in d0
24133        subq.l          &1,%d0          # d0 = LEN -1
24134        clr.l           %d3             # clr table index
24135l_loop:
24136        lsr.l           &1,%d0          # shift next bit into carry
24137        bcc.b           l_next          # if zero, skip the mul
24138        fmul.x          (%a1,%d3),%fp2  # mul by 10**(d3_bit_no)
24139l_next:
24140        add.l           &12,%d3         # inc d3 to next pwrten table entry
24141        tst.l           %d0             # test if LEN is zero
24142        bne.b           l_loop          # if not, loop
24143#
24144# 10^LEN-1 is computed for this test and A14.  If the input was
24145# denormalized, check only the case in which YINT > 10^LEN.
24146#
24147        tst.b           BINDEC_FLG(%a6) # check if input was norm
24148        beq.b           A13_con         # if norm, continue with checking
24149        fabs.x          %fp0            # take abs of YINT
24150        bra             test_2
24151#
24152# Compare abs(YINT) to 10^(LEN-1) and 10^LEN
24153#
24154A13_con:
24155        fabs.x          %fp0            # take abs of YINT
24156        fcmp.x          %fp0,%fp2       # compare abs(YINT) with 10^(LEN-1)
24157        fbge.w          test_2          # if greater, do next test
24158        subq.l          &1,%d6          # subtract 1 from ILOG
24159        mov.w           &1,%d5          # set ICTR
24160        fmov.l          &rm_mode*0x10,%fpcr     # set rmode to RM
24161        fmul.s          FTEN(%pc),%fp2  # compute 10^LEN
24162        bra.w           A6_str          # return to A6 and recompute YINT
24163test_2:
24164        fmul.s          FTEN(%pc),%fp2  # compute 10^LEN
24165        fcmp.x          %fp0,%fp2       # compare abs(YINT) with 10^LEN
24166        fblt.w          A14_st          # if less, all is ok, go to A14
24167        fbgt.w          fix_ex          # if greater, fix and redo
24168        fdiv.s          FTEN(%pc),%fp0  # if equal, divide by 10
24169        addq.l          &1,%d6          # and inc ILOG
24170        bra.b           A14_st          # and continue elsewhere
24171fix_ex:
24172        addq.l          &1,%d6          # increment ILOG by 1
24173        mov.w           &1,%d5          # set ICTR
24174        fmov.l          &rm_mode*0x10,%fpcr     # set rmode to RM
24175        bra.w           A6_str          # return to A6 and recompute YINT
24176#
24177# Since ICTR <> 0, we have already been through one adjustment,
24178# and shouldn't have another; this is to check if abs(YINT) = 10^LEN
24179# 10^LEN is again computed using whatever table is in a1 since the
24180# value calculated cannot be inexact.
24181#
24182not_zr:
24183        fmov.s          FONE(%pc),%fp2  # init fp2 to 1.0
24184        mov.l           %d4,%d0         # put LEN in d0
24185        clr.l           %d3             # clr table index
24186z_loop:
24187        lsr.l           &1,%d0          # shift next bit into carry
24188        bcc.b           z_next          # if zero, skip the mul
24189        fmul.x          (%a1,%d3),%fp2  # mul by 10**(d3_bit_no)
24190z_next:
24191        add.l           &12,%d3         # inc d3 to next pwrten table entry
24192        tst.l           %d0             # test if LEN is zero
24193        bne.b           z_loop          # if not, loop
24194        fabs.x          %fp0            # get abs(YINT)
24195        fcmp.x          %fp0,%fp2       # check if abs(YINT) = 10^LEN
24196        fbneq.w         A14_st          # if not, skip this
24197        fdiv.s          FTEN(%pc),%fp0  # divide abs(YINT) by 10
24198        addq.l          &1,%d6          # and inc ILOG by 1
24199        addq.l          &1,%d4          # and inc LEN
24200        fmul.s          FTEN(%pc),%fp2  # if LEN++, the get 10^^LEN
24201
24202# A14. Convert the mantissa to bcd.
24203#      The binstr routine is used to convert the LEN digit
24204#      mantissa to bcd in memory.  The input to binstr is
24205#      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
24206#      such that the decimal point is to the left of bit 63.
24207#      The bcd digits are stored in the correct position in
24208#      the final string area in memory.
24209#
24210#
24211# Register usage:
24212#       Input/Output
24213#       d0: x/LEN call to binstr - final is 0
24214#       d1: x/0
24215#       d2: x/ms 32-bits of mant of abs(YINT)
24216#       d3: x/ls 32-bits of mant of abs(YINT)
24217#       d4: LEN/Unchanged
24218#       d5: ICTR:LAMBDA/LAMBDA:ICTR
24219#       d6: ILOG
24220#       d7: k-factor/Unchanged
24221#       a0: pointer into memory for packed bcd string formation
24222#           /ptr to first mantissa byte in result string
24223#       a1: ptr to PTENxx array/Unchanged
24224#       a2: ptr to FP_SCR1(a6)/Unchanged
24225#       fp0: int portion of Y/abs(YINT) adjusted
24226#       fp1: 10^ISCALE/Unchanged
24227#       fp2: 10^LEN/Unchanged
24228#       F_SCR1:x/Work area for final result
24229#       F_SCR2:Y with original exponent/Unchanged
24230#       L_SCR1:original USER_FPCR/Unchanged
24231#       L_SCR2:first word of X packed/Unchanged
24232
24233A14_st:
24234        fmov.l          &rz_mode*0x10,%fpcr     # force rz for conversion
24235        fdiv.x          %fp2,%fp0       # divide abs(YINT) by 10^LEN
24236        lea.l           FP_SCR0(%a6),%a0
24237        fmov.x          %fp0,(%a0)      # move abs(YINT)/10^LEN to memory
24238        mov.l           4(%a0),%d2      # move 2nd word of FP_RES to d2
24239        mov.l           8(%a0),%d3      # move 3rd word of FP_RES to d3
24240        clr.l           4(%a0)          # zero word 2 of FP_RES
24241        clr.l           8(%a0)          # zero word 3 of FP_RES
24242        mov.l           (%a0),%d0       # move exponent to d0
24243        swap            %d0             # put exponent in lower word
24244        beq.b           no_sft          # if zero, don't shift
24245        sub.l           &0x3ffd,%d0     # sub bias less 2 to make fract
24246        tst.l           %d0             # check if > 1
24247        bgt.b           no_sft          # if so, don't shift
24248        neg.l           %d0             # make exp positive
24249m_loop:
24250        lsr.l           &1,%d2          # shift d2:d3 right, add 0s
24251        roxr.l          &1,%d3          # the number of places
24252        dbf.w           %d0,m_loop      # given in d0
24253no_sft:
24254        tst.l           %d2             # check for mantissa of zero
24255        bne.b           no_zr           # if not, go on
24256        tst.l           %d3             # continue zero check
24257        beq.b           zer_m           # if zero, go directly to binstr
24258no_zr:
24259        clr.l           %d1             # put zero in d1 for addx
24260        add.l           &0x00000080,%d3 # inc at bit 7
24261        addx.l          %d1,%d2         # continue inc
24262        and.l           &0xffffff80,%d3 # strip off lsb not used by 882
24263zer_m:
24264        mov.l           %d4,%d0         # put LEN in d0 for binstr call
24265        addq.l          &3,%a0          # a0 points to M16 byte in result
24266        bsr             binstr          # call binstr to convert mant
24267
24268
24269# A15. Convert the exponent to bcd.
24270#      As in A14 above, the exp is converted to bcd and the
24271#      digits are stored in the final string.
24272#
24273#      Digits are stored in L_SCR1(a6) on return from BINDEC as:
24274#
24275#        32               16 15                0
24276#       -----------------------------------------
24277#       |  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
24278#       -----------------------------------------
24279#
24280# And are moved into their proper places in FP_SCR0.  If digit e4
24281# is non-zero, OPERR is signaled.  In all cases, all 4 digits are
24282# written as specified in the 881/882 manual for packed decimal.
24283#
24284# Register usage:
24285#       Input/Output
24286#       d0: x/LEN call to binstr - final is 0
24287#       d1: x/scratch (0);shift count for final exponent packing
24288#       d2: x/ms 32-bits of exp fraction/scratch
24289#       d3: x/ls 32-bits of exp fraction
24290#       d4: LEN/Unchanged
24291#       d5: ICTR:LAMBDA/LAMBDA:ICTR
24292#       d6: ILOG
24293#       d7: k-factor/Unchanged
24294#       a0: ptr to result string/ptr to L_SCR1(a6)
24295#       a1: ptr to PTENxx array/Unchanged
24296#       a2: ptr to FP_SCR1(a6)/Unchanged
24297#       fp0: abs(YINT) adjusted/float(ILOG)
24298#       fp1: 10^ISCALE/Unchanged
24299#       fp2: 10^LEN/Unchanged
24300#       F_SCR1:Work area for final result/BCD result
24301#       F_SCR2:Y with original exponent/ILOG/10^4
24302#       L_SCR1:original USER_FPCR/Exponent digits on return from binstr
24303#       L_SCR2:first word of X packed/Unchanged
24304
24305A15_st:
24306        tst.b           BINDEC_FLG(%a6) # check for denorm
24307        beq.b           not_denorm
24308        ftest.x         %fp0            # test for zero
24309        fbeq.w          den_zero        # if zero, use k-factor or 4933
24310        fmov.l          %d6,%fp0        # float ILOG
24311        fabs.x          %fp0            # get abs of ILOG
24312        bra.b           convrt
24313den_zero:
24314        tst.l           %d7             # check sign of the k-factor
24315        blt.b           use_ilog        # if negative, use ILOG
24316        fmov.s          F4933(%pc),%fp0 # force exponent to 4933
24317        bra.b           convrt          # do it
24318use_ilog:
24319        fmov.l          %d6,%fp0        # float ILOG
24320        fabs.x          %fp0            # get abs of ILOG
24321        bra.b           convrt
24322not_denorm:
24323        ftest.x         %fp0            # test for zero
24324        fbneq.w         not_zero        # if zero, force exponent
24325        fmov.s          FONE(%pc),%fp0  # force exponent to 1
24326        bra.b           convrt          # do it
24327not_zero:
24328        fmov.l          %d6,%fp0        # float ILOG
24329        fabs.x          %fp0            # get abs of ILOG
24330convrt:
24331        fdiv.x          24(%a1),%fp0    # compute ILOG/10^4
24332        fmov.x          %fp0,FP_SCR1(%a6)       # store fp0 in memory
24333        mov.l           4(%a2),%d2      # move word 2 to d2
24334        mov.l           8(%a2),%d3      # move word 3 to d3
24335        mov.w           (%a2),%d0       # move exp to d0
24336        beq.b           x_loop_fin      # if zero, skip the shift
24337        sub.w           &0x3ffd,%d0     # subtract off bias
24338        neg.w           %d0             # make exp positive
24339x_loop:
24340        lsr.l           &1,%d2          # shift d2:d3 right
24341        roxr.l          &1,%d3          # the number of places
24342        dbf.w           %d0,x_loop      # given in d0
24343x_loop_fin:
24344        clr.l           %d1             # put zero in d1 for addx
24345        add.l           &0x00000080,%d3 # inc at bit 6
24346        addx.l          %d1,%d2         # continue inc
24347        and.l           &0xffffff80,%d3 # strip off lsb not used by 882
24348        mov.l           &4,%d0          # put 4 in d0 for binstr call
24349        lea.l           L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits
24350        bsr             binstr          # call binstr to convert exp
24351        mov.l           L_SCR1(%a6),%d0 # load L_SCR1 lword to d0
24352        mov.l           &12,%d1         # use d1 for shift count
24353        lsr.l           %d1,%d0         # shift d0 right by 12
24354        bfins           %d0,FP_SCR0(%a6){&4:&12}        # put e3:e2:e1 in FP_SCR0
24355        lsr.l           %d1,%d0         # shift d0 right by 12
24356        bfins           %d0,FP_SCR0(%a6){&16:&4}        # put e4 in FP_SCR0
24357        tst.b           %d0             # check if e4 is zero
24358        beq.b           A16_st          # if zero, skip rest
24359        or.l            &opaop_mask,USER_FPSR(%a6)      # set OPERR & AIOP in USER_FPSR
24360
24361
24362# A16. Write sign bits to final string.
24363#          Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
24364#
24365# Register usage:
24366#       Input/Output
24367#       d0: x/scratch - final is x
24368#       d2: x/x
24369#       d3: x/x
24370#       d4: LEN/Unchanged
24371#       d5: ICTR:LAMBDA/LAMBDA:ICTR
24372#       d6: ILOG/ILOG adjusted
24373#       d7: k-factor/Unchanged
24374#       a0: ptr to L_SCR1(a6)/Unchanged
24375#       a1: ptr to PTENxx array/Unchanged
24376#       a2: ptr to FP_SCR1(a6)/Unchanged
24377#       fp0: float(ILOG)/Unchanged
24378#       fp1: 10^ISCALE/Unchanged
24379#       fp2: 10^LEN/Unchanged
24380#       F_SCR1:BCD result with correct signs
24381#       F_SCR2:ILOG/10^4
24382#       L_SCR1:Exponent digits on return from binstr
24383#       L_SCR2:first word of X packed/Unchanged
24384
24385A16_st:
24386        clr.l           %d0             # clr d0 for collection of signs
24387        and.b           &0x0f,FP_SCR0(%a6)      # clear first nibble of FP_SCR0
24388        tst.l           L_SCR2(%a6)     # check sign of original mantissa
24389        bge.b           mant_p          # if pos, don't set SM
24390        mov.l           &2,%d0          # move 2 in to d0 for SM
24391mant_p:
24392        tst.l           %d6             # check sign of ILOG
24393        bge.b           wr_sgn          # if pos, don't set SE
24394        addq.l          &1,%d0          # set bit 0 in d0 for SE
24395wr_sgn:
24396        bfins           %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR0
24397
24398# Clean up and restore all registers used.
24399
24400        fmov.l          &0,%fpsr        # clear possible inex2/ainex bits
24401        fmovm.x         (%sp)+,&0xe0    #  {%fp0-%fp2}
24402        movm.l          (%sp)+,&0x4fc   #  {%d2-%d7/%a2}
24403        rts
24404
24405        global          PTENRN
24406PTENRN:
24407        long            0x40020000,0xA0000000,0x00000000        # 10 ^ 1
24408        long            0x40050000,0xC8000000,0x00000000        # 10 ^ 2
24409        long            0x400C0000,0x9C400000,0x00000000        # 10 ^ 4
24410        long            0x40190000,0xBEBC2000,0x00000000        # 10 ^ 8
24411        long            0x40340000,0x8E1BC9BF,0x04000000        # 10 ^ 16
24412        long            0x40690000,0x9DC5ADA8,0x2B70B59E        # 10 ^ 32
24413        long            0x40D30000,0xC2781F49,0xFFCFA6D5        # 10 ^ 64
24414        long            0x41A80000,0x93BA47C9,0x80E98CE0        # 10 ^ 128
24415        long            0x43510000,0xAA7EEBFB,0x9DF9DE8E        # 10 ^ 256
24416        long            0x46A30000,0xE319A0AE,0xA60E91C7        # 10 ^ 512
24417        long            0x4D480000,0xC9767586,0x81750C17        # 10 ^ 1024
24418        long            0x5A920000,0x9E8B3B5D,0xC53D5DE5        # 10 ^ 2048
24419        long            0x75250000,0xC4605202,0x8A20979B        # 10 ^ 4096
24420
24421        global          PTENRP
24422PTENRP:
24423        long            0x40020000,0xA0000000,0x00000000        # 10 ^ 1
24424        long            0x40050000,0xC8000000,0x00000000        # 10 ^ 2
24425        long            0x400C0000,0x9C400000,0x00000000        # 10 ^ 4
24426        long            0x40190000,0xBEBC2000,0x00000000        # 10 ^ 8
24427        long            0x40340000,0x8E1BC9BF,0x04000000        # 10 ^ 16
24428        long            0x40690000,0x9DC5ADA8,0x2B70B59E        # 10 ^ 32
24429        long            0x40D30000,0xC2781F49,0xFFCFA6D6        # 10 ^ 64
24430        long            0x41A80000,0x93BA47C9,0x80E98CE0        # 10 ^ 128
24431        long            0x43510000,0xAA7EEBFB,0x9DF9DE8E        # 10 ^ 256
24432        long            0x46A30000,0xE319A0AE,0xA60E91C7        # 10 ^ 512
24433        long            0x4D480000,0xC9767586,0x81750C18        # 10 ^ 1024
24434        long            0x5A920000,0x9E8B3B5D,0xC53D5DE5        # 10 ^ 2048
24435        long            0x75250000,0xC4605202,0x8A20979B        # 10 ^ 4096
24436
24437        global          PTENRM
24438PTENRM:
24439        long            0x40020000,0xA0000000,0x00000000        # 10 ^ 1
24440        long            0x40050000,0xC8000000,0x00000000        # 10 ^ 2
24441        long            0x400C0000,0x9C400000,0x00000000        # 10 ^ 4
24442        long            0x40190000,0xBEBC2000,0x00000000        # 10 ^ 8
24443        long            0x40340000,0x8E1BC9BF,0x04000000        # 10 ^ 16
24444        long            0x40690000,0x9DC5ADA8,0x2B70B59D        # 10 ^ 32
24445        long            0x40D30000,0xC2781F49,0xFFCFA6D5        # 10 ^ 64
24446        long            0x41A80000,0x93BA47C9,0x80E98CDF        # 10 ^ 128
24447        long            0x43510000,0xAA7EEBFB,0x9DF9DE8D        # 10 ^ 256
24448        long            0x46A30000,0xE319A0AE,0xA60E91C6        # 10 ^ 512
24449        long            0x4D480000,0xC9767586,0x81750C17        # 10 ^ 1024
24450        long            0x5A920000,0x9E8B3B5D,0xC53D5DE4        # 10 ^ 2048
24451        long            0x75250000,0xC4605202,0x8A20979A        # 10 ^ 4096
24452
24453#########################################################################
24454# binstr(): Converts a 64-bit binary integer to bcd.                    #
24455#                                                                       #
24456# INPUT *************************************************************** #
24457#       d2:d3 = 64-bit binary integer                                   #
24458#       d0    = desired length (LEN)                                    #
24459#       a0    = pointer to start in memory for bcd characters           #
24460#               (This pointer must point to byte 4 of the first         #
24461#                lword of the packed decimal memory string.)            #
24462#                                                                       #
24463# OUTPUT ************************************************************** #
24464#       a0 = pointer to LEN bcd digits representing the 64-bit integer. #
24465#                                                                       #
24466# ALGORITHM *********************************************************** #
24467#       The 64-bit binary is assumed to have a decimal point before     #
24468#       bit 63.  The fraction is multiplied by 10 using a mul by 2      #
24469#       shift and a mul by 8 shift.  The bits shifted out of the        #
24470#       msb form a decimal digit.  This process is iterated until       #
24471#       LEN digits are formed.                                          #
24472#                                                                       #
24473# A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the        #
24474#     digit formed will be assumed the least significant.  This is      #
24475#     to force the first byte formed to have a 0 in the upper 4 bits.   #
24476#                                                                       #
24477# A2. Beginning of the loop:                                            #
24478#     Copy the fraction in d2:d3 to d4:d5.                              #
24479#                                                                       #
24480# A3. Multiply the fraction in d2:d3 by 8 using bit-field               #
24481#     extracts and shifts.  The three msbs from d2 will go into d1.     #
24482#                                                                       #
24483# A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb        #
24484#     will be collected by the carry.                                   #
24485#                                                                       #
24486# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5      #
24487#     into d2:d3.  D1 will contain the bcd digit formed.                #
24488#                                                                       #
24489# A6. Test d7.  If zero, the digit formed is the ms digit.  If non-     #
24490#     zero, it is the ls digit.  Put the digit in its place in the      #
24491#     upper word of d0.  If it is the ls digit, write the word          #
24492#     from d0 to memory.                                                #
24493#                                                                       #
24494# A7. Decrement d6 (LEN counter) and repeat the loop until zero.        #
24495#                                                                       #
24496#########################################################################
24497
24498#       Implementation Notes:
24499#
24500#       The registers are used as follows:
24501#
24502#               d0: LEN counter
24503#               d1: temp used to form the digit
24504#               d2: upper 32-bits of fraction for mul by 8
24505#               d3: lower 32-bits of fraction for mul by 8
24506#               d4: upper 32-bits of fraction for mul by 2
24507#               d5: lower 32-bits of fraction for mul by 2
24508#               d6: temp for bit-field extracts
24509#               d7: byte digit formation word;digit count {0,1}
24510#               a0: pointer into memory for packed bcd string formation
24511#
24512
24513        global          binstr
24514binstr:
24515        movm.l          &0xff00,-(%sp)  #  {%d0-%d7}
24516
24517#
24518# A1: Init d7
24519#
24520        mov.l           &1,%d7          # init d7 for second digit
24521        subq.l          &1,%d0          # for dbf d0 would have LEN+1 passes
24522#
24523# A2. Copy d2:d3 to d4:d5.  Start loop.
24524#
24525loop:
24526        mov.l           %d2,%d4         # copy the fraction before muls
24527        mov.l           %d3,%d5         # to d4:d5
24528#
24529# A3. Multiply d2:d3 by 8; extract msbs into d1.
24530#
24531        bfextu          %d2{&0:&3},%d1  # copy 3 msbs of d2 into d1
24532        asl.l           &3,%d2          # shift d2 left by 3 places
24533        bfextu          %d3{&0:&3},%d6  # copy 3 msbs of d3 into d6
24534        asl.l           &3,%d3          # shift d3 left by 3 places
24535        or.l            %d6,%d2         # or in msbs from d3 into d2
24536#
24537# A4. Multiply d4:d5 by 2; add carry out to d1.
24538#
24539        asl.l           &1,%d5          # mul d5 by 2
24540        roxl.l          &1,%d4          # mul d4 by 2
24541        swap            %d6             # put 0 in d6 lower word
24542        addx.w          %d6,%d1         # add in extend from mul by 2
24543#
24544# A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
24545#
24546        add.l           %d5,%d3         # add lower 32 bits
24547        nop                             # ERRATA FIX #13 (Rev. 1.2 6/6/90)
24548        addx.l          %d4,%d2         # add with extend upper 32 bits
24549        nop                             # ERRATA FIX #13 (Rev. 1.2 6/6/90)
24550        addx.w          %d6,%d1         # add in extend from add to d1
24551        swap            %d6             # with d6 = 0; put 0 in upper word
24552#
24553# A6. Test d7 and branch.
24554#
24555        tst.w           %d7             # if zero, store digit & to loop
24556        beq.b           first_d         # if non-zero, form byte & write
24557sec_d:
24558        swap            %d7             # bring first digit to word d7b
24559        asl.w           &4,%d7          # first digit in upper 4 bits d7b
24560        add.w           %d1,%d7         # add in ls digit to d7b
24561        mov.b           %d7,(%a0)+      # store d7b byte in memory
24562        swap            %d7             # put LEN counter in word d7a
24563        clr.w           %d7             # set d7a to signal no digits done
24564        dbf.w           %d0,loop        # do loop some more!
24565        bra.b           end_bstr        # finished, so exit
24566first_d:
24567        swap            %d7             # put digit word in d7b
24568        mov.w           %d1,%d7         # put new digit in d7b
24569        swap            %d7             # put LEN counter in word d7a
24570        addq.w          &1,%d7          # set d7a to signal first digit done
24571        dbf.w           %d0,loop        # do loop some more!
24572        swap            %d7             # put last digit in string
24573        lsl.w           &4,%d7          # move it to upper 4 bits
24574        mov.b           %d7,(%a0)+      # store it in memory string
24575#
24576# Clean up and return with result in fp0.
24577#
24578end_bstr:
24579        movm.l          (%sp)+,&0xff    #  {%d0-%d7}
24580        rts
24581
24582#########################################################################
24583# XDEF **************************************************************** #
24584#       facc_in_b(): dmem_read_byte failed                              #
24585#       facc_in_w(): dmem_read_word failed                              #
24586#       facc_in_l(): dmem_read_long failed                              #
24587#       facc_in_d(): dmem_read of dbl prec failed                       #
24588#       facc_in_x(): dmem_read of ext prec failed                       #
24589#                                                                       #
24590#       facc_out_b(): dmem_write_byte failed                            #
24591#       facc_out_w(): dmem_write_word failed                            #
24592#       facc_out_l(): dmem_write_long failed                            #
24593#       facc_out_d(): dmem_write of dbl prec failed                     #
24594#       facc_out_x(): dmem_write of ext prec failed                     #
24595#                                                                       #
24596# XREF **************************************************************** #
24597#       _real_access() - exit through access error handler              #
24598#                                                                       #
24599# INPUT *************************************************************** #
24600#       None                                                            #
24601#                                                                       #
24602# OUTPUT ************************************************************** #
24603#       None                                                            #
24604#                                                                       #
24605# ALGORITHM *********************************************************** #
24606#       Flow jumps here when an FP data fetch call gets an error        #
24607# result. This means the operating system wants an access error frame   #
24608# made out of the current exception stack frame.                        #
24609#       So, we first call restore() which makes sure that any updated   #
24610# -(an)+ register gets returned to its pre-exception value and then     #
24611# we change the stack to an access error stack frame.                   #
24612#                                                                       #
24613#########################################################################
24614
24615facc_in_b:
24616        movq.l          &0x1,%d0                        # one byte
24617        bsr.w           restore                         # fix An
24618
24619        mov.w           &0x0121,EXC_VOFF(%a6)           # set FSLW
24620        bra.w           facc_finish
24621
24622facc_in_w:
24623        movq.l          &0x2,%d0                        # two bytes
24624        bsr.w           restore                         # fix An
24625
24626        mov.w           &0x0141,EXC_VOFF(%a6)           # set FSLW
24627        bra.b           facc_finish
24628
24629facc_in_l:
24630        movq.l          &0x4,%d0                        # four bytes
24631        bsr.w           restore                         # fix An
24632
24633        mov.w           &0x0101,EXC_VOFF(%a6)           # set FSLW
24634        bra.b           facc_finish
24635
24636facc_in_d:
24637        movq.l          &0x8,%d0                        # eight bytes
24638        bsr.w           restore                         # fix An
24639
24640        mov.w           &0x0161,EXC_VOFF(%a6)           # set FSLW
24641        bra.b           facc_finish
24642
24643facc_in_x:
24644        movq.l          &0xc,%d0                        # twelve bytes
24645        bsr.w           restore                         # fix An
24646
24647        mov.w           &0x0161,EXC_VOFF(%a6)           # set FSLW
24648        bra.b           facc_finish
24649
24650################################################################
24651
24652facc_out_b:
24653        movq.l          &0x1,%d0                        # one byte
24654        bsr.w           restore                         # restore An
24655
24656        mov.w           &0x00a1,EXC_VOFF(%a6)           # set FSLW
24657        bra.b           facc_finish
24658
24659facc_out_w:
24660        movq.l          &0x2,%d0                        # two bytes
24661        bsr.w           restore                         # restore An
24662
24663        mov.w           &0x00c1,EXC_VOFF(%a6)           # set FSLW
24664        bra.b           facc_finish
24665
24666facc_out_l:
24667        movq.l          &0x4,%d0                        # four bytes
24668        bsr.w           restore                         # restore An
24669
24670        mov.w           &0x0081,EXC_VOFF(%a6)           # set FSLW
24671        bra.b           facc_finish
24672
24673facc_out_d:
24674        movq.l          &0x8,%d0                        # eight bytes
24675        bsr.w           restore                         # restore An
24676
24677        mov.w           &0x00e1,EXC_VOFF(%a6)           # set FSLW
24678        bra.b           facc_finish
24679
24680facc_out_x:
24681        mov.l           &0xc,%d0                        # twelve bytes
24682        bsr.w           restore                         # restore An
24683
24684        mov.w           &0x00e1,EXC_VOFF(%a6)           # set FSLW
24685
24686# here's where we actually create the access error frame from the
24687# current exception stack frame.
24688facc_finish:
24689        mov.l           USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
24690
24691        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
24692        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
24693        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
24694
24695        unlk            %a6
24696
24697        mov.l           (%sp),-(%sp)            # store SR, hi(PC)
24698        mov.l           0x8(%sp),0x4(%sp)       # store lo(PC)
24699        mov.l           0xc(%sp),0x8(%sp)       # store EA
24700        mov.l           &0x00000001,0xc(%sp)    # store FSLW
24701        mov.w           0x6(%sp),0xc(%sp)       # fix FSLW (size)
24702        mov.w           &0x4008,0x6(%sp)        # store voff
24703
24704        btst            &0x5,(%sp)              # supervisor or user mode?
24705        beq.b           facc_out2               # user
24706        bset            &0x2,0xd(%sp)           # set supervisor TM bit
24707
24708facc_out2:
24709        bra.l           _real_access
24710
24711##################################################################
24712
24713# if the effective addressing mode was predecrement or postincrement,
24714# the emulation has already changed its value to the correct post-
24715# instruction value. but since we're exiting to the access error
24716# handler, then AN must be returned to its pre-instruction value.
24717# we do that here.
24718restore:
24719        mov.b           EXC_OPWORD+0x1(%a6),%d1
24720        andi.b          &0x38,%d1               # extract opmode
24721        cmpi.b          %d1,&0x18               # postinc?
24722        beq.w           rest_inc
24723        cmpi.b          %d1,&0x20               # predec?
24724        beq.w           rest_dec
24725        rts
24726
24727rest_inc:
24728        mov.b           EXC_OPWORD+0x1(%a6),%d1
24729        andi.w          &0x0007,%d1             # fetch An
24730
24731        mov.w           (tbl_rest_inc.b,%pc,%d1.w*2),%d1
24732        jmp             (tbl_rest_inc.b,%pc,%d1.w*1)
24733
24734tbl_rest_inc:
24735        short           ri_a0 - tbl_rest_inc
24736        short           ri_a1 - tbl_rest_inc
24737        short           ri_a2 - tbl_rest_inc
24738        short           ri_a3 - tbl_rest_inc
24739        short           ri_a4 - tbl_rest_inc
24740        short           ri_a5 - tbl_rest_inc
24741        short           ri_a6 - tbl_rest_inc
24742        short           ri_a7 - tbl_rest_inc
24743
24744ri_a0:
24745        sub.l           %d0,EXC_DREGS+0x8(%a6)  # fix stacked a0
24746        rts
24747ri_a1:
24748        sub.l           %d0,EXC_DREGS+0xc(%a6)  # fix stacked a1
24749        rts
24750ri_a2:
24751        sub.l           %d0,%a2                 # fix a2
24752        rts
24753ri_a3:
24754        sub.l           %d0,%a3                 # fix a3
24755        rts
24756ri_a4:
24757        sub.l           %d0,%a4                 # fix a4
24758        rts
24759ri_a5:
24760        sub.l           %d0,%a5                 # fix a5
24761        rts
24762ri_a6:
24763        sub.l           %d0,(%a6)               # fix stacked a6
24764        rts
24765# if it's a fmove out instruction, we don't have to fix a7
24766# because we hadn't changed it yet. if it's an opclass two
24767# instruction (data moved in) and the exception was in supervisor
24768# mode, then also also wasn't updated. if it was user mode, then
24769# restore the correct a7 which is in the USP currently.
24770ri_a7:
24771        cmpi.b          EXC_VOFF(%a6),&0x30     # move in or out?
24772        bne.b           ri_a7_done              # out
24773
24774        btst            &0x5,EXC_SR(%a6)        # user or supervisor?
24775        bne.b           ri_a7_done              # supervisor
24776        movc            %usp,%a0                # restore USP
24777        sub.l           %d0,%a0
24778        movc            %a0,%usp
24779ri_a7_done:
24780        rts
24781
24782# need to invert adjustment value if the <ea> was predec
24783rest_dec:
24784        neg.l           %d0
24785        bra.b           rest_inc
24786