linux/arch/m68k/ifpsp060/src/pfpsp.S
<<
>>
Prefs
   1~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
   3M68000 Hi-Performance Microprocessor Division
   4M68060 Software Package
   5Production Release P1.00 -- October 10, 1994
   6
   7M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
   8
   9THE SOFTWARE is provided on an "AS IS" basis and without warranty.
  10To the maximum extent permitted by applicable law,
  11MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
  12INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
  13and any warranty against infringement with regard to the SOFTWARE
  14(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
  15
  16To the maximum extent permitted by applicable law,
  17IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
  18(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
  19BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
  20ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
  21Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
  22
  23You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
  24so long as this entire notice is retained without alteration in any modified and/or
  25redistributed versions, and that such modified versions are clearly identified as such.
  26No licenses are granted by implication, estoppel or otherwise under any patents
  27or trademarks of Motorola, Inc.
  28~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  29# freal.s:
  30#       This file is appended to the top of the 060FPSP package
  31# and contains the entry points into the package. The user, in
  32# effect, branches to one of the branch table entries located
  33# after _060FPSP_TABLE.
  34#       Also, subroutine stubs exist in this file (_fpsp_done for
  35# example) that are referenced by the FPSP package itself in order
  36# to call a given routine. The stub routine actually performs the
  37# callout. The FPSP code does a "bsr" to the stub routine. This
  38# extra layer of hierarchy adds a slight performance penalty but
  39# it makes the FPSP code easier to read and more mainatinable.
  40#
  41
  42set     _off_bsun,      0x00
  43set     _off_snan,      0x04
  44set     _off_operr,     0x08
  45set     _off_ovfl,      0x0c
  46set     _off_unfl,      0x10
  47set     _off_dz,        0x14
  48set     _off_inex,      0x18
  49set     _off_fline,     0x1c
  50set     _off_fpu_dis,   0x20
  51set     _off_trap,      0x24
  52set     _off_trace,     0x28
  53set     _off_access,    0x2c
  54set     _off_done,      0x30
  55
  56set     _off_imr,       0x40
  57set     _off_dmr,       0x44
  58set     _off_dmw,       0x48
  59set     _off_irw,       0x4c
  60set     _off_irl,       0x50
  61set     _off_drb,       0x54
  62set     _off_drw,       0x58
  63set     _off_drl,       0x5c
  64set     _off_dwb,       0x60
  65set     _off_dww,       0x64
  66set     _off_dwl,       0x68
  67
  68_060FPSP_TABLE:
  69
  70###############################################################
  71
  72# Here's the table of ENTRY POINTS for those linking the package.
  73        bra.l           _fpsp_snan
  74        short           0x0000
  75        bra.l           _fpsp_operr
  76        short           0x0000
  77        bra.l           _fpsp_ovfl
  78        short           0x0000
  79        bra.l           _fpsp_unfl
  80        short           0x0000
  81        bra.l           _fpsp_dz
  82        short           0x0000
  83        bra.l           _fpsp_inex
  84        short           0x0000
  85        bra.l           _fpsp_fline
  86        short           0x0000
  87        bra.l           _fpsp_unsupp
  88        short           0x0000
  89        bra.l           _fpsp_effadd
  90        short           0x0000
  91
  92        space           56
  93
  94###############################################################
  95        global          _fpsp_done
  96_fpsp_done:
  97        mov.l           %d0,-(%sp)
  98        mov.l           (_060FPSP_TABLE-0x80+_off_done,%pc),%d0
  99        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 100        mov.l           0x4(%sp),%d0
 101        rtd             &0x4
 102
 103        global          _real_ovfl
 104_real_ovfl:
 105        mov.l           %d0,-(%sp)
 106        mov.l           (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
 107        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 108        mov.l           0x4(%sp),%d0
 109        rtd             &0x4
 110
 111        global          _real_unfl
 112_real_unfl:
 113        mov.l           %d0,-(%sp)
 114        mov.l           (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
 115        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 116        mov.l           0x4(%sp),%d0
 117        rtd             &0x4
 118
 119        global          _real_inex
 120_real_inex:
 121        mov.l           %d0,-(%sp)
 122        mov.l           (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
 123        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 124        mov.l           0x4(%sp),%d0
 125        rtd             &0x4
 126
 127        global          _real_bsun
 128_real_bsun:
 129        mov.l           %d0,-(%sp)
 130        mov.l           (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
 131        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 132        mov.l           0x4(%sp),%d0
 133        rtd             &0x4
 134
 135        global          _real_operr
 136_real_operr:
 137        mov.l           %d0,-(%sp)
 138        mov.l           (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
 139        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 140        mov.l           0x4(%sp),%d0
 141        rtd             &0x4
 142
 143        global          _real_snan
 144_real_snan:
 145        mov.l           %d0,-(%sp)
 146        mov.l           (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
 147        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 148        mov.l           0x4(%sp),%d0
 149        rtd             &0x4
 150
 151        global          _real_dz
 152_real_dz:
 153        mov.l           %d0,-(%sp)
 154        mov.l           (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
 155        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 156        mov.l           0x4(%sp),%d0
 157        rtd             &0x4
 158
 159        global          _real_fline
 160_real_fline:
 161        mov.l           %d0,-(%sp)
 162        mov.l           (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
 163        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 164        mov.l           0x4(%sp),%d0
 165        rtd             &0x4
 166
 167        global          _real_fpu_disabled
 168_real_fpu_disabled:
 169        mov.l           %d0,-(%sp)
 170        mov.l           (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
 171        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 172        mov.l           0x4(%sp),%d0
 173        rtd             &0x4
 174
 175        global          _real_trap
 176_real_trap:
 177        mov.l           %d0,-(%sp)
 178        mov.l           (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
 179        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 180        mov.l           0x4(%sp),%d0
 181        rtd             &0x4
 182
 183        global          _real_trace
 184_real_trace:
 185        mov.l           %d0,-(%sp)
 186        mov.l           (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
 187        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 188        mov.l           0x4(%sp),%d0
 189        rtd             &0x4
 190
 191        global          _real_access
 192_real_access:
 193        mov.l           %d0,-(%sp)
 194        mov.l           (_060FPSP_TABLE-0x80+_off_access,%pc),%d0
 195        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 196        mov.l           0x4(%sp),%d0
 197        rtd             &0x4
 198
 199#######################################
 200
 201        global          _imem_read
 202_imem_read:
 203        mov.l           %d0,-(%sp)
 204        mov.l           (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
 205        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 206        mov.l           0x4(%sp),%d0
 207        rtd             &0x4
 208
 209        global          _dmem_read
 210_dmem_read:
 211        mov.l           %d0,-(%sp)
 212        mov.l           (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
 213        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 214        mov.l           0x4(%sp),%d0
 215        rtd             &0x4
 216
 217        global          _dmem_write
 218_dmem_write:
 219        mov.l           %d0,-(%sp)
 220        mov.l           (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
 221        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 222        mov.l           0x4(%sp),%d0
 223        rtd             &0x4
 224
 225        global          _imem_read_word
 226_imem_read_word:
 227        mov.l           %d0,-(%sp)
 228        mov.l           (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
 229        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 230        mov.l           0x4(%sp),%d0
 231        rtd             &0x4
 232
 233        global          _imem_read_long
 234_imem_read_long:
 235        mov.l           %d0,-(%sp)
 236        mov.l           (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
 237        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 238        mov.l           0x4(%sp),%d0
 239        rtd             &0x4
 240
 241        global          _dmem_read_byte
 242_dmem_read_byte:
 243        mov.l           %d0,-(%sp)
 244        mov.l           (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
 245        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 246        mov.l           0x4(%sp),%d0
 247        rtd             &0x4
 248
 249        global          _dmem_read_word
 250_dmem_read_word:
 251        mov.l           %d0,-(%sp)
 252        mov.l           (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
 253        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 254        mov.l           0x4(%sp),%d0
 255        rtd             &0x4
 256
 257        global          _dmem_read_long
 258_dmem_read_long:
 259        mov.l           %d0,-(%sp)
 260        mov.l           (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
 261        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 262        mov.l           0x4(%sp),%d0
 263        rtd             &0x4
 264
 265        global          _dmem_write_byte
 266_dmem_write_byte:
 267        mov.l           %d0,-(%sp)
 268        mov.l           (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
 269        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 270        mov.l           0x4(%sp),%d0
 271        rtd             &0x4
 272
 273        global          _dmem_write_word
 274_dmem_write_word:
 275        mov.l           %d0,-(%sp)
 276        mov.l           (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
 277        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 278        mov.l           0x4(%sp),%d0
 279        rtd             &0x4
 280
 281        global          _dmem_write_long
 282_dmem_write_long:
 283        mov.l           %d0,-(%sp)
 284        mov.l           (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
 285        pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 286        mov.l           0x4(%sp),%d0
 287        rtd             &0x4
 288
 289#
 290# This file contains a set of define statements for constants
 291# in order to promote readability within the corecode itself.
 292#
 293
 294set LOCAL_SIZE,         192                     # stack frame size(bytes)
 295set LV,                 -LOCAL_SIZE             # stack offset
 296
 297set EXC_SR,             0x4                     # stack status register
 298set EXC_PC,             0x6                     # stack pc
 299set EXC_VOFF,           0xa                     # stacked vector offset
 300set EXC_EA,             0xc                     # stacked <ea>
 301
 302set EXC_FP,             0x0                     # frame pointer
 303
 304set EXC_AREGS,          -68                     # offset of all address regs
 305set EXC_DREGS,          -100                    # offset of all data regs
 306set EXC_FPREGS,         -36                     # offset of all fp regs
 307
 308set EXC_A7,             EXC_AREGS+(7*4)         # offset of saved a7
 309set OLD_A7,             EXC_AREGS+(6*4)         # extra copy of saved a7
 310set EXC_A6,             EXC_AREGS+(6*4)         # offset of saved a6
 311set EXC_A5,             EXC_AREGS+(5*4)
 312set EXC_A4,             EXC_AREGS+(4*4)
 313set EXC_A3,             EXC_AREGS+(3*4)
 314set EXC_A2,             EXC_AREGS+(2*4)
 315set EXC_A1,             EXC_AREGS+(1*4)
 316set EXC_A0,             EXC_AREGS+(0*4)
 317set EXC_D7,             EXC_DREGS+(7*4)
 318set EXC_D6,             EXC_DREGS+(6*4)
 319set EXC_D5,             EXC_DREGS+(5*4)
 320set EXC_D4,             EXC_DREGS+(4*4)
 321set EXC_D3,             EXC_DREGS+(3*4)
 322set EXC_D2,             EXC_DREGS+(2*4)
 323set EXC_D1,             EXC_DREGS+(1*4)
 324set EXC_D0,             EXC_DREGS+(0*4)
 325
 326set EXC_FP0,            EXC_FPREGS+(0*12)       # offset of saved fp0
 327set EXC_FP1,            EXC_FPREGS+(1*12)       # offset of saved fp1
 328set EXC_FP2,            EXC_FPREGS+(2*12)       # offset of saved fp2 (not used)
 329
 330set FP_SCR1,            LV+80                   # fp scratch 1
 331set FP_SCR1_EX,         FP_SCR1+0
 332set FP_SCR1_SGN,        FP_SCR1+2
 333set FP_SCR1_HI,         FP_SCR1+4
 334set FP_SCR1_LO,         FP_SCR1+8
 335
 336set FP_SCR0,            LV+68                   # fp scratch 0
 337set FP_SCR0_EX,         FP_SCR0+0
 338set FP_SCR0_SGN,        FP_SCR0+2
 339set FP_SCR0_HI,         FP_SCR0+4
 340set FP_SCR0_LO,         FP_SCR0+8
 341
 342set FP_DST,             LV+56                   # fp destination operand
 343set FP_DST_EX,          FP_DST+0
 344set FP_DST_SGN,         FP_DST+2
 345set FP_DST_HI,          FP_DST+4
 346set FP_DST_LO,          FP_DST+8
 347
 348set FP_SRC,             LV+44                   # fp source operand
 349set FP_SRC_EX,          FP_SRC+0
 350set FP_SRC_SGN,         FP_SRC+2
 351set FP_SRC_HI,          FP_SRC+4
 352set FP_SRC_LO,          FP_SRC+8
 353
 354set USER_FPIAR,         LV+40                   # FP instr address register
 355
 356set USER_FPSR,          LV+36                   # FP status register
 357set FPSR_CC,            USER_FPSR+0             # FPSR condition codes
 358set FPSR_QBYTE,         USER_FPSR+1             # FPSR qoutient byte
 359set FPSR_EXCEPT,        USER_FPSR+2             # FPSR exception status byte
 360set FPSR_AEXCEPT,       USER_FPSR+3             # FPSR accrued exception byte
 361
 362set USER_FPCR,          LV+32                   # FP control register
 363set FPCR_ENABLE,        USER_FPCR+2             # FPCR exception enable
 364set FPCR_MODE,          USER_FPCR+3             # FPCR rounding mode control
 365
 366set L_SCR3,             LV+28                   # integer scratch 3
 367set L_SCR2,             LV+24                   # integer scratch 2
 368set L_SCR1,             LV+20                   # integer scratch 1
 369
 370set STORE_FLG,          LV+19                   # flag: operand store (ie. not fcmp/ftst)
 371
 372set EXC_TEMP2,          LV+24                   # temporary space
 373set EXC_TEMP,           LV+16                   # temporary space
 374
 375set DTAG,               LV+15                   # destination operand type
 376set STAG,               LV+14                   # source operand type
 377
 378set SPCOND_FLG,         LV+10                   # flag: special case (see below)
 379
 380set EXC_CC,             LV+8                    # saved condition codes
 381set EXC_EXTWPTR,        LV+4                    # saved current PC (active)
 382set EXC_EXTWORD,        LV+2                    # saved extension word
 383set EXC_CMDREG,         LV+2                    # saved extension word
 384set EXC_OPWORD,         LV+0                    # saved operation word
 385
 386################################
 387
 388# Helpful macros
 389
 390set FTEMP,              0                       # offsets within an
 391set FTEMP_EX,           0                       # extended precision
 392set FTEMP_SGN,          2                       # value saved in memory.
 393set FTEMP_HI,           4
 394set FTEMP_LO,           8
 395set FTEMP_GRS,          12
 396
 397set LOCAL,              0                       # offsets within an
 398set LOCAL_EX,           0                       # extended precision
 399set LOCAL_SGN,          2                       # value saved in memory.
 400set LOCAL_HI,           4
 401set LOCAL_LO,           8
 402set LOCAL_GRS,          12
 403
 404set DST,                0                       # offsets within an
 405set DST_EX,             0                       # extended precision
 406set DST_HI,             4                       # value saved in memory.
 407set DST_LO,             8
 408
 409set SRC,                0                       # offsets within an
 410set SRC_EX,             0                       # extended precision
 411set SRC_HI,             4                       # value saved in memory.
 412set SRC_LO,             8
 413
 414set SGL_LO,             0x3f81                  # min sgl prec exponent
 415set SGL_HI,             0x407e                  # max sgl prec exponent
 416set DBL_LO,             0x3c01                  # min dbl prec exponent
 417set DBL_HI,             0x43fe                  # max dbl prec exponent
 418set EXT_LO,             0x0                     # min ext prec exponent
 419set EXT_HI,             0x7ffe                  # max ext prec exponent
 420
 421set EXT_BIAS,           0x3fff                  # extended precision bias
 422set SGL_BIAS,           0x007f                  # single precision bias
 423set DBL_BIAS,           0x03ff                  # double precision bias
 424
 425set NORM,               0x00                    # operand type for STAG/DTAG
 426set ZERO,               0x01                    # operand type for STAG/DTAG
 427set INF,                0x02                    # operand type for STAG/DTAG
 428set QNAN,               0x03                    # operand type for STAG/DTAG
 429set DENORM,             0x04                    # operand type for STAG/DTAG
 430set SNAN,               0x05                    # operand type for STAG/DTAG
 431set UNNORM,             0x06                    # operand type for STAG/DTAG
 432
 433##################
 434# FPSR/FPCR bits #
 435##################
 436set neg_bit,            0x3                     # negative result
 437set z_bit,              0x2                     # zero result
 438set inf_bit,            0x1                     # infinite result
 439set nan_bit,            0x0                     # NAN result
 440
 441set q_sn_bit,           0x7                     # sign bit of quotient byte
 442
 443set bsun_bit,           7                       # branch on unordered
 444set snan_bit,           6                       # signalling NAN
 445set operr_bit,          5                       # operand error
 446set ovfl_bit,           4                       # overflow
 447set unfl_bit,           3                       # underflow
 448set dz_bit,             2                       # divide by zero
 449set inex2_bit,          1                       # inexact result 2
 450set inex1_bit,          0                       # inexact result 1
 451
 452set aiop_bit,           7                       # accrued inexact operation bit
 453set aovfl_bit,          6                       # accrued overflow bit
 454set aunfl_bit,          5                       # accrued underflow bit
 455set adz_bit,            4                       # accrued dz bit
 456set ainex_bit,          3                       # accrued inexact bit
 457
 458#############################
 459# FPSR individual bit masks #
 460#############################
 461set neg_mask,           0x08000000              # negative bit mask (lw)
 462set inf_mask,           0x02000000              # infinity bit mask (lw)
 463set z_mask,             0x04000000              # zero bit mask (lw)
 464set nan_mask,           0x01000000              # nan bit mask (lw)
 465
 466set neg_bmask,          0x08                    # negative bit mask (byte)
 467set inf_bmask,          0x02                    # infinity bit mask (byte)
 468set z_bmask,            0x04                    # zero bit mask (byte)
 469set nan_bmask,          0x01                    # nan bit mask (byte)
 470
 471set bsun_mask,          0x00008000              # bsun exception mask
 472set snan_mask,          0x00004000              # snan exception mask
 473set operr_mask,         0x00002000              # operr exception mask
 474set ovfl_mask,          0x00001000              # overflow exception mask
 475set unfl_mask,          0x00000800              # underflow exception mask
 476set dz_mask,            0x00000400              # dz exception mask
 477set inex2_mask,         0x00000200              # inex2 exception mask
 478set inex1_mask,         0x00000100              # inex1 exception mask
 479
 480set aiop_mask,          0x00000080              # accrued illegal operation
 481set aovfl_mask,         0x00000040              # accrued overflow
 482set aunfl_mask,         0x00000020              # accrued underflow
 483set adz_mask,           0x00000010              # accrued divide by zero
 484set ainex_mask,         0x00000008              # accrued inexact
 485
 486######################################
 487# FPSR combinations used in the FPSP #
 488######################################
 489set dzinf_mask,         inf_mask+dz_mask+adz_mask
 490set opnan_mask,         nan_mask+operr_mask+aiop_mask
 491set nzi_mask,           0x01ffffff              #clears N, Z, and I
 492set unfinx_mask,        unfl_mask+inex2_mask+aunfl_mask+ainex_mask
 493set unf2inx_mask,       unfl_mask+inex2_mask+ainex_mask
 494set ovfinx_mask,        ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
 495set inx1a_mask,         inex1_mask+ainex_mask
 496set inx2a_mask,         inex2_mask+ainex_mask
 497set snaniop_mask,       nan_mask+snan_mask+aiop_mask
 498set snaniop2_mask,      snan_mask+aiop_mask
 499set naniop_mask,        nan_mask+aiop_mask
 500set neginf_mask,        neg_mask+inf_mask
 501set infaiop_mask,       inf_mask+aiop_mask
 502set negz_mask,          neg_mask+z_mask
 503set opaop_mask,         operr_mask+aiop_mask
 504set unfl_inx_mask,      unfl_mask+aunfl_mask+ainex_mask
 505set ovfl_inx_mask,      ovfl_mask+aovfl_mask+ainex_mask
 506
 507#########
 508# misc. #
 509#########
 510set rnd_stky_bit,       29                      # stky bit pos in longword
 511
 512set sign_bit,           0x7                     # sign bit
 513set signan_bit,         0x6                     # signalling nan bit
 514
 515set sgl_thresh,         0x3f81                  # minimum sgl exponent
 516set dbl_thresh,         0x3c01                  # minimum dbl exponent
 517
 518set x_mode,             0x0                     # extended precision
 519set s_mode,             0x4                     # single precision
 520set d_mode,             0x8                     # double precision
 521
 522set rn_mode,            0x0                     # round-to-nearest
 523set rz_mode,            0x1                     # round-to-zero
 524set rm_mode,            0x2                     # round-tp-minus-infinity
 525set rp_mode,            0x3                     # round-to-plus-infinity
 526
 527set mantissalen,        64                      # length of mantissa in bits
 528
 529set BYTE,               1                       # len(byte) == 1 byte
 530set WORD,               2                       # len(word) == 2 bytes
 531set LONG,               4                       # len(longword) == 2 bytes
 532
 533set BSUN_VEC,           0xc0                    # bsun    vector offset
 534set INEX_VEC,           0xc4                    # inexact vector offset
 535set DZ_VEC,             0xc8                    # dz      vector offset
 536set UNFL_VEC,           0xcc                    # unfl    vector offset
 537set OPERR_VEC,          0xd0                    # operr   vector offset
 538set OVFL_VEC,           0xd4                    # ovfl    vector offset
 539set SNAN_VEC,           0xd8                    # snan    vector offset
 540
 541###########################
 542# SPecial CONDition FLaGs #
 543###########################
 544set ftrapcc_flg,        0x01                    # flag bit: ftrapcc exception
 545set fbsun_flg,          0x02                    # flag bit: bsun exception
 546set mia7_flg,           0x04                    # flag bit: (a7)+ <ea>
 547set mda7_flg,           0x08                    # flag bit: -(a7) <ea>
 548set fmovm_flg,          0x40                    # flag bit: fmovm instruction
 549set immed_flg,          0x80                    # flag bit: &<data> <ea>
 550
 551set ftrapcc_bit,        0x0
 552set fbsun_bit,          0x1
 553set mia7_bit,           0x2
 554set mda7_bit,           0x3
 555set immed_bit,          0x7
 556
 557##################################
 558# TRANSCENDENTAL "LAST-OP" FLAGS #
 559##################################
 560set FMUL_OP,            0x0                     # fmul instr performed last
 561set FDIV_OP,            0x1                     # fdiv performed last
 562set FADD_OP,            0x2                     # fadd performed last
 563set FMOV_OP,            0x3                     # fmov performed last
 564
 565#############
 566# CONSTANTS #
 567#############
 568T1:     long            0x40C62D38,0xD3D64634   # 16381 LOG2 LEAD
 569T2:     long            0x3D6F90AE,0xB1E75CC7   # 16381 LOG2 TRAIL
 570
 571PI:     long            0x40000000,0xC90FDAA2,0x2168C235,0x00000000
 572PIBY2:  long            0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
 573
 574TWOBYPI:
 575        long            0x3FE45F30,0x6DC9C883
 576
 577#########################################################################
 578# XDEF **************************************************************** #
 579#       _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception.    #
 580#                                                                       #
 581#       This handler should be the first code executed upon taking the  #
 582#       FP Overflow exception in an operating system.                   #
 583#                                                                       #
 584# XREF **************************************************************** #
 585#       _imem_read_long() - read instruction longword                   #
 586#       fix_skewed_ops() - adjust src operand in fsave frame            #
 587#       set_tag_x() - determine optype of src/dst operands              #
 588#       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
 589#       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
 590#       load_fpn2() - load dst operand from FP regfile                  #
 591#       fout() - emulate an opclass 3 instruction                       #
 592#       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
 593#       _fpsp_done() - "callout" for 060FPSP exit (all work done!)      #
 594#       _real_ovfl() - "callout" for Overflow exception enabled code    #
 595#       _real_inex() - "callout" for Inexact exception enabled code     #
 596#       _real_trace() - "callout" for Trace exception code              #
 597#                                                                       #
 598# INPUT *************************************************************** #
 599#       - The system stack contains the FP Ovfl exception stack frame   #
 600#       - The fsave frame contains the source operand                   #
 601#                                                                       #
 602# OUTPUT ************************************************************** #
 603#       Overflow Exception enabled:                                     #
 604#       - The system stack is unchanged                                 #
 605#       - The fsave frame contains the adjusted src op for opclass 0,2  #
 606#       Overflow Exception disabled:                                    #
 607#       - The system stack is unchanged                                 #
 608#       - The "exception present" flag in the fsave frame is cleared    #
 609#                                                                       #
 610# ALGORITHM *********************************************************** #
 611#       On the 060, if an FP overflow is present as the result of any   #
 612# instruction, the 060 will take an overflow exception whether the      #
 613# exception is enabled or disabled in the FPCR. For the disabled case,  #
 614# This handler emulates the instruction to determine what the correct   #
 615# default result should be for the operation. This default result is    #
 616# then stored in either the FP regfile, data regfile, or memory.        #
 617# Finally, the handler exits through the "callout" _fpsp_done()         #
 618# denoting that no exceptional conditions exist within the machine.     #
 619#       If the exception is enabled, then this handler must create the  #
 620# exceptional operand and plave it in the fsave state frame, and store  #
 621# the default result (only if the instruction is opclass 3). For        #
 622# exceptions enabled, this handler must exit through the "callout"      #
 623# _real_ovfl() so that the operating system enabled overflow handler    #
 624# can handle this case.                                                 #
 625#       Two other conditions exist. First, if overflow was disabled     #
 626# but the inexact exception was enabled, this handler must exit         #
 627# through the "callout" _real_inex() regardless of whether the result   #
 628# was inexact.                                                          #
 629#       Also, in the case of an opclass three instruction where         #
 630# overflow was disabled and the trace exception was enabled, this       #
 631# handler must exit through the "callout" _real_trace().                #
 632#                                                                       #
 633#########################################################################
 634
 635        global          _fpsp_ovfl
 636_fpsp_ovfl:
 637
 638#$#     sub.l           &24,%sp                 # make room for src/dst
 639
 640        link.w          %a6,&-LOCAL_SIZE        # init stack frame
 641
 642        fsave           FP_SRC(%a6)             # grab the "busy" frame
 643
 644        movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
 645        fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
 646        fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
 647
 648# the FPIAR holds the "current PC" of the faulting instruction
 649        mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
 650        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
 651        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
 652        bsr.l           _imem_read_long         # fetch the instruction words
 653        mov.l           %d0,EXC_OPWORD(%a6)
 654
 655##############################################################################
 656
 657        btst            &0x5,EXC_CMDREG(%a6)    # is instr an fmove out?
 658        bne.w           fovfl_out
 659
 660
 661        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
 662        bsr.l           fix_skewed_ops          # fix src op
 663
 664# since, I believe, only NORMs and DENORMs can come through here,
 665# maybe we can avoid the subroutine call.
 666        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
 667        bsr.l           set_tag_x               # tag the operand type
 668        mov.b           %d0,STAG(%a6)           # maybe NORM,DENORM
 669
 670# bit five of the fp extension word separates the monadic and dyadic operations
 671# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
 672# will never take this exception.
 673        btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
 674        beq.b           fovfl_extract           # monadic
 675
 676        bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
 677        bsr.l           load_fpn2               # load dst into FP_DST
 678
 679        lea             FP_DST(%a6),%a0         # pass: ptr to dst op
 680        bsr.l           set_tag_x               # tag the operand type
 681        cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
 682        bne.b           fovfl_op2_done          # no
 683        bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
 684fovfl_op2_done:
 685        mov.b           %d0,DTAG(%a6)           # save dst optype tag
 686
 687fovfl_extract:
 688
 689#$#     mov.l           FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
 690#$#     mov.l           FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
 691#$#     mov.l           FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
 692#$#     mov.l           FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
 693#$#     mov.l           FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
 694#$#     mov.l           FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
 695
 696        clr.l           %d0
 697        mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
 698
 699        mov.b           1+EXC_CMDREG(%a6),%d1
 700        andi.w          &0x007f,%d1             # extract extension
 701
 702        andi.l          &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
 703
 704        fmov.l          &0x0,%fpcr              # zero current control regs
 705        fmov.l          &0x0,%fpsr
 706
 707        lea             FP_SRC(%a6),%a0
 708        lea             FP_DST(%a6),%a1
 709
 710# maybe we can make these entry points ONLY the OVFL entry points of each routine.
 711        mov.l           (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
 712        jsr             (tbl_unsupp.l,%pc,%d1.l*1)
 713
 714# the operation has been emulated. the result is in fp0.
 715# the EXOP, if an exception occurred, is in fp1.
 716# we must save the default result regardless of whether
 717# traps are enabled or disabled.
 718        bfextu          EXC_CMDREG(%a6){&6:&3},%d0
 719        bsr.l           store_fpreg
 720
 721# the exceptional possibilities we have left ourselves with are ONLY overflow
 722# and inexact. and, the inexact is such that overflow occurred and was disabled
 723# but inexact was enabled.
 724        btst            &ovfl_bit,FPCR_ENABLE(%a6)
 725        bne.b           fovfl_ovfl_on
 726
 727        btst            &inex2_bit,FPCR_ENABLE(%a6)
 728        bne.b           fovfl_inex_on
 729
 730        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 731        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 732        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 733
 734        unlk            %a6
 735#$#     add.l           &24,%sp
 736        bra.l           _fpsp_done
 737
 738# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
 739# in fp1. now, simply jump to _real_ovfl()!
 740fovfl_ovfl_on:
 741        fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP (fp1) to stack
 742
 743        mov.w           &0xe005,2+FP_SRC(%a6)   # save exc status
 744
 745        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 746        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 747        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 748
 749        frestore        FP_SRC(%a6)             # do this after fmovm,other f<op>s!
 750
 751        unlk            %a6
 752
 753        bra.l           _real_ovfl
 754
 755# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
 756# we must jump to real_inex().
 757fovfl_inex_on:
 758
 759        fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP (fp1) to stack
 760
 761        mov.b           &0xc4,1+EXC_VOFF(%a6)   # vector offset = 0xc4
 762        mov.w           &0xe001,2+FP_SRC(%a6)   # save exc status
 763
 764        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 765        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 766        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 767
 768        frestore        FP_SRC(%a6)             # do this after fmovm,other f<op>s!
 769
 770        unlk            %a6
 771
 772        bra.l           _real_inex
 773
 774########################################################################
 775fovfl_out:
 776
 777
 778#$#     mov.l           FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
 779#$#     mov.l           FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
 780#$#     mov.l           FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
 781
 782# the src operand is definitely a NORM(!), so tag it as such
 783        mov.b           &NORM,STAG(%a6)         # set src optype tag
 784
 785        clr.l           %d0
 786        mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
 787
 788        and.l           &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
 789
 790        fmov.l          &0x0,%fpcr              # zero current control regs
 791        fmov.l          &0x0,%fpsr
 792
 793        lea             FP_SRC(%a6),%a0         # pass ptr to src operand
 794
 795        bsr.l           fout
 796
 797        btst            &ovfl_bit,FPCR_ENABLE(%a6)
 798        bne.w           fovfl_ovfl_on
 799
 800        btst            &inex2_bit,FPCR_ENABLE(%a6)
 801        bne.w           fovfl_inex_on
 802
 803        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 804        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 805        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 806
 807        unlk            %a6
 808#$#     add.l           &24,%sp
 809
 810        btst            &0x7,(%sp)              # is trace on?
 811        beq.l           _fpsp_done              # no
 812
 813        fmov.l          %fpiar,0x8(%sp)         # "Current PC" is in FPIAR
 814        mov.w           &0x2024,0x6(%sp)        # stk fmt = 0x2; voff = 0x024
 815        bra.l           _real_trace
 816
 817#########################################################################
 818# XDEF **************************************************************** #
 819#       _fpsp_unfl(): 060FPSP entry point for FP Underflow exception.   #
 820#                                                                       #
 821#       This handler should be the first code executed upon taking the  #
 822#       FP Underflow exception in an operating system.                  #
 823#                                                                       #
 824# XREF **************************************************************** #
 825#       _imem_read_long() - read instruction longword                   #
 826#       fix_skewed_ops() - adjust src operand in fsave frame            #
 827#       set_tag_x() - determine optype of src/dst operands              #
 828#       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
 829#       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
 830#       load_fpn2() - load dst operand from FP regfile                  #
 831#       fout() - emulate an opclass 3 instruction                       #
 832#       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
 833#       _fpsp_done() - "callout" for 060FPSP exit (all work done!)      #
 834#       _real_ovfl() - "callout" for Overflow exception enabled code    #
 835#       _real_inex() - "callout" for Inexact exception enabled code     #
 836#       _real_trace() - "callout" for Trace exception code              #
 837#                                                                       #
 838# INPUT *************************************************************** #
 839#       - The system stack contains the FP Unfl exception stack frame   #
 840#       - The fsave frame contains the source operand                   #
 841#                                                                       #
 842# OUTPUT ************************************************************** #
 843#       Underflow Exception enabled:                                    #
 844#       - The system stack is unchanged                                 #
 845#       - The fsave frame contains the adjusted src op for opclass 0,2  #
 846#       Underflow Exception disabled:                                   #
 847#       - The system stack is unchanged                                 #
 848#       - The "exception present" flag in the fsave frame is cleared    #
 849#                                                                       #
 850# ALGORITHM *********************************************************** #
 851#       On the 060, if an FP underflow is present as the result of any  #
 852# instruction, the 060 will take an underflow exception whether the     #
 853# exception is enabled or disabled in the FPCR. For the disabled case,  #
 854# This handler emulates the instruction to determine what the correct   #
 855# default result should be for the operation. This default result is    #
 856# then stored in either the FP regfile, data regfile, or memory.        #
 857# Finally, the handler exits through the "callout" _fpsp_done()         #
 858# denoting that no exceptional conditions exist within the machine.     #
 859#       If the exception is enabled, then this handler must create the  #
 860# exceptional operand and plave it in the fsave state frame, and store  #
 861# the default result (only if the instruction is opclass 3). For        #
 862# exceptions enabled, this handler must exit through the "callout"      #
 863# _real_unfl() so that the operating system enabled overflow handler    #
 864# can handle this case.                                                 #
 865#       Two other conditions exist. First, if underflow was disabled    #
 866# but the inexact exception was enabled and the result was inexact,     #
 867# this handler must exit through the "callout" _real_inex().            #
 868# was inexact.                                                          #
 869#       Also, in the case of an opclass three instruction where         #
 870# underflow was disabled and the trace exception was enabled, this      #
 871# handler must exit through the "callout" _real_trace().                #
 872#                                                                       #
 873#########################################################################
 874
 875        global          _fpsp_unfl
 876_fpsp_unfl:
 877
 878#$#     sub.l           &24,%sp                 # make room for src/dst
 879
 880        link.w          %a6,&-LOCAL_SIZE        # init stack frame
 881
 882        fsave           FP_SRC(%a6)             # grab the "busy" frame
 883
 884        movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
 885        fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
 886        fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
 887
 888# the FPIAR holds the "current PC" of the faulting instruction
 889        mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
 890        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
 891        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
 892        bsr.l           _imem_read_long         # fetch the instruction words
 893        mov.l           %d0,EXC_OPWORD(%a6)
 894
 895##############################################################################
 896
 897        btst            &0x5,EXC_CMDREG(%a6)    # is instr an fmove out?
 898        bne.w           funfl_out
 899
 900
 901        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
 902        bsr.l           fix_skewed_ops          # fix src op
 903
 904        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
 905        bsr.l           set_tag_x               # tag the operand type
 906        mov.b           %d0,STAG(%a6)           # maybe NORM,DENORM
 907
 908# bit five of the fp ext word separates the monadic and dyadic operations
 909# that can pass through fpsp_unfl(). remember that fcmp, and ftst
 910# will never take this exception.
 911        btst            &0x5,1+EXC_CMDREG(%a6)  # is op monadic or dyadic?
 912        beq.b           funfl_extract           # monadic
 913
 914# now, what's left that's not dyadic is fsincos. we can distinguish it
 915# from all dyadics by the '0110xxx pattern
 916        btst            &0x4,1+EXC_CMDREG(%a6)  # is op an fsincos?
 917        bne.b           funfl_extract           # yes
 918
 919        bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
 920        bsr.l           load_fpn2               # load dst into FP_DST
 921
 922        lea             FP_DST(%a6),%a0         # pass: ptr to dst op
 923        bsr.l           set_tag_x               # tag the operand type
 924        cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
 925        bne.b           funfl_op2_done          # no
 926        bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
 927funfl_op2_done:
 928        mov.b           %d0,DTAG(%a6)           # save dst optype tag
 929
 930funfl_extract:
 931
 932#$#     mov.l           FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
 933#$#     mov.l           FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
 934#$#     mov.l           FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
 935#$#     mov.l           FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
 936#$#     mov.l           FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
 937#$#     mov.l           FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
 938
 939        clr.l           %d0
 940        mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
 941
 942        mov.b           1+EXC_CMDREG(%a6),%d1
 943        andi.w          &0x007f,%d1             # extract extension
 944
 945        andi.l          &0x00ff01ff,USER_FPSR(%a6)
 946
 947        fmov.l          &0x0,%fpcr              # zero current control regs
 948        fmov.l          &0x0,%fpsr
 949
 950        lea             FP_SRC(%a6),%a0
 951        lea             FP_DST(%a6),%a1
 952
 953# maybe we can make these entry points ONLY the OVFL entry points of each routine.
 954        mov.l           (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
 955        jsr             (tbl_unsupp.l,%pc,%d1.l*1)
 956
 957        bfextu          EXC_CMDREG(%a6){&6:&3},%d0
 958        bsr.l           store_fpreg
 959
 960# The `060 FPU multiplier hardware is such that if the result of a
 961# multiply operation is the smallest possible normalized number
 962# (0x00000000_80000000_00000000), then the machine will take an
 963# underflow exception. Since this is incorrect, we need to check
 964# if our emulation, after re-doing the operation, decided that
 965# no underflow was called for. We do these checks only in
 966# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
 967# special case will simply exit gracefully with the correct result.
 968
 969# the exceptional possibilities we have left ourselves with are ONLY overflow
 970# and inexact. and, the inexact is such that overflow occurred and was disabled
 971# but inexact was enabled.
 972        btst            &unfl_bit,FPCR_ENABLE(%a6)
 973        bne.b           funfl_unfl_on
 974
 975funfl_chkinex:
 976        btst            &inex2_bit,FPCR_ENABLE(%a6)
 977        bne.b           funfl_inex_on
 978
 979funfl_exit:
 980        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 981        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 982        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 983
 984        unlk            %a6
 985#$#     add.l           &24,%sp
 986        bra.l           _fpsp_done
 987
 988# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
 989# in fp1 (don't forget to save fp0). what to do now?
 990# well, we simply have to get to go to _real_unfl()!
 991funfl_unfl_on:
 992
 993# The `060 FPU multiplier hardware is such that if the result of a
 994# multiply operation is the smallest possible normalized number
 995# (0x00000000_80000000_00000000), then the machine will take an
 996# underflow exception. Since this is incorrect, we check here to see
 997# if our emulation, after re-doing the operation, decided that
 998# no underflow was called for.
 999        btst            &unfl_bit,FPSR_EXCEPT(%a6)
1000        beq.w           funfl_chkinex
1001
1002funfl_unfl_on2:
1003        fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP (fp1) to stack
1004
1005        mov.w           &0xe003,2+FP_SRC(%a6)   # save exc status
1006
1007        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
1008        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1009        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1010
1011        frestore        FP_SRC(%a6)             # do this after fmovm,other f<op>s!
1012
1013        unlk            %a6
1014
1015        bra.l           _real_unfl
1016
1017# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
1018# we must jump to real_inex().
1019funfl_inex_on:
1020
1021# The `060 FPU multiplier hardware is such that if the result of a
1022# multiply operation is the smallest possible normalized number
1023# (0x00000000_80000000_00000000), then the machine will take an
1024# underflow exception.
1025# But, whether bogus or not, if inexact is enabled AND it occurred,
1026# then we have to branch to real_inex.
1027
1028        btst            &inex2_bit,FPSR_EXCEPT(%a6)
1029        beq.w           funfl_exit
1030
1031funfl_inex_on2:
1032
1033        fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP to stack
1034
1035        mov.b           &0xc4,1+EXC_VOFF(%a6)   # vector offset = 0xc4
1036        mov.w           &0xe001,2+FP_SRC(%a6)   # save exc status
1037
1038        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
1039        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1040        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1041
1042        frestore        FP_SRC(%a6)             # do this after fmovm,other f<op>s!
1043
1044        unlk            %a6
1045
1046        bra.l           _real_inex
1047
1048#######################################################################
1049funfl_out:
1050
1051
1052#$#     mov.l           FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1053#$#     mov.l           FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1054#$#     mov.l           FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1055
1056# the src operand is definitely a NORM(!), so tag it as such
1057        mov.b           &NORM,STAG(%a6)         # set src optype tag
1058
1059        clr.l           %d0
1060        mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
1061
1062        and.l           &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1063
1064        fmov.l          &0x0,%fpcr              # zero current control regs
1065        fmov.l          &0x0,%fpsr
1066
1067        lea             FP_SRC(%a6),%a0         # pass ptr to src operand
1068
1069        bsr.l           fout
1070
1071        btst            &unfl_bit,FPCR_ENABLE(%a6)
1072        bne.w           funfl_unfl_on2
1073
1074        btst            &inex2_bit,FPCR_ENABLE(%a6)
1075        bne.w           funfl_inex_on2
1076
1077        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
1078        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1079        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1080
1081        unlk            %a6
1082#$#     add.l           &24,%sp
1083
1084        btst            &0x7,(%sp)              # is trace on?
1085        beq.l           _fpsp_done              # no
1086
1087        fmov.l          %fpiar,0x8(%sp)         # "Current PC" is in FPIAR
1088        mov.w           &0x2024,0x6(%sp)        # stk fmt = 0x2; voff = 0x024
1089        bra.l           _real_trace
1090
1091#########################################################################
1092# XDEF **************************************************************** #
1093#       _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented       #
1094#                       Data Type" exception.                           #
1095#                                                                       #
1096#       This handler should be the first code executed upon taking the  #
1097#       FP Unimplemented Data Type exception in an operating system.    #
1098#                                                                       #
1099# XREF **************************************************************** #
1100#       _imem_read_{word,long}() - read instruction word/longword       #
1101#       fix_skewed_ops() - adjust src operand in fsave frame            #
1102#       set_tag_x() - determine optype of src/dst operands              #
1103#       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
1104#       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
1105#       load_fpn2() - load dst operand from FP regfile                  #
1106#       load_fpn1() - load src operand from FP regfile                  #
1107#       fout() - emulate an opclass 3 instruction                       #
1108#       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
1109#       _real_inex() - "callout" to operating system inexact handler    #
1110#       _fpsp_done() - "callout" for exit; work all done                #
1111#       _real_trace() - "callout" for Trace enabled exception           #
1112#       funimp_skew() - adjust fsave src ops to "incorrect" value       #
1113#       _real_snan() - "callout" for SNAN exception                     #
1114#       _real_operr() - "callout" for OPERR exception                   #
1115#       _real_ovfl() - "callout" for OVFL exception                     #
1116#       _real_unfl() - "callout" for UNFL exception                     #
1117#       get_packed() - fetch packed operand from memory                 #
1118#                                                                       #
1119# INPUT *************************************************************** #
1120#       - The system stack contains the "Unimp Data Type" stk frame     #
1121#       - The fsave frame contains the ssrc op (for UNNORM/DENORM)      #
1122#                                                                       #
1123# OUTPUT ************************************************************** #
1124#       If Inexact exception (opclass 3):                               #
1125#       - The system stack is changed to an Inexact exception stk frame #
1126#       If SNAN exception (opclass 3):                                  #
1127#       - The system stack is changed to an SNAN exception stk frame    #
1128#       If OPERR exception (opclass 3):                                 #
1129#       - The system stack is changed to an OPERR exception stk frame   #
1130#       If OVFL exception (opclass 3):                                  #
1131#       - The system stack is changed to an OVFL exception stk frame    #
1132#       If UNFL exception (opclass 3):                                  #
1133#       - The system stack is changed to an UNFL exception stack frame  #
1134#       If Trace exception enabled:                                     #
1135#       - The system stack is changed to a Trace exception stack frame  #
1136#       Else: (normal case)                                             #
1137#       - Correct result has been stored as appropriate                 #
1138#                                                                       #
1139# ALGORITHM *********************************************************** #
1140#       Two main instruction types can enter here: (1) DENORM or UNNORM #
1141# unimplemented data types. These can be either opclass 0,2 or 3        #
1142# instructions, and (2) PACKED unimplemented data format instructions   #
1143# also of opclasses 0,2, or 3.                                          #
1144#       For UNNORM/DENORM opclass 0 and 2, the handler fetches the src  #
1145# operand from the fsave state frame and the dst operand (if dyadic)    #
1146# from the FP register file. The instruction is then emulated by        #
1147# choosing an emulation routine from a table of routines indexed by     #
1148# instruction type. Once the instruction has been emulated and result   #
1149# saved, then we check to see if any enabled exceptions resulted from   #
1150# instruction emulation. If none, then we exit through the "callout"    #
1151# _fpsp_done(). If there is an enabled FP exception, then we insert     #
1152# this exception into the FPU in the fsave state frame and then exit    #
1153# through _fpsp_done().                                                 #
1154#       PACKED opclass 0 and 2 is similar in how the instruction is     #
1155# emulated and exceptions handled. The differences occur in how the     #
1156# handler loads the packed op (by calling get_packed() routine) and     #
1157# by the fact that a Trace exception could be pending for PACKED ops.   #
1158# If a Trace exception is pending, then the current exception stack     #
1159# frame is changed to a Trace exception stack frame and an exit is      #
1160# made through _real_trace().                                           #
1161#       For UNNORM/DENORM opclass 3, the actual move out to memory is   #
1162# performed by calling the routine fout(). If no exception should occur #
1163# as the result of emulation, then an exit either occurs through        #
1164# _fpsp_done() or through _real_trace() if a Trace exception is pending #
1165# (a Trace stack frame must be created here, too). If an FP exception   #
1166# should occur, then we must create an exception stack frame of that    #
1167# type and jump to either _real_snan(), _real_operr(), _real_inex(),    #
1168# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3        #
1169# emulation is performed in a similar manner.                           #
1170#                                                                       #
1171#########################################################################
1172
1173#
1174# (1) DENORM and UNNORM (unimplemented) data types:
1175#
1176#                               post-instruction
1177#                               *****************
1178#                               *      EA       *
1179#        pre-instruction        *               *
1180#       *****************       *****************
1181#       * 0x0 *  0x0dc  *       * 0x3 *  0x0dc  *
1182#       *****************       *****************
1183#       *     Next      *       *     Next      *
1184#       *      PC       *       *      PC       *
1185#       *****************       *****************
1186#       *      SR       *       *      SR       *
1187#       *****************       *****************
1188#
1189# (2) PACKED format (unsupported) opclasses two and three:
1190#       *****************
1191#       *      EA       *
1192#       *               *
1193#       *****************
1194#       * 0x2 *  0x0dc  *
1195#       *****************
1196#       *     Next      *
1197#       *      PC       *
1198#       *****************
1199#       *      SR       *
1200#       *****************
1201#
1202        global          _fpsp_unsupp
1203_fpsp_unsupp:
1204
1205        link.w          %a6,&-LOCAL_SIZE        # init stack frame
1206
1207        fsave           FP_SRC(%a6)             # save fp state
1208
1209        movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
1210        fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1211        fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
1212
1213        btst            &0x5,EXC_SR(%a6)        # user or supervisor mode?
1214        bne.b           fu_s
1215fu_u:
1216        mov.l           %usp,%a0                # fetch user stack pointer
1217        mov.l           %a0,EXC_A7(%a6)         # save on stack
1218        bra.b           fu_cont
1219# if the exception is an opclass zero or two unimplemented data type
1220# exception, then the a7' calculated here is wrong since it doesn't
1221# stack an ea. however, we don't need an a7' for this case anyways.
1222fu_s:
1223        lea             0x4+EXC_EA(%a6),%a0     # load old a7'
1224        mov.l           %a0,EXC_A7(%a6)         # save on stack
1225
1226fu_cont:
1227
1228# the FPIAR holds the "current PC" of the faulting instruction
1229# the FPIAR should be set correctly for ALL exceptions passing through
1230# this point.
1231        mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1232        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
1233        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
1234        bsr.l           _imem_read_long         # fetch the instruction words
1235        mov.l           %d0,EXC_OPWORD(%a6)     # store OPWORD and EXTWORD
1236
1237############################
1238
1239        clr.b           SPCOND_FLG(%a6)         # clear special condition flag
1240
1241# Separate opclass three (fpn-to-mem) ops since they have a different
1242# stack frame and protocol.
1243        btst            &0x5,EXC_CMDREG(%a6)    # is it an fmove out?
1244        bne.w           fu_out                  # yes
1245
1246# Separate packed opclass two instructions.
1247        bfextu          EXC_CMDREG(%a6){&0:&6},%d0
1248        cmpi.b          %d0,&0x13
1249        beq.w           fu_in_pack
1250
1251
1252# I'm not sure at this point what FPSR bits are valid for this instruction.
1253# so, since the emulation routines re-create them anyways, zero exception field
1254        andi.l          &0x00ff00ff,USER_FPSR(%a6) # zero exception field
1255
1256        fmov.l          &0x0,%fpcr              # zero current control regs
1257        fmov.l          &0x0,%fpsr
1258
1259# Opclass two w/ memory-to-fpn operation will have an incorrect extended
1260# precision format if the src format was single or double and the
1261# source data type was an INF, NAN, DENORM, or UNNORM
1262        lea             FP_SRC(%a6),%a0         # pass ptr to input
1263        bsr.l           fix_skewed_ops
1264
1265# we don't know whether the src operand or the dst operand (or both) is the
1266# UNNORM or DENORM. call the function that tags the operand type. if the
1267# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1268        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
1269        bsr.l           set_tag_x               # tag the operand type
1270        cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
1271        bne.b           fu_op2                  # no
1272        bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
1273
1274fu_op2:
1275        mov.b           %d0,STAG(%a6)           # save src optype tag
1276
1277        bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1278
1279# bit five of the fp extension word separates the monadic and dyadic operations
1280# at this point
1281        btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
1282        beq.b           fu_extract              # monadic
1283        cmpi.b          1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1284        beq.b           fu_extract              # yes, so it's monadic, too
1285
1286        bsr.l           load_fpn2               # load dst into FP_DST
1287
1288        lea             FP_DST(%a6),%a0         # pass: ptr to dst op
1289        bsr.l           set_tag_x               # tag the operand type
1290        cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
1291        bne.b           fu_op2_done             # no
1292        bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
1293fu_op2_done:
1294        mov.b           %d0,DTAG(%a6)           # save dst optype tag
1295
1296fu_extract:
1297        clr.l           %d0
1298        mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode/prec
1299
1300        bfextu          1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1301
1302        lea             FP_SRC(%a6),%a0
1303        lea             FP_DST(%a6),%a1
1304
1305        mov.l           (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1306        jsr             (tbl_unsupp.l,%pc,%d1.l*1)
1307
1308#
1309# Exceptions in order of precedence:
1310#       BSUN    : none
1311#       SNAN    : all dyadic ops
1312#       OPERR   : fsqrt(-NORM)
1313#       OVFL    : all except ftst,fcmp
1314#       UNFL    : all except ftst,fcmp
1315#       DZ      : fdiv
1316#       INEX2   : all except ftst,fcmp
1317#       INEX1   : none (packed doesn't go through here)
1318#
1319
1320# we determine the highest priority exception(if any) set by the
1321# emulation routine that has also been enabled by the user.
1322        mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions set
1323        bne.b           fu_in_ena               # some are enabled
1324
1325fu_in_cont:
1326# fcmp and ftst do not store any result.
1327        mov.b           1+EXC_CMDREG(%a6),%d0   # fetch extension
1328        andi.b          &0x38,%d0               # extract bits 3-5
1329        cmpi.b          %d0,&0x38               # is instr fcmp or ftst?
1330        beq.b           fu_in_exit              # yes
1331
1332        bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1333        bsr.l           store_fpreg             # store the result
1334
1335fu_in_exit:
1336
1337        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1338        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1339        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1340
1341        unlk            %a6
1342
1343        bra.l           _fpsp_done
1344
1345fu_in_ena:
1346        and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled
1347        bfffo           %d0{&24:&8},%d0         # find highest priority exception
1348        bne.b           fu_in_exc               # there is at least one set
1349
1350#
1351# No exceptions occurred that were also enabled. Now:
1352#
1353#       if (OVFL && ovfl_disabled && inexact_enabled) {
1354#           branch to _real_inex() (even if the result was exact!);
1355#       } else {
1356#           save the result in the proper fp reg (unless the op is fcmp or ftst);
1357#           return;
1358#       }
1359#
1360        btst            &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1361        beq.b           fu_in_cont              # no
1362
1363fu_in_ovflchk:
1364        btst            &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1365        beq.b           fu_in_cont              # no
1366        bra.w           fu_in_exc_ovfl          # go insert overflow frame
1367
1368#
1369# An exception occurred and that exception was enabled:
1370#
1371#       shift enabled exception field into lo byte of d0;
1372#       if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1373#           ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1374#               /*
1375#                * this is the case where we must call _real_inex() now or else
1376#                * there will be no other way to pass it the exceptional operand
1377#                */
1378#               call _real_inex();
1379#       } else {
1380#               restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1381#       }
1382#
1383fu_in_exc:
1384        subi.l          &24,%d0                 # fix offset to be 0-8
1385        cmpi.b          %d0,&0x6                # is exception INEX? (6)
1386        bne.b           fu_in_exc_exit          # no
1387
1388# the enabled exception was inexact
1389        btst            &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1390        bne.w           fu_in_exc_unfl          # yes
1391        btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1392        bne.w           fu_in_exc_ovfl          # yes
1393
1394# here, we insert the correct fsave status value into the fsave frame for the
1395# corresponding exception. the operand in the fsave frame should be the original
1396# src operand.
1397fu_in_exc_exit:
1398        mov.l           %d0,-(%sp)              # save d0
1399        bsr.l           funimp_skew             # skew sgl or dbl inputs
1400        mov.l           (%sp)+,%d0              # restore d0
1401
1402        mov.w           (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1403
1404        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1405        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1406        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1407
1408        frestore        FP_SRC(%a6)             # restore src op
1409
1410        unlk            %a6
1411
1412        bra.l           _fpsp_done
1413
1414tbl_except:
1415        short           0xe000,0xe006,0xe004,0xe005
1416        short           0xe003,0xe002,0xe001,0xe001
1417
1418fu_in_exc_unfl:
1419        mov.w           &0x4,%d0
1420        bra.b           fu_in_exc_exit
1421fu_in_exc_ovfl:
1422        mov.w           &0x03,%d0
1423        bra.b           fu_in_exc_exit
1424
1425# If the input operand to this operation was opclass two and a single
1426# or double precision denorm, inf, or nan, the operand needs to be
1427# "corrected" in order to have the proper equivalent extended precision
1428# number.
1429        global          fix_skewed_ops
1430fix_skewed_ops:
1431        bfextu          EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1432        cmpi.b          %d0,&0x11               # is class = 2 & fmt = sgl?
1433        beq.b           fso_sgl                 # yes
1434        cmpi.b          %d0,&0x15               # is class = 2 & fmt = dbl?
1435        beq.b           fso_dbl                 # yes
1436        rts                                     # no
1437
1438fso_sgl:
1439        mov.w           LOCAL_EX(%a0),%d0       # fetch src exponent
1440        andi.w          &0x7fff,%d0             # strip sign
1441        cmpi.w          %d0,&0x3f80             # is |exp| == $3f80?
1442        beq.b           fso_sgl_dnrm_zero       # yes
1443        cmpi.w          %d0,&0x407f             # no; is |exp| == $407f?
1444        beq.b           fso_infnan              # yes
1445        rts                                     # no
1446
1447fso_sgl_dnrm_zero:
1448        andi.l          &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1449        beq.b           fso_zero                # it's a skewed zero
1450fso_sgl_dnrm:
1451# here, we count on norm not to alter a0...
1452        bsr.l           norm                    # normalize mantissa
1453        neg.w           %d0                     # -shft amt
1454        addi.w          &0x3f81,%d0             # adjust new exponent
1455        andi.w          &0x8000,LOCAL_EX(%a0)   # clear old exponent
1456        or.w            %d0,LOCAL_EX(%a0)       # insert new exponent
1457        rts
1458
1459fso_zero:
1460        andi.w          &0x8000,LOCAL_EX(%a0)   # clear bogus exponent
1461        rts
1462
1463fso_infnan:
1464        andi.b          &0x7f,LOCAL_HI(%a0)     # clear j-bit
1465        ori.w           &0x7fff,LOCAL_EX(%a0)   # make exponent = $7fff
1466        rts
1467
1468fso_dbl:
1469        mov.w           LOCAL_EX(%a0),%d0       # fetch src exponent
1470        andi.w          &0x7fff,%d0             # strip sign
1471        cmpi.w          %d0,&0x3c00             # is |exp| == $3c00?
1472        beq.b           fso_dbl_dnrm_zero       # yes
1473        cmpi.w          %d0,&0x43ff             # no; is |exp| == $43ff?
1474        beq.b           fso_infnan              # yes
1475        rts                                     # no
1476
1477fso_dbl_dnrm_zero:
1478        andi.l          &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1479        bne.b           fso_dbl_dnrm            # it's a skewed denorm
1480        tst.l           LOCAL_LO(%a0)           # is it a zero?
1481        beq.b           fso_zero                # yes
1482fso_dbl_dnrm:
1483# here, we count on norm not to alter a0...
1484        bsr.l           norm                    # normalize mantissa
1485        neg.w           %d0                     # -shft amt
1486        addi.w          &0x3c01,%d0             # adjust new exponent
1487        andi.w          &0x8000,LOCAL_EX(%a0)   # clear old exponent
1488        or.w            %d0,LOCAL_EX(%a0)       # insert new exponent
1489        rts
1490
1491#################################################################
1492
1493# fmove out took an unimplemented data type exception.
1494# the src operand is in FP_SRC. Call _fout() to write out the result and
1495# to determine which exceptions, if any, to take.
1496fu_out:
1497
1498# Separate packed move outs from the UNNORM and DENORM move outs.
1499        bfextu          EXC_CMDREG(%a6){&3:&3},%d0
1500        cmpi.b          %d0,&0x3
1501        beq.w           fu_out_pack
1502        cmpi.b          %d0,&0x7
1503        beq.w           fu_out_pack
1504
1505
1506# I'm not sure at this point what FPSR bits are valid for this instruction.
1507# so, since the emulation routines re-create them anyways, zero exception field.
1508# fmove out doesn't affect ccodes.
1509        and.l           &0xffff00ff,USER_FPSR(%a6) # zero exception field
1510
1511        fmov.l          &0x0,%fpcr              # zero current control regs
1512        fmov.l          &0x0,%fpsr
1513
1514# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1515# call here. just figure out what it is...
1516        mov.w           FP_SRC_EX(%a6),%d0      # get exponent
1517        andi.w          &0x7fff,%d0             # strip sign
1518        beq.b           fu_out_denorm           # it's a DENORM
1519
1520        lea             FP_SRC(%a6),%a0
1521        bsr.l           unnorm_fix              # yes; fix it
1522
1523        mov.b           %d0,STAG(%a6)
1524
1525        bra.b           fu_out_cont
1526fu_out_denorm:
1527        mov.b           &DENORM,STAG(%a6)
1528fu_out_cont:
1529
1530        clr.l           %d0
1531        mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode/prec
1532
1533        lea             FP_SRC(%a6),%a0         # pass ptr to src operand
1534
1535        mov.l           (%a6),EXC_A6(%a6)       # in case a6 changes
1536        bsr.l           fout                    # call fmove out routine
1537
1538# Exceptions in order of precedence:
1539#       BSUN    : none
1540#       SNAN    : none
1541#       OPERR   : fmove.{b,w,l} out of large UNNORM
1542#       OVFL    : fmove.{s,d}
1543#       UNFL    : fmove.{s,d,x}
1544#       DZ      : none
1545#       INEX2   : all
1546#       INEX1   : none (packed doesn't travel through here)
1547
1548# determine the highest priority exception(if any) set by the
1549# emulation routine that has also been enabled by the user.
1550        mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
1551        bne.w           fu_out_ena              # some are enabled
1552
1553fu_out_done:
1554
1555        mov.l           EXC_A6(%a6),(%a6)       # in case a6 changed
1556
1557# on extended precision opclass three instructions using pre-decrement or
1558# post-increment addressing mode, the address register is not updated. is the
1559# address register was the stack pointer used from user mode, then let's update
1560# it here. if it was used from supervisor mode, then we have to handle this
1561# as a special case.
1562        btst            &0x5,EXC_SR(%a6)
1563        bne.b           fu_out_done_s
1564
1565        mov.l           EXC_A7(%a6),%a0         # restore a7
1566        mov.l           %a0,%usp
1567
1568fu_out_done_cont:
1569        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1570        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1571        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1572
1573        unlk            %a6
1574
1575        btst            &0x7,(%sp)              # is trace on?
1576        bne.b           fu_out_trace            # yes
1577
1578        bra.l           _fpsp_done
1579
1580# is the ea mode pre-decrement of the stack pointer from supervisor mode?
1581# ("fmov.x fpm,-(a7)") if so,
1582fu_out_done_s:
1583        cmpi.b          SPCOND_FLG(%a6),&mda7_flg
1584        bne.b           fu_out_done_cont
1585
1586# the extended precision result is still in fp0. but, we need to save it
1587# somewhere on the stack until we can copy it to its final resting place.
1588# here, we're counting on the top of the stack to be the old place-holders
1589# for fp0/fp1 which have already been restored. that way, we can write
1590# over those destinations with the shifted stack frame.
1591        fmovm.x         &0x80,FP_SRC(%a6)       # put answer on stack
1592
1593        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1594        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1595        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1596
1597        mov.l           (%a6),%a6               # restore frame pointer
1598
1599        mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1600        mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1601
1602# now, copy the result to the proper place on the stack
1603        mov.l           LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1604        mov.l           LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1605        mov.l           LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1606
1607        add.l           &LOCAL_SIZE-0x8,%sp
1608
1609        btst            &0x7,(%sp)
1610        bne.b           fu_out_trace
1611
1612        bra.l           _fpsp_done
1613
1614fu_out_ena:
1615        and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled
1616        bfffo           %d0{&24:&8},%d0         # find highest priority exception
1617        bne.b           fu_out_exc              # there is at least one set
1618
1619# no exceptions were set.
1620# if a disabled overflow occurred and inexact was enabled but the result
1621# was exact, then a branch to _real_inex() is made.
1622        btst            &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1623        beq.w           fu_out_done             # no
1624
1625fu_out_ovflchk:
1626        btst            &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1627        beq.w           fu_out_done             # no
1628        bra.w           fu_inex                 # yes
1629
1630#
1631# The fp move out that took the "Unimplemented Data Type" exception was
1632# being traced. Since the stack frames are similar, get the "current" PC
1633# from FPIAR and put it in the trace stack frame then jump to _real_trace().
1634#
1635#                 UNSUPP FRAME             TRACE FRAME
1636#               *****************       *****************
1637#               *      EA       *       *    Current    *
1638#               *               *       *      PC       *
1639#               *****************       *****************
1640#               * 0x3 *  0x0dc  *       * 0x2 *  0x024  *
1641#               *****************       *****************
1642#               *     Next      *       *     Next      *
1643#               *      PC       *       *      PC       *
1644#               *****************       *****************
1645#               *      SR       *       *      SR       *
1646#               *****************       *****************
1647#
1648fu_out_trace:
1649        mov.w           &0x2024,0x6(%sp)
1650        fmov.l          %fpiar,0x8(%sp)
1651        bra.l           _real_trace
1652
1653# an exception occurred and that exception was enabled.
1654fu_out_exc:
1655        subi.l          &24,%d0                 # fix offset to be 0-8
1656
1657# we don't mess with the existing fsave frame. just re-insert it and
1658# jump to the "_real_{}()" handler...
1659        mov.w           (tbl_fu_out.b,%pc,%d0.w*2),%d0
1660        jmp             (tbl_fu_out.b,%pc,%d0.w*1)
1661
1662        swbeg           &0x8
1663tbl_fu_out:
1664        short           tbl_fu_out      - tbl_fu_out    # BSUN can't happen
1665        short           tbl_fu_out      - tbl_fu_out    # SNAN can't happen
1666        short           fu_operr        - tbl_fu_out    # OPERR
1667        short           fu_ovfl         - tbl_fu_out    # OVFL
1668        short           fu_unfl         - tbl_fu_out    # UNFL
1669        short           tbl_fu_out      - tbl_fu_out    # DZ can't happen
1670        short           fu_inex         - tbl_fu_out    # INEX2
1671        short           tbl_fu_out      - tbl_fu_out    # INEX1 won't make it here
1672
1673# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1674# frestore it.
1675fu_snan:
1676        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1677        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1678        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1679
1680        mov.w           &0x30d8,EXC_VOFF(%a6)   # vector offset = 0xd8
1681        mov.w           &0xe006,2+FP_SRC(%a6)
1682
1683        frestore        FP_SRC(%a6)
1684
1685        unlk            %a6
1686
1687
1688        bra.l           _real_snan
1689
1690fu_operr:
1691        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1692        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1693        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1694
1695        mov.w           &0x30d0,EXC_VOFF(%a6)   # vector offset = 0xd0
1696        mov.w           &0xe004,2+FP_SRC(%a6)
1697
1698        frestore        FP_SRC(%a6)
1699
1700        unlk            %a6
1701
1702
1703        bra.l           _real_operr
1704
1705fu_ovfl:
1706        fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP to the stack
1707
1708        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1709        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1710        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1711
1712        mov.w           &0x30d4,EXC_VOFF(%a6)   # vector offset = 0xd4
1713        mov.w           &0xe005,2+FP_SRC(%a6)
1714
1715        frestore        FP_SRC(%a6)             # restore EXOP
1716
1717        unlk            %a6
1718
1719        bra.l           _real_ovfl
1720
1721# underflow can happen for extended precision. extended precision opclass
1722# three instruction exceptions don't update the stack pointer. so, if the
1723# exception occurred from user mode, then simply update a7 and exit normally.
1724# if the exception occurred from supervisor mode, check if
1725fu_unfl:
1726        mov.l           EXC_A6(%a6),(%a6)       # restore a6
1727
1728        btst            &0x5,EXC_SR(%a6)
1729        bne.w           fu_unfl_s
1730
1731        mov.l           EXC_A7(%a6),%a0         # restore a7 whether we need
1732        mov.l           %a0,%usp                # to or not...
1733
1734fu_unfl_cont:
1735        fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP to the stack
1736
1737        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1738        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1739        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1740
1741        mov.w           &0x30cc,EXC_VOFF(%a6)   # vector offset = 0xcc
1742        mov.w           &0xe003,2+FP_SRC(%a6)
1743
1744        frestore        FP_SRC(%a6)             # restore EXOP
1745
1746        unlk            %a6
1747
1748        bra.l           _real_unfl
1749
1750fu_unfl_s:
1751        cmpi.b          SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1752        bne.b           fu_unfl_cont
1753
1754# the extended precision result is still in fp0. but, we need to save it
1755# somewhere on the stack until we can copy it to its final resting place
1756# (where the exc frame is currently). make sure it's not at the top of the
1757# frame or it will get overwritten when the exc stack frame is shifted "down".
1758        fmovm.x         &0x80,FP_SRC(%a6)       # put answer on stack
1759        fmovm.x         &0x40,FP_DST(%a6)       # put EXOP on stack
1760
1761        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1762        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1763        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1764
1765        mov.w           &0x30cc,EXC_VOFF(%a6)   # vector offset = 0xcc
1766        mov.w           &0xe003,2+FP_DST(%a6)
1767
1768        frestore        FP_DST(%a6)             # restore EXOP
1769
1770        mov.l           (%a6),%a6               # restore frame pointer
1771
1772        mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1773        mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1774        mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1775
1776# now, copy the result to the proper place on the stack
1777        mov.l           LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1778        mov.l           LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1779        mov.l           LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1780
1781        add.l           &LOCAL_SIZE-0x8,%sp
1782
1783        bra.l           _real_unfl
1784
1785# fmove in and out enter here.
1786fu_inex:
1787        fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP to the stack
1788
1789        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1790        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1791        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1792
1793        mov.w           &0x30c4,EXC_VOFF(%a6)   # vector offset = 0xc4
1794        mov.w           &0xe001,2+FP_SRC(%a6)
1795
1796        frestore        FP_SRC(%a6)             # restore EXOP
1797
1798        unlk            %a6
1799
1800
1801        bra.l           _real_inex
1802
1803#########################################################################
1804#########################################################################
1805fu_in_pack:
1806
1807
1808# I'm not sure at this point what FPSR bits are valid for this instruction.
1809# so, since the emulation routines re-create them anyways, zero exception field
1810        andi.l          &0x0ff00ff,USER_FPSR(%a6) # zero exception field
1811
1812        fmov.l          &0x0,%fpcr              # zero current control regs
1813        fmov.l          &0x0,%fpsr
1814
1815        bsr.l           get_packed              # fetch packed src operand
1816
1817        lea             FP_SRC(%a6),%a0         # pass ptr to src
1818        bsr.l           set_tag_x               # set src optype tag
1819
1820        mov.b           %d0,STAG(%a6)           # save src optype tag
1821
1822        bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1823
1824# bit five of the fp extension word separates the monadic and dyadic operations
1825# at this point
1826        btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
1827        beq.b           fu_extract_p            # monadic
1828        cmpi.b          1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1829        beq.b           fu_extract_p            # yes, so it's monadic, too
1830
1831        bsr.l           load_fpn2               # load dst into FP_DST
1832
1833        lea             FP_DST(%a6),%a0         # pass: ptr to dst op
1834        bsr.l           set_tag_x               # tag the operand type
1835        cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
1836        bne.b           fu_op2_done_p           # no
1837        bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
1838fu_op2_done_p:
1839        mov.b           %d0,DTAG(%a6)           # save dst optype tag
1840
1841fu_extract_p:
1842        clr.l           %d0
1843        mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode/prec
1844
1845        bfextu          1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1846
1847        lea             FP_SRC(%a6),%a0
1848        lea             FP_DST(%a6),%a1
1849
1850        mov.l           (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1851        jsr             (tbl_unsupp.l,%pc,%d1.l*1)
1852
1853#
1854# Exceptions in order of precedence:
1855#       BSUN    : none
1856#       SNAN    : all dyadic ops
1857#       OPERR   : fsqrt(-NORM)
1858#       OVFL    : all except ftst,fcmp
1859#       UNFL    : all except ftst,fcmp
1860#       DZ      : fdiv
1861#       INEX2   : all except ftst,fcmp
1862#       INEX1   : all
1863#
1864
1865# we determine the highest priority exception(if any) set by the
1866# emulation routine that has also been enabled by the user.
1867        mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
1868        bne.w           fu_in_ena_p             # some are enabled
1869
1870fu_in_cont_p:
1871# fcmp and ftst do not store any result.
1872        mov.b           1+EXC_CMDREG(%a6),%d0   # fetch extension
1873        andi.b          &0x38,%d0               # extract bits 3-5
1874        cmpi.b          %d0,&0x38               # is instr fcmp or ftst?
1875        beq.b           fu_in_exit_p            # yes
1876
1877        bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1878        bsr.l           store_fpreg             # store the result
1879
1880fu_in_exit_p:
1881
1882        btst            &0x5,EXC_SR(%a6)        # user or supervisor?
1883        bne.w           fu_in_exit_s_p          # supervisor
1884
1885        mov.l           EXC_A7(%a6),%a0         # update user a7
1886        mov.l           %a0,%usp
1887
1888fu_in_exit_cont_p:
1889        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1890        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1891        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1892
1893        unlk            %a6                     # unravel stack frame
1894
1895        btst            &0x7,(%sp)              # is trace on?
1896        bne.w           fu_trace_p              # yes
1897
1898        bra.l           _fpsp_done              # exit to os
1899
1900# the exception occurred in supervisor mode. check to see if the
1901# addressing mode was (a7)+. if so, we'll need to shift the
1902# stack frame "up".
1903fu_in_exit_s_p:
1904        btst            &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1905        beq.b           fu_in_exit_cont_p       # no
1906
1907        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1908        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1909        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1910
1911        unlk            %a6                     # unravel stack frame
1912
1913# shift the stack frame "up". we don't really care about the <ea> field.
1914        mov.l           0x4(%sp),0x10(%sp)
1915        mov.l           0x0(%sp),0xc(%sp)
1916        add.l           &0xc,%sp
1917
1918        btst            &0x7,(%sp)              # is trace on?
1919        bne.w           fu_trace_p              # yes
1920
1921        bra.l           _fpsp_done              # exit to os
1922
1923fu_in_ena_p:
1924        and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled & set
1925        bfffo           %d0{&24:&8},%d0         # find highest priority exception
1926        bne.b           fu_in_exc_p             # at least one was set
1927
1928#
1929# No exceptions occurred that were also enabled. Now:
1930#
1931#       if (OVFL && ovfl_disabled && inexact_enabled) {
1932#           branch to _real_inex() (even if the result was exact!);
1933#       } else {
1934#           save the result in the proper fp reg (unless the op is fcmp or ftst);
1935#           return;
1936#       }
1937#
1938        btst            &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1939        beq.w           fu_in_cont_p            # no
1940
1941fu_in_ovflchk_p:
1942        btst            &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1943        beq.w           fu_in_cont_p            # no
1944        bra.w           fu_in_exc_ovfl_p        # do _real_inex() now
1945
1946#
1947# An exception occurred and that exception was enabled:
1948#
1949#       shift enabled exception field into lo byte of d0;
1950#       if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1951#           ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1952#               /*
1953#                * this is the case where we must call _real_inex() now or else
1954#                * there will be no other way to pass it the exceptional operand
1955#                */
1956#               call _real_inex();
1957#       } else {
1958#               restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1959#       }
1960#
1961fu_in_exc_p:
1962        subi.l          &24,%d0                 # fix offset to be 0-8
1963        cmpi.b          %d0,&0x6                # is exception INEX? (6 or 7)
1964        blt.b           fu_in_exc_exit_p        # no
1965
1966# the enabled exception was inexact
1967        btst            &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1968        bne.w           fu_in_exc_unfl_p        # yes
1969        btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1970        bne.w           fu_in_exc_ovfl_p        # yes
1971
1972# here, we insert the correct fsave status value into the fsave frame for the
1973# corresponding exception. the operand in the fsave frame should be the original
1974# src operand.
1975# as a reminder for future predicted pain and agony, we are passing in fsave the
1976# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1977# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1978fu_in_exc_exit_p:
1979        btst            &0x5,EXC_SR(%a6)        # user or supervisor?
1980        bne.w           fu_in_exc_exit_s_p      # supervisor
1981
1982        mov.l           EXC_A7(%a6),%a0         # update user a7
1983        mov.l           %a0,%usp
1984
1985fu_in_exc_exit_cont_p:
1986        mov.w           (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1987
1988        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1989        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1990        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1991
1992        frestore        FP_SRC(%a6)             # restore src op
1993
1994        unlk            %a6
1995
1996        btst            &0x7,(%sp)              # is trace enabled?
1997        bne.w           fu_trace_p              # yes
1998
1999        bra.l           _fpsp_done
2000
2001tbl_except_p:
2002        short           0xe000,0xe006,0xe004,0xe005
2003        short           0xe003,0xe002,0xe001,0xe001
2004
2005fu_in_exc_ovfl_p:
2006        mov.w           &0x3,%d0
2007        bra.w           fu_in_exc_exit_p
2008
2009fu_in_exc_unfl_p:
2010        mov.w           &0x4,%d0
2011        bra.w           fu_in_exc_exit_p
2012
2013fu_in_exc_exit_s_p:
2014        btst            &mia7_bit,SPCOND_FLG(%a6)
2015        beq.b           fu_in_exc_exit_cont_p
2016
2017        mov.w           (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2018
2019        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2020        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2021        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2022
2023        frestore        FP_SRC(%a6)             # restore src op
2024
2025        unlk            %a6                     # unravel stack frame
2026
2027# shift stack frame "up". who cares about <ea> field.
2028        mov.l           0x4(%sp),0x10(%sp)
2029        mov.l           0x0(%sp),0xc(%sp)
2030        add.l           &0xc,%sp
2031
2032        btst            &0x7,(%sp)              # is trace on?
2033        bne.b           fu_trace_p              # yes
2034
2035        bra.l           _fpsp_done              # exit to os
2036
2037#
2038# The opclass two PACKED instruction that took an "Unimplemented Data Type"
2039# exception was being traced. Make the "current" PC the FPIAR and put it in the
2040# trace stack frame then jump to _real_trace().
2041#
2042#                 UNSUPP FRAME             TRACE FRAME
2043#               *****************       *****************
2044#               *      EA       *       *    Current    *
2045#               *               *       *      PC       *
2046#               *****************       *****************
2047#               * 0x2 * 0x0dc   *       * 0x2 *  0x024  *
2048#               *****************       *****************
2049#               *     Next      *       *     Next      *
2050#               *      PC       *       *      PC       *
2051#               *****************       *****************
2052#               *      SR       *       *      SR       *
2053#               *****************       *****************
2054fu_trace_p:
2055        mov.w           &0x2024,0x6(%sp)
2056        fmov.l          %fpiar,0x8(%sp)
2057
2058        bra.l           _real_trace
2059
2060#########################################################
2061#########################################################
2062fu_out_pack:
2063
2064
2065# I'm not sure at this point what FPSR bits are valid for this instruction.
2066# so, since the emulation routines re-create them anyways, zero exception field.
2067# fmove out doesn't affect ccodes.
2068        and.l           &0xffff00ff,USER_FPSR(%a6) # zero exception field
2069
2070        fmov.l          &0x0,%fpcr              # zero current control regs
2071        fmov.l          &0x0,%fpsr
2072
2073        bfextu          EXC_CMDREG(%a6){&6:&3},%d0
2074        bsr.l           load_fpn1
2075
2076# unlike other opclass 3, unimplemented data type exceptions, packed must be
2077# able to detect all operand types.
2078        lea             FP_SRC(%a6),%a0
2079        bsr.l           set_tag_x               # tag the operand type
2080        cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
2081        bne.b           fu_op2_p                # no
2082        bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
2083
2084fu_op2_p:
2085        mov.b           %d0,STAG(%a6)           # save src optype tag
2086
2087        clr.l           %d0
2088        mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode/prec
2089
2090        lea             FP_SRC(%a6),%a0         # pass ptr to src operand
2091
2092        mov.l           (%a6),EXC_A6(%a6)       # in case a6 changes
2093        bsr.l           fout                    # call fmove out routine
2094
2095# Exceptions in order of precedence:
2096#       BSUN    : no
2097#       SNAN    : yes
2098#       OPERR   : if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2099#       OVFL    : no
2100#       UNFL    : no
2101#       DZ      : no
2102#       INEX2   : yes
2103#       INEX1   : no
2104
2105# determine the highest priority exception(if any) set by the
2106# emulation routine that has also been enabled by the user.
2107        mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
2108        bne.w           fu_out_ena_p            # some are enabled
2109
2110fu_out_exit_p:
2111        mov.l           EXC_A6(%a6),(%a6)       # restore a6
2112
2113        btst            &0x5,EXC_SR(%a6)        # user or supervisor?
2114        bne.b           fu_out_exit_s_p         # supervisor
2115
2116        mov.l           EXC_A7(%a6),%a0         # update user a7
2117        mov.l           %a0,%usp
2118
2119fu_out_exit_cont_p:
2120        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2121        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2122        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2123
2124        unlk            %a6                     # unravel stack frame
2125
2126        btst            &0x7,(%sp)              # is trace on?
2127        bne.w           fu_trace_p              # yes
2128
2129        bra.l           _fpsp_done              # exit to os
2130
2131# the exception occurred in supervisor mode. check to see if the
2132# addressing mode was -(a7). if so, we'll need to shift the
2133# stack frame "down".
2134fu_out_exit_s_p:
2135        btst            &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2136        beq.b           fu_out_exit_cont_p      # no
2137
2138        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2139        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2140        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2141
2142        mov.l           (%a6),%a6               # restore frame pointer
2143
2144        mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2145        mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2146
2147# now, copy the result to the proper place on the stack
2148        mov.l           LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2149        mov.l           LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2150        mov.l           LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2151
2152        add.l           &LOCAL_SIZE-0x8,%sp
2153
2154        btst            &0x7,(%sp)
2155        bne.w           fu_trace_p
2156
2157        bra.l           _fpsp_done
2158
2159fu_out_ena_p:
2160        and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled
2161        bfffo           %d0{&24:&8},%d0         # find highest priority exception
2162        beq.w           fu_out_exit_p
2163
2164        mov.l           EXC_A6(%a6),(%a6)       # restore a6
2165
2166# an exception occurred and that exception was enabled.
2167# the only exception possible on packed move out are INEX, OPERR, and SNAN.
2168fu_out_exc_p:
2169        cmpi.b          %d0,&0x1a
2170        bgt.w           fu_inex_p2
2171        beq.w           fu_operr_p
2172
2173fu_snan_p:
2174        btst            &0x5,EXC_SR(%a6)
2175        bne.b           fu_snan_s_p
2176
2177        mov.l           EXC_A7(%a6),%a0
2178        mov.l           %a0,%usp
2179        bra.w           fu_snan
2180
2181fu_snan_s_p:
2182        cmpi.b          SPCOND_FLG(%a6),&mda7_flg
2183        bne.w           fu_snan
2184
2185# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2186# the strategy is to move the exception frame "down" 12 bytes. then, we
2187# can store the default result where the exception frame was.
2188        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2189        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2190        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2191
2192        mov.w           &0x30d8,EXC_VOFF(%a6)   # vector offset = 0xd0
2193        mov.w           &0xe006,2+FP_SRC(%a6)   # set fsave status
2194
2195        frestore        FP_SRC(%a6)             # restore src operand
2196
2197        mov.l           (%a6),%a6               # restore frame pointer
2198
2199        mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2200        mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2201        mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2202
2203# now, we copy the default result to its proper location
2204        mov.l           LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2205        mov.l           LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2206        mov.l           LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2207
2208        add.l           &LOCAL_SIZE-0x8,%sp
2209
2210
2211        bra.l           _real_snan
2212
2213fu_operr_p:
2214        btst            &0x5,EXC_SR(%a6)
2215        bne.w           fu_operr_p_s
2216
2217        mov.l           EXC_A7(%a6),%a0
2218        mov.l           %a0,%usp
2219        bra.w           fu_operr
2220
2221fu_operr_p_s:
2222        cmpi.b          SPCOND_FLG(%a6),&mda7_flg
2223        bne.w           fu_operr
2224
2225# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2226# the strategy is to move the exception frame "down" 12 bytes. then, we
2227# can store the default result where the exception frame was.
2228        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2229        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2230        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2231
2232        mov.w           &0x30d0,EXC_VOFF(%a6)   # vector offset = 0xd0
2233        mov.w           &0xe004,2+FP_SRC(%a6)   # set fsave status
2234
2235        frestore        FP_SRC(%a6)             # restore src operand
2236
2237        mov.l           (%a6),%a6               # restore frame pointer
2238
2239        mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2240        mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2241        mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2242
2243# now, we copy the default result to its proper location
2244        mov.l           LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2245        mov.l           LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2246        mov.l           LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2247
2248        add.l           &LOCAL_SIZE-0x8,%sp
2249
2250
2251        bra.l           _real_operr
2252
2253fu_inex_p2:
2254        btst            &0x5,EXC_SR(%a6)
2255        bne.w           fu_inex_s_p2
2256
2257        mov.l           EXC_A7(%a6),%a0
2258        mov.l           %a0,%usp
2259        bra.w           fu_inex
2260
2261fu_inex_s_p2:
2262        cmpi.b          SPCOND_FLG(%a6),&mda7_flg
2263        bne.w           fu_inex
2264
2265# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2266# the strategy is to move the exception frame "down" 12 bytes. then, we
2267# can store the default result where the exception frame was.
2268        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2269        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2270        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2271
2272        mov.w           &0x30c4,EXC_VOFF(%a6)   # vector offset = 0xc4
2273        mov.w           &0xe001,2+FP_SRC(%a6)   # set fsave status
2274
2275        frestore        FP_SRC(%a6)             # restore src operand
2276
2277        mov.l           (%a6),%a6               # restore frame pointer
2278
2279        mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2280        mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2281        mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2282
2283# now, we copy the default result to its proper location
2284        mov.l           LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2285        mov.l           LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2286        mov.l           LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2287
2288        add.l           &LOCAL_SIZE-0x8,%sp
2289
2290
2291        bra.l           _real_inex
2292
2293#########################################################################
2294
2295#
2296# if we're stuffing a source operand back into an fsave frame then we
2297# have to make sure that for single or double source operands that the
2298# format stuffed is as weird as the hardware usually makes it.
2299#
2300        global          funimp_skew
2301funimp_skew:
2302        bfextu          EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2303        cmpi.b          %d0,&0x1                # was src sgl?
2304        beq.b           funimp_skew_sgl         # yes
2305        cmpi.b          %d0,&0x5                # was src dbl?
2306        beq.b           funimp_skew_dbl         # yes
2307        rts
2308
2309funimp_skew_sgl:
2310        mov.w           FP_SRC_EX(%a6),%d0      # fetch DENORM exponent
2311        andi.w          &0x7fff,%d0             # strip sign
2312        beq.b           funimp_skew_sgl_not
2313        cmpi.w          %d0,&0x3f80
2314        bgt.b           funimp_skew_sgl_not
2315        neg.w           %d0                     # make exponent negative
2316        addi.w          &0x3f81,%d0             # find amt to shift
2317        mov.l           FP_SRC_HI(%a6),%d1      # fetch DENORM hi(man)
2318        lsr.l           %d0,%d1                 # shift it
2319        bset            &31,%d1                 # set j-bit
2320        mov.l           %d1,FP_SRC_HI(%a6)      # insert new hi(man)
2321        andi.w          &0x8000,FP_SRC_EX(%a6)  # clear old exponent
2322        ori.w           &0x3f80,FP_SRC_EX(%a6)  # insert new "skewed" exponent
2323funimp_skew_sgl_not:
2324        rts
2325
2326funimp_skew_dbl:
2327        mov.w           FP_SRC_EX(%a6),%d0      # fetch DENORM exponent
2328        andi.w          &0x7fff,%d0             # strip sign
2329        beq.b           funimp_skew_dbl_not
2330        cmpi.w          %d0,&0x3c00
2331        bgt.b           funimp_skew_dbl_not
2332
2333        tst.b           FP_SRC_EX(%a6)          # make "internal format"
2334        smi.b           0x2+FP_SRC(%a6)
2335        mov.w           %d0,FP_SRC_EX(%a6)      # insert exponent with cleared sign
2336        clr.l           %d0                     # clear g,r,s
2337        lea             FP_SRC(%a6),%a0         # pass ptr to src op
2338        mov.w           &0x3c01,%d1             # pass denorm threshold
2339        bsr.l           dnrm_lp                 # denorm it
2340        mov.w           &0x3c00,%d0             # new exponent
2341        tst.b           0x2+FP_SRC(%a6)         # is sign set?
2342        beq.b           fss_dbl_denorm_done     # no
2343        bset            &15,%d0                 # set sign
2344fss_dbl_denorm_done:
2345        bset            &0x7,FP_SRC_HI(%a6)     # set j-bit
2346        mov.w           %d0,FP_SRC_EX(%a6)      # insert new exponent
2347funimp_skew_dbl_not:
2348        rts
2349
2350#########################################################################
2351        global          _mem_write2
2352_mem_write2:
2353        btst            &0x5,EXC_SR(%a6)
2354        beq.l           _dmem_write
2355        mov.l           0x0(%a0),FP_DST_EX(%a6)
2356        mov.l           0x4(%a0),FP_DST_HI(%a6)
2357        mov.l           0x8(%a0),FP_DST_LO(%a6)
2358        clr.l           %d1
2359        rts
2360
2361#########################################################################
2362# XDEF **************************************************************** #
2363#       _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented       #
2364#                       effective address" exception.                   #
2365#                                                                       #
2366#       This handler should be the first code executed upon taking the  #
2367#       FP Unimplemented Effective Address exception in an operating    #
2368#       system.                                                         #
2369#                                                                       #
2370# XREF **************************************************************** #
2371#       _imem_read_long() - read instruction longword                   #
2372#       fix_skewed_ops() - adjust src operand in fsave frame            #
2373#       set_tag_x() - determine optype of src/dst operands              #
2374#       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
2375#       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
2376#       load_fpn2() - load dst operand from FP regfile                  #
2377#       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
2378#       decbin() - convert packed data to FP binary data                #
2379#       _real_fpu_disabled() - "callout" for "FPU disabled" exception   #
2380#       _real_access() - "callout" for access error exception           #
2381#       _mem_read() - read extended immediate operand from memory       #
2382#       _fpsp_done() - "callout" for exit; work all done                #
2383#       _real_trace() - "callout" for Trace enabled exception           #
2384#       fmovm_dynamic() - emulate dynamic fmovm instruction             #
2385#       fmovm_ctrl() - emulate fmovm control instruction                #
2386#                                                                       #
2387# INPUT *************************************************************** #
2388#       - The system stack contains the "Unimplemented <ea>" stk frame  #
2389#                                                                       #
2390# OUTPUT ************************************************************** #
2391#       If access error:                                                #
2392#       - The system stack is changed to an access error stack frame    #
2393#       If FPU disabled:                                                #
2394#       - The system stack is changed to an FPU disabled stack frame    #
2395#       If Trace exception enabled:                                     #
2396#       - The system stack is changed to a Trace exception stack frame  #
2397#       Else: (normal case)                                             #
2398#       - None (correct result has been stored as appropriate)          #
2399#                                                                       #
2400# ALGORITHM *********************************************************** #
2401#       This exception handles 3 types of operations:                   #
2402# (1) FP Instructions using extended precision or packed immediate      #
2403#     addressing mode.                                                  #
2404# (2) The "fmovm.x" instruction w/ dynamic register specification.      #
2405# (3) The "fmovm.l" instruction w/ 2 or 3 control registers.            #
2406#                                                                       #
2407#       For immediate data operations, the data is read in w/ a         #
2408# _mem_read() "callout", converted to FP binary (if packed), and used   #
2409# as the source operand to the instruction specified by the instruction #
2410# word. If no FP exception should be reported ads a result of the       #
2411# emulation, then the result is stored to the destination register and  #
2412# the handler exits through _fpsp_done(). If an enabled exc has been    #
2413# signalled as a result of emulation, then an fsave state frame         #
2414# corresponding to the FP exception type must be entered into the 060   #
2415# FPU before exiting. In either the enabled or disabled cases, we       #
2416# must also check if a Trace exception is pending, in which case, we    #
2417# must create a Trace exception stack frame from the current exception  #
2418# stack frame. If no Trace is pending, we simply exit through           #
2419# _fpsp_done().                                                         #
2420#       For "fmovm.x", call the routine fmovm_dynamic() which will      #
2421# decode and emulate the instruction. No FP exceptions can be pending   #
2422# as a result of this operation emulation. A Trace exception can be     #
2423# pending, though, which means the current stack frame must be changed  #
2424# to a Trace stack frame and an exit made through _real_trace().        #
2425# For the case of "fmovm.x Dn,-(a7)", where the offending instruction   #
2426# was executed from supervisor mode, this handler must store the FP     #
2427# register file values to the system stack by itself since              #
2428# fmovm_dynamic() can't handle this. A normal exit is made through      #
2429# fpsp_done().                                                          #
2430#       For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. #
2431# Again, a Trace exception may be pending and an exit made through      #
2432# _real_trace(). Else, a normal exit is made through _fpsp_done().      #
2433#                                                                       #
2434#       Before any of the above is attempted, it must be checked to     #
2435# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken #
2436# before the "FPU disabled" exception, but the "FPU disabled" exception #
2437# has higher priority, we check the disabled bit in the PCR. If set,    #
2438# then we must create an 8 word "FPU disabled" exception stack frame    #
2439# from the current 4 word exception stack frame. This includes          #
2440# reproducing the effective address of the instruction to put on the    #
2441# new stack frame.                                                      #
2442#                                                                       #
2443#       In the process of all emulation work, if a _mem_read()          #
2444# "callout" returns a failing result indicating an access error, then   #
2445# we must create an access error stack frame from the current stack     #
2446# frame. This information includes a faulting address and a fault-      #
2447# status-longword. These are created within this handler.               #
2448#                                                                       #
2449#########################################################################
2450
2451        global          _fpsp_effadd
2452_fpsp_effadd:
2453
2454# This exception type takes priority over the "Line F Emulator"
2455# exception. Therefore, the FPU could be disabled when entering here.
2456# So, we must check to see if it's disabled and handle that case separately.
2457        mov.l           %d0,-(%sp)              # save d0
2458        movc            %pcr,%d0                # load proc cr
2459        btst            &0x1,%d0                # is FPU disabled?
2460        bne.w           iea_disabled            # yes
2461        mov.l           (%sp)+,%d0              # restore d0
2462
2463        link            %a6,&-LOCAL_SIZE        # init stack frame
2464
2465        movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
2466        fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2467        fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
2468
2469# PC of instruction that took the exception is the PC in the frame
2470        mov.l           EXC_PC(%a6),EXC_EXTWPTR(%a6)
2471
2472        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
2473        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
2474        bsr.l           _imem_read_long         # fetch the instruction words
2475        mov.l           %d0,EXC_OPWORD(%a6)     # store OPWORD and EXTWORD
2476
2477#########################################################################
2478
2479        tst.w           %d0                     # is operation fmovem?
2480        bmi.w           iea_fmovm               # yes
2481
2482#
2483# here, we will have:
2484#       fabs    fdabs   fsabs           facos           fmod
2485#       fadd    fdadd   fsadd           fasin           frem
2486#       fcmp                            fatan           fscale
2487#       fdiv    fddiv   fsdiv           fatanh          fsin
2488#       fint                            fcos            fsincos
2489#       fintrz                          fcosh           fsinh
2490#       fmove   fdmove  fsmove          fetox           ftan
2491#       fmul    fdmul   fsmul           fetoxm1         ftanh
2492#       fneg    fdneg   fsneg           fgetexp         ftentox
2493#       fsgldiv                         fgetman         ftwotox
2494#       fsglmul                         flog10
2495#       fsqrt                           flog2
2496#       fsub    fdsub   fssub           flogn
2497#       ftst                            flognp1
2498# which can all use f<op>.{x,p}
2499# so, now it's immediate data extended precision AND PACKED FORMAT!
2500#
2501iea_op:
2502        andi.l          &0x00ff00ff,USER_FPSR(%a6)
2503
2504        btst            &0xa,%d0                # is src fmt x or p?
2505        bne.b           iea_op_pack             # packed
2506
2507
2508        mov.l           EXC_EXTWPTR(%a6),%a0    # pass: ptr to #<data>
2509        lea             FP_SRC(%a6),%a1         # pass: ptr to super addr
2510        mov.l           &0xc,%d0                # pass: 12 bytes
2511        bsr.l           _imem_read              # read extended immediate
2512
2513        tst.l           %d1                     # did ifetch fail?
2514        bne.w           iea_iacc                # yes
2515
2516        bra.b           iea_op_setsrc
2517
2518iea_op_pack:
2519
2520        mov.l           EXC_EXTWPTR(%a6),%a0    # pass: ptr to #<data>
2521        lea             FP_SRC(%a6),%a1         # pass: ptr to super dst
2522        mov.l           &0xc,%d0                # pass: 12 bytes
2523        bsr.l           _imem_read              # read packed operand
2524
2525        tst.l           %d1                     # did ifetch fail?
2526        bne.w           iea_iacc                # yes
2527
2528# The packed operand is an INF or a NAN if the exponent field is all ones.
2529        bfextu          FP_SRC(%a6){&1:&15},%d0 # get exp
2530        cmpi.w          %d0,&0x7fff             # INF or NAN?
2531        beq.b           iea_op_setsrc           # operand is an INF or NAN
2532
2533# The packed operand is a zero if the mantissa is all zero, else it's
2534# a normal packed op.
2535        mov.b           3+FP_SRC(%a6),%d0       # get byte 4
2536        andi.b          &0x0f,%d0               # clear all but last nybble
2537        bne.b           iea_op_gp_not_spec      # not a zero
2538        tst.l           FP_SRC_HI(%a6)          # is lw 2 zero?
2539        bne.b           iea_op_gp_not_spec      # not a zero
2540        tst.l           FP_SRC_LO(%a6)          # is lw 3 zero?
2541        beq.b           iea_op_setsrc           # operand is a ZERO
2542iea_op_gp_not_spec:
2543        lea             FP_SRC(%a6),%a0         # pass: ptr to packed op
2544        bsr.l           decbin                  # convert to extended
2545        fmovm.x         &0x80,FP_SRC(%a6)       # make this the srcop
2546
2547iea_op_setsrc:
2548        addi.l          &0xc,EXC_EXTWPTR(%a6)   # update extension word pointer
2549
2550# FP_SRC now holds the src operand.
2551        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
2552        bsr.l           set_tag_x               # tag the operand type
2553        mov.b           %d0,STAG(%a6)           # could be ANYTHING!!!
2554        cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
2555        bne.b           iea_op_getdst           # no
2556        bsr.l           unnorm_fix              # yes; convert to NORM/DENORM/ZERO
2557        mov.b           %d0,STAG(%a6)           # set new optype tag
2558iea_op_getdst:
2559        clr.b           STORE_FLG(%a6)          # clear "store result" boolean
2560
2561        btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
2562        beq.b           iea_op_extract          # monadic
2563        btst            &0x4,1+EXC_CMDREG(%a6)  # is operation fsincos,ftst,fcmp?
2564        bne.b           iea_op_spec             # yes
2565
2566iea_op_loaddst:
2567        bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2568        bsr.l           load_fpn2               # load dst operand
2569
2570        lea             FP_DST(%a6),%a0         # pass: ptr to dst op
2571        bsr.l           set_tag_x               # tag the operand type
2572        mov.b           %d0,DTAG(%a6)           # could be ANYTHING!!!
2573        cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
2574        bne.b           iea_op_extract          # no
2575        bsr.l           unnorm_fix              # yes; convert to NORM/DENORM/ZERO
2576        mov.b           %d0,DTAG(%a6)           # set new optype tag
2577        bra.b           iea_op_extract
2578
2579# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2580iea_op_spec:
2581        btst            &0x3,1+EXC_CMDREG(%a6)  # is operation fsincos?
2582        beq.b           iea_op_extract          # yes
2583# now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2584# store a result. then, only fcmp will branch back and pick up a dst operand.
2585        st              STORE_FLG(%a6)          # don't store a final result
2586        btst            &0x1,1+EXC_CMDREG(%a6)  # is operation fcmp?
2587        beq.b           iea_op_loaddst          # yes
2588
2589iea_op_extract:
2590        clr.l           %d0
2591        mov.b           FPCR_MODE(%a6),%d0      # pass: rnd mode,prec
2592
2593        mov.b           1+EXC_CMDREG(%a6),%d1
2594        andi.w          &0x007f,%d1             # extract extension
2595
2596        fmov.l          &0x0,%fpcr
2597        fmov.l          &0x0,%fpsr
2598
2599        lea             FP_SRC(%a6),%a0
2600        lea             FP_DST(%a6),%a1
2601
2602        mov.l           (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2603        jsr             (tbl_unsupp.l,%pc,%d1.l*1)
2604
2605#
2606# Exceptions in order of precedence:
2607#       BSUN    : none
2608#       SNAN    : all operations
2609#       OPERR   : all reg-reg or mem-reg operations that can normally operr
2610#       OVFL    : same as OPERR
2611#       UNFL    : same as OPERR
2612#       DZ      : same as OPERR
2613#       INEX2   : same as OPERR
2614#       INEX1   : all packed immediate operations
2615#
2616
2617# we determine the highest priority exception(if any) set by the
2618# emulation routine that has also been enabled by the user.
2619        mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
2620        bne.b           iea_op_ena              # some are enabled
2621
2622# now, we save the result, unless, of course, the operation was ftst or fcmp.
2623# these don't save results.
2624iea_op_save:
2625        tst.b           STORE_FLG(%a6)          # does this op store a result?
2626        bne.b           iea_op_exit1            # exit with no frestore
2627
2628iea_op_store:
2629        bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2630        bsr.l           store_fpreg             # store the result
2631
2632iea_op_exit1:
2633        mov.l           EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2634        mov.l           EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2635
2636        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
2637        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2638        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2639
2640        unlk            %a6                     # unravel the frame
2641
2642        btst            &0x7,(%sp)              # is trace on?
2643        bne.w           iea_op_trace            # yes
2644
2645        bra.l           _fpsp_done              # exit to os
2646
2647iea_op_ena:
2648        and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enable and set
2649        bfffo           %d0{&24:&8},%d0         # find highest priority exception
2650        bne.b           iea_op_exc              # at least one was set
2651
2652# no exception occurred. now, did a disabled, exact overflow occur with inexact
2653# enabled? if so, then we have to stuff an overflow frame into the FPU.
2654        btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2655        beq.b           iea_op_save
2656
2657iea_op_ovfl:
2658        btst            &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2659        beq.b           iea_op_store            # no
2660        bra.b           iea_op_exc_ovfl         # yes
2661
2662# an enabled exception occurred. we have to insert the exception type back into
2663# the machine.
2664iea_op_exc:
2665        subi.l          &24,%d0                 # fix offset to be 0-8
2666        cmpi.b          %d0,&0x6                # is exception INEX?
2667        bne.b           iea_op_exc_force        # no
2668
2669# the enabled exception was inexact. so, if it occurs with an overflow
2670# or underflow that was disabled, then we have to force an overflow or
2671# underflow frame.
2672        btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2673        bne.b           iea_op_exc_ovfl         # yes
2674        btst            &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2675        bne.b           iea_op_exc_unfl         # yes
2676
2677iea_op_exc_force:
2678        mov.w           (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2679        bra.b           iea_op_exit2            # exit with frestore
2680
2681tbl_iea_except:
2682        short           0xe002, 0xe006, 0xe004, 0xe005
2683        short           0xe003, 0xe002, 0xe001, 0xe001
2684
2685iea_op_exc_ovfl:
2686        mov.w           &0xe005,2+FP_SRC(%a6)
2687        bra.b           iea_op_exit2
2688
2689iea_op_exc_unfl:
2690        mov.w           &0xe003,2+FP_SRC(%a6)
2691
2692iea_op_exit2:
2693        mov.l           EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2694        mov.l           EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2695
2696        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
2697        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2698        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2699
2700        frestore        FP_SRC(%a6)             # restore exceptional state
2701
2702        unlk            %a6                     # unravel the frame
2703
2704        btst            &0x7,(%sp)              # is trace on?
2705        bne.b           iea_op_trace            # yes
2706
2707        bra.l           _fpsp_done              # exit to os
2708
2709#
2710# The opclass two instruction that took an "Unimplemented Effective Address"
2711# exception was being traced. Make the "current" PC the FPIAR and put it in
2712# the trace stack frame then jump to _real_trace().
2713#
2714#                UNIMP EA FRAME            TRACE FRAME
2715#               *****************       *****************
2716#               * 0x0 *  0x0f0  *       *    Current    *
2717#               *****************       *      PC       *
2718#               *    Current    *       *****************
2719#               *      PC       *       * 0x2 *  0x024  *
2720#               *****************       *****************
2721#               *      SR       *       *     Next      *
2722#               *****************       *      PC       *
2723#                                       *****************
2724#                                       *      SR       *
2725#                                       *****************
2726iea_op_trace:
2727        mov.l           (%sp),-(%sp)            # shift stack frame "down"
2728        mov.w           0x8(%sp),0x4(%sp)
2729        mov.w           &0x2024,0x6(%sp)        # stk fmt = 0x2; voff = 0x024
2730        fmov.l          %fpiar,0x8(%sp)         # "Current PC" is in FPIAR
2731
2732        bra.l           _real_trace
2733
2734#########################################################################
2735iea_fmovm:
2736        btst            &14,%d0                 # ctrl or data reg
2737        beq.w           iea_fmovm_ctrl
2738
2739iea_fmovm_data:
2740
2741        btst            &0x5,EXC_SR(%a6)        # user or supervisor mode
2742        bne.b           iea_fmovm_data_s
2743
2744iea_fmovm_data_u:
2745        mov.l           %usp,%a0
2746        mov.l           %a0,EXC_A7(%a6)         # store current a7
2747        bsr.l           fmovm_dynamic           # do dynamic fmovm
2748        mov.l           EXC_A7(%a6),%a0         # load possibly new a7
2749        mov.l           %a0,%usp                # update usp
2750        bra.w           iea_fmovm_exit
2751
2752iea_fmovm_data_s:
2753        clr.b           SPCOND_FLG(%a6)
2754        lea             0x2+EXC_VOFF(%a6),%a0
2755        mov.l           %a0,EXC_A7(%a6)
2756        bsr.l           fmovm_dynamic           # do dynamic fmovm
2757
2758        cmpi.b          SPCOND_FLG(%a6),&mda7_flg
2759        beq.w           iea_fmovm_data_predec
2760        cmpi.b          SPCOND_FLG(%a6),&mia7_flg
2761        bne.w           iea_fmovm_exit
2762
2763# right now, d0 = the size.
2764# the data has been fetched from the supervisor stack, but we have not
2765# incremented the stack pointer by the appropriate number of bytes.
2766# do it here.
2767iea_fmovm_data_postinc:
2768        btst            &0x7,EXC_SR(%a6)
2769        bne.b           iea_fmovm_data_pi_trace
2770
2771        mov.w           EXC_SR(%a6),(EXC_SR,%a6,%d0)
2772        mov.l           EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2773        mov.w           &0x00f0,(EXC_VOFF,%a6,%d0)
2774
2775        lea             (EXC_SR,%a6,%d0),%a0
2776        mov.l           %a0,EXC_SR(%a6)
2777
2778        fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
2779        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2780        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2781
2782        unlk            %a6
2783        mov.l           (%sp)+,%sp
2784        bra.l           _fpsp_done
2785
2786iea_fmovm_data_pi_trace:
2787        mov.w           EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2788        mov.l           EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2789        mov.w           &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2790        mov.l           EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2791
2792        lea             (EXC_SR-0x4,%a6,%d0),%a0
2793        mov.l           %a0,EXC_SR(%a6)
2794
2795        fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
2796        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2797        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2798
2799        unlk            %a6
2800        mov.l           (%sp)+,%sp
2801        bra.l           _real_trace
2802
2803# right now, d1 = size and d0 = the strg.
2804iea_fmovm_data_predec:
2805        mov.b           %d1,EXC_VOFF(%a6)       # store strg
2806        mov.b           %d0,0x1+EXC_VOFF(%a6)   # store size
2807
2808        fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
2809        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2810        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2811
2812        mov.l           (%a6),-(%sp)            # make a copy of a6
2813        mov.l           %d0,-(%sp)              # save d0
2814        mov.l           %d1,-(%sp)              # save d1
2815        mov.l           EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC
2816
2817        clr.l           %d0
2818        mov.b           0x1+EXC_VOFF(%a6),%d0   # fetch size
2819        neg.l           %d0                     # get negative of size
2820
2821        btst            &0x7,EXC_SR(%a6)        # is trace enabled?
2822        beq.b           iea_fmovm_data_p2
2823
2824        mov.w           EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2825        mov.l           EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2826        mov.l           (%sp)+,(EXC_PC-0x4,%a6,%d0)
2827        mov.w           &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2828
2829        pea             (%a6,%d0)               # create final sp
2830        bra.b           iea_fmovm_data_p3
2831
2832iea_fmovm_data_p2:
2833        mov.w           EXC_SR(%a6),(EXC_SR,%a6,%d0)
2834        mov.l           (%sp)+,(EXC_PC,%a6,%d0)
2835        mov.w           &0x00f0,(EXC_VOFF,%a6,%d0)
2836
2837        pea             (0x4,%a6,%d0)           # create final sp
2838
2839iea_fmovm_data_p3:
2840        clr.l           %d1
2841        mov.b           EXC_VOFF(%a6),%d1       # fetch strg
2842
2843        tst.b           %d1
2844        bpl.b           fm_1
2845        fmovm.x         &0x80,(0x4+0x8,%a6,%d0)
2846        addi.l          &0xc,%d0
2847fm_1:
2848        lsl.b           &0x1,%d1
2849        bpl.b           fm_2
2850        fmovm.x         &0x40,(0x4+0x8,%a6,%d0)
2851        addi.l          &0xc,%d0
2852fm_2:
2853        lsl.b           &0x1,%d1
2854        bpl.b           fm_3
2855        fmovm.x         &0x20,(0x4+0x8,%a6,%d0)
2856        addi.l          &0xc,%d0
2857fm_3:
2858        lsl.b           &0x1,%d1
2859        bpl.b           fm_4
2860        fmovm.x         &0x10,(0x4+0x8,%a6,%d0)
2861        addi.l          &0xc,%d0
2862fm_4:
2863        lsl.b           &0x1,%d1
2864        bpl.b           fm_5
2865        fmovm.x         &0x08,(0x4+0x8,%a6,%d0)
2866        addi.l          &0xc,%d0
2867fm_5:
2868        lsl.b           &0x1,%d1
2869        bpl.b           fm_6
2870        fmovm.x         &0x04,(0x4+0x8,%a6,%d0)
2871        addi.l          &0xc,%d0
2872fm_6:
2873        lsl.b           &0x1,%d1
2874        bpl.b           fm_7
2875        fmovm.x         &0x02,(0x4+0x8,%a6,%d0)
2876        addi.l          &0xc,%d0
2877fm_7:
2878        lsl.b           &0x1,%d1
2879        bpl.b           fm_end
2880        fmovm.x         &0x01,(0x4+0x8,%a6,%d0)
2881fm_end:
2882        mov.l           0x4(%sp),%d1
2883        mov.l           0x8(%sp),%d0
2884        mov.l           0xc(%sp),%a6
2885        mov.l           (%sp)+,%sp
2886
2887        btst            &0x7,(%sp)              # is trace enabled?
2888        beq.l           _fpsp_done
2889        bra.l           _real_trace
2890
2891#########################################################################
2892iea_fmovm_ctrl:
2893
2894        bsr.l           fmovm_ctrl              # load ctrl regs
2895
2896iea_fmovm_exit:
2897        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
2898        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2899        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2900
2901        btst            &0x7,EXC_SR(%a6)        # is trace on?
2902        bne.b           iea_fmovm_trace         # yes
2903
2904        mov.l           EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2905
2906        unlk            %a6                     # unravel the frame
2907
2908        bra.l           _fpsp_done              # exit to os
2909
2910#
2911# The control reg instruction that took an "Unimplemented Effective Address"
2912# exception was being traced. The "Current PC" for the trace frame is the
2913# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2914# After fixing the stack frame, jump to _real_trace().
2915#
2916#                UNIMP EA FRAME            TRACE FRAME
2917#               *****************       *****************
2918#               * 0x0 *  0x0f0  *       *    Current    *
2919#               *****************       *      PC       *
2920#               *    Current    *       *****************
2921#               *      PC       *       * 0x2 *  0x024  *
2922#               *****************       *****************
2923#               *      SR       *       *     Next      *
2924#               *****************       *      PC       *
2925#                                       *****************
2926#                                       *      SR       *
2927#                                       *****************
2928# this ain't a pretty solution, but it works:
2929# -restore a6 (not with unlk)
2930# -shift stack frame down over where old a6 used to be
2931# -add LOCAL_SIZE to stack pointer
2932iea_fmovm_trace:
2933        mov.l           (%a6),%a6               # restore frame pointer
2934        mov.w           EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2935        mov.l           EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2936        mov.l           EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2937        mov.w           &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2938        add.l           &LOCAL_SIZE,%sp         # clear stack frame
2939
2940        bra.l           _real_trace
2941
2942#########################################################################
2943# The FPU is disabled and so we should really have taken the "Line
2944# F Emulator" exception. So, here we create an 8-word stack frame
2945# from our 4-word stack frame. This means we must calculate the length
2946# the faulting instruction to get the "next PC". This is trivial for
2947# immediate operands but requires some extra work for fmovm dynamic
2948# which can use most addressing modes.
2949iea_disabled:
2950        mov.l           (%sp)+,%d0              # restore d0
2951
2952        link            %a6,&-LOCAL_SIZE        # init stack frame
2953
2954        movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
2955
2956# PC of instruction that took the exception is the PC in the frame
2957        mov.l           EXC_PC(%a6),EXC_EXTWPTR(%a6)
2958        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
2959        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
2960        bsr.l           _imem_read_long         # fetch the instruction words
2961        mov.l           %d0,EXC_OPWORD(%a6)     # store OPWORD and EXTWORD
2962
2963        tst.w           %d0                     # is instr fmovm?
2964        bmi.b           iea_dis_fmovm           # yes
2965# instruction is using an extended precision immediate operand. Therefore,
2966# the total instruction length is 16 bytes.
2967iea_dis_immed:
2968        mov.l           &0x10,%d0               # 16 bytes of instruction
2969        bra.b           iea_dis_cont
2970iea_dis_fmovm:
2971        btst            &0xe,%d0                # is instr fmovm ctrl
2972        bne.b           iea_dis_fmovm_data      # no
2973# the instruction is a fmovm.l with 2 or 3 registers.
2974        bfextu          %d0{&19:&3},%d1
2975        mov.l           &0xc,%d0
2976        cmpi.b          %d1,&0x7                # move all regs?
2977        bne.b           iea_dis_cont
2978        addq.l          &0x4,%d0
2979        bra.b           iea_dis_cont
2980# the instruction is an fmovm.x dynamic which can use many addressing
2981# modes and thus can have several different total instruction lengths.
2982# call fmovm_calc_ea which will go through the ea calc process and,
2983# as a by-product, will tell us how long the instruction is.
2984iea_dis_fmovm_data:
2985        clr.l           %d0
2986        bsr.l           fmovm_calc_ea
2987        mov.l           EXC_EXTWPTR(%a6),%d0
2988        sub.l           EXC_PC(%a6),%d0
2989iea_dis_cont:
2990        mov.w           %d0,EXC_VOFF(%a6)       # store stack shift value
2991
2992        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2993
2994        unlk            %a6
2995
2996# here, we actually create the 8-word frame from the 4-word frame,
2997# with the "next PC" as additional info.
2998# the <ea> field is let as undefined.
2999        subq.l          &0x8,%sp                # make room for new stack
3000        mov.l           %d0,-(%sp)              # save d0
3001        mov.w           0xc(%sp),0x4(%sp)       # move SR
3002        mov.l           0xe(%sp),0x6(%sp)       # move Current PC
3003        clr.l           %d0
3004        mov.w           0x12(%sp),%d0
3005        mov.l           0x6(%sp),0x10(%sp)      # move Current PC
3006        add.l           %d0,0x6(%sp)            # make Next PC
3007        mov.w           &0x402c,0xa(%sp)        # insert offset,frame format
3008        mov.l           (%sp)+,%d0              # restore d0
3009
3010        bra.l           _real_fpu_disabled
3011
3012##########
3013
3014iea_iacc:
3015        movc            %pcr,%d0
3016        btst            &0x1,%d0
3017        bne.b           iea_iacc_cont
3018        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3019        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1 on stack
3020iea_iacc_cont:
3021        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3022
3023        unlk            %a6
3024
3025        subq.w          &0x8,%sp                # make stack frame bigger
3026        mov.l           0x8(%sp),(%sp)          # store SR,hi(PC)
3027        mov.w           0xc(%sp),0x4(%sp)       # store lo(PC)
3028        mov.w           &0x4008,0x6(%sp)        # store voff
3029        mov.l           0x2(%sp),0x8(%sp)       # store ea
3030        mov.l           &0x09428001,0xc(%sp)    # store fslw
3031
3032iea_acc_done:
3033        btst            &0x5,(%sp)              # user or supervisor mode?
3034        beq.b           iea_acc_done2           # user
3035        bset            &0x2,0xd(%sp)           # set supervisor TM bit
3036
3037iea_acc_done2:
3038        bra.l           _real_access
3039
3040iea_dacc:
3041        lea             -LOCAL_SIZE(%a6),%sp
3042
3043        movc            %pcr,%d1
3044        btst            &0x1,%d1
3045        bne.b           iea_dacc_cont
3046        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1 on stack
3047        fmovm.l         LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3048iea_dacc_cont:
3049        mov.l           (%a6),%a6
3050
3051        mov.l           0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3052        mov.w           0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3053        mov.w           &0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3054        mov.l           %a0,-0x8+0xc+LOCAL_SIZE(%sp)
3055        mov.w           %d0,-0x8+0x10+LOCAL_SIZE(%sp)
3056        mov.w           &0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3057
3058        movm.l          LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3059        add.w           &LOCAL_SIZE-0x4,%sp
3060
3061        bra.b           iea_acc_done
3062
3063#########################################################################
3064# XDEF **************************************************************** #
3065#       _fpsp_operr(): 060FPSP entry point for FP Operr exception.      #
3066#                                                                       #
3067#       This handler should be the first code executed upon taking the  #
3068#       FP Operand Error exception in an operating system.              #
3069#                                                                       #
3070# XREF **************************************************************** #
3071#       _imem_read_long() - read instruction longword                   #
3072#       fix_skewed_ops() - adjust src operand in fsave frame            #
3073#       _real_operr() - "callout" to operating system operr handler     #
3074#       _dmem_write_{byte,word,long}() - store data to mem (opclass 3)  #
3075#       store_dreg_{b,w,l}() - store data to data regfile (opclass 3)   #
3076#       facc_out_{b,w,l}() - store to memory took access error (opcl 3) #
3077#                                                                       #
3078# INPUT *************************************************************** #
3079#       - The system stack contains the FP Operr exception frame        #
3080#       - The fsave frame contains the source operand                   #
3081#                                                                       #
3082# OUTPUT ************************************************************** #
3083#       No access error:                                                #
3084#       - The system stack is unchanged                                 #
3085#       - The fsave frame contains the adjusted src op for opclass 0,2  #
3086#                                                                       #
3087# ALGORITHM *********************************************************** #
3088#       In a system where the FP Operr exception is enabled, the goal   #
3089# is to get to the handler specified at _real_operr(). But, on the 060, #
3090# for opclass zero and two instruction taking this exception, the       #
3091# input operand in the fsave frame may be incorrect for some cases      #
3092# and needs to be corrected. This handler calls fix_skewed_ops() to     #
3093# do just this and then exits through _real_operr().                    #
3094#       For opclass 3 instructions, the 060 doesn't store the default   #
3095# operr result out to memory or data register file as it should.        #
3096# This code must emulate the move out before finally exiting through    #
3097# _real_inex(). The move out, if to memory, is performed using          #
3098# _mem_write() "callout" routines that may return a failing result.     #
3099# In this special case, the handler must exit through facc_out()        #
3100# which creates an access error stack frame from the current operr      #
3101# stack frame.                                                          #
3102#                                                                       #
3103#########################################################################
3104
3105        global          _fpsp_operr
3106_fpsp_operr:
3107
3108        link.w          %a6,&-LOCAL_SIZE        # init stack frame
3109
3110        fsave           FP_SRC(%a6)             # grab the "busy" frame
3111
3112        movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3113        fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3114        fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
3115
3116# the FPIAR holds the "current PC" of the faulting instruction
3117        mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3118
3119        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3120        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3121        bsr.l           _imem_read_long         # fetch the instruction words
3122        mov.l           %d0,EXC_OPWORD(%a6)
3123
3124##############################################################################
3125
3126        btst            &13,%d0                 # is instr an fmove out?
3127        bne.b           foperr_out              # fmove out
3128
3129
3130# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3131# this would be the case for opclass two operations with a source infinity or
3132# denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3133# cause an operr so we don't need to check for them here.
3134        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3135        bsr.l           fix_skewed_ops          # fix src op
3136
3137foperr_exit:
3138        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3139        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3140        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3141
3142        frestore        FP_SRC(%a6)
3143
3144        unlk            %a6
3145        bra.l           _real_operr
3146
3147########################################################################
3148
3149#
3150# the hardware does not save the default result to memory on enabled
3151# operand error exceptions. we do this here before passing control to
3152# the user operand error handler.
3153#
3154# byte, word, and long destination format operations can pass
3155# through here. we simply need to test the sign of the src
3156# operand and save the appropriate minimum or maximum integer value
3157# to the effective address as pointed to by the stacked effective address.
3158#
3159# although packed opclass three operations can take operand error
3160# exceptions, they won't pass through here since they are caught
3161# first by the unsupported data format exception handler. that handler
3162# sends them directly to _real_operr() if necessary.
3163#
3164foperr_out:
3165
3166        mov.w           FP_SRC_EX(%a6),%d1      # fetch exponent
3167        andi.w          &0x7fff,%d1
3168        cmpi.w          %d1,&0x7fff
3169        bne.b           foperr_out_not_qnan
3170# the operand is either an infinity or a QNAN.
3171        tst.l           FP_SRC_LO(%a6)
3172        bne.b           foperr_out_qnan
3173        mov.l           FP_SRC_HI(%a6),%d1
3174        andi.l          &0x7fffffff,%d1
3175        beq.b           foperr_out_not_qnan
3176foperr_out_qnan:
3177        mov.l           FP_SRC_HI(%a6),L_SCR1(%a6)
3178        bra.b           foperr_out_jmp
3179
3180foperr_out_not_qnan:
3181        mov.l           &0x7fffffff,%d1
3182        tst.b           FP_SRC_EX(%a6)
3183        bpl.b           foperr_out_not_qnan2
3184        addq.l          &0x1,%d1
3185foperr_out_not_qnan2:
3186        mov.l           %d1,L_SCR1(%a6)
3187
3188foperr_out_jmp:
3189        bfextu          %d0{&19:&3},%d0         # extract dst format field
3190        mov.b           1+EXC_OPWORD(%a6),%d1   # extract <ea> mode,reg
3191        mov.w           (tbl_operr.b,%pc,%d0.w*2),%a0
3192        jmp             (tbl_operr.b,%pc,%a0)
3193
3194tbl_operr:
3195        short           foperr_out_l - tbl_operr # long word integer
3196        short           tbl_operr    - tbl_operr # sgl prec shouldn't happen
3197        short           tbl_operr    - tbl_operr # ext prec shouldn't happen
3198        short           foperr_exit  - tbl_operr # packed won't enter here
3199        short           foperr_out_w - tbl_operr # word integer
3200        short           tbl_operr    - tbl_operr # dbl prec shouldn't happen
3201        short           foperr_out_b - tbl_operr # byte integer
3202        short           tbl_operr    - tbl_operr # packed won't enter here
3203
3204foperr_out_b:
3205        mov.b           L_SCR1(%a6),%d0         # load positive default result
3206        cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3207        ble.b           foperr_out_b_save_dn    # yes
3208        mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3209        bsr.l           _dmem_write_byte        # write the default result
3210
3211        tst.l           %d1                     # did dstore fail?
3212        bne.l           facc_out_b              # yes
3213
3214        bra.w           foperr_exit
3215foperr_out_b_save_dn:
3216        andi.w          &0x0007,%d1
3217        bsr.l           store_dreg_b            # store result to regfile
3218        bra.w           foperr_exit
3219
3220foperr_out_w:
3221        mov.w           L_SCR1(%a6),%d0         # load positive default result
3222        cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3223        ble.b           foperr_out_w_save_dn    # yes
3224        mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3225        bsr.l           _dmem_write_word        # write the default result
3226
3227        tst.l           %d1                     # did dstore fail?
3228        bne.l           facc_out_w              # yes
3229
3230        bra.w           foperr_exit
3231foperr_out_w_save_dn:
3232        andi.w          &0x0007,%d1
3233        bsr.l           store_dreg_w            # store result to regfile
3234        bra.w           foperr_exit
3235
3236foperr_out_l:
3237        mov.l           L_SCR1(%a6),%d0         # load positive default result
3238        cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3239        ble.b           foperr_out_l_save_dn    # yes
3240        mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3241        bsr.l           _dmem_write_long        # write the default result
3242
3243        tst.l           %d1                     # did dstore fail?
3244        bne.l           facc_out_l              # yes
3245
3246        bra.w           foperr_exit
3247foperr_out_l_save_dn:
3248        andi.w          &0x0007,%d1
3249        bsr.l           store_dreg_l            # store result to regfile
3250        bra.w           foperr_exit
3251
3252#########################################################################
3253# XDEF **************************************************************** #
3254#       _fpsp_snan(): 060FPSP entry point for FP SNAN exception.        #
3255#                                                                       #
3256#       This handler should be the first code executed upon taking the  #
3257#       FP Signalling NAN exception in an operating system.             #
3258#                                                                       #
3259# XREF **************************************************************** #
3260#       _imem_read_long() - read instruction longword                   #
3261#       fix_skewed_ops() - adjust src operand in fsave frame            #
3262#       _real_snan() - "callout" to operating system SNAN handler       #
3263#       _dmem_write_{byte,word,long}() - store data to mem (opclass 3)  #
3264#       store_dreg_{b,w,l}() - store data to data regfile (opclass 3)   #
3265#       facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3)   #
3266#       _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea>   #
3267#                                                                       #
3268# INPUT *************************************************************** #
3269#       - The system stack contains the FP SNAN exception frame         #
3270#       - The fsave frame contains the source operand                   #
3271#                                                                       #
3272# OUTPUT ************************************************************** #
3273#       No access error:                                                #
3274#       - The system stack is unchanged                                 #
3275#       - The fsave frame contains the adjusted src op for opclass 0,2  #
3276#                                                                       #
3277# ALGORITHM *********************************************************** #
3278#       In a system where the FP SNAN exception is enabled, the goal    #
3279# is to get to the handler specified at _real_snan(). But, on the 060,  #
3280# for opclass zero and two instructions taking this exception, the      #
3281# input operand in the fsave frame may be incorrect for some cases      #
3282# and needs to be corrected. This handler calls fix_skewed_ops() to     #
3283# do just this and then exits through _real_snan().                     #
3284#       For opclass 3 instructions, the 060 doesn't store the default   #
3285# SNAN result out to memory or data register file as it should.         #
3286# This code must emulate the move out before finally exiting through    #
3287# _real_snan(). The move out, if to memory, is performed using          #
3288# _mem_write() "callout" routines that may return a failing result.     #
3289# In this special case, the handler must exit through facc_out()        #
3290# which creates an access error stack frame from the current SNAN       #
3291# stack frame.                                                          #
3292#       For the case of an extended precision opclass 3 instruction,    #
3293# if the effective addressing mode was -() or ()+, then the address     #
3294# register must get updated by calling _calc_ea_fout(). If the <ea>     #
3295# was -(a7) from supervisor mode, then the exception frame currently    #
3296# on the system stack must be carefully moved "down" to make room       #
3297# for the operand being moved.                                          #
3298#                                                                       #
3299#########################################################################
3300
3301        global          _fpsp_snan
3302_fpsp_snan:
3303
3304        link.w          %a6,&-LOCAL_SIZE        # init stack frame
3305
3306        fsave           FP_SRC(%a6)             # grab the "busy" frame
3307
3308        movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3309        fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3310        fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
3311
3312# the FPIAR holds the "current PC" of the faulting instruction
3313        mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3314
3315        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3316        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3317        bsr.l           _imem_read_long         # fetch the instruction words
3318        mov.l           %d0,EXC_OPWORD(%a6)
3319
3320##############################################################################
3321
3322        btst            &13,%d0                 # is instr an fmove out?
3323        bne.w           fsnan_out               # fmove out
3324
3325
3326# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3327# this would be the case for opclass two operations with a source infinity or
3328# denorm operand in the sgl or dbl format. NANs also become skewed and must be
3329# fixed here.
3330        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3331        bsr.l           fix_skewed_ops          # fix src op
3332
3333fsnan_exit:
3334        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3335        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3336        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3337
3338        frestore        FP_SRC(%a6)
3339
3340        unlk            %a6
3341        bra.l           _real_snan
3342
3343########################################################################
3344
3345#
3346# the hardware does not save the default result to memory on enabled
3347# snan exceptions. we do this here before passing control to
3348# the user snan handler.
3349#
3350# byte, word, long, and packed destination format operations can pass
3351# through here. since packed format operations already were handled by
3352# fpsp_unsupp(), then we need to do nothing else for them here.
3353# for byte, word, and long, we simply need to test the sign of the src
3354# operand and save the appropriate minimum or maximum integer value
3355# to the effective address as pointed to by the stacked effective address.
3356#
3357fsnan_out:
3358
3359        bfextu          %d0{&19:&3},%d0         # extract dst format field
3360        mov.b           1+EXC_OPWORD(%a6),%d1   # extract <ea> mode,reg
3361        mov.w           (tbl_snan.b,%pc,%d0.w*2),%a0
3362        jmp             (tbl_snan.b,%pc,%a0)
3363
3364tbl_snan:
3365        short           fsnan_out_l - tbl_snan # long word integer
3366        short           fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3367        short           fsnan_out_x - tbl_snan # ext prec shouldn't happen
3368        short           tbl_snan    - tbl_snan # packed needs no help
3369        short           fsnan_out_w - tbl_snan # word integer
3370        short           fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3371        short           fsnan_out_b - tbl_snan # byte integer
3372        short           tbl_snan    - tbl_snan # packed needs no help
3373
3374fsnan_out_b:
3375        mov.b           FP_SRC_HI(%a6),%d0      # load upper byte of SNAN
3376        bset            &6,%d0                  # set SNAN bit
3377        cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3378        ble.b           fsnan_out_b_dn          # yes
3379        mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3380        bsr.l           _dmem_write_byte        # write the default result
3381
3382        tst.l           %d1                     # did dstore fail?
3383        bne.l           facc_out_b              # yes
3384
3385        bra.w           fsnan_exit
3386fsnan_out_b_dn:
3387        andi.w          &0x0007,%d1
3388        bsr.l           store_dreg_b            # store result to regfile
3389        bra.w           fsnan_exit
3390
3391fsnan_out_w:
3392        mov.w           FP_SRC_HI(%a6),%d0      # load upper word of SNAN
3393        bset            &14,%d0                 # set SNAN bit
3394        cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3395        ble.b           fsnan_out_w_dn          # yes
3396        mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3397        bsr.l           _dmem_write_word        # write the default result
3398
3399        tst.l           %d1                     # did dstore fail?
3400        bne.l           facc_out_w              # yes
3401
3402        bra.w           fsnan_exit
3403fsnan_out_w_dn:
3404        andi.w          &0x0007,%d1
3405        bsr.l           store_dreg_w            # store result to regfile
3406        bra.w           fsnan_exit
3407
3408fsnan_out_l:
3409        mov.l           FP_SRC_HI(%a6),%d0      # load upper longword of SNAN
3410        bset            &30,%d0                 # set SNAN bit
3411        cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3412        ble.b           fsnan_out_l_dn          # yes
3413        mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3414        bsr.l           _dmem_write_long        # write the default result
3415
3416        tst.l           %d1                     # did dstore fail?
3417        bne.l           facc_out_l              # yes
3418
3419        bra.w           fsnan_exit
3420fsnan_out_l_dn:
3421        andi.w          &0x0007,%d1
3422        bsr.l           store_dreg_l            # store result to regfile
3423        bra.w           fsnan_exit
3424
3425fsnan_out_s:
3426        cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3427        ble.b           fsnan_out_d_dn          # yes
3428        mov.l           FP_SRC_EX(%a6),%d0      # fetch SNAN sign
3429        andi.l          &0x80000000,%d0         # keep sign
3430        ori.l           &0x7fc00000,%d0         # insert new exponent,SNAN bit
3431        mov.l           FP_SRC_HI(%a6),%d1      # load mantissa
3432        lsr.l           &0x8,%d1                # shift mantissa for sgl
3433        or.l            %d1,%d0                 # create sgl SNAN
3434        mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3435        bsr.l           _dmem_write_long        # write the default result
3436
3437        tst.l           %d1                     # did dstore fail?
3438        bne.l           facc_out_l              # yes
3439
3440        bra.w           fsnan_exit
3441fsnan_out_d_dn:
3442        mov.l           FP_SRC_EX(%a6),%d0      # fetch SNAN sign
3443        andi.l          &0x80000000,%d0         # keep sign
3444        ori.l           &0x7fc00000,%d0         # insert new exponent,SNAN bit
3445        mov.l           %d1,-(%sp)
3446        mov.l           FP_SRC_HI(%a6),%d1      # load mantissa
3447        lsr.l           &0x8,%d1                # shift mantissa for sgl
3448        or.l            %d1,%d0                 # create sgl SNAN
3449        mov.l           (%sp)+,%d1
3450        andi.w          &0x0007,%d1
3451        bsr.l           store_dreg_l            # store result to regfile
3452        bra.w           fsnan_exit
3453
3454fsnan_out_d:
3455        mov.l           FP_SRC_EX(%a6),%d0      # fetch SNAN sign
3456        andi.l          &0x80000000,%d0         # keep sign
3457        ori.l           &0x7ff80000,%d0         # insert new exponent,SNAN bit
3458        mov.l           FP_SRC_HI(%a6),%d1      # load hi mantissa
3459        mov.l           %d0,FP_SCR0_EX(%a6)     # store to temp space
3460        mov.l           &11,%d0                 # load shift amt
3461        lsr.l           %d0,%d1
3462        or.l            %d1,FP_SCR0_EX(%a6)     # create dbl hi
3463        mov.l           FP_SRC_HI(%a6),%d1      # load hi mantissa
3464        andi.l          &0x000007ff,%d1
3465        ror.l           %d0,%d1
3466        mov.l           %d1,FP_SCR0_HI(%a6)     # store to temp space
3467        mov.l           FP_SRC_LO(%a6),%d1      # load lo mantissa
3468        lsr.l           %d0,%d1
3469        or.l            %d1,FP_SCR0_HI(%a6)     # create dbl lo
3470        lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
3471        mov.l           EXC_EA(%a6),%a1         # pass: dst addr
3472        movq.l          &0x8,%d0                # pass: size of 8 bytes
3473        bsr.l           _dmem_write             # write the default result
3474
3475        tst.l           %d1                     # did dstore fail?
3476        bne.l           facc_out_d              # yes
3477
3478        bra.w           fsnan_exit
3479
3480# for extended precision, if the addressing mode is pre-decrement or
3481# post-increment, then the address register did not get updated.
3482# in addition, for pre-decrement, the stacked <ea> is incorrect.
3483fsnan_out_x:
3484        clr.b           SPCOND_FLG(%a6)         # clear special case flag
3485
3486        mov.w           FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3487        clr.w           2+FP_SCR0(%a6)
3488        mov.l           FP_SRC_HI(%a6),%d0
3489        bset            &30,%d0
3490        mov.l           %d0,FP_SCR0_HI(%a6)
3491        mov.l           FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3492
3493        btst            &0x5,EXC_SR(%a6)        # supervisor mode exception?
3494        bne.b           fsnan_out_x_s           # yes
3495
3496        mov.l           %usp,%a0                # fetch user stack pointer
3497        mov.l           %a0,EXC_A7(%a6)         # save on stack for calc_ea()
3498        mov.l           (%a6),EXC_A6(%a6)
3499
3500        bsr.l           _calc_ea_fout           # find the correct ea,update An
3501        mov.l           %a0,%a1
3502        mov.l           %a0,EXC_EA(%a6)         # stack correct <ea>
3503
3504        mov.l           EXC_A7(%a6),%a0
3505        mov.l           %a0,%usp                # restore user stack pointer
3506        mov.l           EXC_A6(%a6),(%a6)
3507
3508fsnan_out_x_save:
3509        lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
3510        movq.l          &0xc,%d0                # pass: size of extended
3511        bsr.l           _dmem_write             # write the default result
3512
3513        tst.l           %d1                     # did dstore fail?
3514        bne.l           facc_out_x              # yes
3515
3516        bra.w           fsnan_exit
3517
3518fsnan_out_x_s:
3519        mov.l           (%a6),EXC_A6(%a6)
3520
3521        bsr.l           _calc_ea_fout           # find the correct ea,update An
3522        mov.l           %a0,%a1
3523        mov.l           %a0,EXC_EA(%a6)         # stack correct <ea>
3524
3525        mov.l           EXC_A6(%a6),(%a6)
3526
3527        cmpi.b          SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3528        bne.b           fsnan_out_x_save        # no
3529
3530# the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3531        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3532        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3533        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3534
3535        frestore        FP_SRC(%a6)
3536
3537        mov.l           EXC_A6(%a6),%a6         # restore frame pointer
3538
3539        mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3540        mov.l           LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3541        mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3542
3543        mov.l           LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3544        mov.l           LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3545        mov.l           LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3546
3547        add.l           &LOCAL_SIZE-0x8,%sp
3548
3549        bra.l           _real_snan
3550
3551#########################################################################
3552# XDEF **************************************************************** #
3553#       _fpsp_inex(): 060FPSP entry point for FP Inexact exception.     #
3554#                                                                       #
3555#       This handler should be the first code executed upon taking the  #
3556#       FP Inexact exception in an operating system.                    #
3557#                                                                       #
3558# XREF **************************************************************** #
3559#       _imem_read_long() - read instruction longword                   #
3560#       fix_skewed_ops() - adjust src operand in fsave frame            #
3561#       set_tag_x() - determine optype of src/dst operands              #
3562#       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
3563#       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
3564#       load_fpn2() - load dst operand from FP regfile                  #
3565#       smovcr() - emulate an "fmovcr" instruction                      #
3566#       fout() - emulate an opclass 3 instruction                       #
3567#       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
3568#       _real_inex() - "callout" to operating system inexact handler    #
3569#                                                                       #
3570# INPUT *************************************************************** #
3571#       - The system stack contains the FP Inexact exception frame      #
3572#       - The fsave frame contains the source operand                   #
3573#                                                                       #
3574# OUTPUT ************************************************************** #
3575#       - The system stack is unchanged                                 #
3576#       - The fsave frame contains the adjusted src op for opclass 0,2  #
3577#                                                                       #
3578# ALGORITHM *********************************************************** #
3579#       In a system where the FP Inexact exception is enabled, the goal #
3580# is to get to the handler specified at _real_inex(). But, on the 060,  #
3581# for opclass zero and two instruction taking this exception, the       #
3582# hardware doesn't store the correct result to the destination FP       #
3583# register as did the '040 and '881/2. This handler must emulate the    #
3584# instruction in order to get this value and then store it to the       #
3585# correct register before calling _real_inex().                         #
3586#       For opclass 3 instructions, the 060 doesn't store the default   #
3587# inexact result out to memory or data register file as it should.      #
3588# This code must emulate the move out by calling fout() before finally  #
3589# exiting through _real_inex().                                         #
3590#                                                                       #
3591#########################################################################
3592
3593        global          _fpsp_inex
3594_fpsp_inex:
3595
3596        link.w          %a6,&-LOCAL_SIZE        # init stack frame
3597
3598        fsave           FP_SRC(%a6)             # grab the "busy" frame
3599
3600        movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3601        fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3602        fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
3603
3604# the FPIAR holds the "current PC" of the faulting instruction
3605        mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3606
3607        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3608        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3609        bsr.l           _imem_read_long         # fetch the instruction words
3610        mov.l           %d0,EXC_OPWORD(%a6)
3611
3612##############################################################################
3613
3614        btst            &13,%d0                 # is instr an fmove out?
3615        bne.w           finex_out               # fmove out
3616
3617
3618# the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3619# longword integer directly into the upper longword of the mantissa along
3620# w/ an exponent value of 0x401e. we convert this to extended precision here.
3621        bfextu          %d0{&19:&3},%d0         # fetch instr size
3622        bne.b           finex_cont              # instr size is not long
3623        cmpi.w          FP_SRC_EX(%a6),&0x401e  # is exponent 0x401e?
3624        bne.b           finex_cont              # no
3625        fmov.l          &0x0,%fpcr
3626        fmov.l          FP_SRC_HI(%a6),%fp0     # load integer src
3627        fmov.x          %fp0,FP_SRC(%a6)        # store integer as extended precision
3628        mov.w           &0xe001,0x2+FP_SRC(%a6)
3629
3630finex_cont:
3631        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3632        bsr.l           fix_skewed_ops          # fix src op
3633
3634# Here, we zero the ccode and exception byte field since we're going to
3635# emulate the whole instruction. Notice, though, that we don't kill the
3636# INEX1 bit. This is because a packed op has long since been converted
3637# to extended before arriving here. Therefore, we need to retain the
3638# INEX1 bit from when the operand was first converted.
3639        andi.l          &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3640
3641        fmov.l          &0x0,%fpcr              # zero current control regs
3642        fmov.l          &0x0,%fpsr
3643
3644        bfextu          EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3645        cmpi.b          %d1,&0x17               # is op an fmovecr?
3646        beq.w           finex_fmovcr            # yes
3647
3648        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3649        bsr.l           set_tag_x               # tag the operand type
3650        mov.b           %d0,STAG(%a6)           # maybe NORM,DENORM
3651
3652# bits four and five of the fp extension word separate the monadic and dyadic
3653# operations that can pass through fpsp_inex(). remember that fcmp and ftst
3654# will never take this exception, but fsincos will.
3655        btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
3656        beq.b           finex_extract           # monadic
3657
3658        btst            &0x4,1+EXC_CMDREG(%a6)  # is operation an fsincos?
3659        bne.b           finex_extract           # yes
3660
3661        bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3662        bsr.l           load_fpn2               # load dst into FP_DST
3663
3664        lea             FP_DST(%a6),%a0         # pass: ptr to dst op
3665        bsr.l           set_tag_x               # tag the operand type
3666        cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
3667        bne.b           finex_op2_done          # no
3668        bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
3669finex_op2_done:
3670        mov.b           %d0,DTAG(%a6)           # save dst optype tag
3671
3672finex_extract:
3673        clr.l           %d0
3674        mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
3675
3676        mov.b           1+EXC_CMDREG(%a6),%d1
3677        andi.w          &0x007f,%d1             # extract extension
3678
3679        lea             FP_SRC(%a6),%a0
3680        lea             FP_DST(%a6),%a1
3681
3682        mov.l           (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3683        jsr             (tbl_unsupp.l,%pc,%d1.l*1)
3684
3685# the operation has been emulated. the result is in fp0.
3686finex_save:
3687        bfextu          EXC_CMDREG(%a6){&6:&3},%d0
3688        bsr.l           store_fpreg
3689
3690finex_exit:
3691        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3692        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3693        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3694
3695        frestore        FP_SRC(%a6)
3696
3697        unlk            %a6
3698        bra.l           _real_inex
3699
3700finex_fmovcr:
3701        clr.l           %d0
3702        mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec,mode
3703        mov.b           1+EXC_CMDREG(%a6),%d1
3704        andi.l          &0x0000007f,%d1         # pass rom offset
3705        bsr.l           smovcr
3706        bra.b           finex_save
3707
3708########################################################################
3709
3710#
3711# the hardware does not save the default result to memory on enabled
3712# inexact exceptions. we do this here before passing control to
3713# the user inexact handler.
3714#
3715# byte, word, and long destination format operations can pass
3716# through here. so can double and single precision.
3717# although packed opclass three operations can take inexact
3718# exceptions, they won't pass through here since they are caught
3719# first by the unsupported data format exception handler. that handler
3720# sends them directly to _real_inex() if necessary.
3721#
3722finex_out:
3723
3724        mov.b           &NORM,STAG(%a6)         # src is a NORM
3725
3726        clr.l           %d0
3727        mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec,mode
3728
3729        andi.l          &0xffff00ff,USER_FPSR(%a6) # zero exception field
3730
3731        lea             FP_SRC(%a6),%a0         # pass ptr to src operand
3732
3733        bsr.l           fout                    # store the default result
3734
3735        bra.b           finex_exit
3736
3737#########################################################################
3738# XDEF **************************************************************** #
3739#       _fpsp_dz(): 060FPSP entry point for FP DZ exception.            #
3740#                                                                       #
3741#       This handler should be the first code executed upon taking      #
3742#       the FP DZ exception in an operating system.                     #
3743#                                                                       #
3744# XREF **************************************************************** #
3745#       _imem_read_long() - read instruction longword from memory       #
3746#       fix_skewed_ops() - adjust fsave operand                         #
3747#       _real_dz() - "callout" exit point from FP DZ handler            #
3748#                                                                       #
3749# INPUT *************************************************************** #
3750#       - The system stack contains the FP DZ exception stack.          #
3751#       - The fsave frame contains the source operand.                  #
3752#                                                                       #
3753# OUTPUT ************************************************************** #
3754#       - The system stack contains the FP DZ exception stack.          #
3755#       - The fsave frame contains the adjusted source operand.         #
3756#                                                                       #
3757# ALGORITHM *********************************************************** #
3758#       In a system where the DZ exception is enabled, the goal is to   #
3759# get to the handler specified at _real_dz(). But, on the 060, when the #
3760# exception is taken, the input operand in the fsave state frame may    #
3761# be incorrect for some cases and need to be adjusted. So, this package #
3762# adjusts the operand using fix_skewed_ops() and then branches to       #
3763# _real_dz().                                                           #
3764#                                                                       #
3765#########################################################################
3766
3767        global          _fpsp_dz
3768_fpsp_dz:
3769
3770        link.w          %a6,&-LOCAL_SIZE        # init stack frame
3771
3772        fsave           FP_SRC(%a6)             # grab the "busy" frame
3773
3774        movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3775        fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3776        fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
3777
3778# the FPIAR holds the "current PC" of the faulting instruction
3779        mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3780
3781        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3782        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3783        bsr.l           _imem_read_long         # fetch the instruction words
3784        mov.l           %d0,EXC_OPWORD(%a6)
3785
3786##############################################################################
3787
3788
3789# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3790# this would be the case for opclass two operations with a source zero
3791# in the sgl or dbl format.
3792        lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3793        bsr.l           fix_skewed_ops          # fix src op
3794
3795fdz_exit:
3796        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3797        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3798        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3799
3800        frestore        FP_SRC(%a6)
3801
3802        unlk            %a6
3803        bra.l           _real_dz
3804
3805#########################################################################
3806# XDEF **************************************************************** #
3807#       _fpsp_fline(): 060FPSP entry point for "Line F emulator"        #
3808#                      exception when the "reduced" version of the      #
3809#                      FPSP is implemented that does not emulate        #
3810#                      FP unimplemented instructions.                   #
3811#                                                                       #
3812#       This handler should be the first code executed upon taking a    #
3813#       "Line F Emulator" exception in an operating system integrating  #
3814#       the reduced version of 060FPSP.                                 #
3815#                                                                       #
3816# XREF **************************************************************** #
3817#       _real_fpu_disabled() - Handle "FPU disabled" exceptions         #
3818#       _real_fline() - Handle all other cases (treated equally)        #
3819#                                                                       #
3820# INPUT *************************************************************** #
3821#       - The system stack contains a "Line F Emulator" exception       #
3822#         stack frame.                                                  #
3823#                                                                       #
3824# OUTPUT ************************************************************** #
3825#       - The system stack is unchanged.                                #
3826#                                                                       #
3827# ALGORITHM *********************************************************** #
3828#       When a "Line F Emulator" exception occurs in a system where     #
3829# "FPU Unimplemented" instructions will not be emulated, the exception  #
3830# can occur because then FPU is disabled or the instruction is to be    #
3831# classifed as "Line F". This module determines which case exists and   #
3832# calls the appropriate "callout".                                      #
3833#                                                                       #
3834#########################################################################
3835
3836        global          _fpsp_fline
3837_fpsp_fline:
3838
3839# check to see if the FPU is disabled. if so, jump to the OS entry
3840# point for that condition.
3841        cmpi.w          0x6(%sp),&0x402c
3842        beq.l           _real_fpu_disabled
3843
3844        bra.l           _real_fline
3845
3846#########################################################################
3847# XDEF **************************************************************** #
3848#       _dcalc_ea(): calc correct <ea> from <ea> stacked on exception   #
3849#                                                                       #
3850# XREF **************************************************************** #
3851#       inc_areg() - increment an address register                      #
3852#       dec_areg() - decrement an address register                      #
3853#                                                                       #
3854# INPUT *************************************************************** #
3855#       d0 = number of bytes to adjust <ea> by                          #
3856#                                                                       #
3857# OUTPUT ************************************************************** #
3858#       None                                                            #
3859#                                                                       #
3860# ALGORITHM *********************************************************** #
3861# "Dummy" CALCulate Effective Address:                                  #
3862#       The stacked <ea> for FP unimplemented instructions and opclass  #
3863#       two packed instructions is correct with the exception of...     #
3864#                                                                       #
3865#       1) -(An)   : The register is not updated regardless of size.    #
3866#                    Also, for extended precision and packed, the       #
3867#                    stacked <ea> value is 8 bytes too big              #
3868#       2) (An)+   : The register is not updated.                       #
3869#       3) #<data> : The upper longword of the immediate operand is     #
3870#                    stacked b,w,l and s sizes are completely stacked.  #
3871#                    d,x, and p are not.                                #
3872#                                                                       #
3873#########################################################################
3874
3875        global          _dcalc_ea
3876_dcalc_ea:
3877        mov.l           %d0, %a0                # move # bytes to %a0
3878
3879        mov.b           1+EXC_OPWORD(%a6), %d0  # fetch opcode word
3880        mov.l           %d0, %d1                # make a copy
3881
3882        andi.w          &0x38, %d0              # extract mode field
3883        andi.l          &0x7, %d1               # extract reg  field
3884
3885        cmpi.b          %d0,&0x18               # is mode (An)+ ?
3886        beq.b           dcea_pi                 # yes
3887
3888        cmpi.b          %d0,&0x20               # is mode -(An) ?
3889        beq.b           dcea_pd                 # yes
3890
3891        or.w            %d1,%d0                 # concat mode,reg
3892        cmpi.b          %d0,&0x3c               # is mode #<data>?
3893
3894        beq.b           dcea_imm                # yes
3895
3896        mov.l           EXC_EA(%a6),%a0         # return <ea>
3897        rts
3898
3899# need to set immediate data flag here since we'll need to do
3900# an imem_read to fetch this later.
3901dcea_imm:
3902        mov.b           &immed_flg,SPCOND_FLG(%a6)
3903        lea             ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
3904        rts
3905
3906# here, the <ea> is stacked correctly. however, we must update the
3907# address register...
3908dcea_pi:
3909        mov.l           %a0,%d0                 # pass amt to inc by
3910        bsr.l           inc_areg                # inc addr register
3911
3912        mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
3913        rts
3914
3915# the <ea> is stacked correctly for all but extended and packed which
3916# the <ea>s are 8 bytes too large.
3917# it would make no sense to have a pre-decrement to a7 in supervisor
3918# mode so we don't even worry about this tricky case here : )
3919dcea_pd:
3920        mov.l           %a0,%d0                 # pass amt to dec by
3921        bsr.l           dec_areg                # dec addr register
3922
3923        mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
3924
3925        cmpi.b          %d0,&0xc                # is opsize ext or packed?
3926        beq.b           dcea_pd2                # yes
3927        rts
3928dcea_pd2:
3929        sub.l           &0x8,%a0                # correct <ea>
3930        mov.l           %a0,EXC_EA(%a6)         # put correct <ea> on stack
3931        rts
3932
3933#########################################################################
3934# XDEF **************************************************************** #
3935#       _calc_ea_fout(): calculate correct stacked <ea> for extended    #
3936#                        and packed data opclass 3 operations.          #
3937#                                                                       #
3938# XREF **************************************************************** #
3939#       None                                                            #
3940#                                                                       #
3941# INPUT *************************************************************** #
3942#       None                                                            #
3943#                                                                       #
3944# OUTPUT ************************************************************** #
3945#       a0 = return correct effective address                           #
3946#                                                                       #
3947# ALGORITHM *********************************************************** #
3948#       For opclass 3 extended and packed data operations, the <ea>     #
3949# stacked for the exception is incorrect for -(an) and (an)+ addressing #
3950# modes. Also, while we're at it, the index register itself must get    #
3951# updated.                                                              #
3952#       So, for -(an), we must subtract 8 off of the stacked <ea> value #
3953# and return that value as the correct <ea> and store that value in An. #
3954# For (an)+, the stacked <ea> is correct but we must adjust An by +12.  #
3955#                                                                       #
3956#########################################################################
3957
3958# This calc_ea is currently used to retrieve the correct <ea>
3959# for fmove outs of type extended and packed.
3960        global          _calc_ea_fout
3961_calc_ea_fout:
3962        mov.b           1+EXC_OPWORD(%a6),%d0   # fetch opcode word
3963        mov.l           %d0,%d1                 # make a copy
3964
3965        andi.w          &0x38,%d0               # extract mode field
3966        andi.l          &0x7,%d1                # extract reg  field
3967
3968        cmpi.b          %d0,&0x18               # is mode (An)+ ?
3969        beq.b           ceaf_pi                 # yes
3970
3971        cmpi.b          %d0,&0x20               # is mode -(An) ?
3972        beq.w           ceaf_pd                 # yes
3973
3974        mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
3975        rts
3976
3977# (An)+ : extended and packed fmove out
3978#       : stacked <ea> is correct
3979#       : "An" not updated
3980ceaf_pi:
3981        mov.w           (tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
3982        mov.l           EXC_EA(%a6),%a0
3983        jmp             (tbl_ceaf_pi.b,%pc,%d1.w*1)
3984
3985        swbeg           &0x8
3986tbl_ceaf_pi:
3987        short           ceaf_pi0 - tbl_ceaf_pi
3988        short           ceaf_pi1 - tbl_ceaf_pi
3989        short           ceaf_pi2 - tbl_ceaf_pi
3990        short           ceaf_pi3 - tbl_ceaf_pi
3991        short           ceaf_pi4 - tbl_ceaf_pi
3992        short           ceaf_pi5 - tbl_ceaf_pi
3993        short           ceaf_pi6 - tbl_ceaf_pi
3994        short           ceaf_pi7 - tbl_ceaf_pi
3995
3996ceaf_pi0:
3997        addi.l          &0xc,EXC_DREGS+0x8(%a6)
3998        rts
3999ceaf_pi1:
4000        addi.l          &0xc,EXC_DREGS+0xc(%a6)
4001        rts
4002ceaf_pi2:
4003        add.l           &0xc,%a2
4004        rts
4005ceaf_pi3:
4006        add.l           &0xc,%a3
4007        rts
4008ceaf_pi4:
4009        add.l           &0xc,%a4
4010        rts
4011ceaf_pi5:
4012        add.l           &0xc,%a5
4013        rts
4014ceaf_pi6:
4015        addi.l          &0xc,EXC_A6(%a6)
4016        rts
4017ceaf_pi7:
4018        mov.b           &mia7_flg,SPCOND_FLG(%a6)
4019        addi.l          &0xc,EXC_A7(%a6)
4020        rts
4021
4022# -(An) : extended and packed fmove out
4023#       : stacked <ea> = actual <ea> + 8
4024#       : "An" not updated
4025ceaf_pd:
4026        mov.w           (tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
4027        mov.l           EXC_EA(%a6),%a0
4028        sub.l           &0x8,%a0
4029        sub.l           &0x8,EXC_EA(%a6)
4030        jmp             (tbl_ceaf_pd.b,%pc,%d1.w*1)
4031
4032        swbeg           &0x8
4033tbl_ceaf_pd:
4034        short           ceaf_pd0 - tbl_ceaf_pd
4035        short           ceaf_pd1 - tbl_ceaf_pd
4036        short           ceaf_pd2 - tbl_ceaf_pd
4037        short           ceaf_pd3 - tbl_ceaf_pd
4038        short           ceaf_pd4 - tbl_ceaf_pd
4039        short           ceaf_pd5 - tbl_ceaf_pd
4040        short           ceaf_pd6 - tbl_ceaf_pd
4041        short           ceaf_pd7 - tbl_ceaf_pd
4042
4043ceaf_pd0:
4044        mov.l           %a0,EXC_DREGS+0x8(%a6)
4045        rts
4046ceaf_pd1:
4047        mov.l           %a0,EXC_DREGS+0xc(%a6)
4048        rts
4049ceaf_pd2:
4050        mov.l           %a0,%a2
4051        rts
4052ceaf_pd3:
4053        mov.l           %a0,%a3
4054        rts
4055ceaf_pd4:
4056        mov.l           %a0,%a4
4057        rts
4058ceaf_pd5:
4059        mov.l           %a0,%a5
4060        rts
4061ceaf_pd6:
4062        mov.l           %a0,EXC_A6(%a6)
4063        rts
4064ceaf_pd7:
4065        mov.l           %a0,EXC_A7(%a6)
4066        mov.b           &mda7_flg,SPCOND_FLG(%a6)
4067        rts
4068
4069#
4070# This table holds the offsets of the emulation routines for each individual
4071# math operation relative to the address of this table. Included are
4072# routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because
4073# this table is for the version if the 060FPSP without transcendentals.
4074# The location within the table is determined by the extension bits of the
4075# operation longword.
4076#
4077
4078        swbeg           &109
4079tbl_unsupp:
4080        long            fin             - tbl_unsupp    # 00: fmove
4081        long            fint            - tbl_unsupp    # 01: fint
4082        long            tbl_unsupp      - tbl_unsupp    # 02: fsinh
4083        long            fintrz          - tbl_unsupp    # 03: fintrz
4084        long            fsqrt           - tbl_unsupp    # 04: fsqrt
4085        long            tbl_unsupp      - tbl_unsupp
4086        long            tbl_unsupp      - tbl_unsupp    # 06: flognp1
4087        long            tbl_unsupp      - tbl_unsupp
4088        long            tbl_unsupp      - tbl_unsupp    # 08: fetoxm1
4089        long            tbl_unsupp      - tbl_unsupp    # 09: ftanh
4090        long            tbl_unsupp      - tbl_unsupp    # 0a: fatan
4091        long            tbl_unsupp      - tbl_unsupp
4092        long            tbl_unsupp      - tbl_unsupp    # 0c: fasin
4093        long            tbl_unsupp      - tbl_unsupp    # 0d: fatanh
4094        long            tbl_unsupp      - tbl_unsupp    # 0e: fsin
4095        long            tbl_unsupp      - tbl_unsupp    # 0f: ftan
4096        long            tbl_unsupp      - tbl_unsupp    # 10: fetox
4097        long            tbl_unsupp      - tbl_unsupp    # 11: ftwotox
4098        long            tbl_unsupp      - tbl_unsupp    # 12: ftentox
4099        long            tbl_unsupp      - tbl_unsupp
4100        long            tbl_unsupp      - tbl_unsupp    # 14: flogn
4101        long            tbl_unsupp      - tbl_unsupp    # 15: flog10
4102        long            tbl_unsupp      - tbl_unsupp    # 16: flog2
4103        long            tbl_unsupp      - tbl_unsupp
4104        long            fabs            - tbl_unsupp    # 18: fabs
4105        long            tbl_unsupp      - tbl_unsupp    # 19: fcosh
4106        long            fneg            - tbl_unsupp    # 1a: fneg
4107        long            tbl_unsupp      - tbl_unsupp
4108        long            tbl_unsupp      - tbl_unsupp    # 1c: facos
4109        long            tbl_unsupp      - tbl_unsupp    # 1d: fcos
4110        long            tbl_unsupp      - tbl_unsupp    # 1e: fgetexp
4111        long            tbl_unsupp      - tbl_unsupp    # 1f: fgetman
4112        long            fdiv            - tbl_unsupp    # 20: fdiv
4113        long            tbl_unsupp      - tbl_unsupp    # 21: fmod
4114        long            fadd            - tbl_unsupp    # 22: fadd
4115        long            fmul            - tbl_unsupp    # 23: fmul
4116        long            fsgldiv         - tbl_unsupp    # 24: fsgldiv
4117        long            tbl_unsupp      - tbl_unsupp    # 25: frem
4118        long            tbl_unsupp      - tbl_unsupp    # 26: fscale
4119        long            fsglmul         - tbl_unsupp    # 27: fsglmul
4120        long            fsub            - tbl_unsupp    # 28: fsub
4121        long            tbl_unsupp      - tbl_unsupp
4122        long            tbl_unsupp      - tbl_unsupp
4123        long            tbl_unsupp      - tbl_unsupp
4124        long            tbl_unsupp      - tbl_unsupp
4125        long            tbl_unsupp      - tbl_unsupp
4126        long            tbl_unsupp      - tbl_unsupp
4127        long            tbl_unsupp      - tbl_unsupp
4128        long            tbl_unsupp      - tbl_unsupp    # 30: fsincos
4129        long            tbl_unsupp      - tbl_unsupp    # 31: fsincos
4130        long            tbl_unsupp      - tbl_unsupp    # 32: fsincos
4131        long            tbl_unsupp      - tbl_unsupp    # 33: fsincos
4132        long            tbl_unsupp      - tbl_unsupp    # 34: fsincos
4133        long            tbl_unsupp      - tbl_unsupp    # 35: fsincos
4134        long            tbl_unsupp      - tbl_unsupp    # 36: fsincos
4135        long            tbl_unsupp      - tbl_unsupp    # 37: fsincos
4136        long            fcmp            - tbl_unsupp    # 38: fcmp
4137        long            tbl_unsupp      - tbl_unsupp
4138        long            ftst            - tbl_unsupp    # 3a: ftst
4139        long            tbl_unsupp      - tbl_unsupp
4140        long            tbl_unsupp      - tbl_unsupp
4141        long            tbl_unsupp      - tbl_unsupp
4142        long            tbl_unsupp      - tbl_unsupp
4143        long            tbl_unsupp      - tbl_unsupp
4144        long            fsin            - tbl_unsupp    # 40: fsmove
4145        long            fssqrt          - tbl_unsupp    # 41: fssqrt
4146        long            tbl_unsupp      - tbl_unsupp
4147        long            tbl_unsupp      - tbl_unsupp
4148        long            fdin            - tbl_unsupp    # 44: fdmove
4149        long            fdsqrt          - tbl_unsupp    # 45: fdsqrt
4150        long            tbl_unsupp      - tbl_unsupp
4151        long            tbl_unsupp      - tbl_unsupp
4152        long            tbl_unsupp      - tbl_unsupp
4153        long            tbl_unsupp      - tbl_unsupp
4154        long            tbl_unsupp      - tbl_unsupp
4155        long            tbl_unsupp      - tbl_unsupp
4156        long            tbl_unsupp      - tbl_unsupp
4157        long            tbl_unsupp      - tbl_unsupp
4158        long            tbl_unsupp      - tbl_unsupp
4159        long            tbl_unsupp      - tbl_unsupp
4160        long            tbl_unsupp      - tbl_unsupp
4161        long            tbl_unsupp      - tbl_unsupp
4162        long            tbl_unsupp      - tbl_unsupp
4163        long            tbl_unsupp      - tbl_unsupp
4164        long            tbl_unsupp      - tbl_unsupp
4165        long            tbl_unsupp      - tbl_unsupp
4166        long            tbl_unsupp      - tbl_unsupp
4167        long            tbl_unsupp      - tbl_unsupp
4168        long            fsabs           - tbl_unsupp    # 58: fsabs
4169        long            tbl_unsupp      - tbl_unsupp
4170        long            fsneg           - tbl_unsupp    # 5a: fsneg
4171        long            tbl_unsupp      - tbl_unsupp
4172        long            fdabs           - tbl_unsupp    # 5c: fdabs
4173        long            tbl_unsupp      - tbl_unsupp
4174        long            fdneg           - tbl_unsupp    # 5e: fdneg
4175        long            tbl_unsupp      - tbl_unsupp
4176        long            fsdiv           - tbl_unsupp    # 60: fsdiv
4177        long            tbl_unsupp      - tbl_unsupp
4178        long            fsadd           - tbl_unsupp    # 62: fsadd
4179        long            fsmul           - tbl_unsupp    # 63: fsmul
4180        long            fddiv           - tbl_unsupp    # 64: fddiv
4181        long            tbl_unsupp      - tbl_unsupp
4182        long            fdadd           - tbl_unsupp    # 66: fdadd
4183        long            fdmul           - tbl_unsupp    # 67: fdmul
4184        long            fssub           - tbl_unsupp    # 68: fssub
4185        long            tbl_unsupp      - tbl_unsupp
4186        long            tbl_unsupp      - tbl_unsupp
4187        long            tbl_unsupp      - tbl_unsupp
4188        long            fdsub           - tbl_unsupp    # 6c: fdsub
4189
4190#################################################
4191# Add this here so non-fp modules can compile.
4192# (smovcr is called from fpsp_inex.)
4193        global          smovcr
4194smovcr:
4195        bra.b           smovcr
4196
4197#########################################################################
4198# XDEF **************************************************************** #
4199#       fmovm_dynamic(): emulate "fmovm" dynamic instruction            #
4200#                                                                       #
4201# XREF **************************************************************** #
4202#       fetch_dreg() - fetch data register                              #
4203#       {i,d,}mem_read() - fetch data from memory                       #
4204#       _mem_write() - write data to memory                             #
4205#       iea_iacc() - instruction memory access error occurred           #
4206#       iea_dacc() - data memory access error occurred                  #
4207#       restore() - restore An index regs if access error occurred      #
4208#                                                                       #
4209# INPUT *************************************************************** #
4210#       None                                                            #
4211#                                                                       #
4212# OUTPUT ************************************************************** #
4213#       If instr is "fmovm Dn,-(A7)" from supervisor mode,              #
4214#               d0 = size of dump                                       #
4215#               d1 = Dn                                                 #
4216#       Else if instruction access error,                               #
4217#               d0 = FSLW                                               #
4218#       Else if data access error,                                      #
4219#               d0 = FSLW                                               #
4220#               a0 = address of fault                                   #
4221#       Else                                                            #
4222#               none.                                                   #
4223#                                                                       #
4224# ALGORITHM *********************************************************** #
4225#       The effective address must be calculated since this is entered  #
4226# from an "Unimplemented Effective Address" exception handler. So, we   #
4227# have our own fcalc_ea() routine here. If an access error is flagged   #
4228# by a _{i,d,}mem_read() call, we must exit through the special         #
4229# handler.                                                              #
4230#       The data register is determined and its value loaded to get the #
4231# string of FP registers affected. This value is used as an index into  #
4232# a lookup table such that we can determine the number of bytes         #
4233# involved.                                                             #
4234#       If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used  #
4235# to read in all FP values. Again, _mem_read() may fail and require a   #
4236# special exit.                                                         #
4237#       If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used #
4238# to write all FP values. _mem_write() may also fail.                   #
4239#       If the instruction is "fmovm.x DN,-(a7)" from supervisor mode,  #
4240# then we return the size of the dump and the string to the caller      #
4241# so that the move can occur outside of this routine. This special      #
4242# case is required so that moves to the system stack are handled        #
4243# correctly.                                                            #
4244#                                                                       #
4245# DYNAMIC:                                                              #
4246#       fmovm.x dn, <ea>                                                #
4247#       fmovm.x <ea>, dn                                                #
4248#                                                                       #
4249#             <WORD 1>                <WORD2>                           #
4250#       1111 0010 00 |<ea>|     11@& 1000 0$$$ 0000                     #
4251#                                                                       #
4252#       & = (0): predecrement addressing mode                           #
4253#           (1): postincrement or control addressing mode               #
4254#       @ = (0): move listed regs from memory to the FPU                #
4255#           (1): move listed regs from the FPU to memory                #
4256#       $$$    : index of data register holding reg select mask         #
4257#                                                                       #
4258# NOTES:                                                                #
4259#       If the data register holds a zero, then the                     #
4260#       instruction is a nop.                                           #
4261#                                                                       #
4262#########################################################################
4263
4264        global          fmovm_dynamic
4265fmovm_dynamic:
4266
4267# extract the data register in which the bit string resides...
4268        mov.b           1+EXC_EXTWORD(%a6),%d1  # fetch extword
4269        andi.w          &0x70,%d1               # extract reg bits
4270        lsr.b           &0x4,%d1                # shift into lo bits
4271
4272# fetch the bit string into d0...
4273        bsr.l           fetch_dreg              # fetch reg string
4274
4275        andi.l          &0x000000ff,%d0         # keep only lo byte
4276
4277        mov.l           %d0,-(%sp)              # save strg
4278        mov.b           (tbl_fmovm_size.w,%pc,%d0),%d0
4279        mov.l           %d0,-(%sp)              # save size
4280        bsr.l           fmovm_calc_ea           # calculate <ea>
4281        mov.l           (%sp)+,%d0              # restore size
4282        mov.l           (%sp)+,%d1              # restore strg
4283
4284# if the bit string is a zero, then the operation is a no-op
4285# but, make sure that we've calculated ea and advanced the opword pointer
4286        beq.w           fmovm_data_done
4287
4288# separate move ins from move outs...
4289        btst            &0x5,EXC_EXTWORD(%a6)   # is it a move in or out?
4290        beq.w           fmovm_data_in           # it's a move out
4291
4292#############
4293# MOVE OUT: #
4294#############
4295fmovm_data_out:
4296        btst            &0x4,EXC_EXTWORD(%a6)   # control or predecrement?
4297        bne.w           fmovm_out_ctrl          # control
4298
4299############################
4300fmovm_out_predec:
4301# for predecrement mode, the bit string is the opposite of both control
4302# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
4303# here, we convert it to be just like the others...
4304        mov.b           (tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
4305
4306        btst            &0x5,EXC_SR(%a6)        # user or supervisor mode?
4307        beq.b           fmovm_out_ctrl          # user
4308
4309fmovm_out_predec_s:
4310        cmpi.b          SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
4311        bne.b           fmovm_out_ctrl
4312
4313# the operation was unfortunately an: fmovm.x dn,-(sp)
4314# called from supervisor mode.
4315# we're also passing "size" and "strg" back to the calling routine
4316        rts
4317
4318############################
4319fmovm_out_ctrl:
4320        mov.l           %a0,%a1                 # move <ea> to a1
4321
4322        sub.l           %d0,%sp                 # subtract size of dump
4323        lea             (%sp),%a0
4324
4325        tst.b           %d1                     # should FP0 be moved?
4326        bpl.b           fmovm_out_ctrl_fp1      # no
4327
4328        mov.l           0x0+EXC_FP0(%a6),(%a0)+ # yes
4329        mov.l           0x4+EXC_FP0(%a6),(%a0)+
4330        mov.l           0x8+EXC_FP0(%a6),(%a0)+
4331
4332fmovm_out_ctrl_fp1:
4333        lsl.b           &0x1,%d1                # should FP1 be moved?
4334        bpl.b           fmovm_out_ctrl_fp2      # no
4335
4336        mov.l           0x0+EXC_FP1(%a6),(%a0)+ # yes
4337        mov.l           0x4+EXC_FP1(%a6),(%a0)+
4338        mov.l           0x8+EXC_FP1(%a6),(%a0)+
4339
4340fmovm_out_ctrl_fp2:
4341        lsl.b           &0x1,%d1                # should FP2 be moved?
4342        bpl.b           fmovm_out_ctrl_fp3      # no
4343
4344        fmovm.x         &0x20,(%a0)             # yes
4345        add.l           &0xc,%a0
4346
4347fmovm_out_ctrl_fp3:
4348        lsl.b           &0x1,%d1                # should FP3 be moved?
4349        bpl.b           fmovm_out_ctrl_fp4      # no
4350
4351        fmovm.x         &0x10,(%a0)             # yes
4352        add.l           &0xc,%a0
4353
4354fmovm_out_ctrl_fp4:
4355        lsl.b           &0x1,%d1                # should FP4 be moved?
4356        bpl.b           fmovm_out_ctrl_fp5      # no
4357
4358        fmovm.x         &0x08,(%a0)             # yes
4359        add.l           &0xc,%a0
4360
4361fmovm_out_ctrl_fp5:
4362        lsl.b           &0x1,%d1                # should FP5 be moved?
4363        bpl.b           fmovm_out_ctrl_fp6      # no
4364
4365        fmovm.x         &0x04,(%a0)             # yes
4366        add.l           &0xc,%a0
4367
4368fmovm_out_ctrl_fp6:
4369        lsl.b           &0x1,%d1                # should FP6 be moved?
4370        bpl.b           fmovm_out_ctrl_fp7      # no
4371
4372        fmovm.x         &0x02,(%a0)             # yes
4373        add.l           &0xc,%a0
4374
4375fmovm_out_ctrl_fp7:
4376        lsl.b           &0x1,%d1                # should FP7 be moved?
4377        bpl.b           fmovm_out_ctrl_done     # no
4378
4379        fmovm.x         &0x01,(%a0)             # yes
4380        add.l           &0xc,%a0
4381
4382fmovm_out_ctrl_done:
4383        mov.l           %a1,L_SCR1(%a6)
4384
4385        lea             (%sp),%a0               # pass: supervisor src
4386        mov.l           %d0,-(%sp)              # save size
4387        bsr.l           _dmem_write             # copy data to user mem
4388
4389        mov.l           (%sp)+,%d0
4390        add.l           %d0,%sp                 # clear fpreg data from stack
4391
4392        tst.l           %d1                     # did dstore err?
4393        bne.w           fmovm_out_err           # yes
4394
4395        rts
4396
4397############
4398# MOVE IN: #
4399############
4400fmovm_data_in:
4401        mov.l           %a0,L_SCR1(%a6)
4402
4403        sub.l           %d0,%sp                 # make room for fpregs
4404        lea             (%sp),%a1
4405
4406        mov.l           %d1,-(%sp)              # save bit string for later
4407        mov.l           %d0,-(%sp)              # save # of bytes
4408
4409        bsr.l           _dmem_read              # copy data from user mem
4410
4411        mov.l           (%sp)+,%d0              # retrieve # of bytes
4412
4413        tst.l           %d1                     # did dfetch fail?
4414        bne.w           fmovm_in_err            # yes
4415
4416        mov.l           (%sp)+,%d1              # load bit string
4417
4418        lea             (%sp),%a0               # addr of stack
4419
4420        tst.b           %d1                     # should FP0 be moved?
4421        bpl.b           fmovm_data_in_fp1       # no
4422
4423        mov.l           (%a0)+,0x0+EXC_FP0(%a6) # yes
4424        mov.l           (%a0)+,0x4+EXC_FP0(%a6)
4425        mov.l           (%a0)+,0x8+EXC_FP0(%a6)
4426
4427fmovm_data_in_fp1:
4428        lsl.b           &0x1,%d1                # should FP1 be moved?
4429        bpl.b           fmovm_data_in_fp2       # no
4430
4431        mov.l           (%a0)+,0x0+EXC_FP1(%a6) # yes
4432        mov.l           (%a0)+,0x4+EXC_FP1(%a6)
4433        mov.l           (%a0)+,0x8+EXC_FP1(%a6)
4434
4435fmovm_data_in_fp2:
4436        lsl.b           &0x1,%d1                # should FP2 be moved?
4437        bpl.b           fmovm_data_in_fp3       # no
4438
4439        fmovm.x         (%a0)+,&0x20            # yes
4440
4441fmovm_data_in_fp3:
4442        lsl.b           &0x1,%d1                # should FP3 be moved?
4443        bpl.b           fmovm_data_in_fp4       # no
4444
4445        fmovm.x         (%a0)+,&0x10            # yes
4446
4447fmovm_data_in_fp4:
4448        lsl.b           &0x1,%d1                # should FP4 be moved?
4449        bpl.b           fmovm_data_in_fp5       # no
4450
4451        fmovm.x         (%a0)+,&0x08            # yes
4452
4453fmovm_data_in_fp5:
4454        lsl.b           &0x1,%d1                # should FP5 be moved?
4455        bpl.b           fmovm_data_in_fp6       # no
4456
4457        fmovm.x         (%a0)+,&0x04            # yes
4458
4459fmovm_data_in_fp6:
4460        lsl.b           &0x1,%d1                # should FP6 be moved?
4461        bpl.b           fmovm_data_in_fp7       # no
4462
4463        fmovm.x         (%a0)+,&0x02            # yes
4464
4465fmovm_data_in_fp7:
4466        lsl.b           &0x1,%d1                # should FP7 be moved?
4467        bpl.b           fmovm_data_in_done      # no
4468
4469        fmovm.x         (%a0)+,&0x01            # yes
4470
4471fmovm_data_in_done:
4472        add.l           %d0,%sp                 # remove fpregs from stack
4473        rts
4474
4475#####################################
4476
4477fmovm_data_done:
4478        rts
4479
4480##############################################################################
4481
4482#
4483# table indexed by the operation's bit string that gives the number
4484# of bytes that will be moved.
4485#
4486# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
4487#
4488tbl_fmovm_size:
4489        byte    0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
4490        byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4491        byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4492        byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4493        byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4494        byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4495        byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4496        byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4497        byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4498        byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4499        byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4500        byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4501        byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4502        byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4503        byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4504        byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4505        byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4506        byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4507        byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4508        byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4509        byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4510        byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4511        byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4512        byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4513        byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4514        byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4515        byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4516        byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4517        byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4518        byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4519        byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4520        byte    0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
4521
4522#
4523# table to convert a pre-decrement bit string into a post-increment
4524# or control bit string.
4525# ex:   0x00    ==>     0x00
4526#       0x01    ==>     0x80
4527#       0x02    ==>     0x40
4528#               .
4529#               .
4530#       0xfd    ==>     0xbf
4531#       0xfe    ==>     0x7f
4532#       0xff    ==>     0xff
4533#
4534tbl_fmovm_convert:
4535        byte    0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
4536        byte    0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
4537        byte    0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
4538        byte    0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
4539        byte    0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
4540        byte    0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
4541        byte    0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
4542        byte    0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
4543        byte    0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
4544        byte    0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
4545        byte    0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
4546        byte    0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
4547        byte    0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
4548        byte    0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
4549        byte    0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
4550        byte    0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
4551        byte    0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
4552        byte    0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
4553        byte    0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
4554        byte    0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
4555        byte    0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
4556        byte    0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
4557        byte    0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
4558        byte    0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
4559        byte    0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
4560        byte    0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
4561        byte    0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
4562        byte    0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
4563        byte    0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
4564        byte    0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
4565        byte    0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
4566        byte    0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
4567
4568        global          fmovm_calc_ea
4569###############################################
4570# _fmovm_calc_ea: calculate effective address #
4571###############################################
4572fmovm_calc_ea:
4573        mov.l           %d0,%a0                 # move # bytes to a0
4574
4575# currently, MODE and REG are taken from the EXC_OPWORD. this could be
4576# easily changed if they were inputs passed in registers.
4577        mov.w           EXC_OPWORD(%a6),%d0     # fetch opcode word
4578        mov.w           %d0,%d1                 # make a copy
4579
4580        andi.w          &0x3f,%d0               # extract mode field
4581        andi.l          &0x7,%d1                # extract reg  field
4582
4583# jump to the corresponding function for each {MODE,REG} pair.
4584        mov.w           (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
4585        jmp             (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
4586
4587        swbeg           &64
4588tbl_fea_mode:
4589        short           tbl_fea_mode    -       tbl_fea_mode
4590        short           tbl_fea_mode    -       tbl_fea_mode
4591        short           tbl_fea_mode    -       tbl_fea_mode
4592        short           tbl_fea_mode    -       tbl_fea_mode
4593        short           tbl_fea_mode    -       tbl_fea_mode
4594        short           tbl_fea_mode    -       tbl_fea_mode
4595        short           tbl_fea_mode    -       tbl_fea_mode
4596        short           tbl_fea_mode    -       tbl_fea_mode
4597
4598        short           tbl_fea_mode    -       tbl_fea_mode
4599        short           tbl_fea_mode    -       tbl_fea_mode
4600        short           tbl_fea_mode    -       tbl_fea_mode
4601        short           tbl_fea_mode    -       tbl_fea_mode
4602        short           tbl_fea_mode    -       tbl_fea_mode
4603        short           tbl_fea_mode    -       tbl_fea_mode
4604        short           tbl_fea_mode    -       tbl_fea_mode
4605        short           tbl_fea_mode    -       tbl_fea_mode
4606
4607        short           faddr_ind_a0    -       tbl_fea_mode
4608        short           faddr_ind_a1    -       tbl_fea_mode
4609        short           faddr_ind_a2    -       tbl_fea_mode
4610        short           faddr_ind_a3    -       tbl_fea_mode
4611        short           faddr_ind_a4    -       tbl_fea_mode
4612        short           faddr_ind_a5    -       tbl_fea_mode
4613        short           faddr_ind_a6    -       tbl_fea_mode
4614        short           faddr_ind_a7    -       tbl_fea_mode
4615
4616        short           faddr_ind_p_a0  -       tbl_fea_mode
4617        short           faddr_ind_p_a1  -       tbl_fea_mode
4618        short           faddr_ind_p_a2  -       tbl_fea_mode
4619        short           faddr_ind_p_a3  -       tbl_fea_mode
4620        short           faddr_ind_p_a4  -       tbl_fea_mode
4621        short           faddr_ind_p_a5  -       tbl_fea_mode
4622        short           faddr_ind_p_a6  -       tbl_fea_mode
4623        short           faddr_ind_p_a7  -       tbl_fea_mode
4624
4625        short           faddr_ind_m_a0  -       tbl_fea_mode
4626        short           faddr_ind_m_a1  -       tbl_fea_mode
4627        short           faddr_ind_m_a2  -       tbl_fea_mode
4628        short           faddr_ind_m_a3  -       tbl_fea_mode
4629        short           faddr_ind_m_a4  -       tbl_fea_mode
4630        short           faddr_ind_m_a5  -       tbl_fea_mode
4631        short           faddr_ind_m_a6  -       tbl_fea_mode
4632        short           faddr_ind_m_a7  -       tbl_fea_mode
4633
4634        short           faddr_ind_disp_a0       -       tbl_fea_mode
4635        short           faddr_ind_disp_a1       -       tbl_fea_mode
4636        short           faddr_ind_disp_a2       -       tbl_fea_mode
4637        short           faddr_ind_disp_a3       -       tbl_fea_mode
4638        short           faddr_ind_disp_a4       -       tbl_fea_mode
4639        short           faddr_ind_disp_a5       -       tbl_fea_mode
4640        short           faddr_ind_disp_a6       -       tbl_fea_mode
4641        short           faddr_ind_disp_a7       -       tbl_fea_mode
4642
4643        short           faddr_ind_ext   -       tbl_fea_mode
4644        short           faddr_ind_ext   -       tbl_fea_mode
4645        short           faddr_ind_ext   -       tbl_fea_mode
4646        short           faddr_ind_ext   -       tbl_fea_mode
4647        short           faddr_ind_ext   -       tbl_fea_mode
4648        short           faddr_ind_ext   -       tbl_fea_mode
4649        short           faddr_ind_ext   -       tbl_fea_mode
4650        short           faddr_ind_ext   -       tbl_fea_mode
4651
4652        short           fabs_short      -       tbl_fea_mode
4653        short           fabs_long       -       tbl_fea_mode
4654        short           fpc_ind         -       tbl_fea_mode
4655        short           fpc_ind_ext     -       tbl_fea_mode
4656        short           tbl_fea_mode    -       tbl_fea_mode
4657        short           tbl_fea_mode    -       tbl_fea_mode
4658        short           tbl_fea_mode    -       tbl_fea_mode
4659        short           tbl_fea_mode    -       tbl_fea_mode
4660
4661###################################
4662# Address register indirect: (An) #
4663###################################
4664faddr_ind_a0:
4665        mov.l           EXC_DREGS+0x8(%a6),%a0  # Get current a0
4666        rts
4667
4668faddr_ind_a1:
4669        mov.l           EXC_DREGS+0xc(%a6),%a0  # Get current a1
4670        rts
4671
4672faddr_ind_a2:
4673        mov.l           %a2,%a0                 # Get current a2
4674        rts
4675
4676faddr_ind_a3:
4677        mov.l           %a3,%a0                 # Get current a3
4678        rts
4679
4680faddr_ind_a4:
4681        mov.l           %a4,%a0                 # Get current a4
4682        rts
4683
4684faddr_ind_a5:
4685        mov.l           %a5,%a0                 # Get current a5
4686        rts
4687
4688faddr_ind_a6:
4689        mov.l           (%a6),%a0               # Get current a6
4690        rts
4691
4692faddr_ind_a7:
4693        mov.l           EXC_A7(%a6),%a0         # Get current a7
4694        rts
4695
4696#####################################################
4697# Address register indirect w/ postincrement: (An)+ #
4698#####################################################
4699faddr_ind_p_a0:
4700        mov.l           EXC_DREGS+0x8(%a6),%d0  # Get current a0
4701        mov.l           %d0,%d1
4702        add.l           %a0,%d1                 # Increment
4703        mov.l           %d1,EXC_DREGS+0x8(%a6)  # Save incr value
4704        mov.l           %d0,%a0
4705        rts
4706
4707faddr_ind_p_a1:
4708        mov.l           EXC_DREGS+0xc(%a6),%d0  # Get current a1
4709        mov.l           %d0,%d1
4710        add.l           %a0,%d1                 # Increment
4711        mov.l           %d1,EXC_DREGS+0xc(%a6)  # Save incr value
4712        mov.l           %d0,%a0
4713        rts
4714
4715faddr_ind_p_a2:
4716        mov.l           %a2,%d0                 # Get current a2
4717        mov.l           %d0,%d1
4718        add.l           %a0,%d1                 # Increment
4719        mov.l           %d1,%a2                 # Save incr value
4720        mov.l           %d0,%a0
4721        rts
4722
4723faddr_ind_p_a3:
4724        mov.l           %a3,%d0                 # Get current a3
4725        mov.l           %d0,%d1
4726        add.l           %a0,%d1                 # Increment
4727        mov.l           %d1,%a3                 # Save incr value
4728        mov.l           %d0,%a0
4729        rts
4730
4731faddr_ind_p_a4:
4732        mov.l           %a4,%d0                 # Get current a4
4733        mov.l           %d0,%d1
4734        add.l           %a0,%d1                 # Increment
4735        mov.l           %d1,%a4                 # Save incr value
4736        mov.l           %d0,%a0
4737        rts
4738
4739faddr_ind_p_a5:
4740        mov.l           %a5,%d0                 # Get current a5
4741        mov.l           %d0,%d1
4742        add.l           %a0,%d1                 # Increment
4743        mov.l           %d1,%a5                 # Save incr value
4744        mov.l           %d0,%a0
4745        rts
4746
4747faddr_ind_p_a6:
4748        mov.l           (%a6),%d0               # Get current a6
4749        mov.l           %d0,%d1
4750        add.l           %a0,%d1                 # Increment
4751        mov.l           %d1,(%a6)               # Save incr value
4752        mov.l           %d0,%a0
4753        rts
4754
4755faddr_ind_p_a7:
4756        mov.b           &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
4757
4758        mov.l           EXC_A7(%a6),%d0         # Get current a7
4759        mov.l           %d0,%d1
4760        add.l           %a0,%d1                 # Increment
4761        mov.l           %d1,EXC_A7(%a6)         # Save incr value
4762        mov.l           %d0,%a0
4763        rts
4764
4765####################################################
4766# Address register indirect w/ predecrement: -(An) #
4767####################################################
4768faddr_ind_m_a0:
4769        mov.l           EXC_DREGS+0x8(%a6),%d0  # Get current a0
4770        sub.l           %a0,%d0                 # Decrement
4771        mov.l           %d0,EXC_DREGS+0x8(%a6)  # Save decr value
4772        mov.l           %d0,%a0
4773        rts
4774
4775faddr_ind_m_a1:
4776        mov.l           EXC_DREGS+0xc(%a6),%d0  # Get current a1
4777        sub.l           %a0,%d0                 # Decrement
4778        mov.l           %d0,EXC_DREGS+0xc(%a6)  # Save decr value
4779        mov.l           %d0,%a0
4780        rts
4781
4782faddr_ind_m_a2:
4783        mov.l           %a2,%d0                 # Get current a2
4784        sub.l           %a0,%d0                 # Decrement
4785        mov.l           %d0,%a2                 # Save decr value
4786        mov.l           %d0,%a0
4787        rts
4788
4789faddr_ind_m_a3:
4790        mov.l           %a3,%d0                 # Get current a3
4791        sub.l           %a0,%d0                 # Decrement
4792        mov.l           %d0,%a3                 # Save decr value
4793        mov.l           %d0,%a0
4794        rts
4795
4796faddr_ind_m_a4:
4797        mov.l           %a4,%d0                 # Get current a4
4798        sub.l           %a0,%d0                 # Decrement
4799        mov.l           %d0,%a4                 # Save decr value
4800        mov.l           %d0,%a0
4801        rts
4802
4803faddr_ind_m_a5:
4804        mov.l           %a5,%d0                 # Get current a5
4805        sub.l           %a0,%d0                 # Decrement
4806        mov.l           %d0,%a5                 # Save decr value
4807        mov.l           %d0,%a0
4808        rts
4809
4810faddr_ind_m_a6:
4811        mov.l           (%a6),%d0               # Get current a6
4812        sub.l           %a0,%d0                 # Decrement
4813        mov.l           %d0,(%a6)               # Save decr value
4814        mov.l           %d0,%a0
4815        rts
4816
4817faddr_ind_m_a7:
4818        mov.b           &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
4819
4820        mov.l           EXC_A7(%a6),%d0         # Get current a7
4821        sub.l           %a0,%d0                 # Decrement
4822        mov.l           %d0,EXC_A7(%a6)         # Save decr value
4823        mov.l           %d0,%a0
4824        rts
4825
4826########################################################
4827# Address register indirect w/ displacement: (d16, An) #
4828########################################################
4829faddr_ind_disp_a0:
4830        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4831        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4832        bsr.l           _imem_read_word
4833
4834        tst.l           %d1                     # did ifetch fail?
4835        bne.l           iea_iacc                # yes
4836
4837        mov.w           %d0,%a0                 # sign extend displacement
4838
4839        add.l           EXC_DREGS+0x8(%a6),%a0  # a0 + d16
4840        rts
4841
4842faddr_ind_disp_a1:
4843        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4844        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4845        bsr.l           _imem_read_word
4846
4847        tst.l           %d1                     # did ifetch fail?
4848        bne.l           iea_iacc                # yes
4849
4850        mov.w           %d0,%a0                 # sign extend displacement
4851
4852        add.l           EXC_DREGS+0xc(%a6),%a0  # a1 + d16
4853        rts
4854
4855faddr_ind_disp_a2:
4856        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4857        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4858        bsr.l           _imem_read_word
4859
4860        tst.l           %d1                     # did ifetch fail?
4861        bne.l           iea_iacc                # yes
4862
4863        mov.w           %d0,%a0                 # sign extend displacement
4864
4865        add.l           %a2,%a0                 # a2 + d16
4866        rts
4867
4868faddr_ind_disp_a3:
4869        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4870        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4871        bsr.l           _imem_read_word
4872
4873        tst.l           %d1                     # did ifetch fail?
4874        bne.l           iea_iacc                # yes
4875
4876        mov.w           %d0,%a0                 # sign extend displacement
4877
4878        add.l           %a3,%a0                 # a3 + d16
4879        rts
4880
4881faddr_ind_disp_a4:
4882        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4883        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4884        bsr.l           _imem_read_word
4885
4886        tst.l           %d1                     # did ifetch fail?
4887        bne.l           iea_iacc                # yes
4888
4889        mov.w           %d0,%a0                 # sign extend displacement
4890
4891        add.l           %a4,%a0                 # a4 + d16
4892        rts
4893
4894faddr_ind_disp_a5:
4895        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4896        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4897        bsr.l           _imem_read_word
4898
4899        tst.l           %d1                     # did ifetch fail?
4900        bne.l           iea_iacc                # yes
4901
4902        mov.w           %d0,%a0                 # sign extend displacement
4903
4904        add.l           %a5,%a0                 # a5 + d16
4905        rts
4906
4907faddr_ind_disp_a6:
4908        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4909        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4910        bsr.l           _imem_read_word
4911
4912        tst.l           %d1                     # did ifetch fail?
4913        bne.l           iea_iacc                # yes
4914
4915        mov.w           %d0,%a0                 # sign extend displacement
4916
4917        add.l           (%a6),%a0               # a6 + d16
4918        rts
4919
4920faddr_ind_disp_a7:
4921        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4922        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4923        bsr.l           _imem_read_word
4924
4925        tst.l           %d1                     # did ifetch fail?
4926        bne.l           iea_iacc                # yes
4927
4928        mov.w           %d0,%a0                 # sign extend displacement
4929
4930        add.l           EXC_A7(%a6),%a0         # a7 + d16
4931        rts
4932
4933########################################################################
4934# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
4935#    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
4936# Memory indirect postindexed: ([bd, An], Xn, od)                      #
4937# Memory indirect preindexed: ([bd, An, Xn], od)                       #
4938########################################################################
4939faddr_ind_ext:
4940        addq.l          &0x8,%d1
4941        bsr.l           fetch_dreg              # fetch base areg
4942        mov.l           %d0,-(%sp)
4943
4944        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4945        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4946        bsr.l           _imem_read_word         # fetch extword in d0
4947
4948        tst.l           %d1                     # did ifetch fail?
4949        bne.l           iea_iacc                # yes
4950
4951        mov.l           (%sp)+,%a0
4952
4953        btst            &0x8,%d0
4954        bne.w           fcalc_mem_ind
4955
4956        mov.l           %d0,L_SCR1(%a6)         # hold opword
4957
4958        mov.l           %d0,%d1
4959        rol.w           &0x4,%d1
4960        andi.w          &0xf,%d1                # extract index regno
4961
4962# count on fetch_dreg() not to alter a0...
4963        bsr.l           fetch_dreg              # fetch index
4964
4965        mov.l           %d2,-(%sp)              # save d2
4966        mov.l           L_SCR1(%a6),%d2         # fetch opword
4967
4968        btst            &0xb,%d2                # is it word or long?
4969        bne.b           faii8_long
4970        ext.l           %d0                     # sign extend word index
4971faii8_long:
4972        mov.l           %d2,%d1
4973        rol.w           &0x7,%d1
4974        andi.l          &0x3,%d1                # extract scale value
4975
4976        lsl.l           %d1,%d0                 # shift index by scale
4977
4978        extb.l          %d2                     # sign extend displacement
4979        add.l           %d2,%d0                 # index + disp
4980        add.l           %d0,%a0                 # An + (index + disp)
4981
4982        mov.l           (%sp)+,%d2              # restore old d2
4983        rts
4984
4985###########################
4986# Absolute short: (XXX).W #
4987###########################
4988fabs_short:
4989        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4990        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4991        bsr.l           _imem_read_word         # fetch short address
4992
4993        tst.l           %d1                     # did ifetch fail?
4994        bne.l           iea_iacc                # yes
4995
4996        mov.w           %d0,%a0                 # return <ea> in a0
4997        rts
4998
4999##########################
5000# Absolute long: (XXX).L #
5001##########################
5002fabs_long:
5003        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5004        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5005        bsr.l           _imem_read_long         # fetch long address
5006
5007        tst.l           %d1                     # did ifetch fail?
5008        bne.l           iea_iacc                # yes
5009
5010        mov.l           %d0,%a0                 # return <ea> in a0
5011        rts
5012
5013#######################################################
5014# Program counter indirect w/ displacement: (d16, PC) #
5015#######################################################
5016fpc_ind:
5017        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5018        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
5019        bsr.l           _imem_read_word         # fetch word displacement
5020
5021        tst.l           %d1                     # did ifetch fail?
5022        bne.l           iea_iacc                # yes
5023
5024        mov.w           %d0,%a0                 # sign extend displacement
5025
5026        add.l           EXC_EXTWPTR(%a6),%a0    # pc + d16
5027
5028# _imem_read_word() increased the extwptr by 2. need to adjust here.
5029        subq.l          &0x2,%a0                # adjust <ea>
5030        rts
5031
5032##########################################################
5033# PC indirect w/ index(8-bit displacement): (d8, PC, An) #
5034# "     "     w/   "  (base displacement): (bd, PC, An)  #
5035# PC memory indirect postindexed: ([bd, PC], Xn, od)     #
5036# PC memory indirect preindexed: ([bd, PC, Xn], od)      #
5037##########################################################
5038fpc_ind_ext:
5039        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5040        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
5041        bsr.l           _imem_read_word         # fetch ext word
5042
5043        tst.l           %d1                     # did ifetch fail?
5044        bne.l           iea_iacc                # yes
5045
5046        mov.l           EXC_EXTWPTR(%a6),%a0    # put base in a0
5047        subq.l          &0x2,%a0                # adjust base
5048
5049        btst            &0x8,%d0                # is disp only 8 bits?
5050        bne.w           fcalc_mem_ind           # calc memory indirect
5051
5052        mov.l           %d0,L_SCR1(%a6)         # store opword
5053
5054        mov.l           %d0,%d1                 # make extword copy
5055        rol.w           &0x4,%d1                # rotate reg num into place
5056        andi.w          &0xf,%d1                # extract register number
5057
5058# count on fetch_dreg() not to alter a0...
5059        bsr.l           fetch_dreg              # fetch index
5060
5061        mov.l           %d2,-(%sp)              # save d2
5062        mov.l           L_SCR1(%a6),%d2         # fetch opword
5063
5064        btst            &0xb,%d2                # is index word or long?
5065        bne.b           fpii8_long              # long
5066        ext.l           %d0                     # sign extend word index
5067fpii8_long:
5068        mov.l           %d2,%d1
5069        rol.w           &0x7,%d1                # rotate scale value into place
5070        andi.l          &0x3,%d1                # extract scale value
5071
5072        lsl.l           %d1,%d0                 # shift index by scale
5073
5074        extb.l          %d2                     # sign extend displacement
5075        add.l           %d2,%d0                 # disp + index
5076        add.l           %d0,%a0                 # An + (index + disp)
5077
5078        mov.l           (%sp)+,%d2              # restore temp register
5079        rts
5080
5081# d2 = index
5082# d3 = base
5083# d4 = od
5084# d5 = extword
5085fcalc_mem_ind:
5086        btst            &0x6,%d0                # is the index suppressed?
5087        beq.b           fcalc_index
5088
5089        movm.l          &0x3c00,-(%sp)          # save d2-d5
5090
5091        mov.l           %d0,%d5                 # put extword in d5
5092        mov.l           %a0,%d3                 # put base in d3
5093
5094        clr.l           %d2                     # yes, so index = 0
5095        bra.b           fbase_supp_ck
5096
5097# index:
5098fcalc_index:
5099        mov.l           %d0,L_SCR1(%a6)         # save d0 (opword)
5100        bfextu          %d0{&16:&4},%d1         # fetch dreg index
5101        bsr.l           fetch_dreg
5102
5103        movm.l          &0x3c00,-(%sp)          # save d2-d5
5104        mov.l           %d0,%d2                 # put index in d2
5105        mov.l           L_SCR1(%a6),%d5
5106        mov.l           %a0,%d3
5107
5108        btst            &0xb,%d5                # is index word or long?
5109        bne.b           fno_ext
5110        ext.l           %d2
5111
5112fno_ext:
5113        bfextu          %d5{&21:&2},%d0
5114        lsl.l           %d0,%d2
5115
5116# base address (passed as parameter in d3):
5117# we clear the value here if it should actually be suppressed.
5118fbase_supp_ck:
5119        btst            &0x7,%d5                # is the bd suppressed?
5120        beq.b           fno_base_sup
5121        clr.l           %d3
5122
5123# base displacement:
5124fno_base_sup:
5125        bfextu          %d5{&26:&2},%d0         # get bd size
5126#       beq.l           fmovm_error             # if (size == 0) it's reserved
5127
5128        cmpi.b          %d0,&0x2
5129        blt.b           fno_bd
5130        beq.b           fget_word_bd
5131
5132        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5133        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5134        bsr.l           _imem_read_long
5135
5136        tst.l           %d1                     # did ifetch fail?
5137        bne.l           fcea_iacc               # yes
5138
5139        bra.b           fchk_ind
5140
5141fget_word_bd:
5142        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5143        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
5144        bsr.l           _imem_read_word
5145
5146        tst.l           %d1                     # did ifetch fail?
5147        bne.l           fcea_iacc               # yes
5148
5149        ext.l           %d0                     # sign extend bd
5150
5151fchk_ind:
5152        add.l           %d0,%d3                 # base += bd
5153
5154# outer displacement:
5155fno_bd:
5156        bfextu          %d5{&30:&2},%d0         # is od suppressed?
5157        beq.w           faii_bd
5158
5159        cmpi.b          %d0,&0x2
5160        blt.b           fnull_od
5161        beq.b           fword_od
5162
5163        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5164        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5165        bsr.l           _imem_read_long
5166
5167        tst.l           %d1                     # did ifetch fail?
5168        bne.l           fcea_iacc               # yes
5169
5170        bra.b           fadd_them
5171
5172fword_od:
5173        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5174        addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
5175        bsr.l           _imem_read_word
5176
5177        tst.l           %d1                     # did ifetch fail?
5178        bne.l           fcea_iacc               # yes
5179
5180        ext.l           %d0                     # sign extend od
5181        bra.b           fadd_them
5182
5183fnull_od:
5184        clr.l           %d0
5185
5186fadd_them:
5187        mov.l           %d0,%d4
5188
5189        btst            &0x2,%d5                # pre or post indexing?
5190        beq.b           fpre_indexed
5191
5192        mov.l           %d3,%a0
5193        bsr.l           _dmem_read_long
5194
5195        tst.l           %d1                     # did dfetch fail?
5196        bne.w           fcea_err                # yes
5197
5198        add.l           %d2,%d0                 # <ea> += index
5199        add.l           %d4,%d0                 # <ea> += od
5200        bra.b           fdone_ea
5201
5202fpre_indexed:
5203        add.l           %d2,%d3                 # preindexing
5204        mov.l           %d3,%a0
5205        bsr.l           _dmem_read_long
5206
5207        tst.l           %d1                     # did dfetch fail?
5208        bne.w           fcea_err                # yes
5209
5210        add.l           %d4,%d0                 # ea += od
5211        bra.b           fdone_ea
5212
5213faii_bd:
5214        add.l           %d2,%d3                 # ea = (base + bd) + index
5215        mov.l           %d3,%d0
5216fdone_ea:
5217        mov.l           %d0,%a0
5218
5219        movm.l          (%sp)+,&0x003c          # restore d2-d5
5220        rts
5221
5222#########################################################
5223fcea_err:
5224        mov.l           %d3,%a0
5225
5226        movm.l          (%sp)+,&0x003c          # restore d2-d5
5227        mov.w           &0x0101,%d0
5228        bra.l           iea_dacc
5229
5230fcea_iacc:
5231        movm.l          (%sp)+,&0x003c          # restore d2-d5
5232        bra.l           iea_iacc
5233
5234fmovm_out_err:
5235        bsr.l           restore
5236        mov.w           &0x00e1,%d0
5237        bra.b           fmovm_err
5238
5239fmovm_in_err:
5240        bsr.l           restore
5241        mov.w           &0x0161,%d0
5242
5243fmovm_err:
5244        mov.l           L_SCR1(%a6),%a0
5245        bra.l           iea_dacc
5246
5247#########################################################################
5248# XDEF **************************************************************** #
5249#       fmovm_ctrl(): emulate fmovm.l of control registers instr        #
5250#                                                                       #
5251# XREF **************************************************************** #
5252#       _imem_read_long() - read longword from memory                   #
5253#       iea_iacc() - _imem_read_long() failed; error recovery           #
5254#                                                                       #
5255# INPUT *************************************************************** #
5256#       None                                                            #
5257#                                                                       #
5258# OUTPUT ************************************************************** #
5259#       If _imem_read_long() doesn't fail:                              #
5260#               USER_FPCR(a6)  = new FPCR value                         #
5261#               USER_FPSR(a6)  = new FPSR value                         #
5262#               USER_FPIAR(a6) = new FPIAR value                        #
5263#                                                                       #
5264# ALGORITHM *********************************************************** #
5265#       Decode the instruction type by looking at the extension word    #
5266# in order to see how many control registers to fetch from memory.      #
5267# Fetch them using _imem_read_long(). If this fetch fails, exit through #
5268# the special access error exit handler iea_iacc().                     #
5269#                                                                       #
5270# Instruction word decoding:                                            #
5271#                                                                       #
5272#       fmovem.l #<data>, {FPIAR&|FPCR&|FPSR}                           #
5273#                                                                       #
5274#               WORD1                   WORD2                           #
5275#       1111 0010 00 111100     100$ $$00 0000 0000                     #
5276#                                                                       #
5277#       $$$ (100): FPCR                                                 #
5278#           (010): FPSR                                                 #
5279#           (001): FPIAR                                                #
5280#           (000): FPIAR                                                #
5281#                                                                       #
5282#########################################################################
5283
5284        global          fmovm_ctrl
5285fmovm_ctrl:
5286        mov.b           EXC_EXTWORD(%a6),%d0    # fetch reg select bits
5287        cmpi.b          %d0,&0x9c               # fpcr & fpsr & fpiar ?
5288        beq.w           fctrl_in_7              # yes
5289        cmpi.b          %d0,&0x98               # fpcr & fpsr ?
5290        beq.w           fctrl_in_6              # yes
5291        cmpi.b          %d0,&0x94               # fpcr & fpiar ?
5292        beq.b           fctrl_in_5              # yes
5293
5294# fmovem.l #<data>, fpsr/fpiar
5295fctrl_in_3:
5296        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5297        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5298        bsr.l           _imem_read_long         # fetch FPSR from mem
5299
5300        tst.l           %d1                     # did ifetch fail?
5301        bne.l           iea_iacc                # yes
5302
5303        mov.l           %d0,USER_FPSR(%a6)      # store new FPSR to stack
5304        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5305        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5306        bsr.l           _imem_read_long         # fetch FPIAR from mem
5307
5308        tst.l           %d1                     # did ifetch fail?
5309        bne.l           iea_iacc                # yes
5310
5311        mov.l           %d0,USER_FPIAR(%a6)     # store new FPIAR to stack
5312        rts
5313
5314# fmovem.l #<data>, fpcr/fpiar
5315fctrl_in_5:
5316        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5317        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5318        bsr.l           _imem_read_long         # fetch FPCR from mem
5319
5320        tst.l           %d1                     # did ifetch fail?
5321        bne.l           iea_iacc                # yes
5322
5323        mov.l           %d0,USER_FPCR(%a6)      # store new FPCR to stack
5324        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5325        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5326        bsr.l           _imem_read_long         # fetch FPIAR from mem
5327
5328        tst.l           %d1                     # did ifetch fail?
5329        bne.l           iea_iacc                # yes
5330
5331        mov.l           %d0,USER_FPIAR(%a6)     # store new FPIAR to stack
5332        rts
5333
5334# fmovem.l #<data>, fpcr/fpsr
5335fctrl_in_6:
5336        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5337        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5338        bsr.l           _imem_read_long         # fetch FPCR from mem
5339
5340        tst.l           %d1                     # did ifetch fail?
5341        bne.l           iea_iacc                # yes
5342
5343        mov.l           %d0,USER_FPCR(%a6)      # store new FPCR to mem
5344        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5345        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5346        bsr.l           _imem_read_long         # fetch FPSR from mem
5347
5348        tst.l           %d1                     # did ifetch fail?
5349        bne.l           iea_iacc                # yes
5350
5351        mov.l           %d0,USER_FPSR(%a6)      # store new FPSR to mem
5352        rts
5353
5354# fmovem.l #<data>, fpcr/fpsr/fpiar
5355fctrl_in_7:
5356        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5357        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5358        bsr.l           _imem_read_long         # fetch FPCR from mem
5359
5360        tst.l           %d1                     # did ifetch fail?
5361        bne.l           iea_iacc                # yes
5362
5363        mov.l           %d0,USER_FPCR(%a6)      # store new FPCR to mem
5364        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5365        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5366        bsr.l           _imem_read_long         # fetch FPSR from mem
5367
5368        tst.l           %d1                     # did ifetch fail?
5369        bne.l           iea_iacc                # yes
5370
5371        mov.l           %d0,USER_FPSR(%a6)      # store new FPSR to mem
5372        mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5373        addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5374        bsr.l           _imem_read_long         # fetch FPIAR from mem
5375
5376        tst.l           %d1                     # did ifetch fail?
5377        bne.l           iea_iacc                # yes
5378
5379        mov.l           %d0,USER_FPIAR(%a6)     # store new FPIAR to mem
5380        rts
5381
5382##########################################################################
5383
5384#########################################################################
5385# XDEF **************************************************************** #
5386#       addsub_scaler2(): scale inputs to fadd/fsub such that no        #
5387#                         OVFL/UNFL exceptions will result              #
5388#                                                                       #
5389# XREF **************************************************************** #
5390#       norm() - normalize mantissa after adjusting exponent            #
5391#                                                                       #
5392# INPUT *************************************************************** #
5393#       FP_SRC(a6) = fp op1(src)                                        #
5394#       FP_DST(a6) = fp op2(dst)                                        #
5395#                                                                       #
5396# OUTPUT ************************************************************** #
5397#       FP_SRC(a6) = fp op1 scaled(src)                                 #
5398#       FP_DST(a6) = fp op2 scaled(dst)                                 #
5399#       d0         = scale amount                                       #
5400#                                                                       #
5401# ALGORITHM *********************************************************** #
5402#       If the DST exponent is > the SRC exponent, set the DST exponent #
5403# equal to 0x3fff and scale the SRC exponent by the value that the      #
5404# DST exponent was scaled by. If the SRC exponent is greater or equal,  #
5405# do the opposite. Return this scale factor in d0.                      #
5406#       If the two exponents differ by > the number of mantissa bits    #
5407# plus two, then set the smallest exponent to a very small value as a   #
5408# quick shortcut.                                                       #
5409#                                                                       #
5410#########################################################################
5411
5412        global          addsub_scaler2
5413addsub_scaler2:
5414        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
5415        mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
5416        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
5417        mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
5418        mov.w           SRC_EX(%a0),%d0
5419        mov.w           DST_EX(%a1),%d1
5420        mov.w           %d0,FP_SCR0_EX(%a6)
5421        mov.w           %d1,FP_SCR1_EX(%a6)
5422
5423        andi.w          &0x7fff,%d0
5424        andi.w          &0x7fff,%d1
5425        mov.w           %d0,L_SCR1(%a6)         # store src exponent
5426        mov.w           %d1,2+L_SCR1(%a6)       # store dst exponent
5427
5428        cmp.w           %d0, %d1                # is src exp >= dst exp?
5429        bge.l           src_exp_ge2
5430
5431# dst exp is >  src exp; scale dst to exp = 0x3fff
5432dst_exp_gt2:
5433        bsr.l           scale_to_zero_dst
5434        mov.l           %d0,-(%sp)              # save scale factor
5435
5436        cmpi.b          STAG(%a6),&DENORM       # is dst denormalized?
5437        bne.b           cmpexp12
5438
5439        lea             FP_SCR0(%a6),%a0
5440        bsr.l           norm                    # normalize the denorm; result is new exp
5441        neg.w           %d0                     # new exp = -(shft val)
5442        mov.w           %d0,L_SCR1(%a6)         # inset new exp
5443
5444cmpexp12:
5445        mov.w           2+L_SCR1(%a6),%d0
5446        subi.w          &mantissalen+2,%d0      # subtract mantissalen+2 from larger exp
5447
5448        cmp.w           %d0,L_SCR1(%a6)         # is difference >= len(mantissa)+2?
5449        bge.b           quick_scale12
5450
5451        mov.w           L_SCR1(%a6),%d0
5452        add.w           0x2(%sp),%d0            # scale src exponent by scale factor
5453        mov.w           FP_SCR0_EX(%a6),%d1
5454        and.w           &0x8000,%d1
5455        or.w            %d1,%d0                 # concat {sgn,new exp}
5456        mov.w           %d0,FP_SCR0_EX(%a6)     # insert new dst exponent
5457
5458        mov.l           (%sp)+,%d0              # return SCALE factor
5459        rts
5460
5461quick_scale12:
5462        andi.w          &0x8000,FP_SCR0_EX(%a6) # zero src exponent
5463        bset            &0x0,1+FP_SCR0_EX(%a6)  # set exp = 1
5464
5465        mov.l           (%sp)+,%d0              # return SCALE factor
5466        rts
5467
5468# src exp is >= dst exp; scale src to exp = 0x3fff
5469src_exp_ge2:
5470        bsr.l           scale_to_zero_src
5471        mov.l           %d0,-(%sp)              # save scale factor
5472
5473        cmpi.b          DTAG(%a6),&DENORM       # is dst denormalized?
5474        bne.b           cmpexp22
5475        lea             FP_SCR1(%a6),%a0
5476        bsr.l           norm                    # normalize the denorm; result is new exp
5477        neg.w           %d0                     # new exp = -(shft val)
5478        mov.w           %d0,2+L_SCR1(%a6)       # inset new exp
5479
5480cmpexp22:
5481        mov.w           L_SCR1(%a6),%d0
5482        subi.w          &mantissalen+2,%d0      # subtract mantissalen+2 from larger exp
5483
5484        cmp.w           %d0,2+L_SCR1(%a6)       # is difference >= len(mantissa)+2?
5485        bge.b           quick_scale22
5486
5487        mov.w           2+L_SCR1(%a6),%d0
5488        add.w           0x2(%sp),%d0            # scale dst exponent by scale factor
5489        mov.w           FP_SCR1_EX(%a6),%d1
5490        andi.w          &0x8000,%d1
5491        or.w            %d1,%d0                 # concat {sgn,new exp}
5492        mov.w           %d0,FP_SCR1_EX(%a6)     # insert new dst exponent
5493
5494        mov.l           (%sp)+,%d0              # return SCALE factor
5495        rts
5496
5497quick_scale22:
5498        andi.w          &0x8000,FP_SCR1_EX(%a6) # zero dst exponent
5499        bset            &0x0,1+FP_SCR1_EX(%a6)  # set exp = 1
5500
5501        mov.l           (%sp)+,%d0              # return SCALE factor
5502        rts
5503
5504##########################################################################
5505
5506#########################################################################
5507# XDEF **************************************************************** #
5508#       scale_to_zero_src(): scale the exponent of extended precision   #
5509#                            value at FP_SCR0(a6).                      #
5510#                                                                       #
5511# XREF **************************************************************** #
5512#       norm() - normalize the mantissa if the operand was a DENORM     #
5513#                                                                       #
5514# INPUT *************************************************************** #
5515#       FP_SCR0(a6) = extended precision operand to be scaled           #
5516#                                                                       #
5517# OUTPUT ************************************************************** #
5518#       FP_SCR0(a6) = scaled extended precision operand                 #
5519#       d0          = scale value                                       #
5520#                                                                       #
5521# ALGORITHM *********************************************************** #
5522#       Set the exponent of the input operand to 0x3fff. Save the value #
5523# of the difference between the original and new exponent. Then,        #
5524# normalize the operand if it was a DENORM. Add this normalization      #
5525# value to the previous value. Return the result.                       #
5526#                                                                       #
5527#########################################################################
5528
5529        global          scale_to_zero_src
5530scale_to_zero_src:
5531        mov.w           FP_SCR0_EX(%a6),%d1     # extract operand's {sgn,exp}
5532        mov.w           %d1,%d0                 # make a copy
5533
5534        andi.l          &0x7fff,%d1             # extract operand's exponent
5535
5536        andi.w          &0x8000,%d0             # extract operand's sgn
5537        or.w            &0x3fff,%d0             # insert new operand's exponent(=0)
5538
5539        mov.w           %d0,FP_SCR0_EX(%a6)     # insert biased exponent
5540
5541        cmpi.b          STAG(%a6),&DENORM       # is operand normalized?
5542        beq.b           stzs_denorm             # normalize the DENORM
5543
5544stzs_norm:
5545        mov.l           &0x3fff,%d0
5546        sub.l           %d1,%d0                 # scale = BIAS + (-exp)
5547
5548        rts
5549
5550stzs_denorm:
5551        lea             FP_SCR0(%a6),%a0        # pass ptr to src op
5552        bsr.l           norm                    # normalize denorm
5553        neg.l           %d0                     # new exponent = -(shft val)
5554        mov.l           %d0,%d1                 # prepare for op_norm call
5555        bra.b           stzs_norm               # finish scaling
5556
5557###
5558
5559#########################################################################
5560# XDEF **************************************************************** #
5561#       scale_sqrt(): scale the input operand exponent so a subsequent  #
5562#                     fsqrt operation won't take an exception.          #
5563#                                                                       #
5564# XREF **************************************************************** #
5565#       norm() - normalize the mantissa if the operand was a DENORM     #
5566#                                                                       #
5567# INPUT *************************************************************** #
5568#       FP_SCR0(a6) = extended precision operand to be scaled           #
5569#                                                                       #
5570# OUTPUT ************************************************************** #
5571#       FP_SCR0(a6) = scaled extended precision operand                 #
5572#       d0          = scale value                                       #
5573#                                                                       #
5574# ALGORITHM *********************************************************** #
5575#       If the input operand is a DENORM, normalize it.                 #
5576#       If the exponent of the input operand is even, set the exponent  #
5577# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the       #
5578# exponent of the input operand is off, set the exponent to ox3fff and  #
5579# return a scale factor of "(exp-0x3fff)/2".                            #
5580#                                                                       #
5581#########################################################################
5582
5583        global          scale_sqrt
5584scale_sqrt:
5585        cmpi.b          STAG(%a6),&DENORM       # is operand normalized?
5586        beq.b           ss_denorm               # normalize the DENORM
5587
5588        mov.w           FP_SCR0_EX(%a6),%d1     # extract operand's {sgn,exp}
5589        andi.l          &0x7fff,%d1             # extract operand's exponent
5590
5591        andi.w          &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn
5592
5593        btst            &0x0,%d1                # is exp even or odd?
5594        beq.b           ss_norm_even
5595
5596        ori.w           &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5597
5598        mov.l           &0x3fff,%d0
5599        sub.l           %d1,%d0                 # scale = BIAS + (-exp)
5600        asr.l           &0x1,%d0                # divide scale factor by 2
5601        rts
5602
5603ss_norm_even:
5604        ori.w           &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5605
5606        mov.l           &0x3ffe,%d0
5607        sub.l           %d1,%d0                 # scale = BIAS + (-exp)
5608        asr.l           &0x1,%d0                # divide scale factor by 2
5609        rts
5610
5611ss_denorm:
5612        lea             FP_SCR0(%a6),%a0        # pass ptr to src op
5613        bsr.l           norm                    # normalize denorm
5614
5615        btst            &0x0,%d0                # is exp even or odd?
5616        beq.b           ss_denorm_even
5617
5618        ori.w           &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5619
5620        add.l           &0x3fff,%d0
5621        asr.l           &0x1,%d0                # divide scale factor by 2
5622        rts
5623
5624ss_denorm_even:
5625        ori.w           &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5626
5627        add.l           &0x3ffe,%d0
5628        asr.l           &0x1,%d0                # divide scale factor by 2
5629        rts
5630
5631###
5632
5633#########################################################################
5634# XDEF **************************************************************** #
5635#       scale_to_zero_dst(): scale the exponent of extended precision   #
5636#                            value at FP_SCR1(a6).                      #
5637#                                                                       #
5638# XREF **************************************************************** #
5639#       norm() - normalize the mantissa if the operand was a DENORM     #
5640#                                                                       #
5641# INPUT *************************************************************** #
5642#       FP_SCR1(a6) = extended precision operand to be scaled           #
5643#                                                                       #
5644# OUTPUT ************************************************************** #
5645#       FP_SCR1(a6) = scaled extended precision operand                 #
5646#       d0          = scale value                                       #
5647#                                                                       #
5648# ALGORITHM *********************************************************** #
5649#       Set the exponent of the input operand to 0x3fff. Save the value #
5650# of the difference between the original and new exponent. Then,        #
5651# normalize the operand if it was a DENORM. Add this normalization      #
5652# value to the previous value. Return the result.                       #
5653#                                                                       #
5654#########################################################################
5655
5656        global          scale_to_zero_dst
5657scale_to_zero_dst:
5658        mov.w           FP_SCR1_EX(%a6),%d1     # extract operand's {sgn,exp}
5659        mov.w           %d1,%d0                 # make a copy
5660
5661        andi.l          &0x7fff,%d1             # extract operand's exponent
5662
5663        andi.w          &0x8000,%d0             # extract operand's sgn
5664        or.w            &0x3fff,%d0             # insert new operand's exponent(=0)
5665
5666        mov.w           %d0,FP_SCR1_EX(%a6)     # insert biased exponent
5667
5668        cmpi.b          DTAG(%a6),&DENORM       # is operand normalized?
5669        beq.b           stzd_denorm             # normalize the DENORM
5670
5671stzd_norm:
5672        mov.l           &0x3fff,%d0
5673        sub.l           %d1,%d0                 # scale = BIAS + (-exp)
5674        rts
5675
5676stzd_denorm:
5677        lea             FP_SCR1(%a6),%a0        # pass ptr to dst op
5678        bsr.l           norm                    # normalize denorm
5679        neg.l           %d0                     # new exponent = -(shft val)
5680        mov.l           %d0,%d1                 # prepare for op_norm call
5681        bra.b           stzd_norm               # finish scaling
5682
5683##########################################################################
5684
5685#########################################################################
5686# XDEF **************************************************************** #
5687#       res_qnan(): return default result w/ QNAN operand for dyadic    #
5688#       res_snan(): return default result w/ SNAN operand for dyadic    #
5689#       res_qnan_1op(): return dflt result w/ QNAN operand for monadic  #
5690#       res_snan_1op(): return dflt result w/ SNAN operand for monadic  #
5691#                                                                       #
5692# XREF **************************************************************** #
5693#       None                                                            #
5694#                                                                       #
5695# INPUT *************************************************************** #
5696#       FP_SRC(a6) = pointer to extended precision src operand          #
5697#       FP_DST(a6) = pointer to extended precision dst operand          #
5698#                                                                       #
5699# OUTPUT ************************************************************** #
5700#       fp0 = default result                                            #
5701#                                                                       #
5702# ALGORITHM *********************************************************** #
5703#       If either operand (but not both operands) of an operation is a  #
5704# nonsignalling NAN, then that NAN is returned as the result. If both   #
5705# operands are nonsignalling NANs, then the destination operand         #
5706# nonsignalling NAN is returned as the result.                          #
5707#       If either operand to an operation is a signalling NAN (SNAN),   #
5708# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap      #
5709# enable bit is set in the FPCR, then the trap is taken and the         #
5710# destination is not modified. If the SNAN trap enable bit is not set,  #
5711# then the SNAN is converted to a nonsignalling NAN (by setting the     #
5712# SNAN bit in the operand to one), and the operation continues as       #
5713# described in the preceding paragraph, for nonsignalling NANs.         #
5714#       Make sure the appropriate FPSR bits are set before exiting.     #
5715#                                                                       #
5716#########################################################################
5717
5718        global          res_qnan
5719        global          res_snan
5720res_qnan:
5721res_snan:
5722        cmp.b           DTAG(%a6), &SNAN        # is the dst an SNAN?
5723        beq.b           dst_snan2
5724        cmp.b           DTAG(%a6), &QNAN        # is the dst a  QNAN?
5725        beq.b           dst_qnan2
5726src_nan:
5727        cmp.b           STAG(%a6), &QNAN
5728        beq.b           src_qnan2
5729        global          res_snan_1op
5730res_snan_1op:
5731src_snan2:
5732        bset            &0x6, FP_SRC_HI(%a6)    # set SNAN bit
5733        or.l            &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5734        lea             FP_SRC(%a6), %a0
5735        bra.b           nan_comp
5736        global          res_qnan_1op
5737res_qnan_1op:
5738src_qnan2:
5739        or.l            &nan_mask, USER_FPSR(%a6)
5740        lea             FP_SRC(%a6), %a0
5741        bra.b           nan_comp
5742dst_snan2:
5743        or.l            &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5744        bset            &0x6, FP_DST_HI(%a6)    # set SNAN bit
5745        lea             FP_DST(%a6), %a0
5746        bra.b           nan_comp
5747dst_qnan2:
5748        lea             FP_DST(%a6), %a0
5749        cmp.b           STAG(%a6), &SNAN
5750        bne             nan_done
5751        or.l            &aiop_mask+snan_mask, USER_FPSR(%a6)
5752nan_done:
5753        or.l            &nan_mask, USER_FPSR(%a6)
5754nan_comp:
5755        btst            &0x7, FTEMP_EX(%a0)     # is NAN neg?
5756        beq.b           nan_not_neg
5757        or.l            &neg_mask, USER_FPSR(%a6)
5758nan_not_neg:
5759        fmovm.x         (%a0), &0x80
5760        rts
5761
5762#########################################################################
5763# XDEF **************************************************************** #
5764#       res_operr(): return default result during operand error         #
5765#                                                                       #
5766# XREF **************************************************************** #
5767#       None                                                            #
5768#                                                                       #
5769# INPUT *************************************************************** #
5770#       None                                                            #
5771#                                                                       #
5772# OUTPUT ************************************************************** #
5773#       fp0 = default operand error result                              #
5774#                                                                       #
5775# ALGORITHM *********************************************************** #
5776#       An nonsignalling NAN is returned as the default result when     #
5777# an operand error occurs for the following cases:                      #
5778#                                                                       #
5779#       Multiply: (Infinity x Zero)                                     #
5780#       Divide  : (Zero / Zero) || (Infinity / Infinity)                #
5781#                                                                       #
5782#########################################################################
5783
5784        global          res_operr
5785res_operr:
5786        or.l            &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
5787        fmovm.x         nan_return(%pc), &0x80
5788        rts
5789
5790nan_return:
5791        long            0x7fff0000, 0xffffffff, 0xffffffff
5792
5793#########################################################################
5794# XDEF **************************************************************** #
5795#       _denorm(): denormalize an intermediate result                   #
5796#                                                                       #
5797# XREF **************************************************************** #
5798#       None                                                            #
5799#                                                                       #
5800# INPUT *************************************************************** #
5801#       a0 = points to the operand to be denormalized                   #
5802#               (in the internal extended format)                       #
5803#                                                                       #
5804#       d0 = rounding precision                                         #
5805#                                                                       #
5806# OUTPUT ************************************************************** #
5807#       a0 = pointer to the denormalized result                         #
5808#               (in the internal extended format)                       #
5809#                                                                       #
5810#       d0 = guard,round,sticky                                         #
5811#                                                                       #
5812# ALGORITHM *********************************************************** #
5813#       According to the exponent underflow threshold for the given     #
5814# precision, shift the mantissa bits to the right in order raise the    #
5815# exponent of the operand to the threshold value. While shifting the    #
5816# mantissa bits right, maintain the value of the guard, round, and      #
5817# sticky bits.                                                          #
5818# other notes:                                                          #
5819#       (1) _denorm() is called by the underflow routines               #
5820#       (2) _denorm() does NOT affect the status register               #
5821#                                                                       #
5822#########################################################################
5823
5824#
5825# table of exponent threshold values for each precision
5826#
5827tbl_thresh:
5828        short           0x0
5829        short           sgl_thresh
5830        short           dbl_thresh
5831
5832        global          _denorm
5833_denorm:
5834#
5835# Load the exponent threshold for the precision selected and check
5836# to see if (threshold - exponent) is > 65 in which case we can
5837# simply calculate the sticky bit and zero the mantissa. otherwise
5838# we have to call the denormalization routine.
5839#
5840        lsr.b           &0x2, %d0               # shift prec to lo bits
5841        mov.w           (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
5842        mov.w           %d1, %d0                # copy d1 into d0
5843        sub.w           FTEMP_EX(%a0), %d0      # diff = threshold - exp
5844        cmpi.w          %d0, &66                # is diff > 65? (mant + g,r bits)
5845        bpl.b           denorm_set_stky         # yes; just calc sticky
5846
5847        clr.l           %d0                     # clear g,r,s
5848        btst            &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
5849        beq.b           denorm_call             # no; don't change anything
5850        bset            &29, %d0                # yes; set sticky bit
5851
5852denorm_call:
5853        bsr.l           dnrm_lp                 # denormalize the number
5854        rts
5855
5856#
5857# all bit would have been shifted off during the denorm so simply
5858# calculate if the sticky should be set and clear the entire mantissa.
5859#
5860denorm_set_stky:
5861        mov.l           &0x20000000, %d0        # set sticky bit in return value
5862        mov.w           %d1, FTEMP_EX(%a0)      # load exp with threshold
5863        clr.l           FTEMP_HI(%a0)           # set d1 = 0 (ms mantissa)
5864        clr.l           FTEMP_LO(%a0)           # set d2 = 0 (ms mantissa)
5865        rts
5866
5867#                                                                       #
5868# dnrm_lp(): normalize exponent/mantissa to specified threshold         #
5869#                                                                       #
5870# INPUT:                                                                #
5871#       %a0        : points to the operand to be denormalized           #
5872#       %d0{31:29} : initial guard,round,sticky                         #
5873#       %d1{15:0}  : denormalization threshold                          #
5874# OUTPUT:                                                               #
5875#       %a0        : points to the denormalized operand                 #
5876#       %d0{31:29} : final guard,round,sticky                           #
5877#                                                                       #
5878
5879# *** Local Equates *** #
5880set     GRS,            L_SCR2                  # g,r,s temp storage
5881set     FTEMP_LO2,      L_SCR1                  # FTEMP_LO copy
5882
5883        global          dnrm_lp
5884dnrm_lp:
5885
5886#
5887# make a copy of FTEMP_LO and place the g,r,s bits directly after it
5888# in memory so as to make the bitfield extraction for denormalization easier.
5889#
5890        mov.l           FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
5891        mov.l           %d0, GRS(%a6)           # place g,r,s after it
5892
5893#
5894# check to see how much less than the underflow threshold the operand
5895# exponent is.
5896#
5897        mov.l           %d1, %d0                # copy the denorm threshold
5898        sub.w           FTEMP_EX(%a0), %d1      # d1 = threshold - uns exponent
5899        ble.b           dnrm_no_lp              # d1 <= 0
5900        cmpi.w          %d1, &0x20              # is ( 0 <= d1 < 32) ?
5901        blt.b           case_1                  # yes
5902        cmpi.w          %d1, &0x40              # is (32 <= d1 < 64) ?
5903        blt.b           case_2                  # yes
5904        bra.w           case_3                  # (d1 >= 64)
5905
5906#
5907# No normalization necessary
5908#
5909dnrm_no_lp:
5910        mov.l           GRS(%a6), %d0           # restore original g,r,s
5911        rts
5912
5913#
5914# case (0<d1<32)
5915#
5916# %d0 = denorm threshold
5917# %d1 = "n" = amt to shift
5918#
5919#       ---------------------------------------------------------
5920#       |     FTEMP_HI    |     FTEMP_LO     |grs000.........000|
5921#       ---------------------------------------------------------
5922#       <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5923#       \          \                  \                  \
5924#        \          \                  \                  \
5925#         \          \                  \                  \
5926#          \          \                  \                  \
5927#           \          \                  \                  \
5928#            \          \                  \                  \
5929#             \          \                  \                  \
5930#              \          \                  \                  \
5931#       <-(n)-><-(32 - n)-><------(32)-------><------(32)------->
5932#       ---------------------------------------------------------
5933#       |0.....0| NEW_HI  |  NEW_FTEMP_LO     |grs              |
5934#       ---------------------------------------------------------
5935#
5936case_1:
5937        mov.l           %d2, -(%sp)             # create temp storage
5938
5939        mov.w           %d0, FTEMP_EX(%a0)      # exponent = denorm threshold
5940        mov.l           &32, %d0
5941        sub.w           %d1, %d0                # %d0 = 32 - %d1
5942
5943        cmpi.w          %d1, &29                # is shft amt >= 29
5944        blt.b           case1_extract           # no; no fix needed
5945        mov.b           GRS(%a6), %d2
5946        or.b            %d2, 3+FTEMP_LO2(%a6)
5947
5948case1_extract:
5949        bfextu          FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
5950        bfextu          FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
5951        bfextu          FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
5952
5953        mov.l           %d2, FTEMP_HI(%a0)      # store new FTEMP_HI
5954        mov.l           %d1, FTEMP_LO(%a0)      # store new FTEMP_LO
5955
5956        bftst           %d0{&2:&30}             # were bits shifted off?
5957        beq.b           case1_sticky_clear      # no; go finish
5958        bset            &rnd_stky_bit, %d0      # yes; set sticky bit
5959
5960case1_sticky_clear:
5961        and.l           &0xe0000000, %d0        # clear all but G,R,S
5962        mov.l           (%sp)+, %d2             # restore temp register
5963        rts
5964
5965#
5966# case (32<=d1<64)
5967#
5968# %d0 = denorm threshold
5969# %d1 = "n" = amt to shift
5970#
5971#       ---------------------------------------------------------
5972#       |     FTEMP_HI    |     FTEMP_LO     |grs000.........000|
5973#       ---------------------------------------------------------
5974#       <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5975#       \          \                  \
5976#        \          \                  \
5977#         \          \                  -------------------
5978#          \          --------------------                 \
5979#           -------------------           \                 \
5980#                              \           \                 \
5981#                               \           \                 \
5982#                                \           \                 \
5983#       <-------(32)------><-(n)-><-(32 - n)-><------(32)------->
5984#       ---------------------------------------------------------
5985#       |0...............0|0....0| NEW_LO     |grs              |
5986#       ---------------------------------------------------------
5987#
5988case_2:
5989        mov.l           %d2, -(%sp)             # create temp storage
5990
5991        mov.w           %d0, FTEMP_EX(%a0)      # exponent = denorm threshold
5992        subi.w          &0x20, %d1              # %d1 now between 0 and 32
5993        mov.l           &0x20, %d0
5994        sub.w           %d1, %d0                # %d0 = 32 - %d1
5995
5996# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
5997# the number of bits to check for the sticky detect.
5998# it only plays a role in shift amounts of 61-63.
5999        mov.b           GRS(%a6), %d2
6000        or.b            %d2, 3+FTEMP_LO2(%a6)
6001
6002        bfextu          FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
6003        bfextu          FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
6004
6005        bftst           %d1{&2:&30}             # were any bits shifted off?
6006        bne.b           case2_set_sticky        # yes; set sticky bit
6007        bftst           FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off?
6008        bne.b           case2_set_sticky        # yes; set sticky bit
6009
6010        mov.l           %d1, %d0                # move new G,R,S to %d0
6011        bra.b           case2_end
6012
6013case2_set_sticky:
6014        mov.l           %d1, %d0                # move new G,R,S to %d0
6015        bset            &rnd_stky_bit, %d0      # set sticky bit
6016
6017case2_end:
6018        clr.l           FTEMP_HI(%a0)           # store FTEMP_HI = 0
6019        mov.l           %d2, FTEMP_LO(%a0)      # store FTEMP_LO
6020        and.l           &0xe0000000, %d0        # clear all but G,R,S
6021
6022        mov.l           (%sp)+,%d2              # restore temp register
6023        rts
6024
6025#
6026# case (d1>=64)
6027#
6028# %d0 = denorm threshold
6029# %d1 = amt to shift
6030#
6031case_3:
6032        mov.w           %d0, FTEMP_EX(%a0)      # insert denorm threshold
6033
6034        cmpi.w          %d1, &65                # is shift amt > 65?
6035        blt.b           case3_64                # no; it's == 64
6036        beq.b           case3_65                # no; it's == 65
6037
6038#
6039# case (d1>65)
6040#
6041# Shift value is > 65 and out of range. All bits are shifted off.
6042# Return a zero mantissa with the sticky bit set
6043#
6044        clr.l           FTEMP_HI(%a0)           # clear hi(mantissa)
6045        clr.l           FTEMP_LO(%a0)           # clear lo(mantissa)
6046        mov.l           &0x20000000, %d0        # set sticky bit
6047        rts
6048
6049#
6050# case (d1 == 64)
6051#
6052#       ---------------------------------------------------------
6053#       |     FTEMP_HI    |     FTEMP_LO     |grs000.........000|
6054#       ---------------------------------------------------------
6055#       <-------(32)------>
6056#       \                  \
6057#        \                  \
6058#         \                  \
6059#          \                  ------------------------------
6060#           -------------------------------                 \
6061#                                          \                 \
6062#                                           \                 \
6063#                                            \                 \
6064#                                             <-------(32)------>
6065#       ---------------------------------------------------------
6066#       |0...............0|0................0|grs               |
6067#       ---------------------------------------------------------
6068#
6069case3_64:
6070        mov.l           FTEMP_HI(%a0), %d0      # fetch hi(mantissa)
6071        mov.l           %d0, %d1                # make a copy
6072        and.l           &0xc0000000, %d0        # extract G,R
6073        and.l           &0x3fffffff, %d1        # extract other bits
6074
6075        bra.b           case3_complete
6076
6077#
6078# case (d1 == 65)
6079#
6080#       ---------------------------------------------------------
6081#       |     FTEMP_HI    |     FTEMP_LO     |grs000.........000|
6082#       ---------------------------------------------------------
6083#       <-------(32)------>
6084#       \                  \
6085#        \                  \
6086#         \                  \
6087#          \                  ------------------------------
6088#           --------------------------------                \
6089#                                           \                \
6090#                                            \                \
6091#                                             \                \
6092#                                              <-------(31)----->
6093#       ---------------------------------------------------------
6094#       |0...............0|0................0|0rs               |
6095#       ---------------------------------------------------------
6096#
6097case3_65:
6098        mov.l           FTEMP_HI(%a0), %d0      # fetch hi(mantissa)
6099        and.l           &0x80000000, %d0        # extract R bit
6100        lsr.l           &0x1, %d0               # shift high bit into R bit
6101        and.l           &0x7fffffff, %d1        # extract other bits
6102
6103case3_complete:
6104# last operation done was an "and" of the bits shifted off so the condition
6105# codes are already set so branch accordingly.
6106        bne.b           case3_set_sticky        # yes; go set new sticky
6107        tst.l           FTEMP_LO(%a0)           # were any bits shifted off?
6108        bne.b           case3_set_sticky        # yes; go set new sticky
6109        tst.b           GRS(%a6)                # were any bits shifted off?
6110        bne.b           case3_set_sticky        # yes; go set new sticky
6111
6112#
6113# no bits were shifted off so don't set the sticky bit.
6114# the guard and
6115# the entire mantissa is zero.
6116#
6117        clr.l           FTEMP_HI(%a0)           # clear hi(mantissa)
6118        clr.l           FTEMP_LO(%a0)           # clear lo(mantissa)
6119        rts
6120
6121#
6122# some bits were shifted off so set the sticky bit.
6123# the entire mantissa is zero.
6124#
6125case3_set_sticky:
6126        bset            &rnd_stky_bit,%d0       # set new sticky bit
6127        clr.l           FTEMP_HI(%a0)           # clear hi(mantissa)
6128        clr.l           FTEMP_LO(%a0)           # clear lo(mantissa)
6129        rts
6130
6131#########################################################################
6132# XDEF **************************************************************** #
6133#       _round(): round result according to precision/mode              #
6134#                                                                       #
6135# XREF **************************************************************** #
6136#       None                                                            #
6137#                                                                       #
6138# INPUT *************************************************************** #
6139#       a0        = ptr to input operand in internal extended format    #
6140#       d1(hi)    = contains rounding precision:                        #
6141#                       ext = $0000xxxx                                 #
6142#                       sgl = $0004xxxx                                 #
6143#                       dbl = $0008xxxx                                 #
6144#       d1(lo)    = contains rounding mode:                             #
6145#                       RN  = $xxxx0000                                 #
6146#                       RZ  = $xxxx0001                                 #
6147#                       RM  = $xxxx0002                                 #
6148#                       RP  = $xxxx0003                                 #
6149#       d0{31:29} = contains the g,r,s bits (extended)                  #
6150#                                                                       #
6151# OUTPUT ************************************************************** #
6152#       a0 = pointer to rounded result                                  #
6153#                                                                       #
6154# ALGORITHM *********************************************************** #
6155#       On return the value pointed to by a0 is correctly rounded,      #
6156#       a0 is preserved and the g-r-s bits in d0 are cleared.           #
6157#       The result is not typed - the tag field is invalid.  The        #
6158#       result is still in the internal extended format.                #
6159#                                                                       #
6160#       The INEX bit of USER_FPSR will be set if the rounded result was #
6161#       inexact (i.e. if any of the g-r-s bits were set).               #
6162#                                                                       #
6163#########################################################################
6164
6165        global          _round
6166_round:
6167#
6168# ext_grs() looks at the rounding precision and sets the appropriate
6169# G,R,S bits.
6170# If (G,R,S == 0) then result is exact and round is done, else set
6171# the inex flag in status reg and continue.
6172#
6173        bsr.l           ext_grs                 # extract G,R,S
6174
6175        tst.l           %d0                     # are G,R,S zero?
6176        beq.w           truncate                # yes; round is complete
6177
6178        or.w            &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
6179
6180#
6181# Use rounding mode as an index into a jump table for these modes.
6182# All of the following assumes grs != 0.
6183#
6184        mov.w           (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
6185        jmp             (tbl_mode.b,%pc,%a1)    # jmp to rnd mode handler
6186
6187tbl_mode:
6188        short           rnd_near - tbl_mode
6189        short           truncate - tbl_mode     # RZ always truncates
6190        short           rnd_mnus - tbl_mode
6191        short           rnd_plus - tbl_mode
6192
6193#################################################################
6194#       ROUND PLUS INFINITY                                     #
6195#                                                               #
6196#       If sign of fp number = 0 (positive), then add 1 to l.   #
6197#################################################################
6198rnd_plus:
6199        tst.b           FTEMP_SGN(%a0)          # check for sign
6200        bmi.w           truncate                # if positive then truncate
6201
6202        mov.l           &0xffffffff, %d0        # force g,r,s to be all f's
6203        swap            %d1                     # set up d1 for round prec.
6204
6205        cmpi.b          %d1, &s_mode            # is prec = sgl?
6206        beq.w           add_sgl                 # yes
6207        bgt.w           add_dbl                 # no; it's dbl
6208        bra.w           add_ext                 # no; it's ext
6209
6210#################################################################
6211#       ROUND MINUS INFINITY                                    #
6212#                                                               #
6213#       If sign of fp number = 1 (negative), then add 1 to l.   #
6214#################################################################
6215rnd_mnus:
6216        tst.b           FTEMP_SGN(%a0)          # check for sign
6217        bpl.w           truncate                # if negative then truncate
6218
6219        mov.l           &0xffffffff, %d0        # force g,r,s to be all f's
6220        swap            %d1                     # set up d1 for round prec.
6221
6222        cmpi.b          %d1, &s_mode            # is prec = sgl?
6223        beq.w           add_sgl                 # yes
6224        bgt.w           add_dbl                 # no; it's dbl
6225        bra.w           add_ext                 # no; it's ext
6226
6227#################################################################
6228#       ROUND NEAREST                                           #
6229#                                                               #
6230#       If (g=1), then add 1 to l and if (r=s=0), then clear l  #
6231#       Note that this will round to even in case of a tie.     #
6232#################################################################
6233rnd_near:
6234        asl.l           &0x1, %d0               # shift g-bit to c-bit
6235        bcc.w           truncate                # if (g=1) then
6236
6237        swap            %d1                     # set up d1 for round prec.
6238
6239        cmpi.b          %d1, &s_mode            # is prec = sgl?
6240        beq.w           add_sgl                 # yes
6241        bgt.w           add_dbl                 # no; it's dbl
6242        bra.w           add_ext                 # no; it's ext
6243
6244# *** LOCAL EQUATES ***
6245set     ad_1_sgl,       0x00000100      # constant to add 1 to l-bit in sgl prec
6246set     ad_1_dbl,       0x00000800      # constant to add 1 to l-bit in dbl prec
6247
6248#########################
6249#       ADD SINGLE      #
6250#########################
6251add_sgl:
6252        add.l           &ad_1_sgl, FTEMP_HI(%a0)
6253        bcc.b           scc_clr                 # no mantissa overflow
6254        roxr.w          FTEMP_HI(%a0)           # shift v-bit back in
6255        roxr.w          FTEMP_HI+2(%a0)         # shift v-bit back in
6256        add.w           &0x1, FTEMP_EX(%a0)     # and incr exponent
6257scc_clr:
6258        tst.l           %d0                     # test for rs = 0
6259        bne.b           sgl_done
6260        and.w           &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
6261sgl_done:
6262        and.l           &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
6263        clr.l           FTEMP_LO(%a0)           # clear d2
6264        rts
6265
6266#########################
6267#       ADD EXTENDED    #
6268#########################
6269add_ext:
6270        addq.l          &1,FTEMP_LO(%a0)        # add 1 to l-bit
6271        bcc.b           xcc_clr                 # test for carry out
6272        addq.l          &1,FTEMP_HI(%a0)        # propagate carry
6273        bcc.b           xcc_clr
6274        roxr.w          FTEMP_HI(%a0)           # mant is 0 so restore v-bit
6275        roxr.w          FTEMP_HI+2(%a0)         # mant is 0 so restore v-bit
6276        roxr.w          FTEMP_LO(%a0)
6277        roxr.w          FTEMP_LO+2(%a0)
6278        add.w           &0x1,FTEMP_EX(%a0)      # and inc exp
6279xcc_clr:
6280        tst.l           %d0                     # test rs = 0
6281        bne.b           add_ext_done
6282        and.b           &0xfe,FTEMP_LO+3(%a0)   # clear the l bit
6283add_ext_done:
6284        rts
6285
6286#########################
6287#       ADD DOUBLE      #
6288#########################
6289add_dbl:
6290        add.l           &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
6291        bcc.b           dcc_clr                 # no carry
6292        addq.l          &0x1, FTEMP_HI(%a0)     # propagate carry
6293        bcc.b           dcc_clr                 # no carry
6294
6295        roxr.w          FTEMP_HI(%a0)           # mant is 0 so restore v-bit
6296        roxr.w          FTEMP_HI+2(%a0)         # mant is 0 so restore v-bit
6297        roxr.w          FTEMP_LO(%a0)
6298        roxr.w          FTEMP_LO+2(%a0)
6299        addq.w          &0x1, FTEMP_EX(%a0)     # incr exponent
6300dcc_clr:
6301        tst.l           %d0                     # test for rs = 0
6302        bne.b           dbl_done
6303        and.w           &0xf000, FTEMP_LO+2(%a0) # clear the l-bit
6304
6305dbl_done:
6306        and.l           &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
6307        rts
6308
6309###########################
6310# Truncate all other bits #
6311###########################
6312truncate:
6313        swap            %d1                     # select rnd prec
6314
6315        cmpi.b          %d1, &s_mode            # is prec sgl?
6316        beq.w           sgl_done                # yes
6317        bgt.b           dbl_done                # no; it's dbl
6318        rts                                     # no; it's ext
6319
6320
6321#
6322# ext_grs(): extract guard, round and sticky bits according to
6323#            rounding precision.
6324#
6325# INPUT
6326#       d0         = extended precision g,r,s (in d0{31:29})
6327#       d1         = {PREC,ROUND}
6328# OUTPUT
6329#       d0{31:29}  = guard, round, sticky
6330#
6331# The ext_grs extract the guard/round/sticky bits according to the
6332# selected rounding precision. It is called by the round subroutine
6333# only.  All registers except d0 are kept intact. d0 becomes an
6334# updated guard,round,sticky in d0{31:29}
6335#
6336# Notes: the ext_grs uses the round PREC, and therefore has to swap d1
6337#        prior to usage, and needs to restore d1 to original. this
6338#        routine is tightly tied to the round routine and not meant to
6339#        uphold standard subroutine calling practices.
6340#
6341
6342ext_grs:
6343        swap            %d1                     # have d1.w point to round precision
6344        tst.b           %d1                     # is rnd prec = extended?
6345        bne.b           ext_grs_not_ext         # no; go handle sgl or dbl
6346
6347#
6348# %d0 actually already hold g,r,s since _round() had it before calling
6349# this function. so, as long as we don't disturb it, we are "returning" it.
6350#
6351ext_grs_ext:
6352        swap            %d1                     # yes; return to correct positions
6353        rts
6354
6355ext_grs_not_ext:
6356        movm.l          &0x3000, -(%sp)         # make some temp registers {d2/d3}
6357
6358        cmpi.b          %d1, &s_mode            # is rnd prec = sgl?
6359        bne.b           ext_grs_dbl             # no; go handle dbl
6360
6361#
6362# sgl:
6363#       96              64        40    32              0
6364#       -----------------------------------------------------
6365#       | EXP   |XXXXXXX|         |xx   |               |grs|
6366#       -----------------------------------------------------
6367#                       <--(24)--->nn\                     /
6368#                                  ee ---------------------
6369#                                  ww           |
6370#                                               v
6371#                                  gr      new sticky
6372#
6373ext_grs_sgl:
6374        bfextu          FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
6375        mov.l           &30, %d2                # of the sgl prec. limits
6376        lsl.l           %d2, %d3                # shift g-r bits to MSB of d3
6377        mov.l           FTEMP_HI(%a0), %d2      # get word 2 for s-bit test
6378        and.l           &0x0000003f, %d2        # s bit is the or of all other
6379        bne.b           ext_grs_st_stky         # bits to the right of g-r
6380        tst.l           FTEMP_LO(%a0)           # test lower mantissa
6381        bne.b           ext_grs_st_stky         # if any are set, set sticky
6382        tst.l           %d0                     # test original g,r,s
6383        bne.b           ext_grs_st_stky         # if any are set, set sticky
6384        bra.b           ext_grs_end_sd          # if words 3 and 4 are clr, exit
6385
6386#
6387# dbl:
6388#       96              64              32       11     0
6389#       -----------------------------------------------------
6390#       | EXP   |XXXXXXX|               |        |xx    |grs|
6391#       -----------------------------------------------------
6392#                                                 nn\       /
6393#                                                 ee -------
6394#                                                 ww    |
6395#                                                       v
6396#                                                 gr    new sticky
6397#
6398ext_grs_dbl:
6399        bfextu          FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
6400        mov.l           &30, %d2                # of the dbl prec. limits
6401        lsl.l           %d2, %d3                # shift g-r bits to the MSB of d3
6402        mov.l           FTEMP_LO(%a0), %d2      # get lower mantissa  for s-bit test
6403        and.l           &0x000001ff, %d2        # s bit is the or-ing of all
6404        bne.b           ext_grs_st_stky         # other bits to the right of g-r
6405        tst.l           %d0                     # test word original g,r,s
6406        bne.b           ext_grs_st_stky         # if any are set, set sticky
6407        bra.b           ext_grs_end_sd          # if clear, exit
6408
6409ext_grs_st_stky:
6410        bset            &rnd_stky_bit, %d3      # set sticky bit
6411ext_grs_end_sd:
6412        mov.l           %d3, %d0                # return grs to d0
6413
6414        movm.l          (%sp)+, &0xc            # restore scratch registers {d2/d3}
6415
6416        swap            %d1                     # restore d1 to original
6417        rts
6418
6419#########################################################################
6420# norm(): normalize the mantissa of an extended precision input. the    #
6421#         input operand should not be normalized already.               #
6422#                                                                       #
6423# XDEF **************************************************************** #
6424#       norm()                                                          #
6425#                                                                       #
6426# XREF **************************************************************** #
6427#       none                                                            #
6428#                                                                       #
6429# INPUT *************************************************************** #
6430#       a0 = pointer fp extended precision operand to normalize         #
6431#                                                                       #
6432# OUTPUT ************************************************************** #
6433#       d0 = number of bit positions the mantissa was shifted           #
6434#       a0 = the input operand's mantissa is normalized; the exponent   #
6435#            is unchanged.                                              #
6436#                                                                       #
6437#########################################################################
6438        global          norm
6439norm:
6440        mov.l           %d2, -(%sp)             # create some temp regs
6441        mov.l           %d3, -(%sp)
6442
6443        mov.l           FTEMP_HI(%a0), %d0      # load hi(mantissa)
6444        mov.l           FTEMP_LO(%a0), %d1      # load lo(mantissa)
6445
6446        bfffo           %d0{&0:&32}, %d2        # how many places to shift?
6447        beq.b           norm_lo                 # hi(man) is all zeroes!
6448
6449norm_hi:
6450        lsl.l           %d2, %d0                # left shift hi(man)
6451        bfextu          %d1{&0:%d2}, %d3        # extract lo bits
6452
6453        or.l            %d3, %d0                # create hi(man)
6454        lsl.l           %d2, %d1                # create lo(man)
6455
6456        mov.l           %d0, FTEMP_HI(%a0)      # store new hi(man)
6457        mov.l           %d1, FTEMP_LO(%a0)      # store new lo(man)
6458
6459        mov.l           %d2, %d0                # return shift amount
6460
6461        mov.l           (%sp)+, %d3             # restore temp regs
6462        mov.l           (%sp)+, %d2
6463
6464        rts
6465
6466norm_lo:
6467        bfffo           %d1{&0:&32}, %d2        # how many places to shift?
6468        lsl.l           %d2, %d1                # shift lo(man)
6469        add.l           &32, %d2                # add 32 to shft amount
6470
6471        mov.l           %d1, FTEMP_HI(%a0)      # store hi(man)
6472        clr.l           FTEMP_LO(%a0)           # lo(man) is now zero
6473
6474        mov.l           %d2, %d0                # return shift amount
6475
6476        mov.l           (%sp)+, %d3             # restore temp regs
6477        mov.l           (%sp)+, %d2
6478
6479        rts
6480
6481#########################################################################
6482# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO     #
6483#               - returns corresponding optype tag                      #
6484#                                                                       #
6485# XDEF **************************************************************** #
6486#       unnorm_fix()                                                    #
6487#                                                                       #
6488# XREF **************************************************************** #
6489#       norm() - normalize the mantissa                                 #
6490#                                                                       #
6491# INPUT *************************************************************** #
6492#       a0 = pointer to unnormalized extended precision number          #
6493#                                                                       #
6494# OUTPUT ************************************************************** #
6495#       d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO  #
6496#       a0 = input operand has been converted to a norm, denorm, or     #
6497#            zero; both the exponent and mantissa are changed.          #
6498#                                                                       #
6499#########################################################################
6500
6501        global          unnorm_fix
6502unnorm_fix:
6503        bfffo           FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
6504        bne.b           unnorm_shift            # hi(man) is not all zeroes
6505
6506#
6507# hi(man) is all zeroes so see if any bits in lo(man) are set
6508#
6509unnorm_chk_lo:
6510        bfffo           FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
6511        beq.w           unnorm_zero             # yes
6512
6513        add.w           &32, %d0                # no; fix shift distance
6514
6515#
6516# d0 = # shifts needed for complete normalization
6517#
6518unnorm_shift:
6519        clr.l           %d1                     # clear top word
6520        mov.w           FTEMP_EX(%a0), %d1      # extract exponent
6521        and.w           &0x7fff, %d1            # strip off sgn
6522
6523        cmp.w           %d0, %d1                # will denorm push exp < 0?
6524        bgt.b           unnorm_nrm_zero         # yes; denorm only until exp = 0
6525
6526#
6527# exponent would not go < 0. Therefore, number stays normalized
6528#
6529        sub.w           %d0, %d1                # shift exponent value
6530        mov.w           FTEMP_EX(%a0), %d0      # load old exponent
6531        and.w           &0x8000, %d0            # save old sign
6532        or.w            %d0, %d1                # {sgn,new exp}
6533        mov.w           %d1, FTEMP_EX(%a0)      # insert new exponent
6534
6535        bsr.l           norm                    # normalize UNNORM
6536
6537        mov.b           &NORM, %d0              # return new optype tag
6538        rts
6539
6540#
6541# exponent would go < 0, so only denormalize until exp = 0
6542#
6543unnorm_nrm_zero:
6544        cmp.b           %d1, &32                # is exp <= 32?
6545        bgt.b           unnorm_nrm_zero_lrg     # no; go handle large exponent
6546
6547        bfextu          FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
6548        mov.l           %d0, FTEMP_HI(%a0)      # save new hi(man)
6549
6550        mov.l           FTEMP_LO(%a0), %d0      # fetch old lo(man)
6551        lsl.l           %d1, %d0                # extract new lo(man)
6552        mov.l           %d0, FTEMP_LO(%a0)      # save new lo(man)
6553
6554        and.w           &0x8000, FTEMP_EX(%a0)  # set exp = 0
6555
6556        mov.b           &DENORM, %d0            # return new optype tag
6557        rts
6558
6559#
6560# only mantissa bits set are in lo(man)
6561#
6562unnorm_nrm_zero_lrg:
6563        sub.w           &32, %d1                # adjust shft amt by 32
6564
6565        mov.l           FTEMP_LO(%a0), %d0      # fetch old lo(man)
6566        lsl.l           %d1, %d0                # left shift lo(man)
6567
6568        mov.l           %d0, FTEMP_HI(%a0)      # store new hi(man)
6569        clr.l           FTEMP_LO(%a0)           # lo(man) = 0
6570
6571        and.w           &0x8000, FTEMP_EX(%a0)  # set exp = 0
6572
6573        mov.b           &DENORM, %d0            # return new optype tag
6574        rts
6575
6576#
6577# whole mantissa is zero so this UNNORM is actually a zero
6578#
6579unnorm_zero:
6580        and.w           &0x8000, FTEMP_EX(%a0)  # force exponent to zero
6581
6582        mov.b           &ZERO, %d0              # fix optype tag
6583        rts
6584
6585#########################################################################
6586# XDEF **************************************************************** #
6587#       set_tag_x(): return the optype of the input ext fp number       #
6588#                                                                       #
6589# XREF **************************************************************** #
6590#       None                                                            #
6591#                                                                       #
6592# INPUT *************************************************************** #
6593#       a0 = pointer to extended precision operand                      #
6594#                                                                       #
6595# OUTPUT ************************************************************** #
6596#       d0 = value of type tag                                          #
6597#               one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO     #
6598#                                                                       #
6599# ALGORITHM *********************************************************** #
6600#       Simply test the exponent, j-bit, and mantissa values to         #
6601# determine the type of operand.                                        #
6602#       If it's an unnormalized zero, alter the operand and force it    #
6603# to be a normal zero.                                                  #
6604#                                                                       #
6605#########################################################################
6606
6607        global          set_tag_x
6608set_tag_x:
6609        mov.w           FTEMP_EX(%a0), %d0      # extract exponent
6610        andi.w          &0x7fff, %d0            # strip off sign
6611        cmpi.w          %d0, &0x7fff            # is (EXP == MAX)?
6612        beq.b           inf_or_nan_x
6613not_inf_or_nan_x:
6614        btst            &0x7,FTEMP_HI(%a0)
6615        beq.b           not_norm_x
6616is_norm_x:
6617        mov.b           &NORM, %d0
6618        rts
6619not_norm_x:
6620        tst.w           %d0                     # is exponent = 0?
6621        bne.b           is_unnorm_x
6622not_unnorm_x:
6623        tst.l           FTEMP_HI(%a0)
6624        bne.b           is_denorm_x
6625        tst.l           FTEMP_LO(%a0)
6626        bne.b           is_denorm_x
6627is_zero_x:
6628        mov.b           &ZERO, %d0
6629        rts
6630is_denorm_x:
6631        mov.b           &DENORM, %d0
6632        rts
6633# must distinguish now "Unnormalized zeroes" which we
6634# must convert to zero.
6635is_unnorm_x:
6636        tst.l           FTEMP_HI(%a0)
6637        bne.b           is_unnorm_reg_x
6638        tst.l           FTEMP_LO(%a0)
6639        bne.b           is_unnorm_reg_x
6640# it's an "unnormalized zero". let's convert it to an actual zero...
6641        andi.w          &0x8000,FTEMP_EX(%a0)   # clear exponent
6642        mov.b           &ZERO, %d0
6643        rts
6644is_unnorm_reg_x:
6645        mov.b           &UNNORM, %d0
6646        rts
6647inf_or_nan_x:
6648        tst.l           FTEMP_LO(%a0)
6649        bne.b           is_nan_x
6650        mov.l           FTEMP_HI(%a0), %d0
6651        and.l           &0x7fffffff, %d0        # msb is a don't care!
6652        bne.b           is_nan_x
6653is_inf_x:
6654        mov.b           &INF, %d0
6655        rts
6656is_nan_x:
6657        btst            &0x6, FTEMP_HI(%a0)
6658        beq.b           is_snan_x
6659        mov.b           &QNAN, %d0
6660        rts
6661is_snan_x:
6662        mov.b           &SNAN, %d0
6663        rts
6664
6665#########################################################################
6666# XDEF **************************************************************** #
6667#       set_tag_d(): return the optype of the input dbl fp number       #
6668#                                                                       #
6669# XREF **************************************************************** #
6670#       None                                                            #
6671#                                                                       #
6672# INPUT *************************************************************** #
6673#       a0 = points to double precision operand                         #
6674#                                                                       #
6675# OUTPUT ************************************************************** #
6676#       d0 = value of type tag                                          #
6677#               one of: NORM, INF, QNAN, SNAN, DENORM, ZERO             #
6678#                                                                       #
6679# ALGORITHM *********************************************************** #
6680#       Simply test the exponent, j-bit, and mantissa values to         #
6681# determine the type of operand.                                        #
6682#                                                                       #
6683#########################################################################
6684
6685        global          set_tag_d
6686set_tag_d:
6687        mov.l           FTEMP(%a0), %d0
6688        mov.l           %d0, %d1
6689
6690        andi.l          &0x7ff00000, %d0
6691        beq.b           zero_or_denorm_d
6692
6693        cmpi.l          %d0, &0x7ff00000
6694        beq.b           inf_or_nan_d
6695
6696is_norm_d:
6697        mov.b           &NORM, %d0
6698        rts
6699zero_or_denorm_d:
6700        and.l           &0x000fffff, %d1
6701        bne             is_denorm_d
6702        tst.l           4+FTEMP(%a0)
6703        bne             is_denorm_d
6704is_zero_d:
6705        mov.b           &ZERO, %d0
6706        rts
6707is_denorm_d:
6708        mov.b           &DENORM, %d0
6709        rts
6710inf_or_nan_d:
6711        and.l           &0x000fffff, %d1
6712        bne             is_nan_d
6713        tst.l           4+FTEMP(%a0)
6714        bne             is_nan_d
6715is_inf_d:
6716        mov.b           &INF, %d0
6717        rts
6718is_nan_d:
6719        btst            &19, %d1
6720        bne             is_qnan_d
6721is_snan_d:
6722        mov.b           &SNAN, %d0
6723        rts
6724is_qnan_d:
6725        mov.b           &QNAN, %d0
6726        rts
6727
6728#########################################################################
6729# XDEF **************************************************************** #
6730#       set_tag_s(): return the optype of the input sgl fp number       #
6731#                                                                       #
6732# XREF **************************************************************** #
6733#       None                                                            #
6734#                                                                       #
6735# INPUT *************************************************************** #
6736#       a0 = pointer to single precision operand                        #
6737#                                                                       #
6738# OUTPUT ************************************************************** #
6739#       d0 = value of type tag                                          #
6740#               one of: NORM, INF, QNAN, SNAN, DENORM, ZERO             #
6741#                                                                       #
6742# ALGORITHM *********************************************************** #
6743#       Simply test the exponent, j-bit, and mantissa values to         #
6744# determine the type of operand.                                        #
6745#                                                                       #
6746#########################################################################
6747
6748        global          set_tag_s
6749set_tag_s:
6750        mov.l           FTEMP(%a0), %d0
6751        mov.l           %d0, %d1
6752
6753        andi.l          &0x7f800000, %d0
6754        beq.b           zero_or_denorm_s
6755
6756        cmpi.l          %d0, &0x7f800000
6757        beq.b           inf_or_nan_s
6758
6759is_norm_s:
6760        mov.b           &NORM, %d0
6761        rts
6762zero_or_denorm_s:
6763        and.l           &0x007fffff, %d1
6764        bne             is_denorm_s
6765is_zero_s:
6766        mov.b           &ZERO, %d0
6767        rts
6768is_denorm_s:
6769        mov.b           &DENORM, %d0
6770        rts
6771inf_or_nan_s:
6772        and.l           &0x007fffff, %d1
6773        bne             is_nan_s
6774is_inf_s:
6775        mov.b           &INF, %d0
6776        rts
6777is_nan_s:
6778        btst            &22, %d1
6779        bne             is_qnan_s
6780is_snan_s:
6781        mov.b           &SNAN, %d0
6782        rts
6783is_qnan_s:
6784        mov.b           &QNAN, %d0
6785        rts
6786
6787#########################################################################
6788# XDEF **************************************************************** #
6789#       unf_res(): routine to produce default underflow result of a     #
6790#                  scaled extended precision number; this is used by    #
6791#                  fadd/fdiv/fmul/etc. emulation routines.              #
6792#       unf_res4(): same as above but for fsglmul/fsgldiv which use     #
6793#                   single round prec and extended prec mode.           #
6794#                                                                       #
6795# XREF **************************************************************** #
6796#       _denorm() - denormalize according to scale factor               #
6797#       _round() - round denormalized number according to rnd prec      #
6798#                                                                       #
6799# INPUT *************************************************************** #
6800#       a0 = pointer to extended precison operand                       #
6801#       d0 = scale factor                                               #
6802#       d1 = rounding precision/mode                                    #
6803#                                                                       #
6804# OUTPUT ************************************************************** #
6805#       a0 = pointer to default underflow result in extended precision  #
6806#       d0.b = result FPSR_cc which caller may or may not want to save  #
6807#                                                                       #
6808# ALGORITHM *********************************************************** #
6809#       Convert the input operand to "internal format" which means the  #
6810# exponent is extended to 16 bits and the sign is stored in the unused  #
6811# portion of the extended precison operand. Denormalize the number      #
6812# according to the scale factor passed in d0. Then, round the           #
6813# denormalized result.                                                  #
6814#       Set the FPSR_exc bits as appropriate but return the cc bits in  #
6815# d0 in case the caller doesn't want to save them (as is the case for   #
6816# fmove out).                                                           #
6817#       unf_res4() for fsglmul/fsgldiv forces the denorm to extended    #
6818# precision and the rounding mode to single.                            #
6819#                                                                       #
6820#########################################################################
6821        global          unf_res
6822unf_res:
6823        mov.l           %d1, -(%sp)             # save rnd prec,mode on stack
6824
6825        btst            &0x7, FTEMP_EX(%a0)     # make "internal" format
6826        sne             FTEMP_SGN(%a0)
6827
6828        mov.w           FTEMP_EX(%a0), %d1      # extract exponent
6829        and.w           &0x7fff, %d1
6830        sub.w           %d0, %d1
6831        mov.w           %d1, FTEMP_EX(%a0)      # insert 16 bit exponent
6832
6833        mov.l           %a0, -(%sp)             # save operand ptr during calls
6834
6835        mov.l           0x4(%sp),%d0            # pass rnd prec.
6836        andi.w          &0x00c0,%d0
6837        lsr.w           &0x4,%d0
6838        bsr.l           _denorm                 # denorm result
6839
6840        mov.l           (%sp),%a0
6841        mov.w           0x6(%sp),%d1            # load prec:mode into %d1
6842        andi.w          &0xc0,%d1               # extract rnd prec
6843        lsr.w           &0x4,%d1
6844        swap            %d1
6845        mov.w           0x6(%sp),%d1
6846        andi.w          &0x30,%d1
6847        lsr.w           &0x4,%d1
6848        bsr.l           _round                  # round the denorm
6849
6850        mov.l           (%sp)+, %a0
6851
6852# result is now rounded properly. convert back to normal format
6853        bclr            &0x7, FTEMP_EX(%a0)     # clear sgn first; may have residue
6854        tst.b           FTEMP_SGN(%a0)          # is "internal result" sign set?
6855        beq.b           unf_res_chkifzero       # no; result is positive
6856        bset            &0x7, FTEMP_EX(%a0)     # set result sgn
6857        clr.b           FTEMP_SGN(%a0)          # clear temp sign
6858
6859# the number may have become zero after rounding. set ccodes accordingly.
6860unf_res_chkifzero:
6861        clr.l           %d0
6862        tst.l           FTEMP_HI(%a0)           # is value now a zero?
6863        bne.b           unf_res_cont            # no
6864        tst.l           FTEMP_LO(%a0)
6865        bne.b           unf_res_cont            # no
6866#       bset            &z_bit, FPSR_CC(%a6)    # yes; set zero ccode bit
6867        bset            &z_bit, %d0             # yes; set zero ccode bit
6868
6869unf_res_cont:
6870
6871#
6872# can inex1 also be set along with unfl and inex2???
6873#
6874# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6875#
6876        btst            &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
6877        beq.b           unf_res_end             # no
6878        bset            &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
6879
6880unf_res_end:
6881        add.l           &0x4, %sp               # clear stack
6882        rts
6883
6884# unf_res() for fsglmul() and fsgldiv().
6885        global          unf_res4
6886unf_res4:
6887        mov.l           %d1,-(%sp)              # save rnd prec,mode on stack
6888
6889        btst            &0x7,FTEMP_EX(%a0)      # make "internal" format
6890        sne             FTEMP_SGN(%a0)
6891
6892        mov.w           FTEMP_EX(%a0),%d1       # extract exponent
6893        and.w           &0x7fff,%d1
6894        sub.w           %d0,%d1
6895        mov.w           %d1,FTEMP_EX(%a0)       # insert 16 bit exponent
6896
6897        mov.l           %a0,-(%sp)              # save operand ptr during calls
6898
6899        clr.l           %d0                     # force rnd prec = ext
6900        bsr.l           _denorm                 # denorm result
6901
6902        mov.l           (%sp),%a0
6903        mov.w           &s_mode,%d1             # force rnd prec = sgl
6904        swap            %d1
6905        mov.w           0x6(%sp),%d1            # load rnd mode
6906        andi.w          &0x30,%d1               # extract rnd prec
6907        lsr.w           &0x4,%d1
6908        bsr.l           _round                  # round the denorm
6909
6910        mov.l           (%sp)+,%a0
6911
6912# result is now rounded properly. convert back to normal format
6913        bclr            &0x7,FTEMP_EX(%a0)      # clear sgn first; may have residue
6914        tst.b           FTEMP_SGN(%a0)          # is "internal result" sign set?
6915        beq.b           unf_res4_chkifzero      # no; result is positive
6916        bset            &0x7,FTEMP_EX(%a0)      # set result sgn
6917        clr.b           FTEMP_SGN(%a0)          # clear temp sign
6918
6919# the number may have become zero after rounding. set ccodes accordingly.
6920unf_res4_chkifzero:
6921        clr.l           %d0
6922        tst.l           FTEMP_HI(%a0)           # is value now a zero?
6923        bne.b           unf_res4_cont           # no
6924        tst.l           FTEMP_LO(%a0)
6925        bne.b           unf_res4_cont           # no
6926#       bset            &z_bit,FPSR_CC(%a6)     # yes; set zero ccode bit
6927        bset            &z_bit,%d0              # yes; set zero ccode bit
6928
6929unf_res4_cont:
6930
6931#
6932# can inex1 also be set along with unfl and inex2???
6933#
6934# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6935#
6936        btst            &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
6937        beq.b           unf_res4_end            # no
6938        bset            &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
6939
6940unf_res4_end:
6941        add.l           &0x4,%sp                # clear stack
6942        rts
6943
6944#########################################################################
6945# XDEF **************************************************************** #
6946#       ovf_res(): routine to produce the default overflow result of    #
6947#                  an overflowing number.                               #
6948#       ovf_res2(): same as above but the rnd mode/prec are passed      #
6949#                   differently.                                        #
6950#                                                                       #
6951# XREF **************************************************************** #
6952#       none                                                            #
6953#                                                                       #
6954# INPUT *************************************************************** #
6955#       d1.b    = '-1' => (-); '0' => (+)                               #
6956#   ovf_res():                                                          #
6957#       d0      = rnd mode/prec                                         #
6958#   ovf_res2():                                                         #
6959#       hi(d0)  = rnd prec                                              #
6960#       lo(d0)  = rnd mode                                              #
6961#                                                                       #
6962# OUTPUT ************************************************************** #
6963#       a0      = points to extended precision result                   #
6964#       d0.b    = condition code bits                                   #
6965#                                                                       #
6966# ALGORITHM *********************************************************** #
6967#       The default overflow result can be determined by the sign of    #
6968# the result and the rounding mode/prec in effect. These bits are       #
6969# concatenated together to create an index into the default result      #
6970# table. A pointer to the correct result is returned in a0. The         #
6971# resulting condition codes are returned in d0 in case the caller       #
6972# doesn't want FPSR_cc altered (as is the case for fmove out).          #
6973#                                                                       #
6974#########################################################################
6975
6976        global          ovf_res
6977ovf_res:
6978        andi.w          &0x10,%d1               # keep result sign
6979        lsr.b           &0x4,%d0                # shift prec/mode
6980        or.b            %d0,%d1                 # concat the two
6981        mov.w           %d1,%d0                 # make a copy
6982        lsl.b           &0x1,%d1                # multiply d1 by 2
6983        bra.b           ovf_res_load
6984
6985        global          ovf_res2
6986ovf_res2:
6987        and.w           &0x10, %d1              # keep result sign
6988        or.b            %d0, %d1                # insert rnd mode
6989        swap            %d0
6990        or.b            %d0, %d1                # insert rnd prec
6991        mov.w           %d1, %d0                # make a copy
6992        lsl.b           &0x1, %d1               # shift left by 1
6993
6994#
6995# use the rounding mode, precision, and result sign as in index into the
6996# two tables below to fetch the default result and the result ccodes.
6997#
6998ovf_res_load:
6999        mov.b           (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
7000        lea             (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
7001
7002        rts
7003
7004tbl_ovfl_cc:
7005        byte            0x2, 0x0, 0x0, 0x2
7006        byte            0x2, 0x0, 0x0, 0x2
7007        byte            0x2, 0x0, 0x0, 0x2
7008        byte            0x0, 0x0, 0x0, 0x0
7009        byte            0x2+0x8, 0x8, 0x2+0x8, 0x8
7010        byte            0x2+0x8, 0x8, 0x2+0x8, 0x8
7011        byte            0x2+0x8, 0x8, 0x2+0x8, 0x8
7012
7013tbl_ovfl_result:
7014        long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7015        long            0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
7016        long            0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
7017        long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7018
7019        long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7020        long            0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
7021        long            0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
7022        long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7023
7024        long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7025        long            0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
7026        long            0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
7027        long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7028
7029        long            0x00000000,0x00000000,0x00000000,0x00000000
7030        long            0x00000000,0x00000000,0x00000000,0x00000000
7031        long            0x00000000,0x00000000,0x00000000,0x00000000
7032        long            0x00000000,0x00000000,0x00000000,0x00000000
7033
7034        long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7035        long            0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
7036        long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7037        long            0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
7038
7039        long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7040        long            0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
7041        long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7042        long            0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
7043
7044        long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7045        long            0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
7046        long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7047        long            0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
7048
7049#########################################################################
7050# XDEF **************************************************************** #
7051#       fout(): move from fp register to memory or data register        #
7052#                                                                       #
7053# XREF **************************************************************** #
7054#       _round() - needed to create EXOP for sgl/dbl precision          #
7055#       norm() - needed to create EXOP for extended precision           #
7056#       ovf_res() - create default overflow result for sgl/dbl precision#
7057#       unf_res() - create default underflow result for sgl/dbl prec.   #
7058#       dst_dbl() - create rounded dbl precision result.                #
7059#       dst_sgl() - create rounded sgl precision result.                #
7060#       fetch_dreg() - fetch dynamic k-factor reg for packed.           #
7061#       bindec() - convert FP binary number to packed number.           #
7062#       _mem_write() - write data to memory.                            #
7063#       _mem_write2() - write data to memory unless supv mode -(a7) exc.#
7064#       _dmem_write_{byte,word,long}() - write data to memory.          #
7065#       store_dreg_{b,w,l}() - store data to data register file.        #
7066#       facc_out_{b,w,l,d,x}() - data access error occurred.            #
7067#                                                                       #
7068# INPUT *************************************************************** #
7069#       a0 = pointer to extended precision source operand               #
7070#       d0 = round prec,mode                                            #
7071#                                                                       #
7072# OUTPUT ************************************************************** #
7073#       fp0 : intermediate underflow or overflow result if              #
7074#             OVFL/UNFL occurred for a sgl or dbl operand               #
7075#                                                                       #
7076# ALGORITHM *********************************************************** #
7077#       This routine is accessed by many handlers that need to do an    #
7078# opclass three move of an operand out to memory.                       #
7079#       Decode an fmove out (opclass 3) instruction to determine if     #
7080# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data  #
7081# register or memory. The algorithm uses a standard "fmove" to create   #
7082# the rounded result. Also, since exceptions are disabled, this also    #
7083# create the correct OPERR default result if appropriate.               #
7084#       For sgl or dbl precision, overflow or underflow can occur. If   #
7085# either occurs and is enabled, the EXOP.                               #
7086#       For extended precision, the stacked <ea> must be fixed along    #
7087# w/ the address index register as appropriate w/ _calc_ea_fout(). If   #
7088# the source is a denorm and if underflow is enabled, an EXOP must be   #
7089# created.                                                              #
7090#       For packed, the k-factor must be fetched from the instruction   #
7091# word or a data register. The <ea> must be fixed as w/ extended        #
7092# precision. Then, bindec() is called to create the appropriate         #
7093# packed result.                                                        #
7094#       If at any time an access error is flagged by one of the move-   #
7095# to-memory routines, then a special exit must be made so that the      #
7096# access error can be handled properly.                                 #
7097#                                                                       #
7098#########################################################################
7099
7100        global          fout
7101fout:
7102        bfextu          EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
7103        mov.w           (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
7104        jmp             (tbl_fout.b,%pc,%a1)    # jump to routine
7105
7106        swbeg           &0x8
7107tbl_fout:
7108        short           fout_long       -       tbl_fout
7109        short           fout_sgl        -       tbl_fout
7110        short           fout_ext        -       tbl_fout
7111        short           fout_pack       -       tbl_fout
7112        short           fout_word       -       tbl_fout
7113        short           fout_dbl        -       tbl_fout
7114        short           fout_byte       -       tbl_fout
7115        short           fout_pack       -       tbl_fout
7116
7117#################################################################
7118# fmove.b out ###################################################
7119#################################################################
7120
7121# Only "Unimplemented Data Type" exceptions enter here. The operand
7122# is either a DENORM or a NORM.
7123fout_byte:
7124        tst.b           STAG(%a6)               # is operand normalized?
7125        bne.b           fout_byte_denorm        # no
7126
7127        fmovm.x         SRC(%a0),&0x80          # load value
7128
7129fout_byte_norm:
7130        fmov.l          %d0,%fpcr               # insert rnd prec,mode
7131
7132        fmov.b          %fp0,%d0                # exec move out w/ correct rnd mode
7133
7134        fmov.l          &0x0,%fpcr              # clear FPCR
7135        fmov.l          %fpsr,%d1               # fetch FPSR
7136        or.w            %d1,2+USER_FPSR(%a6)    # save new exc,accrued bits
7137
7138        mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
7139        andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
7140        beq.b           fout_byte_dn            # must save to integer regfile
7141
7142        mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
7143        bsr.l           _dmem_write_byte        # write byte
7144
7145        tst.l           %d1                     # did dstore fail?
7146        bne.l           facc_out_b              # yes
7147
7148        rts
7149
7150fout_byte_dn:
7151        mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
7152        andi.w          &0x7,%d1
7153        bsr.l           store_dreg_b
7154        rts
7155
7156fout_byte_denorm:
7157        mov.l           SRC_EX(%a0),%d1
7158        andi.l          &0x80000000,%d1         # keep DENORM sign
7159        ori.l           &0x00800000,%d1         # make smallest sgl
7160        fmov.s          %d1,%fp0
7161        bra.b           fout_byte_norm
7162
7163#################################################################
7164# fmove.w out ###################################################
7165#################################################################
7166
7167# Only "Unimplemented Data Type" exceptions enter here. The operand
7168# is either a DENORM or a NORM.
7169fout_word:
7170        tst.b           STAG(%a6)               # is operand normalized?
7171        bne.b           fout_word_denorm        # no
7172
7173        fmovm.x         SRC(%a0),&0x80          # load value
7174
7175fout_word_norm:
7176        fmov.l          %d0,%fpcr               # insert rnd prec:mode
7177
7178        fmov.w          %fp0,%d0                # exec move out w/ correct rnd mode
7179
7180        fmov.l          &0x0,%fpcr              # clear FPCR
7181        fmov.l          %fpsr,%d1               # fetch FPSR
7182        or.w            %d1,2+USER_FPSR(%a6)    # save new exc,accrued bits
7183
7184        mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
7185        andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
7186        beq.b           fout_word_dn            # must save to integer regfile
7187
7188        mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
7189        bsr.l           _dmem_write_word        # write word
7190
7191        tst.l           %d1                     # did dstore fail?
7192        bne.l           facc_out_w              # yes
7193
7194        rts
7195
7196fout_word_dn:
7197        mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
7198        andi.w          &0x7,%d1
7199        bsr.l           store_dreg_w
7200        rts
7201
7202fout_word_denorm:
7203        mov.l           SRC_EX(%a0),%d1
7204        andi.l          &0x80000000,%d1         # keep DENORM sign
7205        ori.l           &0x00800000,%d1         # make smallest sgl
7206        fmov.s          %d1,%fp0
7207        bra.b           fout_word_norm
7208
7209#################################################################
7210# fmove.l out ###################################################
7211#################################################################
7212
7213# Only "Unimplemented Data Type" exceptions enter here. The operand
7214# is either a DENORM or a NORM.
7215fout_long:
7216        tst.b           STAG(%a6)               # is operand normalized?
7217        bne.b           fout_long_denorm        # no
7218
7219        fmovm.x         SRC(%a0),&0x80          # load value
7220
7221fout_long_norm:
7222        fmov.l          %d0,%fpcr               # insert rnd prec:mode
7223
7224        fmov.l          %fp0,%d0                # exec move out w/ correct rnd mode
7225
7226        fmov.l          &0x0,%fpcr              # clear FPCR
7227        fmov.l          %fpsr,%d1               # fetch FPSR
7228        or.w            %d1,2+USER_FPSR(%a6)    # save new exc,accrued bits
7229
7230fout_long_write:
7231        mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
7232        andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
7233        beq.b           fout_long_dn            # must save to integer regfile
7234
7235        mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
7236        bsr.l           _dmem_write_long        # write long
7237
7238        tst.l           %d1                     # did dstore fail?
7239        bne.l           facc_out_l              # yes
7240
7241        rts
7242
7243fout_long_dn:
7244        mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
7245        andi.w          &0x7,%d1
7246        bsr.l           store_dreg_l
7247        rts
7248
7249fout_long_denorm:
7250        mov.l           SRC_EX(%a0),%d1
7251        andi.l          &0x80000000,%d1         # keep DENORM sign
7252        ori.l           &0x00800000,%d1         # make smallest sgl
7253        fmov.s          %d1,%fp0
7254        bra.b           fout_long_norm
7255
7256#################################################################
7257# fmove.x out ###################################################
7258#################################################################
7259
7260# Only "Unimplemented Data Type" exceptions enter here. The operand
7261# is either a DENORM or a NORM.
7262# The DENORM causes an Underflow exception.
7263fout_ext:
7264
7265# we copy the extended precision result to FP_SCR0 so that the reserved
7266# 16-bit field gets zeroed. we do this since we promise not to disturb
7267# what's at SRC(a0).
7268        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
7269        clr.w           2+FP_SCR0_EX(%a6)       # clear reserved field
7270        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
7271        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
7272
7273        fmovm.x         SRC(%a0),&0x80          # return result
7274
7275        bsr.l           _calc_ea_fout           # fix stacked <ea>
7276
7277        mov.l           %a0,%a1                 # pass: dst addr
7278        lea             FP_SCR0(%a6),%a0        # pass: src addr
7279        mov.l           &0xc,%d0                # pass: opsize is 12 bytes
7280
7281# we must not yet write the extended precision data to the stack
7282# in the pre-decrement case from supervisor mode or else we'll corrupt
7283# the stack frame. so, leave it in FP_SRC for now and deal with it later...
7284        cmpi.b          SPCOND_FLG(%a6),&mda7_flg
7285        beq.b           fout_ext_a7
7286
7287        bsr.l           _dmem_write             # write ext prec number to memory
7288
7289        tst.l           %d1                     # did dstore fail?
7290        bne.w           fout_ext_err            # yes
7291
7292        tst.b           STAG(%a6)               # is operand normalized?
7293        bne.b           fout_ext_denorm         # no
7294        rts
7295
7296# the number is a DENORM. must set the underflow exception bit
7297fout_ext_denorm:
7298        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
7299
7300        mov.b           FPCR_ENABLE(%a6),%d0
7301        andi.b          &0x0a,%d0               # is UNFL or INEX enabled?
7302        bne.b           fout_ext_exc            # yes
7303        rts
7304
7305# we don't want to do the write if the exception occurred in supervisor mode
7306# so _mem_write2() handles this for us.
7307fout_ext_a7:
7308        bsr.l           _mem_write2             # write ext prec number to memory
7309
7310        tst.l           %d1                     # did dstore fail?
7311        bne.w           fout_ext_err            # yes
7312
7313        tst.b           STAG(%a6)               # is operand normalized?
7314        bne.b           fout_ext_denorm         # no
7315        rts
7316
7317fout_ext_exc:
7318        lea             FP_SCR0(%a6),%a0
7319        bsr.l           norm                    # normalize the mantissa
7320        neg.w           %d0                     # new exp = -(shft amt)
7321        andi.w          &0x7fff,%d0
7322        andi.w          &0x8000,FP_SCR0_EX(%a6) # keep only old sign
7323        or.w            %d0,FP_SCR0_EX(%a6)     # insert new exponent
7324        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
7325        rts
7326
7327fout_ext_err:
7328        mov.l           EXC_A6(%a6),(%a6)       # fix stacked a6
7329        bra.l           facc_out_x
7330
7331#########################################################################
7332# fmove.s out ###########################################################
7333#########################################################################
7334fout_sgl:
7335        andi.b          &0x30,%d0               # clear rnd prec
7336        ori.b           &s_mode*0x10,%d0        # insert sgl prec
7337        mov.l           %d0,L_SCR3(%a6)         # save rnd prec,mode on stack
7338
7339#
7340# operand is a normalized number. first, we check to see if the move out
7341# would cause either an underflow or overflow. these cases are handled
7342# separately. otherwise, set the FPCR to the proper rounding mode and
7343# execute the move.
7344#
7345        mov.w           SRC_EX(%a0),%d0         # extract exponent
7346        andi.w          &0x7fff,%d0             # strip sign
7347
7348        cmpi.w          %d0,&SGL_HI             # will operand overflow?
7349        bgt.w           fout_sgl_ovfl           # yes; go handle OVFL
7350        beq.w           fout_sgl_may_ovfl       # maybe; go handle possible OVFL
7351        cmpi.w          %d0,&SGL_LO             # will operand underflow?
7352        blt.w           fout_sgl_unfl           # yes; go handle underflow
7353
7354#
7355# NORMs(in range) can be stored out by a simple "fmov.s"
7356# Unnormalized inputs can come through this point.
7357#
7358fout_sgl_exg:
7359        fmovm.x         SRC(%a0),&0x80          # fetch fop from stack
7360
7361        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
7362        fmov.l          &0x0,%fpsr              # clear FPSR
7363
7364        fmov.s          %fp0,%d0                # store does convert and round
7365
7366        fmov.l          &0x0,%fpcr              # clear FPCR
7367        fmov.l          %fpsr,%d1               # save FPSR
7368
7369        or.w            %d1,2+USER_FPSR(%a6)    # set possible inex2/ainex
7370
7371fout_sgl_exg_write:
7372        mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
7373        andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
7374        beq.b           fout_sgl_exg_write_dn   # must save to integer regfile
7375
7376        mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
7377        bsr.l           _dmem_write_long        # write long
7378
7379        tst.l           %d1                     # did dstore fail?
7380        bne.l           facc_out_l              # yes
7381
7382        rts
7383
7384fout_sgl_exg_write_dn:
7385        mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
7386        andi.w          &0x7,%d1
7387        bsr.l           store_dreg_l
7388        rts
7389
7390#
7391# here, we know that the operand would UNFL if moved out to single prec,
7392# so, denorm and round and then use generic store single routine to
7393# write the value to memory.
7394#
7395fout_sgl_unfl:
7396        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7397
7398        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
7399        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
7400        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
7401        mov.l           %a0,-(%sp)
7402
7403        clr.l           %d0                     # pass: S.F. = 0
7404
7405        cmpi.b          STAG(%a6),&DENORM       # fetch src optype tag
7406        bne.b           fout_sgl_unfl_cont      # let DENORMs fall through
7407
7408        lea             FP_SCR0(%a6),%a0
7409        bsr.l           norm                    # normalize the DENORM
7410
7411fout_sgl_unfl_cont:
7412        lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
7413        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
7414        bsr.l           unf_res                 # calc default underflow result
7415
7416        lea             FP_SCR0(%a6),%a0        # pass: ptr to fop
7417        bsr.l           dst_sgl                 # convert to single prec
7418
7419        mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
7420        andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
7421        beq.b           fout_sgl_unfl_dn        # must save to integer regfile
7422
7423        mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
7424        bsr.l           _dmem_write_long        # write long
7425
7426        tst.l           %d1                     # did dstore fail?
7427        bne.l           facc_out_l              # yes
7428
7429        bra.b           fout_sgl_unfl_chkexc
7430
7431fout_sgl_unfl_dn:
7432        mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
7433        andi.w          &0x7,%d1
7434        bsr.l           store_dreg_l
7435
7436fout_sgl_unfl_chkexc:
7437        mov.b           FPCR_ENABLE(%a6),%d1
7438        andi.b          &0x0a,%d1               # is UNFL or INEX enabled?
7439        bne.w           fout_sd_exc_unfl        # yes
7440        addq.l          &0x4,%sp
7441        rts
7442
7443#
7444# it's definitely an overflow so call ovf_res to get the correct answer
7445#
7446fout_sgl_ovfl:
7447        tst.b           3+SRC_HI(%a0)           # is result inexact?
7448        bne.b           fout_sgl_ovfl_inex2
7449        tst.l           SRC_LO(%a0)             # is result inexact?
7450        bne.b           fout_sgl_ovfl_inex2
7451        ori.w           &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7452        bra.b           fout_sgl_ovfl_cont
7453fout_sgl_ovfl_inex2:
7454        ori.w           &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7455
7456fout_sgl_ovfl_cont:
7457        mov.l           %a0,-(%sp)
7458
7459# call ovf_res() w/ sgl prec and the correct rnd mode to create the default
7460# overflow result. DON'T save the returned ccodes from ovf_res() since
7461# fmove out doesn't alter them.
7462        tst.b           SRC_EX(%a0)             # is operand negative?
7463        smi             %d1                     # set if so
7464        mov.l           L_SCR3(%a6),%d0         # pass: sgl prec,rnd mode
7465        bsr.l           ovf_res                 # calc OVFL result
7466        fmovm.x         (%a0),&0x80             # load default overflow result
7467        fmov.s          %fp0,%d0                # store to single
7468
7469        mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
7470        andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
7471        beq.b           fout_sgl_ovfl_dn        # must save to integer regfile
7472
7473        mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
7474        bsr.l           _dmem_write_long        # write long
7475
7476        tst.l           %d1                     # did dstore fail?
7477        bne.l           facc_out_l              # yes
7478
7479        bra.b           fout_sgl_ovfl_chkexc
7480
7481fout_sgl_ovfl_dn:
7482        mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
7483        andi.w          &0x7,%d1
7484        bsr.l           store_dreg_l
7485
7486fout_sgl_ovfl_chkexc:
7487        mov.b           FPCR_ENABLE(%a6),%d1
7488        andi.b          &0x0a,%d1               # is UNFL or INEX enabled?
7489        bne.w           fout_sd_exc_ovfl        # yes
7490        addq.l          &0x4,%sp
7491        rts
7492
7493#
7494# move out MAY overflow:
7495# (1) force the exp to 0x3fff
7496# (2) do a move w/ appropriate rnd mode
7497# (3) if exp still equals zero, then insert original exponent
7498#       for the correct result.
7499#     if exp now equals one, then it overflowed so call ovf_res.
7500#
7501fout_sgl_may_ovfl:
7502        mov.w           SRC_EX(%a0),%d1         # fetch current sign
7503        andi.w          &0x8000,%d1             # keep it,clear exp
7504        ori.w           &0x3fff,%d1             # insert exp = 0
7505        mov.w           %d1,FP_SCR0_EX(%a6)     # insert scaled exp
7506        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7507        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7508
7509        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
7510
7511        fmov.x          FP_SCR0(%a6),%fp0       # force fop to be rounded
7512        fmov.l          &0x0,%fpcr              # clear FPCR
7513
7514        fabs.x          %fp0                    # need absolute value
7515        fcmp.b          %fp0,&0x2               # did exponent increase?
7516        fblt.w          fout_sgl_exg            # no; go finish NORM
7517        bra.w           fout_sgl_ovfl           # yes; go handle overflow
7518
7519################
7520
7521fout_sd_exc_unfl:
7522        mov.l           (%sp)+,%a0
7523
7524        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
7525        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
7526        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
7527
7528        cmpi.b          STAG(%a6),&DENORM       # was src a DENORM?
7529        bne.b           fout_sd_exc_cont        # no
7530
7531        lea             FP_SCR0(%a6),%a0
7532        bsr.l           norm
7533        neg.l           %d0
7534        andi.w          &0x7fff,%d0
7535        bfins           %d0,FP_SCR0_EX(%a6){&1:&15}
7536        bra.b           fout_sd_exc_cont
7537
7538fout_sd_exc:
7539fout_sd_exc_ovfl:
7540        mov.l           (%sp)+,%a0              # restore a0
7541
7542        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
7543        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
7544        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
7545
7546fout_sd_exc_cont:
7547        bclr            &0x7,FP_SCR0_EX(%a6)    # clear sign bit
7548        sne.b           2+FP_SCR0_EX(%a6)       # set internal sign bit
7549        lea             FP_SCR0(%a6),%a0        # pass: ptr to DENORM
7550
7551        mov.b           3+L_SCR3(%a6),%d1
7552        lsr.b           &0x4,%d1
7553        andi.w          &0x0c,%d1
7554        swap            %d1
7555        mov.b           3+L_SCR3(%a6),%d1
7556        lsr.b           &0x4,%d1
7557        andi.w          &0x03,%d1
7558        clr.l           %d0                     # pass: zero g,r,s
7559        bsr.l           _round                  # round the DENORM
7560
7561        tst.b           2+FP_SCR0_EX(%a6)       # is EXOP negative?
7562        beq.b           fout_sd_exc_done        # no
7563        bset            &0x7,FP_SCR0_EX(%a6)    # yes
7564
7565fout_sd_exc_done:
7566        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
7567        rts
7568
7569#################################################################
7570# fmove.d out ###################################################
7571#################################################################
7572fout_dbl:
7573        andi.b          &0x30,%d0               # clear rnd prec
7574        ori.b           &d_mode*0x10,%d0        # insert dbl prec
7575        mov.l           %d0,L_SCR3(%a6)         # save rnd prec,mode on stack
7576
7577#
7578# operand is a normalized number. first, we check to see if the move out
7579# would cause either an underflow or overflow. these cases are handled
7580# separately. otherwise, set the FPCR to the proper rounding mode and
7581# execute the move.
7582#
7583        mov.w           SRC_EX(%a0),%d0         # extract exponent
7584        andi.w          &0x7fff,%d0             # strip sign
7585
7586        cmpi.w          %d0,&DBL_HI             # will operand overflow?
7587        bgt.w           fout_dbl_ovfl           # yes; go handle OVFL
7588        beq.w           fout_dbl_may_ovfl       # maybe; go handle possible OVFL
7589        cmpi.w          %d0,&DBL_LO             # will operand underflow?
7590        blt.w           fout_dbl_unfl           # yes; go handle underflow
7591
7592#
7593# NORMs(in range) can be stored out by a simple "fmov.d"
7594# Unnormalized inputs can come through this point.
7595#
7596fout_dbl_exg:
7597        fmovm.x         SRC(%a0),&0x80          # fetch fop from stack
7598
7599        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
7600        fmov.l          &0x0,%fpsr              # clear FPSR
7601
7602        fmov.d          %fp0,L_SCR1(%a6)        # store does convert and round
7603
7604        fmov.l          &0x0,%fpcr              # clear FPCR
7605        fmov.l          %fpsr,%d0               # save FPSR
7606
7607        or.w            %d0,2+USER_FPSR(%a6)    # set possible inex2/ainex
7608
7609        mov.l           EXC_EA(%a6),%a1         # pass: dst addr
7610        lea             L_SCR1(%a6),%a0         # pass: src addr
7611        movq.l          &0x8,%d0                # pass: opsize is 8 bytes
7612        bsr.l           _dmem_write             # store dbl fop to memory
7613
7614        tst.l           %d1                     # did dstore fail?
7615        bne.l           facc_out_d              # yes
7616
7617        rts                                     # no; so we're finished
7618
7619#
7620# here, we know that the operand would UNFL if moved out to double prec,
7621# so, denorm and round and then use generic store double routine to
7622# write the value to memory.
7623#
7624fout_dbl_unfl:
7625        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7626
7627        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
7628        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
7629        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
7630        mov.l           %a0,-(%sp)
7631
7632        clr.l           %d0                     # pass: S.F. = 0
7633
7634        cmpi.b          STAG(%a6),&DENORM       # fetch src optype tag
7635        bne.b           fout_dbl_unfl_cont      # let DENORMs fall through
7636
7637        lea             FP_SCR0(%a6),%a0
7638        bsr.l           norm                    # normalize the DENORM
7639
7640fout_dbl_unfl_cont:
7641        lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
7642        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
7643        bsr.l           unf_res                 # calc default underflow result
7644
7645        lea             FP_SCR0(%a6),%a0        # pass: ptr to fop
7646        bsr.l           dst_dbl                 # convert to single prec
7647        mov.l           %d0,L_SCR1(%a6)
7648        mov.l           %d1,L_SCR2(%a6)
7649
7650        mov.l           EXC_EA(%a6),%a1         # pass: dst addr
7651        lea             L_SCR1(%a6),%a0         # pass: src addr
7652        movq.l          &0x8,%d0                # pass: opsize is 8 bytes
7653        bsr.l           _dmem_write             # store dbl fop to memory
7654
7655        tst.l           %d1                     # did dstore fail?
7656        bne.l           facc_out_d              # yes
7657
7658        mov.b           FPCR_ENABLE(%a6),%d1
7659        andi.b          &0x0a,%d1               # is UNFL or INEX enabled?
7660        bne.w           fout_sd_exc_unfl        # yes
7661        addq.l          &0x4,%sp
7662        rts
7663
7664#
7665# it's definitely an overflow so call ovf_res to get the correct answer
7666#
7667fout_dbl_ovfl:
7668        mov.w           2+SRC_LO(%a0),%d0
7669        andi.w          &0x7ff,%d0
7670        bne.b           fout_dbl_ovfl_inex2
7671
7672        ori.w           &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7673        bra.b           fout_dbl_ovfl_cont
7674fout_dbl_ovfl_inex2:
7675        ori.w           &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7676
7677fout_dbl_ovfl_cont:
7678        mov.l           %a0,-(%sp)
7679
7680# call ovf_res() w/ dbl prec and the correct rnd mode to create the default
7681# overflow result. DON'T save the returned ccodes from ovf_res() since
7682# fmove out doesn't alter them.
7683        tst.b           SRC_EX(%a0)             # is operand negative?
7684        smi             %d1                     # set if so
7685        mov.l           L_SCR3(%a6),%d0         # pass: dbl prec,rnd mode
7686        bsr.l           ovf_res                 # calc OVFL result
7687        fmovm.x         (%a0),&0x80             # load default overflow result
7688        fmov.d          %fp0,L_SCR1(%a6)        # store to double
7689
7690        mov.l           EXC_EA(%a6),%a1         # pass: dst addr
7691        lea             L_SCR1(%a6),%a0         # pass: src addr
7692        movq.l          &0x8,%d0                # pass: opsize is 8 bytes
7693        bsr.l           _dmem_write             # store dbl fop to memory
7694
7695        tst.l           %d1                     # did dstore fail?
7696        bne.l           facc_out_d              # yes
7697
7698        mov.b           FPCR_ENABLE(%a6),%d1
7699        andi.b          &0x0a,%d1               # is UNFL or INEX enabled?
7700        bne.w           fout_sd_exc_ovfl        # yes
7701        addq.l          &0x4,%sp
7702        rts
7703
7704#
7705# move out MAY overflow:
7706# (1) force the exp to 0x3fff
7707# (2) do a move w/ appropriate rnd mode
7708# (3) if exp still equals zero, then insert original exponent
7709#       for the correct result.
7710#     if exp now equals one, then it overflowed so call ovf_res.
7711#
7712fout_dbl_may_ovfl:
7713        mov.w           SRC_EX(%a0),%d1         # fetch current sign
7714        andi.w          &0x8000,%d1             # keep it,clear exp
7715        ori.w           &0x3fff,%d1             # insert exp = 0
7716        mov.w           %d1,FP_SCR0_EX(%a6)     # insert scaled exp
7717        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7718        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7719
7720        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
7721
7722        fmov.x          FP_SCR0(%a6),%fp0       # force fop to be rounded
7723        fmov.l          &0x0,%fpcr              # clear FPCR
7724
7725        fabs.x          %fp0                    # need absolute value
7726        fcmp.b          %fp0,&0x2               # did exponent increase?
7727        fblt.w          fout_dbl_exg            # no; go finish NORM
7728        bra.w           fout_dbl_ovfl           # yes; go handle overflow
7729
7730#########################################################################
7731# XDEF **************************************************************** #
7732#       dst_dbl(): create double precision value from extended prec.    #
7733#                                                                       #
7734# XREF **************************************************************** #
7735#       None                                                            #
7736#                                                                       #
7737# INPUT *************************************************************** #
7738#       a0 = pointer to source operand in extended precision            #
7739#                                                                       #
7740# OUTPUT ************************************************************** #
7741#       d0 = hi(double precision result)                                #
7742#       d1 = lo(double precision result)                                #
7743#                                                                       #
7744# ALGORITHM *********************************************************** #
7745#                                                                       #
7746#  Changes extended precision to double precision.                      #
7747#  Note: no attempt is made to round the extended value to double.      #
7748#       dbl_sign = ext_sign                                             #
7749#       dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)            #
7750#       get rid of ext integer bit                                      #
7751#       dbl_mant = ext_mant{62:12}                                      #
7752#                                                                       #
7753#               ---------------   ---------------    ---------------    #
7754#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |    #
7755#               ---------------   ---------------    ---------------    #
7756#                95         64    63 62       32      31     11   0     #
7757#                                    |                       |          #
7758#                                    |                       |          #
7759#                                    |                       |          #
7760#                                    v                       v          #
7761#                             ---------------   ---------------         #
7762#  double   ->                |s|exp| mant  |   |  mant       |         #
7763#                             ---------------   ---------------         #
7764#                             63     51   32   31              0        #
7765#                                                                       #
7766#########################################################################
7767
7768dst_dbl:
7769        clr.l           %d0                     # clear d0
7770        mov.w           FTEMP_EX(%a0),%d0       # get exponent
7771        subi.w          &EXT_BIAS,%d0           # subtract extended precision bias
7772        addi.w          &DBL_BIAS,%d0           # add double precision bias
7773        tst.b           FTEMP_HI(%a0)           # is number a denorm?
7774        bmi.b           dst_get_dupper          # no
7775        subq.w          &0x1,%d0                # yes; denorm bias = DBL_BIAS - 1
7776dst_get_dupper:
7777        swap            %d0                     # d0 now in upper word
7778        lsl.l           &0x4,%d0                # d0 in proper place for dbl prec exp
7779        tst.b           FTEMP_EX(%a0)           # test sign
7780        bpl.b           dst_get_dman            # if positive, go process mantissa
7781        bset            &0x1f,%d0               # if negative, set sign
7782dst_get_dman:
7783        mov.l           FTEMP_HI(%a0),%d1       # get ms mantissa
7784        bfextu          %d1{&1:&20},%d1         # get upper 20 bits of ms
7785        or.l            %d1,%d0                 # put these bits in ms word of double
7786        mov.l           %d0,L_SCR1(%a6)         # put the new exp back on the stack
7787        mov.l           FTEMP_HI(%a0),%d1       # get ms mantissa
7788        mov.l           &21,%d0                 # load shift count
7789        lsl.l           %d0,%d1                 # put lower 11 bits in upper bits
7790        mov.l           %d1,L_SCR2(%a6)         # build lower lword in memory
7791        mov.l           FTEMP_LO(%a0),%d1       # get ls mantissa
7792        bfextu          %d1{&0:&21},%d0         # get ls 21 bits of double
7793        mov.l           L_SCR2(%a6),%d1
7794        or.l            %d0,%d1                 # put them in double result
7795        mov.l           L_SCR1(%a6),%d0
7796        rts
7797
7798#########################################################################
7799# XDEF **************************************************************** #
7800#       dst_sgl(): create single precision value from extended prec     #
7801#                                                                       #
7802# XREF **************************************************************** #
7803#                                                                       #
7804# INPUT *************************************************************** #
7805#       a0 = pointer to source operand in extended precision            #
7806#                                                                       #
7807# OUTPUT ************************************************************** #
7808#       d0 = single precision result                                    #
7809#                                                                       #
7810# ALGORITHM *********************************************************** #
7811#                                                                       #
7812# Changes extended precision to single precision.                       #
7813#       sgl_sign = ext_sign                                             #
7814#       sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)             #
7815#       get rid of ext integer bit                                      #
7816#       sgl_mant = ext_mant{62:12}                                      #
7817#                                                                       #
7818#               ---------------   ---------------    ---------------    #
7819#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |    #
7820#               ---------------   ---------------    ---------------    #
7821#                95         64    63 62    40 32      31     12   0     #
7822#                                    |     |                            #
7823#                                    |     |                            #
7824#                                    |     |                            #
7825#                                    v     v                            #
7826#                             ---------------                           #
7827#  single   ->                |s|exp| mant  |                           #
7828#                             ---------------                           #
7829#                             31     22     0                           #
7830#                                                                       #
7831#########################################################################
7832
7833dst_sgl:
7834        clr.l           %d0
7835        mov.w           FTEMP_EX(%a0),%d0       # get exponent
7836        subi.w          &EXT_BIAS,%d0           # subtract extended precision bias
7837        addi.w          &SGL_BIAS,%d0           # add single precision bias
7838        tst.b           FTEMP_HI(%a0)           # is number a denorm?
7839        bmi.b           dst_get_supper          # no
7840        subq.w          &0x1,%d0                # yes; denorm bias = SGL_BIAS - 1
7841dst_get_supper:
7842        swap            %d0                     # put exp in upper word of d0
7843        lsl.l           &0x7,%d0                # shift it into single exp bits
7844        tst.b           FTEMP_EX(%a0)           # test sign
7845        bpl.b           dst_get_sman            # if positive, continue
7846        bset            &0x1f,%d0               # if negative, put in sign first
7847dst_get_sman:
7848        mov.l           FTEMP_HI(%a0),%d1       # get ms mantissa
7849        andi.l          &0x7fffff00,%d1         # get upper 23 bits of ms
7850        lsr.l           &0x8,%d1                # and put them flush right
7851        or.l            %d1,%d0                 # put these bits in ms word of single
7852        rts
7853
7854##############################################################################
7855fout_pack:
7856        bsr.l           _calc_ea_fout           # fetch the <ea>
7857        mov.l           %a0,-(%sp)
7858
7859        mov.b           STAG(%a6),%d0           # fetch input type
7860        bne.w           fout_pack_not_norm      # input is not NORM
7861
7862fout_pack_norm:
7863        btst            &0x4,EXC_CMDREG(%a6)    # static or dynamic?
7864        beq.b           fout_pack_s             # static
7865
7866fout_pack_d:
7867        mov.b           1+EXC_CMDREG(%a6),%d1   # fetch dynamic reg
7868        lsr.b           &0x4,%d1
7869        andi.w          &0x7,%d1
7870
7871        bsr.l           fetch_dreg              # fetch Dn w/ k-factor
7872
7873        bra.b           fout_pack_type
7874fout_pack_s:
7875        mov.b           1+EXC_CMDREG(%a6),%d0   # fetch static field
7876
7877fout_pack_type:
7878        bfexts          %d0{&25:&7},%d0         # extract k-factor
7879        mov.l   %d0,-(%sp)
7880
7881        lea             FP_SRC(%a6),%a0         # pass: ptr to input
7882
7883# bindec is currently scrambling FP_SRC for denorm inputs.
7884# we'll have to change this, but for now, tough luck!!!
7885        bsr.l           bindec                  # convert xprec to packed
7886
7887#       andi.l          &0xcfff000f,FP_SCR0(%a6) # clear unused fields
7888        andi.l          &0xcffff00f,FP_SCR0(%a6) # clear unused fields
7889
7890        mov.l   (%sp)+,%d0
7891
7892        tst.b           3+FP_SCR0_EX(%a6)
7893        bne.b           fout_pack_set
7894        tst.l           FP_SCR0_HI(%a6)
7895        bne.b           fout_pack_set
7896        tst.l           FP_SCR0_LO(%a6)
7897        bne.b           fout_pack_set
7898
7899# add the extra condition that only if the k-factor was zero, too, should
7900# we zero the exponent
7901        tst.l           %d0
7902        bne.b           fout_pack_set
7903# "mantissa" is all zero which means that the answer is zero. but, the '040
7904# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
7905# if the mantissa is zero, I will zero the exponent, too.
7906# the question now is whether the exponents sign bit is allowed to be non-zero
7907# for a zero, also...
7908        andi.w          &0xf000,FP_SCR0(%a6)
7909
7910fout_pack_set:
7911
7912        lea             FP_SCR0(%a6),%a0        # pass: src addr
7913
7914fout_pack_write:
7915        mov.l           (%sp)+,%a1              # pass: dst addr
7916        mov.l           &0xc,%d0                # pass: opsize is 12 bytes
7917
7918        cmpi.b          SPCOND_FLG(%a6),&mda7_flg
7919        beq.b           fout_pack_a7
7920
7921        bsr.l           _dmem_write             # write ext prec number to memory
7922
7923        tst.l           %d1                     # did dstore fail?
7924        bne.w           fout_ext_err            # yes
7925
7926        rts
7927
7928# we don't want to do the write if the exception occurred in supervisor mode
7929# so _mem_write2() handles this for us.
7930fout_pack_a7:
7931        bsr.l           _mem_write2             # write ext prec number to memory
7932
7933        tst.l           %d1                     # did dstore fail?
7934        bne.w           fout_ext_err            # yes
7935
7936        rts
7937
7938fout_pack_not_norm:
7939        cmpi.b          %d0,&DENORM             # is it a DENORM?
7940        beq.w           fout_pack_norm          # yes
7941        lea             FP_SRC(%a6),%a0
7942        clr.w           2+FP_SRC_EX(%a6)
7943        cmpi.b          %d0,&SNAN               # is it an SNAN?
7944        beq.b           fout_pack_snan          # yes
7945        bra.b           fout_pack_write         # no
7946
7947fout_pack_snan:
7948        ori.w           &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
7949        bset            &0x6,FP_SRC_HI(%a6)     # set snan bit
7950        bra.b           fout_pack_write
7951
7952#########################################################################
7953# XDEF **************************************************************** #
7954#       fmul(): emulates the fmul instruction                           #
7955#       fsmul(): emulates the fsmul instruction                         #
7956#       fdmul(): emulates the fdmul instruction                         #
7957#                                                                       #
7958# XREF **************************************************************** #
7959#       scale_to_zero_src() - scale src exponent to zero                #
7960#       scale_to_zero_dst() - scale dst exponent to zero                #
7961#       unf_res() - return default underflow result                     #
7962#       ovf_res() - return default overflow result                      #
7963#       res_qnan() - return QNAN result                                 #
7964#       res_snan() - return SNAN result                                 #
7965#                                                                       #
7966# INPUT *************************************************************** #
7967#       a0 = pointer to extended precision source operand               #
7968#       a1 = pointer to extended precision destination operand          #
7969#       d0  rnd prec,mode                                               #
7970#                                                                       #
7971# OUTPUT ************************************************************** #
7972#       fp0 = result                                                    #
7973#       fp1 = EXOP (if exception occurred)                              #
7974#                                                                       #
7975# ALGORITHM *********************************************************** #
7976#       Handle NANs, infinities, and zeroes as special cases. Divide    #
7977# norms/denorms into ext/sgl/dbl precision.                             #
7978#       For norms/denorms, scale the exponents such that a multiply     #
7979# instruction won't cause an exception. Use the regular fmul to         #
7980# compute a result. Check if the regular operands would have taken      #
7981# an exception. If so, return the default overflow/underflow result     #
7982# and return the EXOP if exceptions are enabled. Else, scale the        #
7983# result operand to the proper exponent.                                #
7984#                                                                       #
7985#########################################################################
7986
7987        align           0x10
7988tbl_fmul_ovfl:
7989        long            0x3fff - 0x7ffe         # ext_max
7990        long            0x3fff - 0x407e         # sgl_max
7991        long            0x3fff - 0x43fe         # dbl_max
7992tbl_fmul_unfl:
7993        long            0x3fff + 0x0001         # ext_unfl
7994        long            0x3fff - 0x3f80         # sgl_unfl
7995        long            0x3fff - 0x3c00         # dbl_unfl
7996
7997        global          fsmul
7998fsmul:
7999        andi.b          &0x30,%d0               # clear rnd prec
8000        ori.b           &s_mode*0x10,%d0        # insert sgl prec
8001        bra.b           fmul
8002
8003        global          fdmul
8004fdmul:
8005        andi.b          &0x30,%d0
8006        ori.b           &d_mode*0x10,%d0        # insert dbl prec
8007
8008        global          fmul
8009fmul:
8010        mov.l           %d0,L_SCR3(%a6)         # store rnd info
8011
8012        clr.w           %d1
8013        mov.b           DTAG(%a6),%d1
8014        lsl.b           &0x3,%d1
8015        or.b            STAG(%a6),%d1           # combine src tags
8016        bne.w           fmul_not_norm           # optimize on non-norm input
8017
8018fmul_norm:
8019        mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
8020        mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
8021        mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
8022
8023        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
8024        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
8025        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
8026
8027        bsr.l           scale_to_zero_src       # scale src exponent
8028        mov.l           %d0,-(%sp)              # save scale factor 1
8029
8030        bsr.l           scale_to_zero_dst       # scale dst exponent
8031
8032        add.l           %d0,(%sp)               # SCALE_FACTOR = scale1 + scale2
8033
8034        mov.w           2+L_SCR3(%a6),%d1       # fetch precision
8035        lsr.b           &0x6,%d1                # shift to lo bits
8036        mov.l           (%sp)+,%d0              # load S.F.
8037        cmp.l           %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
8038        beq.w           fmul_may_ovfl           # result may rnd to overflow
8039        blt.w           fmul_ovfl               # result will overflow
8040
8041        cmp.l           %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
8042        beq.w           fmul_may_unfl           # result may rnd to no unfl
8043        bgt.w           fmul_unfl               # result will underflow
8044
8045#
8046# NORMAL:
8047# - the result of the multiply operation will neither overflow nor underflow.
8048# - do the multiply to the proper precision and rounding mode.
8049# - scale the result exponent using the scale factor. if both operands were
8050# normalized then we really don't need to go through this scaling. but for now,
8051# this will do.
8052#
8053fmul_normal:
8054        fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
8055
8056        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8057        fmov.l          &0x0,%fpsr              # clear FPSR
8058
8059        fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
8060
8061        fmov.l          %fpsr,%d1               # save status
8062        fmov.l          &0x0,%fpcr              # clear FPCR
8063
8064        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8065
8066fmul_normal_exit:
8067        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
8068        mov.l           %d2,-(%sp)              # save d2
8069        mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
8070        mov.l           %d1,%d2                 # make a copy
8071        andi.l          &0x7fff,%d1             # strip sign
8072        andi.w          &0x8000,%d2             # keep old sign
8073        sub.l           %d0,%d1                 # add scale factor
8074        or.w            %d2,%d1                 # concat old sign,new exp
8075        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
8076        mov.l           (%sp)+,%d2              # restore d2
8077        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
8078        rts
8079
8080#
8081# OVERFLOW:
8082# - the result of the multiply operation is an overflow.
8083# - do the multiply to the proper precision and rounding mode in order to
8084# set the inexact bits.
8085# - calculate the default result and return it in fp0.
8086# - if overflow or inexact is enabled, we need a multiply result rounded to
8087# extended precision. if the original operation was extended, then we have this
8088# result. if the original operation was single or double, we have to do another
8089# multiply using extended precision and the correct rounding mode. the result
8090# of this operation then has its exponent scaled by -0x6000 to create the
8091# exceptional operand.
8092#
8093fmul_ovfl:
8094        fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
8095
8096        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8097        fmov.l          &0x0,%fpsr              # clear FPSR
8098
8099        fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
8100
8101        fmov.l          %fpsr,%d1               # save status
8102        fmov.l          &0x0,%fpcr              # clear FPCR
8103
8104        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8105
8106# save setting this until now because this is where fmul_may_ovfl may jump in
8107fmul_ovfl_tst:
8108        or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8109
8110        mov.b           FPCR_ENABLE(%a6),%d1
8111        andi.b          &0x13,%d1               # is OVFL or INEX enabled?
8112        bne.b           fmul_ovfl_ena           # yes
8113
8114# calculate the default result
8115fmul_ovfl_dis:
8116        btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
8117        sne             %d1                     # set sign param accordingly
8118        mov.l           L_SCR3(%a6),%d0         # pass rnd prec,mode
8119        bsr.l           ovf_res                 # calculate default result
8120        or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
8121        fmovm.x         (%a0),&0x80             # return default result in fp0
8122        rts
8123
8124#
8125# OVFL is enabled; Create EXOP:
8126# - if precision is extended, then we have the EXOP. simply bias the exponent
8127# with an extra -0x6000. if the precision is single or double, we need to
8128# calculate a result rounded to extended precision.
8129#
8130fmul_ovfl_ena:
8131        mov.l           L_SCR3(%a6),%d1
8132        andi.b          &0xc0,%d1               # test the rnd prec
8133        bne.b           fmul_ovfl_ena_sd        # it's sgl or dbl
8134
8135fmul_ovfl_ena_cont:
8136        fmovm.x         &0x80,FP_SCR0(%a6)      # move result to stack
8137
8138        mov.l           %d2,-(%sp)              # save d2
8139        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
8140        mov.w           %d1,%d2                 # make a copy
8141        andi.l          &0x7fff,%d1             # strip sign
8142        sub.l           %d0,%d1                 # add scale factor
8143        subi.l          &0x6000,%d1             # subtract bias
8144        andi.w          &0x7fff,%d1             # clear sign bit
8145        andi.w          &0x8000,%d2             # keep old sign
8146        or.w            %d2,%d1                 # concat old sign,new exp
8147        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
8148        mov.l           (%sp)+,%d2              # restore d2
8149        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
8150        bra.b           fmul_ovfl_dis
8151
8152fmul_ovfl_ena_sd:
8153        fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
8154
8155        mov.l           L_SCR3(%a6),%d1
8156        andi.b          &0x30,%d1               # keep rnd mode only
8157        fmov.l          %d1,%fpcr               # set FPCR
8158
8159        fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
8160
8161        fmov.l          &0x0,%fpcr              # clear FPCR
8162        bra.b           fmul_ovfl_ena_cont
8163
8164#
8165# may OVERFLOW:
8166# - the result of the multiply operation MAY overflow.
8167# - do the multiply to the proper precision and rounding mode in order to
8168# set the inexact bits.
8169# - calculate the default result and return it in fp0.
8170#
8171fmul_may_ovfl:
8172        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
8173
8174        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8175        fmov.l          &0x0,%fpsr              # clear FPSR
8176
8177        fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
8178
8179        fmov.l          %fpsr,%d1               # save status
8180        fmov.l          &0x0,%fpcr              # clear FPCR
8181
8182        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8183
8184        fabs.x          %fp0,%fp1               # make a copy of result
8185        fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
8186        fbge.w          fmul_ovfl_tst           # yes; overflow has occurred
8187
8188# no, it didn't overflow; we have correct result
8189        bra.w           fmul_normal_exit
8190
8191#
8192# UNDERFLOW:
8193# - the result of the multiply operation is an underflow.
8194# - do the multiply to the proper precision and rounding mode in order to
8195# set the inexact bits.
8196# - calculate the default result and return it in fp0.
8197# - if overflow or inexact is enabled, we need a multiply result rounded to
8198# extended precision. if the original operation was extended, then we have this
8199# result. if the original operation was single or double, we have to do another
8200# multiply using extended precision and the correct rounding mode. the result
8201# of this operation then has its exponent scaled by -0x6000 to create the
8202# exceptional operand.
8203#
8204fmul_unfl:
8205        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8206
8207# for fun, let's use only extended precision, round to zero. then, let
8208# the unf_res() routine figure out all the rest.
8209# will we get the correct answer.
8210        fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
8211
8212        fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
8213        fmov.l          &0x0,%fpsr              # clear FPSR
8214
8215        fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
8216
8217        fmov.l          %fpsr,%d1               # save status
8218        fmov.l          &0x0,%fpcr              # clear FPCR
8219
8220        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8221
8222        mov.b           FPCR_ENABLE(%a6),%d1
8223        andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
8224        bne.b           fmul_unfl_ena           # yes
8225
8226fmul_unfl_dis:
8227        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
8228
8229        lea             FP_SCR0(%a6),%a0        # pass: result addr
8230        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
8231        bsr.l           unf_res                 # calculate default result
8232        or.b            %d0,FPSR_CC(%a6)        # unf_res2 may have set 'Z'
8233        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
8234        rts
8235
8236#
8237# UNFL is enabled.
8238#
8239fmul_unfl_ena:
8240        fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
8241
8242        mov.l           L_SCR3(%a6),%d1
8243        andi.b          &0xc0,%d1               # is precision extended?
8244        bne.b           fmul_unfl_ena_sd        # no, sgl or dbl
8245
8246# if the rnd mode is anything but RZ, then we have to re-do the above
8247# multiplication because we used RZ for all.
8248        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8249
8250fmul_unfl_ena_cont:
8251        fmov.l          &0x0,%fpsr              # clear FPSR
8252
8253        fmul.x          FP_SCR0(%a6),%fp1       # execute multiply
8254
8255        fmov.l          &0x0,%fpcr              # clear FPCR
8256
8257        fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
8258        mov.l           %d2,-(%sp)              # save d2
8259        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
8260        mov.l           %d1,%d2                 # make a copy
8261        andi.l          &0x7fff,%d1             # strip sign
8262        andi.w          &0x8000,%d2             # keep old sign
8263        sub.l           %d0,%d1                 # add scale factor
8264        addi.l          &0x6000,%d1             # add bias
8265        andi.w          &0x7fff,%d1
8266        or.w            %d2,%d1                 # concat old sign,new exp
8267        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
8268        mov.l           (%sp)+,%d2              # restore d2
8269        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
8270        bra.w           fmul_unfl_dis
8271
8272fmul_unfl_ena_sd:
8273        mov.l           L_SCR3(%a6),%d1
8274        andi.b          &0x30,%d1               # use only rnd mode
8275        fmov.l          %d1,%fpcr               # set FPCR
8276
8277        bra.b           fmul_unfl_ena_cont
8278
8279# MAY UNDERFLOW:
8280# -use the correct rounding mode and precision. this code favors operations
8281# that do not underflow.
8282fmul_may_unfl:
8283        fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
8284
8285        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8286        fmov.l          &0x0,%fpsr              # clear FPSR
8287
8288        fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
8289
8290        fmov.l          %fpsr,%d1               # save status
8291        fmov.l          &0x0,%fpcr              # clear FPCR
8292
8293        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8294
8295        fabs.x          %fp0,%fp1               # make a copy of result
8296        fcmp.b          %fp1,&0x2               # is |result| > 2.b?
8297        fbgt.w          fmul_normal_exit        # no; no underflow occurred
8298        fblt.w          fmul_unfl               # yes; underflow occurred
8299
8300#
8301# we still don't know if underflow occurred. result is ~ equal to 2. but,
8302# we don't know if the result was an underflow that rounded up to a 2 or
8303# a normalized number that rounded down to a 2. so, redo the entire operation
8304# using RZ as the rounding mode to see what the pre-rounded result is.
8305# this case should be relatively rare.
8306#
8307        fmovm.x         FP_SCR1(%a6),&0x40      # load dst operand
8308
8309        mov.l           L_SCR3(%a6),%d1
8310        andi.b          &0xc0,%d1               # keep rnd prec
8311        ori.b           &rz_mode*0x10,%d1       # insert RZ
8312
8313        fmov.l          %d1,%fpcr               # set FPCR
8314        fmov.l          &0x0,%fpsr              # clear FPSR
8315
8316        fmul.x          FP_SCR0(%a6),%fp1       # execute multiply
8317
8318        fmov.l          &0x0,%fpcr              # clear FPCR
8319        fabs.x          %fp1                    # make absolute value
8320        fcmp.b          %fp1,&0x2               # is |result| < 2.b?
8321        fbge.w          fmul_normal_exit        # no; no underflow occurred
8322        bra.w           fmul_unfl               # yes, underflow occurred
8323
8324################################################################################
8325
8326#
8327# Multiply: inputs are not both normalized; what are they?
8328#
8329fmul_not_norm:
8330        mov.w           (tbl_fmul_op.b,%pc,%d1.w*2),%d1
8331        jmp             (tbl_fmul_op.b,%pc,%d1.w)
8332
8333        swbeg           &48
8334tbl_fmul_op:
8335        short           fmul_norm       - tbl_fmul_op # NORM x NORM
8336        short           fmul_zero       - tbl_fmul_op # NORM x ZERO
8337        short           fmul_inf_src    - tbl_fmul_op # NORM x INF
8338        short           fmul_res_qnan   - tbl_fmul_op # NORM x QNAN
8339        short           fmul_norm       - tbl_fmul_op # NORM x DENORM
8340        short           fmul_res_snan   - tbl_fmul_op # NORM x SNAN
8341        short           tbl_fmul_op     - tbl_fmul_op #
8342        short           tbl_fmul_op     - tbl_fmul_op #
8343
8344        short           fmul_zero       - tbl_fmul_op # ZERO x NORM
8345        short           fmul_zero       - tbl_fmul_op # ZERO x ZERO
8346        short           fmul_res_operr  - tbl_fmul_op # ZERO x INF
8347        short           fmul_res_qnan   - tbl_fmul_op # ZERO x QNAN
8348        short           fmul_zero       - tbl_fmul_op # ZERO x DENORM
8349        short           fmul_res_snan   - tbl_fmul_op # ZERO x SNAN
8350        short           tbl_fmul_op     - tbl_fmul_op #
8351        short           tbl_fmul_op     - tbl_fmul_op #
8352
8353        short           fmul_inf_dst    - tbl_fmul_op # INF x NORM
8354        short           fmul_res_operr  - tbl_fmul_op # INF x ZERO
8355        short           fmul_inf_dst    - tbl_fmul_op # INF x INF
8356        short           fmul_res_qnan   - tbl_fmul_op # INF x QNAN
8357        short           fmul_inf_dst    - tbl_fmul_op # INF x DENORM
8358        short           fmul_res_snan   - tbl_fmul_op # INF x SNAN
8359        short           tbl_fmul_op     - tbl_fmul_op #
8360        short           tbl_fmul_op     - tbl_fmul_op #
8361
8362        short           fmul_res_qnan   - tbl_fmul_op # QNAN x NORM
8363        short           fmul_res_qnan   - tbl_fmul_op # QNAN x ZERO
8364        short           fmul_res_qnan   - tbl_fmul_op # QNAN x INF
8365        short           fmul_res_qnan   - tbl_fmul_op # QNAN x QNAN
8366        short           fmul_res_qnan   - tbl_fmul_op # QNAN x DENORM
8367        short           fmul_res_snan   - tbl_fmul_op # QNAN x SNAN
8368        short           tbl_fmul_op     - tbl_fmul_op #
8369        short           tbl_fmul_op     - tbl_fmul_op #
8370
8371        short           fmul_norm       - tbl_fmul_op # NORM x NORM
8372        short           fmul_zero       - tbl_fmul_op # NORM x ZERO
8373        short           fmul_inf_src    - tbl_fmul_op # NORM x INF
8374        short           fmul_res_qnan   - tbl_fmul_op # NORM x QNAN
8375        short           fmul_norm       - tbl_fmul_op # NORM x DENORM
8376        short           fmul_res_snan   - tbl_fmul_op # NORM x SNAN
8377        short           tbl_fmul_op     - tbl_fmul_op #
8378        short           tbl_fmul_op     - tbl_fmul_op #
8379
8380        short           fmul_res_snan   - tbl_fmul_op # SNAN x NORM
8381        short           fmul_res_snan   - tbl_fmul_op # SNAN x ZERO
8382        short           fmul_res_snan   - tbl_fmul_op # SNAN x INF
8383        short           fmul_res_snan   - tbl_fmul_op # SNAN x QNAN
8384        short           fmul_res_snan   - tbl_fmul_op # SNAN x DENORM
8385        short           fmul_res_snan   - tbl_fmul_op # SNAN x SNAN
8386        short           tbl_fmul_op     - tbl_fmul_op #
8387        short           tbl_fmul_op     - tbl_fmul_op #
8388
8389fmul_res_operr:
8390        bra.l           res_operr
8391fmul_res_snan:
8392        bra.l           res_snan
8393fmul_res_qnan:
8394        bra.l           res_qnan
8395
8396#
8397# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
8398#
8399        global          fmul_zero               # global for fsglmul
8400fmul_zero:
8401        mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
8402        mov.b           DST_EX(%a1),%d1
8403        eor.b           %d0,%d1
8404        bpl.b           fmul_zero_p             # result ZERO is pos.
8405fmul_zero_n:
8406        fmov.s          &0x80000000,%fp0        # load -ZERO
8407        mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
8408        rts
8409fmul_zero_p:
8410        fmov.s          &0x00000000,%fp0        # load +ZERO
8411        mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
8412        rts
8413
8414#
8415# Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
8416#
8417# Note: The j-bit for an infinity is a don't-care. However, to be
8418# strictly compatible w/ the 68881/882, we make sure to return an
8419# INF w/ the j-bit set if the input INF j-bit was set. Destination
8420# INFs take priority.
8421#
8422        global          fmul_inf_dst            # global for fsglmul
8423fmul_inf_dst:
8424        fmovm.x         DST(%a1),&0x80          # return INF result in fp0
8425        mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
8426        mov.b           DST_EX(%a1),%d1
8427        eor.b           %d0,%d1
8428        bpl.b           fmul_inf_dst_p          # result INF is pos.
8429fmul_inf_dst_n:
8430        fabs.x          %fp0                    # clear result sign
8431        fneg.x          %fp0                    # set result sign
8432        mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
8433        rts
8434fmul_inf_dst_p:
8435        fabs.x          %fp0                    # clear result sign
8436        mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
8437        rts
8438
8439        global          fmul_inf_src            # global for fsglmul
8440fmul_inf_src:
8441        fmovm.x         SRC(%a0),&0x80          # return INF result in fp0
8442        mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
8443        mov.b           DST_EX(%a1),%d1
8444        eor.b           %d0,%d1
8445        bpl.b           fmul_inf_dst_p          # result INF is pos.
8446        bra.b           fmul_inf_dst_n
8447
8448#########################################################################
8449# XDEF **************************************************************** #
8450#       fin(): emulates the fmove instruction                           #
8451#       fsin(): emulates the fsmove instruction                         #
8452#       fdin(): emulates the fdmove instruction                         #
8453#                                                                       #
8454# XREF **************************************************************** #
8455#       norm() - normalize mantissa for EXOP on denorm                  #
8456#       scale_to_zero_src() - scale src exponent to zero                #
8457#       ovf_res() - return default overflow result                      #
8458#       unf_res() - return default underflow result                     #
8459#       res_qnan_1op() - return QNAN result                             #
8460#       res_snan_1op() - return SNAN result                             #
8461#                                                                       #
8462# INPUT *************************************************************** #
8463#       a0 = pointer to extended precision source operand               #
8464#       d0 = round prec/mode                                            #
8465#                                                                       #
8466# OUTPUT ************************************************************** #
8467#       fp0 = result                                                    #
8468#       fp1 = EXOP (if exception occurred)                              #
8469#                                                                       #
8470# ALGORITHM *********************************************************** #
8471#       Handle NANs, infinities, and zeroes as special cases. Divide    #
8472# norms into extended, single, and double precision.                    #
8473#       Norms can be emulated w/ a regular fmove instruction. For       #
8474# sgl/dbl, must scale exponent and perform an "fmove". Check to see     #
8475# if the result would have overflowed/underflowed. If so, use unf_res() #
8476# or ovf_res() to return the default result. Also return EXOP if        #
8477# exception is enabled. If no exception, return the default result.     #
8478#       Unnorms don't pass through here.                                #
8479#                                                                       #
8480#########################################################################
8481
8482        global          fsin
8483fsin:
8484        andi.b          &0x30,%d0               # clear rnd prec
8485        ori.b           &s_mode*0x10,%d0        # insert sgl precision
8486        bra.b           fin
8487
8488        global          fdin
8489fdin:
8490        andi.b          &0x30,%d0               # clear rnd prec
8491        ori.b           &d_mode*0x10,%d0        # insert dbl precision
8492
8493        global          fin
8494fin:
8495        mov.l           %d0,L_SCR3(%a6)         # store rnd info
8496
8497        mov.b           STAG(%a6),%d1           # fetch src optype tag
8498        bne.w           fin_not_norm            # optimize on non-norm input
8499
8500#
8501# FP MOVE IN: NORMs and DENORMs ONLY!
8502#
8503fin_norm:
8504        andi.b          &0xc0,%d0               # is precision extended?
8505        bne.w           fin_not_ext             # no, so go handle dbl or sgl
8506
8507#
8508# precision selected is extended. so...we cannot get an underflow
8509# or overflow because of rounding to the correct precision. so...
8510# skip the scaling and unscaling...
8511#
8512        tst.b           SRC_EX(%a0)             # is the operand negative?
8513        bpl.b           fin_norm_done           # no
8514        bset            &neg_bit,FPSR_CC(%a6)   # yes, so set 'N' ccode bit
8515fin_norm_done:
8516        fmovm.x         SRC(%a0),&0x80          # return result in fp0
8517        rts
8518
8519#
8520# for an extended precision DENORM, the UNFL exception bit is set
8521# the accrued bit is NOT set in this instance(no inexactness!)
8522#
8523fin_denorm:
8524        andi.b          &0xc0,%d0               # is precision extended?
8525        bne.w           fin_not_ext             # no, so go handle dbl or sgl
8526
8527        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8528        tst.b           SRC_EX(%a0)             # is the operand negative?
8529        bpl.b           fin_denorm_done         # no
8530        bset            &neg_bit,FPSR_CC(%a6)   # yes, so set 'N' ccode bit
8531fin_denorm_done:
8532        fmovm.x         SRC(%a0),&0x80          # return result in fp0
8533        btst            &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
8534        bne.b           fin_denorm_unfl_ena     # yes
8535        rts
8536
8537#
8538# the input is an extended DENORM and underflow is enabled in the FPCR.
8539# normalize the mantissa and add the bias of 0x6000 to the resulting negative
8540# exponent and insert back into the operand.
8541#
8542fin_denorm_unfl_ena:
8543        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
8544        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
8545        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
8546        lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
8547        bsr.l           norm                    # normalize result
8548        neg.w           %d0                     # new exponent = -(shft val)
8549        addi.w          &0x6000,%d0             # add new bias to exponent
8550        mov.w           FP_SCR0_EX(%a6),%d1     # fetch old sign,exp
8551        andi.w          &0x8000,%d1             # keep old sign
8552        andi.w          &0x7fff,%d0             # clear sign position
8553        or.w            %d1,%d0                 # concat new exo,old sign
8554        mov.w           %d0,FP_SCR0_EX(%a6)     # insert new exponent
8555        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
8556        rts
8557
8558#
8559# operand is to be rounded to single or double precision
8560#
8561fin_not_ext:
8562        cmpi.b          %d0,&s_mode*0x10        # separate sgl/dbl prec
8563        bne.b           fin_dbl
8564
8565#
8566# operand is to be rounded to single precision
8567#
8568fin_sgl:
8569        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
8570        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
8571        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
8572        bsr.l           scale_to_zero_src       # calculate scale factor
8573
8574        cmpi.l          %d0,&0x3fff-0x3f80      # will move in underflow?
8575        bge.w           fin_sd_unfl             # yes; go handle underflow
8576        cmpi.l          %d0,&0x3fff-0x407e      # will move in overflow?
8577        beq.w           fin_sd_may_ovfl         # maybe; go check
8578        blt.w           fin_sd_ovfl             # yes; go handle overflow
8579
8580#
8581# operand will NOT overflow or underflow when moved into the fp reg file
8582#
8583fin_sd_normal:
8584        fmov.l          &0x0,%fpsr              # clear FPSR
8585        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8586
8587        fmov.x          FP_SCR0(%a6),%fp0       # perform move
8588
8589        fmov.l          %fpsr,%d1               # save FPSR
8590        fmov.l          &0x0,%fpcr              # clear FPCR
8591
8592        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8593
8594fin_sd_normal_exit:
8595        mov.l           %d2,-(%sp)              # save d2
8596        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
8597        mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
8598        mov.w           %d1,%d2                 # make a copy
8599        andi.l          &0x7fff,%d1             # strip sign
8600        sub.l           %d0,%d1                 # add scale factor
8601        andi.w          &0x8000,%d2             # keep old sign
8602        or.w            %d1,%d2                 # concat old sign,new exponent
8603        mov.w           %d2,FP_SCR0_EX(%a6)     # insert new exponent
8604        mov.l           (%sp)+,%d2              # restore d2
8605        fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
8606        rts
8607
8608#
8609# operand is to be rounded to double precision
8610#
8611fin_dbl:
8612        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
8613        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
8614        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
8615        bsr.l           scale_to_zero_src       # calculate scale factor
8616
8617        cmpi.l          %d0,&0x3fff-0x3c00      # will move in underflow?
8618        bge.w           fin_sd_unfl             # yes; go handle underflow
8619        cmpi.l          %d0,&0x3fff-0x43fe      # will move in overflow?
8620        beq.w           fin_sd_may_ovfl         # maybe; go check
8621        blt.w           fin_sd_ovfl             # yes; go handle overflow
8622        bra.w           fin_sd_normal           # no; ho handle normalized op
8623
8624#
8625# operand WILL underflow when moved in to the fp register file
8626#
8627fin_sd_unfl:
8628        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8629
8630        tst.b           FP_SCR0_EX(%a6)         # is operand negative?
8631        bpl.b           fin_sd_unfl_tst
8632        bset            &neg_bit,FPSR_CC(%a6)   # set 'N' ccode bit
8633
8634# if underflow or inexact is enabled, then go calculate the EXOP first.
8635fin_sd_unfl_tst:
8636        mov.b           FPCR_ENABLE(%a6),%d1
8637        andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
8638        bne.b           fin_sd_unfl_ena         # yes
8639
8640fin_sd_unfl_dis:
8641        lea             FP_SCR0(%a6),%a0        # pass: result addr
8642        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
8643        bsr.l           unf_res                 # calculate default result
8644        or.b            %d0,FPSR_CC(%a6)        # unf_res may have set 'Z'
8645        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
8646        rts
8647
8648#
8649# operand will underflow AND underflow or inexact is enabled.
8650# Therefore, we must return the result rounded to extended precision.
8651#
8652fin_sd_unfl_ena:
8653        mov.l           FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
8654        mov.l           FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
8655        mov.w           FP_SCR0_EX(%a6),%d1     # load current exponent
8656
8657        mov.l           %d2,-(%sp)              # save d2
8658        mov.w           %d1,%d2                 # make a copy
8659        andi.l          &0x7fff,%d1             # strip sign
8660        sub.l           %d0,%d1                 # subtract scale factor
8661        andi.w          &0x8000,%d2             # extract old sign
8662        addi.l          &0x6000,%d1             # add new bias
8663        andi.w          &0x7fff,%d1
8664        or.w            %d1,%d2                 # concat old sign,new exp
8665        mov.w           %d2,FP_SCR1_EX(%a6)     # insert new exponent
8666        fmovm.x         FP_SCR1(%a6),&0x40      # return EXOP in fp1
8667        mov.l           (%sp)+,%d2              # restore d2
8668        bra.b           fin_sd_unfl_dis
8669
8670#
8671# operand WILL overflow.
8672#
8673fin_sd_ovfl:
8674        fmov.l          &0x0,%fpsr              # clear FPSR
8675        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8676
8677        fmov.x          FP_SCR0(%a6),%fp0       # perform move
8678
8679        fmov.l          &0x0,%fpcr              # clear FPCR
8680        fmov.l          %fpsr,%d1               # save FPSR
8681
8682        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8683
8684fin_sd_ovfl_tst:
8685        or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8686
8687        mov.b           FPCR_ENABLE(%a6),%d1
8688        andi.b          &0x13,%d1               # is OVFL or INEX enabled?
8689        bne.b           fin_sd_ovfl_ena         # yes
8690
8691#
8692# OVFL is not enabled; therefore, we must create the default result by
8693# calling ovf_res().
8694#
8695fin_sd_ovfl_dis:
8696        btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
8697        sne             %d1                     # set sign param accordingly
8698        mov.l           L_SCR3(%a6),%d0         # pass: prec,mode
8699        bsr.l           ovf_res                 # calculate default result
8700        or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
8701        fmovm.x         (%a0),&0x80             # return default result in fp0
8702        rts
8703
8704#
8705# OVFL is enabled.
8706# the INEX2 bit has already been updated by the round to the correct precision.
8707# now, round to extended(and don't alter the FPSR).
8708#
8709fin_sd_ovfl_ena:
8710        mov.l           %d2,-(%sp)              # save d2
8711        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
8712        mov.l           %d1,%d2                 # make a copy
8713        andi.l          &0x7fff,%d1             # strip sign
8714        andi.w          &0x8000,%d2             # keep old sign
8715        sub.l           %d0,%d1                 # add scale factor
8716        sub.l           &0x6000,%d1             # subtract bias
8717        andi.w          &0x7fff,%d1
8718        or.w            %d2,%d1
8719        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
8720        mov.l           (%sp)+,%d2              # restore d2
8721        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
8722        bra.b           fin_sd_ovfl_dis
8723
8724#
8725# the move in MAY overflow. so...
8726#
8727fin_sd_may_ovfl:
8728        fmov.l          &0x0,%fpsr              # clear FPSR
8729        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8730
8731        fmov.x          FP_SCR0(%a6),%fp0       # perform the move
8732
8733        fmov.l          %fpsr,%d1               # save status
8734        fmov.l          &0x0,%fpcr              # clear FPCR
8735
8736        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8737
8738        fabs.x          %fp0,%fp1               # make a copy of result
8739        fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
8740        fbge.w          fin_sd_ovfl_tst         # yes; overflow has occurred
8741
8742# no, it didn't overflow; we have correct result
8743        bra.w           fin_sd_normal_exit
8744
8745##########################################################################
8746
8747#
8748# operand is not a NORM: check its optype and branch accordingly
8749#
8750fin_not_norm:
8751        cmpi.b          %d1,&DENORM             # weed out DENORM
8752        beq.w           fin_denorm
8753        cmpi.b          %d1,&SNAN               # weed out SNANs
8754        beq.l           res_snan_1op
8755        cmpi.b          %d1,&QNAN               # weed out QNANs
8756        beq.l           res_qnan_1op
8757
8758#
8759# do the fmove in; at this point, only possible ops are ZERO and INF.
8760# use fmov to determine ccodes.
8761# prec:mode should be zero at this point but it won't affect answer anyways.
8762#
8763        fmov.x          SRC(%a0),%fp0           # do fmove in
8764        fmov.l          %fpsr,%d0               # no exceptions possible
8765        rol.l           &0x8,%d0                # put ccodes in lo byte
8766        mov.b           %d0,FPSR_CC(%a6)        # insert correct ccodes
8767        rts
8768
8769#########################################################################
8770# XDEF **************************************************************** #
8771#       fdiv(): emulates the fdiv instruction                           #
8772#       fsdiv(): emulates the fsdiv instruction                         #
8773#       fddiv(): emulates the fddiv instruction                         #
8774#                                                                       #
8775# XREF **************************************************************** #
8776#       scale_to_zero_src() - scale src exponent to zero                #
8777#       scale_to_zero_dst() - scale dst exponent to zero                #
8778#       unf_res() - return default underflow result                     #
8779#       ovf_res() - return default overflow result                      #
8780#       res_qnan() - return QNAN result                                 #
8781#       res_snan() - return SNAN result                                 #
8782#                                                                       #
8783# INPUT *************************************************************** #
8784#       a0 = pointer to extended precision source operand               #
8785#       a1 = pointer to extended precision destination operand          #
8786#       d0  rnd prec,mode                                               #
8787#                                                                       #
8788# OUTPUT ************************************************************** #
8789#       fp0 = result                                                    #
8790#       fp1 = EXOP (if exception occurred)                              #
8791#                                                                       #
8792# ALGORITHM *********************************************************** #
8793#       Handle NANs, infinities, and zeroes as special cases. Divide    #
8794# norms/denorms into ext/sgl/dbl precision.                             #
8795#       For norms/denorms, scale the exponents such that a divide       #
8796# instruction won't cause an exception. Use the regular fdiv to         #
8797# compute a result. Check if the regular operands would have taken      #
8798# an exception. If so, return the default overflow/underflow result     #
8799# and return the EXOP if exceptions are enabled. Else, scale the        #
8800# result operand to the proper exponent.                                #
8801#                                                                       #
8802#########################################################################
8803
8804        align           0x10
8805tbl_fdiv_unfl:
8806        long            0x3fff - 0x0000         # ext_unfl
8807        long            0x3fff - 0x3f81         # sgl_unfl
8808        long            0x3fff - 0x3c01         # dbl_unfl
8809
8810tbl_fdiv_ovfl:
8811        long            0x3fff - 0x7ffe         # ext overflow exponent
8812        long            0x3fff - 0x407e         # sgl overflow exponent
8813        long            0x3fff - 0x43fe         # dbl overflow exponent
8814
8815        global          fsdiv
8816fsdiv:
8817        andi.b          &0x30,%d0               # clear rnd prec
8818        ori.b           &s_mode*0x10,%d0        # insert sgl prec
8819        bra.b           fdiv
8820
8821        global          fddiv
8822fddiv:
8823        andi.b          &0x30,%d0               # clear rnd prec
8824        ori.b           &d_mode*0x10,%d0        # insert dbl prec
8825
8826        global          fdiv
8827fdiv:
8828        mov.l           %d0,L_SCR3(%a6)         # store rnd info
8829
8830        clr.w           %d1
8831        mov.b           DTAG(%a6),%d1
8832        lsl.b           &0x3,%d1
8833        or.b            STAG(%a6),%d1           # combine src tags
8834
8835        bne.w           fdiv_not_norm           # optimize on non-norm input
8836
8837#
8838# DIVIDE: NORMs and DENORMs ONLY!
8839#
8840fdiv_norm:
8841        mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
8842        mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
8843        mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
8844
8845        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
8846        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
8847        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
8848
8849        bsr.l           scale_to_zero_src       # scale src exponent
8850        mov.l           %d0,-(%sp)              # save scale factor 1
8851
8852        bsr.l           scale_to_zero_dst       # scale dst exponent
8853
8854        neg.l           (%sp)                   # SCALE FACTOR = scale1 - scale2
8855        add.l           %d0,(%sp)
8856
8857        mov.w           2+L_SCR3(%a6),%d1       # fetch precision
8858        lsr.b           &0x6,%d1                # shift to lo bits
8859        mov.l           (%sp)+,%d0              # load S.F.
8860        cmp.l           %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
8861        ble.w           fdiv_may_ovfl           # result will overflow
8862
8863        cmp.l           %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
8864        beq.w           fdiv_may_unfl           # maybe
8865        bgt.w           fdiv_unfl               # yes; go handle underflow
8866
8867fdiv_normal:
8868        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
8869
8870        fmov.l          L_SCR3(%a6),%fpcr       # save FPCR
8871        fmov.l          &0x0,%fpsr              # clear FPSR
8872
8873        fdiv.x          FP_SCR0(%a6),%fp0       # perform divide
8874
8875        fmov.l          %fpsr,%d1               # save FPSR
8876        fmov.l          &0x0,%fpcr              # clear FPCR
8877
8878        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8879
8880fdiv_normal_exit:
8881        fmovm.x         &0x80,FP_SCR0(%a6)      # store result on stack
8882        mov.l           %d2,-(%sp)              # store d2
8883        mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
8884        mov.l           %d1,%d2                 # make a copy
8885        andi.l          &0x7fff,%d1             # strip sign
8886        andi.w          &0x8000,%d2             # keep old sign
8887        sub.l           %d0,%d1                 # add scale factor
8888        or.w            %d2,%d1                 # concat old sign,new exp
8889        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
8890        mov.l           (%sp)+,%d2              # restore d2
8891        fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
8892        rts
8893
8894tbl_fdiv_ovfl2:
8895        long            0x7fff
8896        long            0x407f
8897        long            0x43ff
8898
8899fdiv_no_ovfl:
8900        mov.l           (%sp)+,%d0              # restore scale factor
8901        bra.b           fdiv_normal_exit
8902
8903fdiv_may_ovfl:
8904        mov.l           %d0,-(%sp)              # save scale factor
8905
8906        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
8907
8908        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8909        fmov.l          &0x0,%fpsr              # set FPSR
8910
8911        fdiv.x          FP_SCR0(%a6),%fp0       # execute divide
8912
8913        fmov.l          %fpsr,%d0
8914        fmov.l          &0x0,%fpcr
8915
8916        or.l            %d0,USER_FPSR(%a6)      # save INEX,N
8917
8918        fmovm.x         &0x01,-(%sp)            # save result to stack
8919        mov.w           (%sp),%d0               # fetch new exponent
8920        add.l           &0xc,%sp                # clear result from stack
8921        andi.l          &0x7fff,%d0             # strip sign
8922        sub.l           (%sp),%d0               # add scale factor
8923        cmp.l           %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
8924        blt.b           fdiv_no_ovfl
8925        mov.l           (%sp)+,%d0
8926
8927fdiv_ovfl_tst:
8928        or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8929
8930        mov.b           FPCR_ENABLE(%a6),%d1
8931        andi.b          &0x13,%d1               # is OVFL or INEX enabled?
8932        bne.b           fdiv_ovfl_ena           # yes
8933
8934fdiv_ovfl_dis:
8935        btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
8936        sne             %d1                     # set sign param accordingly
8937        mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
8938        bsr.l           ovf_res                 # calculate default result
8939        or.b            %d0,FPSR_CC(%a6)        # set INF if applicable
8940        fmovm.x         (%a0),&0x80             # return default result in fp0
8941        rts
8942
8943fdiv_ovfl_ena:
8944        mov.l           L_SCR3(%a6),%d1
8945        andi.b          &0xc0,%d1               # is precision extended?
8946        bne.b           fdiv_ovfl_ena_sd        # no, do sgl or dbl
8947
8948fdiv_ovfl_ena_cont:
8949        fmovm.x         &0x80,FP_SCR0(%a6)      # move result to stack
8950
8951        mov.l           %d2,-(%sp)              # save d2
8952        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
8953        mov.w           %d1,%d2                 # make a copy
8954        andi.l          &0x7fff,%d1             # strip sign
8955        sub.l           %d0,%d1                 # add scale factor
8956        subi.l          &0x6000,%d1             # subtract bias
8957        andi.w          &0x7fff,%d1             # clear sign bit
8958        andi.w          &0x8000,%d2             # keep old sign
8959        or.w            %d2,%d1                 # concat old sign,new exp
8960        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
8961        mov.l           (%sp)+,%d2              # restore d2
8962        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
8963        bra.b           fdiv_ovfl_dis
8964
8965fdiv_ovfl_ena_sd:
8966        fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
8967
8968        mov.l           L_SCR3(%a6),%d1
8969        andi.b          &0x30,%d1               # keep rnd mode
8970        fmov.l          %d1,%fpcr               # set FPCR
8971
8972        fdiv.x          FP_SCR0(%a6),%fp0       # execute divide
8973
8974        fmov.l          &0x0,%fpcr              # clear FPCR
8975        bra.b           fdiv_ovfl_ena_cont
8976
8977fdiv_unfl:
8978        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8979
8980        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
8981
8982        fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
8983        fmov.l          &0x0,%fpsr              # clear FPSR
8984
8985        fdiv.x          FP_SCR0(%a6),%fp0       # execute divide
8986
8987        fmov.l          %fpsr,%d1               # save status
8988        fmov.l          &0x0,%fpcr              # clear FPCR
8989
8990        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8991
8992        mov.b           FPCR_ENABLE(%a6),%d1
8993        andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
8994        bne.b           fdiv_unfl_ena           # yes
8995
8996fdiv_unfl_dis:
8997        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
8998
8999        lea             FP_SCR0(%a6),%a0        # pass: result addr
9000        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
9001        bsr.l           unf_res                 # calculate default result
9002        or.b            %d0,FPSR_CC(%a6)        # 'Z' may have been set
9003        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
9004        rts
9005
9006#
9007# UNFL is enabled.
9008#
9009fdiv_unfl_ena:
9010        fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
9011
9012        mov.l           L_SCR3(%a6),%d1
9013        andi.b          &0xc0,%d1               # is precision extended?
9014        bne.b           fdiv_unfl_ena_sd        # no, sgl or dbl
9015
9016        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
9017
9018fdiv_unfl_ena_cont:
9019        fmov.l          &0x0,%fpsr              # clear FPSR
9020
9021        fdiv.x          FP_SCR0(%a6),%fp1       # execute divide
9022
9023        fmov.l          &0x0,%fpcr              # clear FPCR
9024
9025        fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
9026        mov.l           %d2,-(%sp)              # save d2
9027        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
9028        mov.l           %d1,%d2                 # make a copy
9029        andi.l          &0x7fff,%d1             # strip sign
9030        andi.w          &0x8000,%d2             # keep old sign
9031        sub.l           %d0,%d1                 # add scale factoer
9032        addi.l          &0x6000,%d1             # add bias
9033        andi.w          &0x7fff,%d1
9034        or.w            %d2,%d1                 # concat old sign,new exp
9035        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exp
9036        mov.l           (%sp)+,%d2              # restore d2
9037        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
9038        bra.w           fdiv_unfl_dis
9039
9040fdiv_unfl_ena_sd:
9041        mov.l           L_SCR3(%a6),%d1
9042        andi.b          &0x30,%d1               # use only rnd mode
9043        fmov.l          %d1,%fpcr               # set FPCR
9044
9045        bra.b           fdiv_unfl_ena_cont
9046
9047#
9048# the divide operation MAY underflow:
9049#
9050fdiv_may_unfl:
9051        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
9052
9053        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
9054        fmov.l          &0x0,%fpsr              # clear FPSR
9055
9056        fdiv.x          FP_SCR0(%a6),%fp0       # execute divide
9057
9058        fmov.l          %fpsr,%d1               # save status
9059        fmov.l          &0x0,%fpcr              # clear FPCR
9060
9061        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
9062
9063        fabs.x          %fp0,%fp1               # make a copy of result
9064        fcmp.b          %fp1,&0x1               # is |result| > 1.b?
9065        fbgt.w          fdiv_normal_exit        # no; no underflow occurred
9066        fblt.w          fdiv_unfl               # yes; underflow occurred
9067
9068#
9069# we still don't know if underflow occurred. result is ~ equal to 1. but,
9070# we don't know if the result was an underflow that rounded up to a 1
9071# or a normalized number that rounded down to a 1. so, redo the entire
9072# operation using RZ as the rounding mode to see what the pre-rounded
9073# result is. this case should be relatively rare.
9074#
9075        fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into fp1
9076
9077        mov.l           L_SCR3(%a6),%d1
9078        andi.b          &0xc0,%d1               # keep rnd prec
9079        ori.b           &rz_mode*0x10,%d1       # insert RZ
9080
9081        fmov.l          %d1,%fpcr               # set FPCR
9082        fmov.l          &0x0,%fpsr              # clear FPSR
9083
9084        fdiv.x          FP_SCR0(%a6),%fp1       # execute divide
9085
9086        fmov.l          &0x0,%fpcr              # clear FPCR
9087        fabs.x          %fp1                    # make absolute value
9088        fcmp.b          %fp1,&0x1               # is |result| < 1.b?
9089        fbge.w          fdiv_normal_exit        # no; no underflow occurred
9090        bra.w           fdiv_unfl               # yes; underflow occurred
9091
9092############################################################################
9093
9094#
9095# Divide: inputs are not both normalized; what are they?
9096#
9097fdiv_not_norm:
9098        mov.w           (tbl_fdiv_op.b,%pc,%d1.w*2),%d1
9099        jmp             (tbl_fdiv_op.b,%pc,%d1.w*1)
9100
9101        swbeg           &48
9102tbl_fdiv_op:
9103        short           fdiv_norm       - tbl_fdiv_op # NORM / NORM
9104        short           fdiv_inf_load   - tbl_fdiv_op # NORM / ZERO
9105        short           fdiv_zero_load  - tbl_fdiv_op # NORM / INF
9106        short           fdiv_res_qnan   - tbl_fdiv_op # NORM / QNAN
9107        short           fdiv_norm       - tbl_fdiv_op # NORM / DENORM
9108        short           fdiv_res_snan   - tbl_fdiv_op # NORM / SNAN
9109        short           tbl_fdiv_op     - tbl_fdiv_op #
9110        short           tbl_fdiv_op     - tbl_fdiv_op #
9111
9112        short           fdiv_zero_load  - tbl_fdiv_op # ZERO / NORM
9113        short           fdiv_res_operr  - tbl_fdiv_op # ZERO / ZERO
9114        short           fdiv_zero_load  - tbl_fdiv_op # ZERO / INF
9115        short           fdiv_res_qnan   - tbl_fdiv_op # ZERO / QNAN
9116        short           fdiv_zero_load  - tbl_fdiv_op # ZERO / DENORM
9117        short           fdiv_res_snan   - tbl_fdiv_op # ZERO / SNAN
9118        short           tbl_fdiv_op     - tbl_fdiv_op #
9119        short           tbl_fdiv_op     - tbl_fdiv_op #
9120
9121        short           fdiv_inf_dst    - tbl_fdiv_op # INF / NORM
9122        short           fdiv_inf_dst    - tbl_fdiv_op # INF / ZERO
9123        short           fdiv_res_operr  - tbl_fdiv_op # INF / INF
9124        short           fdiv_res_qnan   - tbl_fdiv_op # INF / QNAN
9125        short           fdiv_inf_dst    - tbl_fdiv_op # INF / DENORM
9126        short           fdiv_res_snan   - tbl_fdiv_op # INF / SNAN
9127        short           tbl_fdiv_op     - tbl_fdiv_op #
9128        short           tbl_fdiv_op     - tbl_fdiv_op #
9129
9130        short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / NORM
9131        short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / ZERO
9132        short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / INF
9133        short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / QNAN
9134        short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / DENORM
9135        short           fdiv_res_snan   - tbl_fdiv_op # QNAN / SNAN
9136        short           tbl_fdiv_op     - tbl_fdiv_op #
9137        short           tbl_fdiv_op     - tbl_fdiv_op #
9138
9139        short           fdiv_norm       - tbl_fdiv_op # DENORM / NORM
9140        short           fdiv_inf_load   - tbl_fdiv_op # DENORM / ZERO
9141        short           fdiv_zero_load  - tbl_fdiv_op # DENORM / INF
9142        short           fdiv_res_qnan   - tbl_fdiv_op # DENORM / QNAN
9143        short           fdiv_norm       - tbl_fdiv_op # DENORM / DENORM
9144        short           fdiv_res_snan   - tbl_fdiv_op # DENORM / SNAN
9145        short           tbl_fdiv_op     - tbl_fdiv_op #
9146        short           tbl_fdiv_op     - tbl_fdiv_op #
9147
9148        short           fdiv_res_snan   - tbl_fdiv_op # SNAN / NORM
9149        short           fdiv_res_snan   - tbl_fdiv_op # SNAN / ZERO
9150        short           fdiv_res_snan   - tbl_fdiv_op # SNAN / INF
9151        short           fdiv_res_snan   - tbl_fdiv_op # SNAN / QNAN
9152        short           fdiv_res_snan   - tbl_fdiv_op # SNAN / DENORM
9153        short           fdiv_res_snan   - tbl_fdiv_op # SNAN / SNAN
9154        short           tbl_fdiv_op     - tbl_fdiv_op #
9155        short           tbl_fdiv_op     - tbl_fdiv_op #
9156
9157fdiv_res_qnan:
9158        bra.l           res_qnan
9159fdiv_res_snan:
9160        bra.l           res_snan
9161fdiv_res_operr:
9162        bra.l           res_operr
9163
9164        global          fdiv_zero_load          # global for fsgldiv
9165fdiv_zero_load:
9166        mov.b           SRC_EX(%a0),%d0         # result sign is exclusive
9167        mov.b           DST_EX(%a1),%d1         # or of input signs.
9168        eor.b           %d0,%d1
9169        bpl.b           fdiv_zero_load_p        # result is positive
9170        fmov.s          &0x80000000,%fp0        # load a -ZERO
9171        mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
9172        rts
9173fdiv_zero_load_p:
9174        fmov.s          &0x00000000,%fp0        # load a +ZERO
9175        mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
9176        rts
9177
9178#
9179# The destination was In Range and the source was a ZERO. The result,
9180# Therefore, is an INF w/ the proper sign.
9181# So, determine the sign and return a new INF (w/ the j-bit cleared).
9182#
9183        global          fdiv_inf_load           # global for fsgldiv
9184fdiv_inf_load:
9185        ori.w           &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
9186        mov.b           SRC_EX(%a0),%d0         # load both signs
9187        mov.b           DST_EX(%a1),%d1
9188        eor.b           %d0,%d1
9189        bpl.b           fdiv_inf_load_p         # result is positive
9190        fmov.s          &0xff800000,%fp0        # make result -INF
9191        mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
9192        rts
9193fdiv_inf_load_p:
9194        fmov.s          &0x7f800000,%fp0        # make result +INF
9195        mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
9196        rts
9197
9198#
9199# The destination was an INF w/ an In Range or ZERO source, the result is
9200# an INF w/ the proper sign.
9201# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
9202# dst INF is set, then then j-bit of the result INF is also set).
9203#
9204        global          fdiv_inf_dst            # global for fsgldiv
9205fdiv_inf_dst:
9206        mov.b           DST_EX(%a1),%d0         # load both signs
9207        mov.b           SRC_EX(%a0),%d1
9208        eor.b           %d0,%d1
9209        bpl.b           fdiv_inf_dst_p          # result is positive
9210
9211        fmovm.x         DST(%a1),&0x80          # return result in fp0
9212        fabs.x          %fp0                    # clear sign bit
9213        fneg.x          %fp0                    # set sign bit
9214        mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
9215        rts
9216
9217fdiv_inf_dst_p:
9218        fmovm.x         DST(%a1),&0x80          # return result in fp0
9219        fabs.x          %fp0                    # return positive INF
9220        mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
9221        rts
9222
9223#########################################################################
9224# XDEF **************************************************************** #
9225#       fneg(): emulates the fneg instruction                           #
9226#       fsneg(): emulates the fsneg instruction                         #
9227#       fdneg(): emulates the fdneg instruction                         #
9228#                                                                       #
9229# XREF **************************************************************** #
9230#       norm() - normalize a denorm to provide EXOP                     #
9231#       scale_to_zero_src() - scale sgl/dbl source exponent             #
9232#       ovf_res() - return default overflow result                      #
9233#       unf_res() - return default underflow result                     #
9234#       res_qnan_1op() - return QNAN result                             #
9235#       res_snan_1op() - return SNAN result                             #
9236#                                                                       #
9237# INPUT *************************************************************** #
9238#       a0 = pointer to extended precision source operand               #
9239#       d0 = rnd prec,mode                                              #
9240#                                                                       #
9241# OUTPUT ************************************************************** #
9242#       fp0 = result                                                    #
9243#       fp1 = EXOP (if exception occurred)                              #
9244#                                                                       #
9245# ALGORITHM *********************************************************** #
9246#       Handle NANs, zeroes, and infinities as special cases. Separate  #
9247# norms/denorms into ext/sgl/dbl precisions. Extended precision can be  #
9248# emulated by simply setting sign bit. Sgl/dbl operands must be scaled  #
9249# and an actual fneg performed to see if overflow/underflow would have  #
9250# occurred. If so, return default underflow/overflow result. Else,      #
9251# scale the result exponent and return result. FPSR gets set based on   #
9252# the result value.                                                     #
9253#                                                                       #
9254#########################################################################
9255
9256        global          fsneg
9257fsneg:
9258        andi.b          &0x30,%d0               # clear rnd prec
9259        ori.b           &s_mode*0x10,%d0        # insert sgl precision
9260        bra.b           fneg
9261
9262        global          fdneg
9263fdneg:
9264        andi.b          &0x30,%d0               # clear rnd prec
9265        ori.b           &d_mode*0x10,%d0        # insert dbl prec
9266
9267        global          fneg
9268fneg:
9269        mov.l           %d0,L_SCR3(%a6)         # store rnd info
9270        mov.b           STAG(%a6),%d1
9271        bne.w           fneg_not_norm           # optimize on non-norm input
9272
9273#
9274# NEGATE SIGN : norms and denorms ONLY!
9275#
9276fneg_norm:
9277        andi.b          &0xc0,%d0               # is precision extended?
9278        bne.w           fneg_not_ext            # no; go handle sgl or dbl
9279
9280#
9281# precision selected is extended. so...we can not get an underflow
9282# or overflow because of rounding to the correct precision. so...
9283# skip the scaling and unscaling...
9284#
9285        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
9286        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
9287        mov.w           SRC_EX(%a0),%d0
9288        eori.w          &0x8000,%d0             # negate sign
9289        bpl.b           fneg_norm_load          # sign is positive
9290        mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9291fneg_norm_load:
9292        mov.w           %d0,FP_SCR0_EX(%a6)
9293        fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
9294        rts
9295
9296#
9297# for an extended precision DENORM, the UNFL exception bit is set
9298# the accrued bit is NOT set in this instance(no inexactness!)
9299#
9300fneg_denorm:
9301        andi.b          &0xc0,%d0               # is precision extended?
9302        bne.b           fneg_not_ext            # no; go handle sgl or dbl
9303
9304        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9305
9306        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
9307        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
9308        mov.w           SRC_EX(%a0),%d0
9309        eori.w          &0x8000,%d0             # negate sign
9310        bpl.b           fneg_denorm_done        # no
9311        mov.b           &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit
9312fneg_denorm_done:
9313        mov.w           %d0,FP_SCR0_EX(%a6)
9314        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
9315
9316        btst            &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9317        bne.b           fneg_ext_unfl_ena       # yes
9318        rts
9319
9320#
9321# the input is an extended DENORM and underflow is enabled in the FPCR.
9322# normalize the mantissa and add the bias of 0x6000 to the resulting negative
9323# exponent and insert back into the operand.
9324#
9325fneg_ext_unfl_ena:
9326        lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
9327        bsr.l           norm                    # normalize result
9328        neg.w           %d0                     # new exponent = -(shft val)
9329        addi.w          &0x6000,%d0             # add new bias to exponent
9330        mov.w           FP_SCR0_EX(%a6),%d1     # fetch old sign,exp
9331        andi.w          &0x8000,%d1             # keep old sign
9332        andi.w          &0x7fff,%d0             # clear sign position
9333        or.w            %d1,%d0                 # concat old sign, new exponent
9334        mov.w           %d0,FP_SCR0_EX(%a6)     # insert new exponent
9335        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
9336        rts
9337
9338#
9339# operand is either single or double
9340#
9341fneg_not_ext:
9342        cmpi.b          %d0,&s_mode*0x10        # separate sgl/dbl prec
9343        bne.b           fneg_dbl
9344
9345#
9346# operand is to be rounded to single precision
9347#
9348fneg_sgl:
9349        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
9350        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
9351        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
9352        bsr.l           scale_to_zero_src       # calculate scale factor
9353
9354        cmpi.l          %d0,&0x3fff-0x3f80      # will move in underflow?
9355        bge.w           fneg_sd_unfl            # yes; go handle underflow
9356        cmpi.l          %d0,&0x3fff-0x407e      # will move in overflow?
9357        beq.w           fneg_sd_may_ovfl        # maybe; go check
9358        blt.w           fneg_sd_ovfl            # yes; go handle overflow
9359
9360#
9361# operand will NOT overflow or underflow when moved in to the fp reg file
9362#
9363fneg_sd_normal:
9364        fmov.l          &0x0,%fpsr              # clear FPSR
9365        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
9366
9367        fneg.x          FP_SCR0(%a6),%fp0       # perform negation
9368
9369        fmov.l          %fpsr,%d1               # save FPSR
9370        fmov.l          &0x0,%fpcr              # clear FPCR
9371
9372        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
9373
9374fneg_sd_normal_exit:
9375        mov.l           %d2,-(%sp)              # save d2
9376        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
9377        mov.w           FP_SCR0_EX(%a6),%d1     # load sgn,exp
9378        mov.w           %d1,%d2                 # make a copy
9379        andi.l          &0x7fff,%d1             # strip sign
9380        sub.l           %d0,%d1                 # add scale factor
9381        andi.w          &0x8000,%d2             # keep old sign
9382        or.w            %d1,%d2                 # concat old sign,new exp
9383        mov.w           %d2,FP_SCR0_EX(%a6)     # insert new exponent
9384        mov.l           (%sp)+,%d2              # restore d2
9385        fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
9386        rts
9387
9388#
9389# operand is to be rounded to double precision
9390#
9391fneg_dbl:
9392        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
9393        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
9394        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
9395        bsr.l           scale_to_zero_src       # calculate scale factor
9396
9397        cmpi.l          %d0,&0x3fff-0x3c00      # will move in underflow?
9398        bge.b           fneg_sd_unfl            # yes; go handle underflow
9399        cmpi.l          %d0,&0x3fff-0x43fe      # will move in overflow?
9400        beq.w           fneg_sd_may_ovfl        # maybe; go check
9401        blt.w           fneg_sd_ovfl            # yes; go handle overflow
9402        bra.w           fneg_sd_normal          # no; ho handle normalized op
9403
9404#
9405# operand WILL underflow when moved in to the fp register file
9406#
9407fneg_sd_unfl:
9408        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9409
9410        eori.b          &0x80,FP_SCR0_EX(%a6)   # negate sign
9411        bpl.b           fneg_sd_unfl_tst
9412        bset            &neg_bit,FPSR_CC(%a6)   # set 'N' ccode bit
9413
9414# if underflow or inexact is enabled, go calculate EXOP first.
9415fneg_sd_unfl_tst:
9416        mov.b           FPCR_ENABLE(%a6),%d1
9417        andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
9418        bne.b           fneg_sd_unfl_ena        # yes
9419
9420fneg_sd_unfl_dis:
9421        lea             FP_SCR0(%a6),%a0        # pass: result addr
9422        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
9423        bsr.l           unf_res                 # calculate default result
9424        or.b            %d0,FPSR_CC(%a6)        # unf_res may have set 'Z'
9425        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
9426        rts
9427
9428#
9429# operand will underflow AND underflow is enabled.
9430# Therefore, we must return the result rounded to extended precision.
9431#
9432fneg_sd_unfl_ena:
9433        mov.l           FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
9434        mov.l           FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
9435        mov.w           FP_SCR0_EX(%a6),%d1     # load current exponent
9436
9437        mov.l           %d2,-(%sp)              # save d2
9438        mov.l           %d1,%d2                 # make a copy
9439        andi.l          &0x7fff,%d1             # strip sign
9440        andi.w          &0x8000,%d2             # keep old sign
9441        sub.l           %d0,%d1                 # subtract scale factor
9442        addi.l          &0x6000,%d1             # add new bias
9443        andi.w          &0x7fff,%d1
9444        or.w            %d2,%d1                 # concat new sign,new exp
9445        mov.w           %d1,FP_SCR1_EX(%a6)     # insert new exp
9446        fmovm.x         FP_SCR1(%a6),&0x40      # return EXOP in fp1
9447        mov.l           (%sp)+,%d2              # restore d2
9448        bra.b           fneg_sd_unfl_dis
9449
9450#
9451# operand WILL overflow.
9452#
9453fneg_sd_ovfl:
9454        fmov.l          &0x0,%fpsr              # clear FPSR
9455        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
9456
9457        fneg.x          FP_SCR0(%a6),%fp0       # perform negation
9458
9459        fmov.l          &0x0,%fpcr              # clear FPCR
9460        fmov.l          %fpsr,%d1               # save FPSR
9461
9462        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
9463
9464fneg_sd_ovfl_tst:
9465        or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
9466
9467        mov.b           FPCR_ENABLE(%a6),%d1
9468        andi.b          &0x13,%d1               # is OVFL or INEX enabled?
9469        bne.b           fneg_sd_ovfl_ena        # yes
9470
9471#
9472# OVFL is not enabled; therefore, we must create the default result by
9473# calling ovf_res().
9474#
9475fneg_sd_ovfl_dis:
9476        btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
9477        sne             %d1                     # set sign param accordingly
9478        mov.l           L_SCR3(%a6),%d0         # pass: prec,mode
9479        bsr.l           ovf_res                 # calculate default result
9480        or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
9481        fmovm.x         (%a0),&0x80             # return default result in fp0
9482        rts
9483
9484#
9485# OVFL is enabled.
9486# the INEX2 bit has already been updated by the round to the correct precision.
9487# now, round to extended(and don't alter the FPSR).
9488#
9489fneg_sd_ovfl_ena:
9490        mov.l           %d2,-(%sp)              # save d2
9491        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
9492        mov.l           %d1,%d2                 # make a copy
9493        andi.l          &0x7fff,%d1             # strip sign
9494        andi.w          &0x8000,%d2             # keep old sign
9495        sub.l           %d0,%d1                 # add scale factor
9496        subi.l          &0x6000,%d1             # subtract bias
9497        andi.w          &0x7fff,%d1
9498        or.w            %d2,%d1                 # concat sign,exp
9499        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
9500        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
9501        mov.l           (%sp)+,%d2              # restore d2
9502        bra.b           fneg_sd_ovfl_dis
9503
9504#
9505# the move in MAY underflow. so...
9506#
9507fneg_sd_may_ovfl:
9508        fmov.l          &0x0,%fpsr              # clear FPSR
9509        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
9510
9511        fneg.x          FP_SCR0(%a6),%fp0       # perform negation
9512
9513        fmov.l          %fpsr,%d1               # save status
9514        fmov.l          &0x0,%fpcr              # clear FPCR
9515
9516        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
9517
9518        fabs.x          %fp0,%fp1               # make a copy of result
9519        fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
9520        fbge.w          fneg_sd_ovfl_tst        # yes; overflow has occurred
9521
9522# no, it didn't overflow; we have correct result
9523        bra.w           fneg_sd_normal_exit
9524
9525##########################################################################
9526
9527#
9528# input is not normalized; what is it?
9529#
9530fneg_not_norm:
9531        cmpi.b          %d1,&DENORM             # weed out DENORM
9532        beq.w           fneg_denorm
9533        cmpi.b          %d1,&SNAN               # weed out SNAN
9534        beq.l           res_snan_1op
9535        cmpi.b          %d1,&QNAN               # weed out QNAN
9536        beq.l           res_qnan_1op
9537
9538#
9539# do the fneg; at this point, only possible ops are ZERO and INF.
9540# use fneg to determine ccodes.
9541# prec:mode should be zero at this point but it won't affect answer anyways.
9542#
9543        fneg.x          SRC_EX(%a0),%fp0        # do fneg
9544        fmov.l          %fpsr,%d0
9545        rol.l           &0x8,%d0                # put ccodes in lo byte
9546        mov.b           %d0,FPSR_CC(%a6)        # insert correct ccodes
9547        rts
9548
9549#########################################################################
9550# XDEF **************************************************************** #
9551#       ftst(): emulates the ftest instruction                          #
9552#                                                                       #
9553# XREF **************************************************************** #
9554#       res{s,q}nan_1op() - set NAN result for monadic instruction      #
9555#                                                                       #
9556# INPUT *************************************************************** #
9557#       a0 = pointer to extended precision source operand               #
9558#                                                                       #
9559# OUTPUT ************************************************************** #
9560#       none                                                            #
9561#                                                                       #
9562# ALGORITHM *********************************************************** #
9563#       Check the source operand tag (STAG) and set the FPCR according  #
9564# to the operand type and sign.                                         #
9565#                                                                       #
9566#########################################################################
9567
9568        global          ftst
9569ftst:
9570        mov.b           STAG(%a6),%d1
9571        bne.b           ftst_not_norm           # optimize on non-norm input
9572
9573#
9574# Norm:
9575#
9576ftst_norm:
9577        tst.b           SRC_EX(%a0)             # is operand negative?
9578        bmi.b           ftst_norm_m             # yes
9579        rts
9580ftst_norm_m:
9581        mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9582        rts
9583
9584#
9585# input is not normalized; what is it?
9586#
9587ftst_not_norm:
9588        cmpi.b          %d1,&ZERO               # weed out ZERO
9589        beq.b           ftst_zero
9590        cmpi.b          %d1,&INF                # weed out INF
9591        beq.b           ftst_inf
9592        cmpi.b          %d1,&SNAN               # weed out SNAN
9593        beq.l           res_snan_1op
9594        cmpi.b          %d1,&QNAN               # weed out QNAN
9595        beq.l           res_qnan_1op
9596
9597#
9598# Denorm:
9599#
9600ftst_denorm:
9601        tst.b           SRC_EX(%a0)             # is operand negative?
9602        bmi.b           ftst_denorm_m           # yes
9603        rts
9604ftst_denorm_m:
9605        mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9606        rts
9607
9608#
9609# Infinity:
9610#
9611ftst_inf:
9612        tst.b           SRC_EX(%a0)             # is operand negative?
9613        bmi.b           ftst_inf_m              # yes
9614ftst_inf_p:
9615        mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9616        rts
9617ftst_inf_m:
9618        mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
9619        rts
9620
9621#
9622# Zero:
9623#
9624ftst_zero:
9625        tst.b           SRC_EX(%a0)             # is operand negative?
9626        bmi.b           ftst_zero_m             # yes
9627ftst_zero_p:
9628        mov.b           &z_bmask,FPSR_CC(%a6)   # set 'N' ccode bit
9629        rts
9630ftst_zero_m:
9631        mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9632        rts
9633
9634#########################################################################
9635# XDEF **************************************************************** #
9636#       fint(): emulates the fint instruction                           #
9637#                                                                       #
9638# XREF **************************************************************** #
9639#       res_{s,q}nan_1op() - set NAN result for monadic operation       #
9640#                                                                       #
9641# INPUT *************************************************************** #
9642#       a0 = pointer to extended precision source operand               #
9643#       d0 = round precision/mode                                       #
9644#                                                                       #
9645# OUTPUT ************************************************************** #
9646#       fp0 = result                                                    #
9647#                                                                       #
9648# ALGORITHM *********************************************************** #
9649#       Separate according to operand type. Unnorms don't pass through  #
9650# here. For norms, load the rounding mode/prec, execute a "fint", then  #
9651# store the resulting FPSR bits.                                        #
9652#       For denorms, force the j-bit to a one and do the same as for    #
9653# norms. Denorms are so low that the answer will either be a zero or a  #
9654# one.                                                                  #
9655#       For zeroes/infs/NANs, return the same while setting the FPSR    #
9656# as appropriate.                                                       #
9657#                                                                       #
9658#########################################################################
9659
9660        global          fint
9661fint:
9662        mov.b           STAG(%a6),%d1
9663        bne.b           fint_not_norm           # optimize on non-norm input
9664
9665#
9666# Norm:
9667#
9668fint_norm:
9669        andi.b          &0x30,%d0               # set prec = ext
9670
9671        fmov.l          %d0,%fpcr               # set FPCR
9672        fmov.l          &0x0,%fpsr              # clear FPSR
9673
9674        fint.x          SRC(%a0),%fp0           # execute fint
9675
9676        fmov.l          &0x0,%fpcr              # clear FPCR
9677        fmov.l          %fpsr,%d0               # save FPSR
9678        or.l            %d0,USER_FPSR(%a6)      # set exception bits
9679
9680        rts
9681
9682#
9683# input is not normalized; what is it?
9684#
9685fint_not_norm:
9686        cmpi.b          %d1,&ZERO               # weed out ZERO
9687        beq.b           fint_zero
9688        cmpi.b          %d1,&INF                # weed out INF
9689        beq.b           fint_inf
9690        cmpi.b          %d1,&DENORM             # weed out DENORM
9691        beq.b           fint_denorm
9692        cmpi.b          %d1,&SNAN               # weed out SNAN
9693        beq.l           res_snan_1op
9694        bra.l           res_qnan_1op            # weed out QNAN
9695
9696#
9697# Denorm:
9698#
9699# for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
9700# also, the INEX2 and AINEX exception bits will be set.
9701# so, we could either set these manually or force the DENORM
9702# to a very small NORM and ship it to the NORM routine.
9703# I do the latter.
9704#
9705fint_denorm:
9706        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9707        mov.b           &0x80,FP_SCR0_HI(%a6)   # force DENORM ==> small NORM
9708        lea             FP_SCR0(%a6),%a0
9709        bra.b           fint_norm
9710
9711#
9712# Zero:
9713#
9714fint_zero:
9715        tst.b           SRC_EX(%a0)             # is ZERO negative?
9716        bmi.b           fint_zero_m             # yes
9717fint_zero_p:
9718        fmov.s          &0x00000000,%fp0        # return +ZERO in fp0
9719        mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
9720        rts
9721fint_zero_m:
9722        fmov.s          &0x80000000,%fp0        # return -ZERO in fp0
9723        mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9724        rts
9725
9726#
9727# Infinity:
9728#
9729fint_inf:
9730        fmovm.x         SRC(%a0),&0x80          # return result in fp0
9731        tst.b           SRC_EX(%a0)             # is INF negative?
9732        bmi.b           fint_inf_m              # yes
9733fint_inf_p:
9734        mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9735        rts
9736fint_inf_m:
9737        mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9738        rts
9739
9740#########################################################################
9741# XDEF **************************************************************** #
9742#       fintrz(): emulates the fintrz instruction                       #
9743#                                                                       #
9744# XREF **************************************************************** #
9745#       res_{s,q}nan_1op() - set NAN result for monadic operation       #
9746#                                                                       #
9747# INPUT *************************************************************** #
9748#       a0 = pointer to extended precision source operand               #
9749#       d0 = round precision/mode                                       #
9750#                                                                       #
9751# OUTPUT ************************************************************** #
9752#       fp0 = result                                                    #
9753#                                                                       #
9754# ALGORITHM *********************************************************** #
9755#       Separate according to operand type. Unnorms don't pass through  #
9756# here. For norms, load the rounding mode/prec, execute a "fintrz",     #
9757# then store the resulting FPSR bits.                                   #
9758#       For denorms, force the j-bit to a one and do the same as for    #
9759# norms. Denorms are so low that the answer will either be a zero or a  #
9760# one.                                                                  #
9761#       For zeroes/infs/NANs, return the same while setting the FPSR    #
9762# as appropriate.                                                       #
9763#                                                                       #
9764#########################################################################
9765
9766        global          fintrz
9767fintrz:
9768        mov.b           STAG(%a6),%d1
9769        bne.b           fintrz_not_norm         # optimize on non-norm input
9770
9771#
9772# Norm:
9773#
9774fintrz_norm:
9775        fmov.l          &0x0,%fpsr              # clear FPSR
9776
9777        fintrz.x        SRC(%a0),%fp0           # execute fintrz
9778
9779        fmov.l          %fpsr,%d0               # save FPSR
9780        or.l            %d0,USER_FPSR(%a6)      # set exception bits
9781
9782        rts
9783
9784#
9785# input is not normalized; what is it?
9786#
9787fintrz_not_norm:
9788        cmpi.b          %d1,&ZERO               # weed out ZERO
9789        beq.b           fintrz_zero
9790        cmpi.b          %d1,&INF                # weed out INF
9791        beq.b           fintrz_inf
9792        cmpi.b          %d1,&DENORM             # weed out DENORM
9793        beq.b           fintrz_denorm
9794        cmpi.b          %d1,&SNAN               # weed out SNAN
9795        beq.l           res_snan_1op
9796        bra.l           res_qnan_1op            # weed out QNAN
9797
9798#
9799# Denorm:
9800#
9801# for DENORMs, the result will be (+/-)ZERO.
9802# also, the INEX2 and AINEX exception bits will be set.
9803# so, we could either set these manually or force the DENORM
9804# to a very small NORM and ship it to the NORM routine.
9805# I do the latter.
9806#
9807fintrz_denorm:
9808        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9809        mov.b           &0x80,FP_SCR0_HI(%a6)   # force DENORM ==> small NORM
9810        lea             FP_SCR0(%a6),%a0
9811        bra.b           fintrz_norm
9812
9813#
9814# Zero:
9815#
9816fintrz_zero:
9817        tst.b           SRC_EX(%a0)             # is ZERO negative?
9818        bmi.b           fintrz_zero_m           # yes
9819fintrz_zero_p:
9820        fmov.s          &0x00000000,%fp0        # return +ZERO in fp0
9821        mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
9822        rts
9823fintrz_zero_m:
9824        fmov.s          &0x80000000,%fp0        # return -ZERO in fp0
9825        mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9826        rts
9827
9828#
9829# Infinity:
9830#
9831fintrz_inf:
9832        fmovm.x         SRC(%a0),&0x80          # return result in fp0
9833        tst.b           SRC_EX(%a0)             # is INF negative?
9834        bmi.b           fintrz_inf_m            # yes
9835fintrz_inf_p:
9836        mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9837        rts
9838fintrz_inf_m:
9839        mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9840        rts
9841
9842#########################################################################
9843# XDEF **************************************************************** #
9844#       fabs():  emulates the fabs instruction                          #
9845#       fsabs(): emulates the fsabs instruction                         #
9846#       fdabs(): emulates the fdabs instruction                         #
9847#                                                                       #
9848# XREF **************************************************************** #
9849#       norm() - normalize denorm mantissa to provide EXOP              #
9850#       scale_to_zero_src() - make exponent. = 0; get scale factor      #
9851#       unf_res() - calculate underflow result                          #
9852#       ovf_res() - calculate overflow result                           #
9853#       res_{s,q}nan_1op() - set NAN result for monadic operation       #
9854#                                                                       #
9855# INPUT *************************************************************** #
9856#       a0 = pointer to extended precision source operand               #
9857#       d0 = rnd precision/mode                                         #
9858#                                                                       #
9859# OUTPUT ************************************************************** #
9860#       fp0 = result                                                    #
9861#       fp1 = EXOP (if exception occurred)                              #
9862#                                                                       #
9863# ALGORITHM *********************************************************** #
9864#       Handle NANs, infinities, and zeroes as special cases. Divide    #
9865# norms into extended, single, and double precision.                    #
9866#       Simply clear sign for extended precision norm. Ext prec denorm  #
9867# gets an EXOP created for it since it's an underflow.                  #
9868#       Double and single precision can overflow and underflow. First,  #
9869# scale the operand such that the exponent is zero. Perform an "fabs"   #
9870# using the correct rnd mode/prec. Check to see if the original         #
9871# exponent would take an exception. If so, use unf_res() or ovf_res()   #
9872# to calculate the default result. Also, create the EXOP for the        #
9873# exceptional case. If no exception should occur, insert the correct    #
9874# result exponent and return.                                           #
9875#       Unnorms don't pass through here.                                #
9876#                                                                       #
9877#########################################################################
9878
9879        global          fsabs
9880fsabs:
9881        andi.b          &0x30,%d0               # clear rnd prec
9882        ori.b           &s_mode*0x10,%d0        # insert sgl precision
9883        bra.b           fabs
9884
9885        global          fdabs
9886fdabs:
9887        andi.b          &0x30,%d0               # clear rnd prec
9888        ori.b           &d_mode*0x10,%d0        # insert dbl precision
9889
9890        global          fabs
9891fabs:
9892        mov.l           %d0,L_SCR3(%a6)         # store rnd info
9893        mov.b           STAG(%a6),%d1
9894        bne.w           fabs_not_norm           # optimize on non-norm input
9895
9896#
9897# ABSOLUTE VALUE: norms and denorms ONLY!
9898#
9899fabs_norm:
9900        andi.b          &0xc0,%d0               # is precision extended?
9901        bne.b           fabs_not_ext            # no; go handle sgl or dbl
9902
9903#
9904# precision selected is extended. so...we can not get an underflow
9905# or overflow because of rounding to the correct precision. so...
9906# skip the scaling and unscaling...
9907#
9908        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
9909        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
9910        mov.w           SRC_EX(%a0),%d1
9911        bclr            &15,%d1                 # force absolute value
9912        mov.w           %d1,FP_SCR0_EX(%a6)     # insert exponent
9913        fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
9914        rts
9915
9916#
9917# for an extended precision DENORM, the UNFL exception bit is set
9918# the accrued bit is NOT set in this instance(no inexactness!)
9919#
9920fabs_denorm:
9921        andi.b          &0xc0,%d0               # is precision extended?
9922        bne.b           fabs_not_ext            # no
9923
9924        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9925
9926        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
9927        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
9928        mov.w           SRC_EX(%a0),%d0
9929        bclr            &15,%d0                 # clear sign
9930        mov.w           %d0,FP_SCR0_EX(%a6)     # insert exponent
9931
9932        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
9933
9934        btst            &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9935        bne.b           fabs_ext_unfl_ena
9936        rts
9937
9938#
9939# the input is an extended DENORM and underflow is enabled in the FPCR.
9940# normalize the mantissa and add the bias of 0x6000 to the resulting negative
9941# exponent and insert back into the operand.
9942#
9943fabs_ext_unfl_ena:
9944        lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
9945        bsr.l           norm                    # normalize result
9946        neg.w           %d0                     # new exponent = -(shft val)
9947        addi.w          &0x6000,%d0             # add new bias to exponent
9948        mov.w           FP_SCR0_EX(%a6),%d1     # fetch old sign,exp
9949        andi.w          &0x8000,%d1             # keep old sign
9950        andi.w          &0x7fff,%d0             # clear sign position
9951        or.w            %d1,%d0                 # concat old sign, new exponent
9952        mov.w           %d0,FP_SCR0_EX(%a6)     # insert new exponent
9953        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
9954        rts
9955
9956#
9957# operand is either single or double
9958#
9959fabs_not_ext:
9960        cmpi.b          %d0,&s_mode*0x10        # separate sgl/dbl prec
9961        bne.b           fabs_dbl
9962
9963#
9964# operand is to be rounded to single precision
9965#
9966fabs_sgl:
9967        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
9968        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
9969        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
9970        bsr.l           scale_to_zero_src       # calculate scale factor
9971
9972        cmpi.l          %d0,&0x3fff-0x3f80      # will move in underflow?
9973        bge.w           fabs_sd_unfl            # yes; go handle underflow
9974        cmpi.l          %d0,&0x3fff-0x407e      # will move in overflow?
9975        beq.w           fabs_sd_may_ovfl        # maybe; go check
9976        blt.w           fabs_sd_ovfl            # yes; go handle overflow
9977
9978#
9979# operand will NOT overflow or underflow when moved in to the fp reg file
9980#
9981fabs_sd_normal:
9982        fmov.l          &0x0,%fpsr              # clear FPSR
9983        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
9984
9985        fabs.x          FP_SCR0(%a6),%fp0       # perform absolute
9986
9987        fmov.l          %fpsr,%d1               # save FPSR
9988        fmov.l          &0x0,%fpcr              # clear FPCR
9989
9990        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
9991
9992fabs_sd_normal_exit:
9993        mov.l           %d2,-(%sp)              # save d2
9994        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
9995        mov.w           FP_SCR0_EX(%a6),%d1     # load sgn,exp
9996        mov.l           %d1,%d2                 # make a copy
9997        andi.l          &0x7fff,%d1             # strip sign
9998        sub.l           %d0,%d1                 # add scale factor
9999        andi.w          &0x8000,%d2             # keep old sign
10000        or.w            %d1,%d2                 # concat old sign,new exp
10001        mov.w           %d2,FP_SCR0_EX(%a6)     # insert new exponent
10002        mov.l           (%sp)+,%d2              # restore d2
10003        fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
10004        rts
10005
10006#
10007# operand is to be rounded to double precision
10008#
10009fabs_dbl:
10010        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
10011        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
10012        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
10013        bsr.l           scale_to_zero_src       # calculate scale factor
10014
10015        cmpi.l          %d0,&0x3fff-0x3c00      # will move in underflow?
10016        bge.b           fabs_sd_unfl            # yes; go handle underflow
10017        cmpi.l          %d0,&0x3fff-0x43fe      # will move in overflow?
10018        beq.w           fabs_sd_may_ovfl        # maybe; go check
10019        blt.w           fabs_sd_ovfl            # yes; go handle overflow
10020        bra.w           fabs_sd_normal          # no; ho handle normalized op
10021
10022#
10023# operand WILL underflow when moved in to the fp register file
10024#
10025fabs_sd_unfl:
10026        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10027
10028        bclr            &0x7,FP_SCR0_EX(%a6)    # force absolute value
10029
10030# if underflow or inexact is enabled, go calculate EXOP first.
10031        mov.b           FPCR_ENABLE(%a6),%d1
10032        andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
10033        bne.b           fabs_sd_unfl_ena        # yes
10034
10035fabs_sd_unfl_dis:
10036        lea             FP_SCR0(%a6),%a0        # pass: result addr
10037        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
10038        bsr.l           unf_res                 # calculate default result
10039        or.b            %d0,FPSR_CC(%a6)        # set possible 'Z' ccode
10040        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
10041        rts
10042
10043#
10044# operand will underflow AND underflow is enabled.
10045# Therefore, we must return the result rounded to extended precision.
10046#
10047fabs_sd_unfl_ena:
10048        mov.l           FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
10049        mov.l           FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
10050        mov.w           FP_SCR0_EX(%a6),%d1     # load current exponent
10051
10052        mov.l           %d2,-(%sp)              # save d2
10053        mov.l           %d1,%d2                 # make a copy
10054        andi.l          &0x7fff,%d1             # strip sign
10055        andi.w          &0x8000,%d2             # keep old sign
10056        sub.l           %d0,%d1                 # subtract scale factor
10057        addi.l          &0x6000,%d1             # add new bias
10058        andi.w          &0x7fff,%d1
10059        or.w            %d2,%d1                 # concat new sign,new exp
10060        mov.w           %d1,FP_SCR1_EX(%a6)     # insert new exp
10061        fmovm.x         FP_SCR1(%a6),&0x40      # return EXOP in fp1
10062        mov.l           (%sp)+,%d2              # restore d2
10063        bra.b           fabs_sd_unfl_dis
10064
10065#
10066# operand WILL overflow.
10067#
10068fabs_sd_ovfl:
10069        fmov.l          &0x0,%fpsr              # clear FPSR
10070        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10071
10072        fabs.x          FP_SCR0(%a6),%fp0       # perform absolute
10073
10074        fmov.l          &0x0,%fpcr              # clear FPCR
10075        fmov.l          %fpsr,%d1               # save FPSR
10076
10077        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10078
10079fabs_sd_ovfl_tst:
10080        or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
10081
10082        mov.b           FPCR_ENABLE(%a6),%d1
10083        andi.b          &0x13,%d1               # is OVFL or INEX enabled?
10084        bne.b           fabs_sd_ovfl_ena        # yes
10085
10086#
10087# OVFL is not enabled; therefore, we must create the default result by
10088# calling ovf_res().
10089#
10090fabs_sd_ovfl_dis:
10091        btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
10092        sne             %d1                     # set sign param accordingly
10093        mov.l           L_SCR3(%a6),%d0         # pass: prec,mode
10094        bsr.l           ovf_res                 # calculate default result
10095        or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
10096        fmovm.x         (%a0),&0x80             # return default result in fp0
10097        rts
10098
10099#
10100# OVFL is enabled.
10101# the INEX2 bit has already been updated by the round to the correct precision.
10102# now, round to extended(and don't alter the FPSR).
10103#
10104fabs_sd_ovfl_ena:
10105        mov.l           %d2,-(%sp)              # save d2
10106        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
10107        mov.l           %d1,%d2                 # make a copy
10108        andi.l          &0x7fff,%d1             # strip sign
10109        andi.w          &0x8000,%d2             # keep old sign
10110        sub.l           %d0,%d1                 # add scale factor
10111        subi.l          &0x6000,%d1             # subtract bias
10112        andi.w          &0x7fff,%d1
10113        or.w            %d2,%d1                 # concat sign,exp
10114        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
10115        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
10116        mov.l           (%sp)+,%d2              # restore d2
10117        bra.b           fabs_sd_ovfl_dis
10118
10119#
10120# the move in MAY underflow. so...
10121#
10122fabs_sd_may_ovfl:
10123        fmov.l          &0x0,%fpsr              # clear FPSR
10124        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10125
10126        fabs.x          FP_SCR0(%a6),%fp0       # perform absolute
10127
10128        fmov.l          %fpsr,%d1               # save status
10129        fmov.l          &0x0,%fpcr              # clear FPCR
10130
10131        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10132
10133        fabs.x          %fp0,%fp1               # make a copy of result
10134        fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
10135        fbge.w          fabs_sd_ovfl_tst        # yes; overflow has occurred
10136
10137# no, it didn't overflow; we have correct result
10138        bra.w           fabs_sd_normal_exit
10139
10140##########################################################################
10141
10142#
10143# input is not normalized; what is it?
10144#
10145fabs_not_norm:
10146        cmpi.b          %d1,&DENORM             # weed out DENORM
10147        beq.w           fabs_denorm
10148        cmpi.b          %d1,&SNAN               # weed out SNAN
10149        beq.l           res_snan_1op
10150        cmpi.b          %d1,&QNAN               # weed out QNAN
10151        beq.l           res_qnan_1op
10152
10153        fabs.x          SRC(%a0),%fp0           # force absolute value
10154
10155        cmpi.b          %d1,&INF                # weed out INF
10156        beq.b           fabs_inf
10157fabs_zero:
10158        mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
10159        rts
10160fabs_inf:
10161        mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
10162        rts
10163
10164#########################################################################
10165# XDEF **************************************************************** #
10166#       fcmp(): fp compare op routine                                   #
10167#                                                                       #
10168# XREF **************************************************************** #
10169#       res_qnan() - return QNAN result                                 #
10170#       res_snan() - return SNAN result                                 #
10171#                                                                       #
10172# INPUT *************************************************************** #
10173#       a0 = pointer to extended precision source operand               #
10174#       a1 = pointer to extended precision destination operand          #
10175#       d0 = round prec/mode                                            #
10176#                                                                       #
10177# OUTPUT ************************************************************** #
10178#       None                                                            #
10179#                                                                       #
10180# ALGORITHM *********************************************************** #
10181#       Handle NANs and denorms as special cases. For everything else,  #
10182# just use the actual fcmp instruction to produce the correct condition #
10183# codes.                                                                #
10184#                                                                       #
10185#########################################################################
10186
10187        global          fcmp
10188fcmp:
10189        clr.w           %d1
10190        mov.b           DTAG(%a6),%d1
10191        lsl.b           &0x3,%d1
10192        or.b            STAG(%a6),%d1
10193        bne.b           fcmp_not_norm           # optimize on non-norm input
10194
10195#
10196# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
10197#
10198fcmp_norm:
10199        fmovm.x         DST(%a1),&0x80          # load dst op
10200
10201        fcmp.x          %fp0,SRC(%a0)           # do compare
10202
10203        fmov.l          %fpsr,%d0               # save FPSR
10204        rol.l           &0x8,%d0                # extract ccode bits
10205        mov.b           %d0,FPSR_CC(%a6)        # set ccode bits(no exc bits are set)
10206
10207        rts
10208
10209#
10210# fcmp: inputs are not both normalized; what are they?
10211#
10212fcmp_not_norm:
10213        mov.w           (tbl_fcmp_op.b,%pc,%d1.w*2),%d1
10214        jmp             (tbl_fcmp_op.b,%pc,%d1.w*1)
10215
10216        swbeg           &48
10217tbl_fcmp_op:
10218        short           fcmp_norm       - tbl_fcmp_op # NORM - NORM
10219        short           fcmp_norm       - tbl_fcmp_op # NORM - ZERO
10220        short           fcmp_norm       - tbl_fcmp_op # NORM - INF
10221        short           fcmp_res_qnan   - tbl_fcmp_op # NORM - QNAN
10222        short           fcmp_nrm_dnrm   - tbl_fcmp_op # NORM - DENORM
10223        short           fcmp_res_snan   - tbl_fcmp_op # NORM - SNAN
10224        short           tbl_fcmp_op     - tbl_fcmp_op #
10225        short           tbl_fcmp_op     - tbl_fcmp_op #
10226
10227        short           fcmp_norm       - tbl_fcmp_op # ZERO - NORM
10228        short           fcmp_norm       - tbl_fcmp_op # ZERO - ZERO
10229        short           fcmp_norm       - tbl_fcmp_op # ZERO - INF
10230        short           fcmp_res_qnan   - tbl_fcmp_op # ZERO - QNAN
10231        short           fcmp_dnrm_s     - tbl_fcmp_op # ZERO - DENORM
10232        short           fcmp_res_snan   - tbl_fcmp_op # ZERO - SNAN
10233        short           tbl_fcmp_op     - tbl_fcmp_op #
10234        short           tbl_fcmp_op     - tbl_fcmp_op #
10235
10236        short           fcmp_norm       - tbl_fcmp_op # INF - NORM
10237        short           fcmp_norm       - tbl_fcmp_op # INF - ZERO
10238        short           fcmp_norm       - tbl_fcmp_op # INF - INF
10239        short           fcmp_res_qnan   - tbl_fcmp_op # INF - QNAN
10240        short           fcmp_dnrm_s     - tbl_fcmp_op # INF - DENORM
10241        short           fcmp_res_snan   - tbl_fcmp_op # INF - SNAN
10242        short           tbl_fcmp_op     - tbl_fcmp_op #
10243        short           tbl_fcmp_op     - tbl_fcmp_op #
10244
10245        short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - NORM
10246        short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - ZERO
10247        short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - INF
10248        short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - QNAN
10249        short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - DENORM
10250        short           fcmp_res_snan   - tbl_fcmp_op # QNAN - SNAN
10251        short           tbl_fcmp_op     - tbl_fcmp_op #
10252        short           tbl_fcmp_op     - tbl_fcmp_op #
10253
10254        short           fcmp_dnrm_nrm   - tbl_fcmp_op # DENORM - NORM
10255        short           fcmp_dnrm_d     - tbl_fcmp_op # DENORM - ZERO
10256        short           fcmp_dnrm_d     - tbl_fcmp_op # DENORM - INF
10257        short           fcmp_res_qnan   - tbl_fcmp_op # DENORM - QNAN
10258        short           fcmp_dnrm_sd    - tbl_fcmp_op # DENORM - DENORM
10259        short           fcmp_res_snan   - tbl_fcmp_op # DENORM - SNAN
10260        short           tbl_fcmp_op     - tbl_fcmp_op #
10261        short           tbl_fcmp_op     - tbl_fcmp_op #
10262
10263        short           fcmp_res_snan   - tbl_fcmp_op # SNAN - NORM
10264        short           fcmp_res_snan   - tbl_fcmp_op # SNAN - ZERO
10265        short           fcmp_res_snan   - tbl_fcmp_op # SNAN - INF
10266        short           fcmp_res_snan   - tbl_fcmp_op # SNAN - QNAN
10267        short           fcmp_res_snan   - tbl_fcmp_op # SNAN - DENORM
10268        short           fcmp_res_snan   - tbl_fcmp_op # SNAN - SNAN
10269        short           tbl_fcmp_op     - tbl_fcmp_op #
10270        short           tbl_fcmp_op     - tbl_fcmp_op #
10271
10272# unlike all other functions for QNAN and SNAN, fcmp does NOT set the
10273# 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
10274fcmp_res_qnan:
10275        bsr.l           res_qnan
10276        andi.b          &0xf7,FPSR_CC(%a6)
10277        rts
10278fcmp_res_snan:
10279        bsr.l           res_snan
10280        andi.b          &0xf7,FPSR_CC(%a6)
10281        rts
10282
10283#
10284# DENORMs are a little more difficult.
10285# If you have a 2 DENORMs, then you can just force the j-bit to a one
10286# and use the fcmp_norm routine.
10287# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
10288# and use the fcmp_norm routine.
10289# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
10290# But with a DENORM and a NORM of the same sign, the neg bit is set if the
10291# (1) signs are (+) and the DENORM is the dst or
10292# (2) signs are (-) and the DENORM is the src
10293#
10294
10295fcmp_dnrm_s:
10296        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
10297        mov.l           SRC_HI(%a0),%d0
10298        bset            &31,%d0                 # DENORM src; make into small norm
10299        mov.l           %d0,FP_SCR0_HI(%a6)
10300        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
10301        lea             FP_SCR0(%a6),%a0
10302        bra.w           fcmp_norm
10303
10304fcmp_dnrm_d:
10305        mov.l           DST_EX(%a1),FP_SCR0_EX(%a6)
10306        mov.l           DST_HI(%a1),%d0
10307        bset            &31,%d0                 # DENORM src; make into small norm
10308        mov.l           %d0,FP_SCR0_HI(%a6)
10309        mov.l           DST_LO(%a1),FP_SCR0_LO(%a6)
10310        lea             FP_SCR0(%a6),%a1
10311        bra.w           fcmp_norm
10312
10313fcmp_dnrm_sd:
10314        mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
10315        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
10316        mov.l           DST_HI(%a1),%d0
10317        bset            &31,%d0                 # DENORM dst; make into small norm
10318        mov.l           %d0,FP_SCR1_HI(%a6)
10319        mov.l           SRC_HI(%a0),%d0
10320        bset            &31,%d0                 # DENORM dst; make into small norm
10321        mov.l           %d0,FP_SCR0_HI(%a6)
10322        mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
10323        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
10324        lea             FP_SCR1(%a6),%a1
10325        lea             FP_SCR0(%a6),%a0
10326        bra.w           fcmp_norm
10327
10328fcmp_nrm_dnrm:
10329        mov.b           SRC_EX(%a0),%d0         # determine if like signs
10330        mov.b           DST_EX(%a1),%d1
10331        eor.b           %d0,%d1
10332        bmi.w           fcmp_dnrm_s
10333
10334# signs are the same, so must determine the answer ourselves.
10335        tst.b           %d0                     # is src op negative?
10336        bmi.b           fcmp_nrm_dnrm_m         # yes
10337        rts
10338fcmp_nrm_dnrm_m:
10339        mov.b           &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10340        rts
10341
10342fcmp_dnrm_nrm:
10343        mov.b           SRC_EX(%a0),%d0         # determine if like signs
10344        mov.b           DST_EX(%a1),%d1
10345        eor.b           %d0,%d1
10346        bmi.w           fcmp_dnrm_d
10347
10348# signs are the same, so must determine the answer ourselves.
10349        tst.b           %d0                     # is src op negative?
10350        bpl.b           fcmp_dnrm_nrm_m         # no
10351        rts
10352fcmp_dnrm_nrm_m:
10353        mov.b           &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10354        rts
10355
10356#########################################################################
10357# XDEF **************************************************************** #
10358#       fsglmul(): emulates the fsglmul instruction                     #
10359#                                                                       #
10360# XREF **************************************************************** #
10361#       scale_to_zero_src() - scale src exponent to zero                #
10362#       scale_to_zero_dst() - scale dst exponent to zero                #
10363#       unf_res4() - return default underflow result for sglop          #
10364#       ovf_res() - return default overflow result                      #
10365#       res_qnan() - return QNAN result                                 #
10366#       res_snan() - return SNAN result                                 #
10367#                                                                       #
10368# INPUT *************************************************************** #
10369#       a0 = pointer to extended precision source operand               #
10370#       a1 = pointer to extended precision destination operand          #
10371#       d0  rnd prec,mode                                               #
10372#                                                                       #
10373# OUTPUT ************************************************************** #
10374#       fp0 = result                                                    #
10375#       fp1 = EXOP (if exception occurred)                              #
10376#                                                                       #
10377# ALGORITHM *********************************************************** #
10378#       Handle NANs, infinities, and zeroes as special cases. Divide    #
10379# norms/denorms into ext/sgl/dbl precision.                             #
10380#       For norms/denorms, scale the exponents such that a multiply     #
10381# instruction won't cause an exception. Use the regular fsglmul to      #
10382# compute a result. Check if the regular operands would have taken      #
10383# an exception. If so, return the default overflow/underflow result     #
10384# and return the EXOP if exceptions are enabled. Else, scale the        #
10385# result operand to the proper exponent.                                #
10386#                                                                       #
10387#########################################################################
10388
10389        global          fsglmul
10390fsglmul:
10391        mov.l           %d0,L_SCR3(%a6)         # store rnd info
10392
10393        clr.w           %d1
10394        mov.b           DTAG(%a6),%d1
10395        lsl.b           &0x3,%d1
10396        or.b            STAG(%a6),%d1
10397
10398        bne.w           fsglmul_not_norm        # optimize on non-norm input
10399
10400fsglmul_norm:
10401        mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
10402        mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
10403        mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
10404
10405        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
10406        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
10407        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
10408
10409        bsr.l           scale_to_zero_src       # scale exponent
10410        mov.l           %d0,-(%sp)              # save scale factor 1
10411
10412        bsr.l           scale_to_zero_dst       # scale dst exponent
10413
10414        add.l           (%sp)+,%d0              # SCALE_FACTOR = scale1 + scale2
10415
10416        cmpi.l          %d0,&0x3fff-0x7ffe      # would result ovfl?
10417        beq.w           fsglmul_may_ovfl        # result may rnd to overflow
10418        blt.w           fsglmul_ovfl            # result will overflow
10419
10420        cmpi.l          %d0,&0x3fff+0x0001      # would result unfl?
10421        beq.w           fsglmul_may_unfl        # result may rnd to no unfl
10422        bgt.w           fsglmul_unfl            # result will underflow
10423
10424fsglmul_normal:
10425        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10426
10427        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10428        fmov.l          &0x0,%fpsr              # clear FPSR
10429
10430        fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
10431
10432        fmov.l          %fpsr,%d1               # save status
10433        fmov.l          &0x0,%fpcr              # clear FPCR
10434
10435        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10436
10437fsglmul_normal_exit:
10438        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
10439        mov.l           %d2,-(%sp)              # save d2
10440        mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
10441        mov.l           %d1,%d2                 # make a copy
10442        andi.l          &0x7fff,%d1             # strip sign
10443        andi.w          &0x8000,%d2             # keep old sign
10444        sub.l           %d0,%d1                 # add scale factor
10445        or.w            %d2,%d1                 # concat old sign,new exp
10446        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
10447        mov.l           (%sp)+,%d2              # restore d2
10448        fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
10449        rts
10450
10451fsglmul_ovfl:
10452        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10453
10454        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10455        fmov.l          &0x0,%fpsr              # clear FPSR
10456
10457        fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
10458
10459        fmov.l          %fpsr,%d1               # save status
10460        fmov.l          &0x0,%fpcr              # clear FPCR
10461
10462        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10463
10464fsglmul_ovfl_tst:
10465
10466# save setting this until now because this is where fsglmul_may_ovfl may jump in
10467        or.l            &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
10468
10469        mov.b           FPCR_ENABLE(%a6),%d1
10470        andi.b          &0x13,%d1               # is OVFL or INEX enabled?
10471        bne.b           fsglmul_ovfl_ena        # yes
10472
10473fsglmul_ovfl_dis:
10474        btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
10475        sne             %d1                     # set sign param accordingly
10476        mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
10477        andi.b          &0x30,%d0               # force prec = ext
10478        bsr.l           ovf_res                 # calculate default result
10479        or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
10480        fmovm.x         (%a0),&0x80             # return default result in fp0
10481        rts
10482
10483fsglmul_ovfl_ena:
10484        fmovm.x         &0x80,FP_SCR0(%a6)      # move result to stack
10485
10486        mov.l           %d2,-(%sp)              # save d2
10487        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
10488        mov.l           %d1,%d2                 # make a copy
10489        andi.l          &0x7fff,%d1             # strip sign
10490        sub.l           %d0,%d1                 # add scale factor
10491        subi.l          &0x6000,%d1             # subtract bias
10492        andi.w          &0x7fff,%d1
10493        andi.w          &0x8000,%d2             # keep old sign
10494        or.w            %d2,%d1                 # concat old sign,new exp
10495        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
10496        mov.l           (%sp)+,%d2              # restore d2
10497        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
10498        bra.b           fsglmul_ovfl_dis
10499
10500fsglmul_may_ovfl:
10501        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10502
10503        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10504        fmov.l          &0x0,%fpsr              # clear FPSR
10505
10506        fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
10507
10508        fmov.l          %fpsr,%d1               # save status
10509        fmov.l          &0x0,%fpcr              # clear FPCR
10510
10511        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10512
10513        fabs.x          %fp0,%fp1               # make a copy of result
10514        fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
10515        fbge.w          fsglmul_ovfl_tst        # yes; overflow has occurred
10516
10517# no, it didn't overflow; we have correct result
10518        bra.w           fsglmul_normal_exit
10519
10520fsglmul_unfl:
10521        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10522
10523        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10524
10525        fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
10526        fmov.l          &0x0,%fpsr              # clear FPSR
10527
10528        fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
10529
10530        fmov.l          %fpsr,%d1               # save status
10531        fmov.l          &0x0,%fpcr              # clear FPCR
10532
10533        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10534
10535        mov.b           FPCR_ENABLE(%a6),%d1
10536        andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
10537        bne.b           fsglmul_unfl_ena        # yes
10538
10539fsglmul_unfl_dis:
10540        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
10541
10542        lea             FP_SCR0(%a6),%a0        # pass: result addr
10543        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
10544        bsr.l           unf_res4                # calculate default result
10545        or.b            %d0,FPSR_CC(%a6)        # 'Z' bit may have been set
10546        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
10547        rts
10548
10549#
10550# UNFL is enabled.
10551#
10552fsglmul_unfl_ena:
10553        fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
10554
10555        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10556        fmov.l          &0x0,%fpsr              # clear FPSR
10557
10558        fsglmul.x       FP_SCR0(%a6),%fp1       # execute sgl multiply
10559
10560        fmov.l          &0x0,%fpcr              # clear FPCR
10561
10562        fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
10563        mov.l           %d2,-(%sp)              # save d2
10564        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
10565        mov.l           %d1,%d2                 # make a copy
10566        andi.l          &0x7fff,%d1             # strip sign
10567        andi.w          &0x8000,%d2             # keep old sign
10568        sub.l           %d0,%d1                 # add scale factor
10569        addi.l          &0x6000,%d1             # add bias
10570        andi.w          &0x7fff,%d1
10571        or.w            %d2,%d1                 # concat old sign,new exp
10572        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
10573        mov.l           (%sp)+,%d2              # restore d2
10574        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
10575        bra.w           fsglmul_unfl_dis
10576
10577fsglmul_may_unfl:
10578        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10579
10580        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10581        fmov.l          &0x0,%fpsr              # clear FPSR
10582
10583        fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
10584
10585        fmov.l          %fpsr,%d1               # save status
10586        fmov.l          &0x0,%fpcr              # clear FPCR
10587
10588        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10589
10590        fabs.x          %fp0,%fp1               # make a copy of result
10591        fcmp.b          %fp1,&0x2               # is |result| > 2.b?
10592        fbgt.w          fsglmul_normal_exit     # no; no underflow occurred
10593        fblt.w          fsglmul_unfl            # yes; underflow occurred
10594
10595#
10596# we still don't know if underflow occurred. result is ~ equal to 2. but,
10597# we don't know if the result was an underflow that rounded up to a 2 or
10598# a normalized number that rounded down to a 2. so, redo the entire operation
10599# using RZ as the rounding mode to see what the pre-rounded result is.
10600# this case should be relatively rare.
10601#
10602        fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into fp1
10603
10604        mov.l           L_SCR3(%a6),%d1
10605        andi.b          &0xc0,%d1               # keep rnd prec
10606        ori.b           &rz_mode*0x10,%d1       # insert RZ
10607
10608        fmov.l          %d1,%fpcr               # set FPCR
10609        fmov.l          &0x0,%fpsr              # clear FPSR
10610
10611        fsglmul.x       FP_SCR0(%a6),%fp1       # execute sgl multiply
10612
10613        fmov.l          &0x0,%fpcr              # clear FPCR
10614        fabs.x          %fp1                    # make absolute value
10615        fcmp.b          %fp1,&0x2               # is |result| < 2.b?
10616        fbge.w          fsglmul_normal_exit     # no; no underflow occurred
10617        bra.w           fsglmul_unfl            # yes, underflow occurred
10618
10619##############################################################################
10620
10621#
10622# Single Precision Multiply: inputs are not both normalized; what are they?
10623#
10624fsglmul_not_norm:
10625        mov.w           (tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
10626        jmp             (tbl_fsglmul_op.b,%pc,%d1.w*1)
10627
10628        swbeg           &48
10629tbl_fsglmul_op:
10630        short           fsglmul_norm            - tbl_fsglmul_op # NORM x NORM
10631        short           fsglmul_zero            - tbl_fsglmul_op # NORM x ZERO
10632        short           fsglmul_inf_src         - tbl_fsglmul_op # NORM x INF
10633        short           fsglmul_res_qnan        - tbl_fsglmul_op # NORM x QNAN
10634        short           fsglmul_norm            - tbl_fsglmul_op # NORM x DENORM
10635        short           fsglmul_res_snan        - tbl_fsglmul_op # NORM x SNAN
10636        short           tbl_fsglmul_op          - tbl_fsglmul_op #
10637        short           tbl_fsglmul_op          - tbl_fsglmul_op #
10638
10639        short           fsglmul_zero            - tbl_fsglmul_op # ZERO x NORM
10640        short           fsglmul_zero            - tbl_fsglmul_op # ZERO x ZERO
10641        short           fsglmul_res_operr       - tbl_fsglmul_op # ZERO x INF
10642        short           fsglmul_res_qnan        - tbl_fsglmul_op # ZERO x QNAN
10643        short           fsglmul_zero            - tbl_fsglmul_op # ZERO x DENORM
10644        short           fsglmul_res_snan        - tbl_fsglmul_op # ZERO x SNAN
10645        short           tbl_fsglmul_op          - tbl_fsglmul_op #
10646        short           tbl_fsglmul_op          - tbl_fsglmul_op #
10647
10648        short           fsglmul_inf_dst         - tbl_fsglmul_op # INF x NORM
10649        short           fsglmul_res_operr       - tbl_fsglmul_op # INF x ZERO
10650        short           fsglmul_inf_dst         - tbl_fsglmul_op # INF x INF
10651        short           fsglmul_res_qnan        - tbl_fsglmul_op # INF x QNAN
10652        short           fsglmul_inf_dst         - tbl_fsglmul_op # INF x DENORM
10653        short           fsglmul_res_snan        - tbl_fsglmul_op # INF x SNAN
10654        short           tbl_fsglmul_op          - tbl_fsglmul_op #
10655        short           tbl_fsglmul_op          - tbl_fsglmul_op #
10656
10657        short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x NORM
10658        short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x ZERO
10659        short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x INF
10660        short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x QNAN
10661        short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x DENORM
10662        short           fsglmul_res_snan        - tbl_fsglmul_op # QNAN x SNAN
10663        short           tbl_fsglmul_op          - tbl_fsglmul_op #
10664        short           tbl_fsglmul_op          - tbl_fsglmul_op #
10665
10666        short           fsglmul_norm            - tbl_fsglmul_op # NORM x NORM
10667        short           fsglmul_zero            - tbl_fsglmul_op # NORM x ZERO
10668        short           fsglmul_inf_src         - tbl_fsglmul_op # NORM x INF
10669        short           fsglmul_res_qnan        - tbl_fsglmul_op # NORM x QNAN
10670        short           fsglmul_norm            - tbl_fsglmul_op # NORM x DENORM
10671        short           fsglmul_res_snan        - tbl_fsglmul_op # NORM x SNAN
10672        short           tbl_fsglmul_op          - tbl_fsglmul_op #
10673        short           tbl_fsglmul_op          - tbl_fsglmul_op #
10674
10675        short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x NORM
10676        short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x ZERO
10677        short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x INF
10678        short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x QNAN
10679        short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x DENORM
10680        short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x SNAN
10681        short           tbl_fsglmul_op          - tbl_fsglmul_op #
10682        short           tbl_fsglmul_op          - tbl_fsglmul_op #
10683
10684fsglmul_res_operr:
10685        bra.l           res_operr
10686fsglmul_res_snan:
10687        bra.l           res_snan
10688fsglmul_res_qnan:
10689        bra.l           res_qnan
10690fsglmul_zero:
10691        bra.l           fmul_zero
10692fsglmul_inf_src:
10693        bra.l           fmul_inf_src
10694fsglmul_inf_dst:
10695        bra.l           fmul_inf_dst
10696
10697#########################################################################
10698# XDEF **************************************************************** #
10699#       fsgldiv(): emulates the fsgldiv instruction                     #
10700#                                                                       #
10701# XREF **************************************************************** #
10702#       scale_to_zero_src() - scale src exponent to zero                #
10703#       scale_to_zero_dst() - scale dst exponent to zero                #
10704#       unf_res4() - return default underflow result for sglop          #
10705#       ovf_res() - return default overflow result                      #
10706#       res_qnan() - return QNAN result                                 #
10707#       res_snan() - return SNAN result                                 #
10708#                                                                       #
10709# INPUT *************************************************************** #
10710#       a0 = pointer to extended precision source operand               #
10711#       a1 = pointer to extended precision destination operand          #
10712#       d0  rnd prec,mode                                               #
10713#                                                                       #
10714# OUTPUT ************************************************************** #
10715#       fp0 = result                                                    #
10716#       fp1 = EXOP (if exception occurred)                              #
10717#                                                                       #
10718# ALGORITHM *********************************************************** #
10719#       Handle NANs, infinities, and zeroes as special cases. Divide    #
10720# norms/denorms into ext/sgl/dbl precision.                             #
10721#       For norms/denorms, scale the exponents such that a divide       #
10722# instruction won't cause an exception. Use the regular fsgldiv to      #
10723# compute a result. Check if the regular operands would have taken      #
10724# an exception. If so, return the default overflow/underflow result     #
10725# and return the EXOP if exceptions are enabled. Else, scale the        #
10726# result operand to the proper exponent.                                #
10727#                                                                       #
10728#########################################################################
10729
10730        global          fsgldiv
10731fsgldiv:
10732        mov.l           %d0,L_SCR3(%a6)         # store rnd info
10733
10734        clr.w           %d1
10735        mov.b           DTAG(%a6),%d1
10736        lsl.b           &0x3,%d1
10737        or.b            STAG(%a6),%d1           # combine src tags
10738
10739        bne.w           fsgldiv_not_norm        # optimize on non-norm input
10740
10741#
10742# DIVIDE: NORMs and DENORMs ONLY!
10743#
10744fsgldiv_norm:
10745        mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
10746        mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
10747        mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
10748
10749        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
10750        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
10751        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
10752
10753        bsr.l           scale_to_zero_src       # calculate scale factor 1
10754        mov.l           %d0,-(%sp)              # save scale factor 1
10755
10756        bsr.l           scale_to_zero_dst       # calculate scale factor 2
10757
10758        neg.l           (%sp)                   # S.F. = scale1 - scale2
10759        add.l           %d0,(%sp)
10760
10761        mov.w           2+L_SCR3(%a6),%d1       # fetch precision,mode
10762        lsr.b           &0x6,%d1
10763        mov.l           (%sp)+,%d0
10764        cmpi.l          %d0,&0x3fff-0x7ffe
10765        ble.w           fsgldiv_may_ovfl
10766
10767        cmpi.l          %d0,&0x3fff-0x0000      # will result underflow?
10768        beq.w           fsgldiv_may_unfl        # maybe
10769        bgt.w           fsgldiv_unfl            # yes; go handle underflow
10770
10771fsgldiv_normal:
10772        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10773
10774        fmov.l          L_SCR3(%a6),%fpcr       # save FPCR
10775        fmov.l          &0x0,%fpsr              # clear FPSR
10776
10777        fsgldiv.x       FP_SCR0(%a6),%fp0       # perform sgl divide
10778
10779        fmov.l          %fpsr,%d1               # save FPSR
10780        fmov.l          &0x0,%fpcr              # clear FPCR
10781
10782        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10783
10784fsgldiv_normal_exit:
10785        fmovm.x         &0x80,FP_SCR0(%a6)      # store result on stack
10786        mov.l           %d2,-(%sp)              # save d2
10787        mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
10788        mov.l           %d1,%d2                 # make a copy
10789        andi.l          &0x7fff,%d1             # strip sign
10790        andi.w          &0x8000,%d2             # keep old sign
10791        sub.l           %d0,%d1                 # add scale factor
10792        or.w            %d2,%d1                 # concat old sign,new exp
10793        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
10794        mov.l           (%sp)+,%d2              # restore d2
10795        fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
10796        rts
10797
10798fsgldiv_may_ovfl:
10799        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10800
10801        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10802        fmov.l          &0x0,%fpsr              # set FPSR
10803
10804        fsgldiv.x       FP_SCR0(%a6),%fp0       # execute divide
10805
10806        fmov.l          %fpsr,%d1
10807        fmov.l          &0x0,%fpcr
10808
10809        or.l            %d1,USER_FPSR(%a6)      # save INEX,N
10810
10811        fmovm.x         &0x01,-(%sp)            # save result to stack
10812        mov.w           (%sp),%d1               # fetch new exponent
10813        add.l           &0xc,%sp                # clear result
10814        andi.l          &0x7fff,%d1             # strip sign
10815        sub.l           %d0,%d1                 # add scale factor
10816        cmp.l           %d1,&0x7fff             # did divide overflow?
10817        blt.b           fsgldiv_normal_exit
10818
10819fsgldiv_ovfl_tst:
10820        or.w            &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
10821
10822        mov.b           FPCR_ENABLE(%a6),%d1
10823        andi.b          &0x13,%d1               # is OVFL or INEX enabled?
10824        bne.b           fsgldiv_ovfl_ena        # yes
10825
10826fsgldiv_ovfl_dis:
10827        btst            &neg_bit,FPSR_CC(%a6)   # is result negative
10828        sne             %d1                     # set sign param accordingly
10829        mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
10830        andi.b          &0x30,%d0               # kill precision
10831        bsr.l           ovf_res                 # calculate default result
10832        or.b            %d0,FPSR_CC(%a6)        # set INF if applicable
10833        fmovm.x         (%a0),&0x80             # return default result in fp0
10834        rts
10835
10836fsgldiv_ovfl_ena:
10837        fmovm.x         &0x80,FP_SCR0(%a6)      # move result to stack
10838
10839        mov.l           %d2,-(%sp)              # save d2
10840        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
10841        mov.l           %d1,%d2                 # make a copy
10842        andi.l          &0x7fff,%d1             # strip sign
10843        andi.w          &0x8000,%d2             # keep old sign
10844        sub.l           %d0,%d1                 # add scale factor
10845        subi.l          &0x6000,%d1             # subtract new bias
10846        andi.w          &0x7fff,%d1             # clear ms bit
10847        or.w            %d2,%d1                 # concat old sign,new exp
10848        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
10849        mov.l           (%sp)+,%d2              # restore d2
10850        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
10851        bra.b           fsgldiv_ovfl_dis
10852
10853fsgldiv_unfl:
10854        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10855
10856        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10857
10858        fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
10859        fmov.l          &0x0,%fpsr              # clear FPSR
10860
10861        fsgldiv.x       FP_SCR0(%a6),%fp0       # execute sgl divide
10862
10863        fmov.l          %fpsr,%d1               # save status
10864        fmov.l          &0x0,%fpcr              # clear FPCR
10865
10866        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10867
10868        mov.b           FPCR_ENABLE(%a6),%d1
10869        andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
10870        bne.b           fsgldiv_unfl_ena        # yes
10871
10872fsgldiv_unfl_dis:
10873        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
10874
10875        lea             FP_SCR0(%a6),%a0        # pass: result addr
10876        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
10877        bsr.l           unf_res4                # calculate default result
10878        or.b            %d0,FPSR_CC(%a6)        # 'Z' bit may have been set
10879        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
10880        rts
10881
10882#
10883# UNFL is enabled.
10884#
10885fsgldiv_unfl_ena:
10886        fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
10887
10888        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10889        fmov.l          &0x0,%fpsr              # clear FPSR
10890
10891        fsgldiv.x       FP_SCR0(%a6),%fp1       # execute sgl divide
10892
10893        fmov.l          &0x0,%fpcr              # clear FPCR
10894
10895        fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
10896        mov.l           %d2,-(%sp)              # save d2
10897        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
10898        mov.l           %d1,%d2                 # make a copy
10899        andi.l          &0x7fff,%d1             # strip sign
10900        andi.w          &0x8000,%d2             # keep old sign
10901        sub.l           %d0,%d1                 # add scale factor
10902        addi.l          &0x6000,%d1             # add bias
10903        andi.w          &0x7fff,%d1             # clear top bit
10904        or.w            %d2,%d1                 # concat old sign, new exp
10905        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
10906        mov.l           (%sp)+,%d2              # restore d2
10907        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
10908        bra.b           fsgldiv_unfl_dis
10909
10910#
10911# the divide operation MAY underflow:
10912#
10913fsgldiv_may_unfl:
10914        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10915
10916        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10917        fmov.l          &0x0,%fpsr              # clear FPSR
10918
10919        fsgldiv.x       FP_SCR0(%a6),%fp0       # execute sgl divide
10920
10921        fmov.l          %fpsr,%d1               # save status
10922        fmov.l          &0x0,%fpcr              # clear FPCR
10923
10924        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10925
10926        fabs.x          %fp0,%fp1               # make a copy of result
10927        fcmp.b          %fp1,&0x1               # is |result| > 1.b?
10928        fbgt.w          fsgldiv_normal_exit     # no; no underflow occurred
10929        fblt.w          fsgldiv_unfl            # yes; underflow occurred
10930
10931#
10932# we still don't know if underflow occurred. result is ~ equal to 1. but,
10933# we don't know if the result was an underflow that rounded up to a 1
10934# or a normalized number that rounded down to a 1. so, redo the entire
10935# operation using RZ as the rounding mode to see what the pre-rounded
10936# result is. this case should be relatively rare.
10937#
10938        fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into %fp1
10939
10940        clr.l           %d1                     # clear scratch register
10941        ori.b           &rz_mode*0x10,%d1       # force RZ rnd mode
10942
10943        fmov.l          %d1,%fpcr               # set FPCR
10944        fmov.l          &0x0,%fpsr              # clear FPSR
10945
10946        fsgldiv.x       FP_SCR0(%a6),%fp1       # execute sgl divide
10947
10948        fmov.l          &0x0,%fpcr              # clear FPCR
10949        fabs.x          %fp1                    # make absolute value
10950        fcmp.b          %fp1,&0x1               # is |result| < 1.b?
10951        fbge.w          fsgldiv_normal_exit     # no; no underflow occurred
10952        bra.w           fsgldiv_unfl            # yes; underflow occurred
10953
10954############################################################################
10955
10956#
10957# Divide: inputs are not both normalized; what are they?
10958#
10959fsgldiv_not_norm:
10960        mov.w           (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
10961        jmp             (tbl_fsgldiv_op.b,%pc,%d1.w*1)
10962
10963        swbeg           &48
10964tbl_fsgldiv_op:
10965        short           fsgldiv_norm            - tbl_fsgldiv_op # NORM / NORM
10966        short           fsgldiv_inf_load        - tbl_fsgldiv_op # NORM / ZERO
10967        short           fsgldiv_zero_load       - tbl_fsgldiv_op # NORM / INF
10968        short           fsgldiv_res_qnan        - tbl_fsgldiv_op # NORM / QNAN
10969        short           fsgldiv_norm            - tbl_fsgldiv_op # NORM / DENORM
10970        short           fsgldiv_res_snan        - tbl_fsgldiv_op # NORM / SNAN
10971        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
10972        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
10973
10974        short           fsgldiv_zero_load       - tbl_fsgldiv_op # ZERO / NORM
10975        short           fsgldiv_res_operr       - tbl_fsgldiv_op # ZERO / ZERO
10976        short           fsgldiv_zero_load       - tbl_fsgldiv_op # ZERO / INF
10977        short           fsgldiv_res_qnan        - tbl_fsgldiv_op # ZERO / QNAN
10978        short           fsgldiv_zero_load       - tbl_fsgldiv_op # ZERO / DENORM
10979        short           fsgldiv_res_snan        - tbl_fsgldiv_op # ZERO / SNAN
10980        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
10981        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
10982
10983        short           fsgldiv_inf_dst         - tbl_fsgldiv_op # INF / NORM
10984        short           fsgldiv_inf_dst         - tbl_fsgldiv_op # INF / ZERO
10985        short           fsgldiv_res_operr       - tbl_fsgldiv_op # INF / INF
10986        short           fsgldiv_res_qnan        - tbl_fsgldiv_op # INF / QNAN
10987        short           fsgldiv_inf_dst         - tbl_fsgldiv_op # INF / DENORM
10988        short           fsgldiv_res_snan        - tbl_fsgldiv_op # INF / SNAN
10989        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
10990        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
10991
10992        short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / NORM
10993        short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / ZERO
10994        short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / INF
10995        short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / QNAN
10996        short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / DENORM
10997        short           fsgldiv_res_snan        - tbl_fsgldiv_op # QNAN / SNAN
10998        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
10999        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
11000
11001        short           fsgldiv_norm            - tbl_fsgldiv_op # DENORM / NORM
11002        short           fsgldiv_inf_load        - tbl_fsgldiv_op # DENORM / ZERO
11003        short           fsgldiv_zero_load       - tbl_fsgldiv_op # DENORM / INF
11004        short           fsgldiv_res_qnan        - tbl_fsgldiv_op # DENORM / QNAN
11005        short           fsgldiv_norm            - tbl_fsgldiv_op # DENORM / DENORM
11006        short           fsgldiv_res_snan        - tbl_fsgldiv_op # DENORM / SNAN
11007        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
11008        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
11009
11010        short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / NORM
11011        short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / ZERO
11012        short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / INF
11013        short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / QNAN
11014        short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / DENORM
11015        short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / SNAN
11016        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
11017        short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
11018
11019fsgldiv_res_qnan:
11020        bra.l           res_qnan
11021fsgldiv_res_snan:
11022        bra.l           res_snan
11023fsgldiv_res_operr:
11024        bra.l           res_operr
11025fsgldiv_inf_load:
11026        bra.l           fdiv_inf_load
11027fsgldiv_zero_load:
11028        bra.l           fdiv_zero_load
11029fsgldiv_inf_dst:
11030        bra.l           fdiv_inf_dst
11031
11032#########################################################################
11033# XDEF **************************************************************** #
11034#       fadd(): emulates the fadd instruction                           #
11035#       fsadd(): emulates the fadd instruction                          #
11036#       fdadd(): emulates the fdadd instruction                         #
11037#                                                                       #
11038# XREF **************************************************************** #
11039#       addsub_scaler2() - scale the operands so they won't take exc    #
11040#       ovf_res() - return default overflow result                      #
11041#       unf_res() - return default underflow result                     #
11042#       res_qnan() - set QNAN result                                    #
11043#       res_snan() - set SNAN result                                    #
11044#       res_operr() - set OPERR result                                  #
11045#       scale_to_zero_src() - set src operand exponent equal to zero    #
11046#       scale_to_zero_dst() - set dst operand exponent equal to zero    #
11047#                                                                       #
11048# INPUT *************************************************************** #
11049#       a0 = pointer to extended precision source operand               #
11050#       a1 = pointer to extended precision destination operand          #
11051#                                                                       #
11052# OUTPUT ************************************************************** #
11053#       fp0 = result                                                    #
11054#       fp1 = EXOP (if exception occurred)                              #
11055#                                                                       #
11056# ALGORITHM *********************************************************** #
11057#       Handle NANs, infinities, and zeroes as special cases. Divide    #
11058# norms into extended, single, and double precision.                    #
11059#       Do addition after scaling exponents such that exception won't   #
11060# occur. Then, check result exponent to see if exception would have     #
11061# occurred. If so, return default result and maybe EXOP. Else, insert   #
11062# the correct result exponent and return. Set FPSR bits as appropriate. #
11063#                                                                       #
11064#########################################################################
11065
11066        global          fsadd
11067fsadd:
11068        andi.b          &0x30,%d0               # clear rnd prec
11069        ori.b           &s_mode*0x10,%d0        # insert sgl prec
11070        bra.b           fadd
11071
11072        global          fdadd
11073fdadd:
11074        andi.b          &0x30,%d0               # clear rnd prec
11075        ori.b           &d_mode*0x10,%d0        # insert dbl prec
11076
11077        global          fadd
11078fadd:
11079        mov.l           %d0,L_SCR3(%a6)         # store rnd info
11080
11081        clr.w           %d1
11082        mov.b           DTAG(%a6),%d1
11083        lsl.b           &0x3,%d1
11084        or.b            STAG(%a6),%d1           # combine src tags
11085
11086        bne.w           fadd_not_norm           # optimize on non-norm input
11087
11088#
11089# ADD: norms and denorms
11090#
11091fadd_norm:
11092        bsr.l           addsub_scaler2          # scale exponents
11093
11094fadd_zero_entry:
11095        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
11096
11097        fmov.l          &0x0,%fpsr              # clear FPSR
11098        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11099
11100        fadd.x          FP_SCR0(%a6),%fp0       # execute add
11101
11102        fmov.l          &0x0,%fpcr              # clear FPCR
11103        fmov.l          %fpsr,%d1               # fetch INEX2,N,Z
11104
11105        or.l            %d1,USER_FPSR(%a6)      # save exc and ccode bits
11106
11107        fbeq.w          fadd_zero_exit          # if result is zero, end now
11108
11109        mov.l           %d2,-(%sp)              # save d2
11110
11111        fmovm.x         &0x01,-(%sp)            # save result to stack
11112
11113        mov.w           2+L_SCR3(%a6),%d1
11114        lsr.b           &0x6,%d1
11115
11116        mov.w           (%sp),%d2               # fetch new sign, exp
11117        andi.l          &0x7fff,%d2             # strip sign
11118        sub.l           %d0,%d2                 # add scale factor
11119
11120        cmp.l           %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11121        bge.b           fadd_ovfl               # yes
11122
11123        cmp.l           %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
11124        blt.w           fadd_unfl               # yes
11125        beq.w           fadd_may_unfl           # maybe; go find out
11126
11127fadd_normal:
11128        mov.w           (%sp),%d1
11129        andi.w          &0x8000,%d1             # keep sign
11130        or.w            %d2,%d1                 # concat sign,new exp
11131        mov.w           %d1,(%sp)               # insert new exponent
11132
11133        fmovm.x         (%sp)+,&0x80            # return result in fp0
11134
11135        mov.l           (%sp)+,%d2              # restore d2
11136        rts
11137
11138fadd_zero_exit:
11139#       fmov.s          &0x00000000,%fp0        # return zero in fp0
11140        rts
11141
11142tbl_fadd_ovfl:
11143        long            0x7fff                  # ext ovfl
11144        long            0x407f                  # sgl ovfl
11145        long            0x43ff                  # dbl ovfl
11146
11147tbl_fadd_unfl:
11148        long            0x0000                  # ext unfl
11149        long            0x3f81                  # sgl unfl
11150        long            0x3c01                  # dbl unfl
11151
11152fadd_ovfl:
11153        or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11154
11155        mov.b           FPCR_ENABLE(%a6),%d1
11156        andi.b          &0x13,%d1               # is OVFL or INEX enabled?
11157        bne.b           fadd_ovfl_ena           # yes
11158
11159        add.l           &0xc,%sp
11160fadd_ovfl_dis:
11161        btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
11162        sne             %d1                     # set sign param accordingly
11163        mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
11164        bsr.l           ovf_res                 # calculate default result
11165        or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
11166        fmovm.x         (%a0),&0x80             # return default result in fp0
11167        mov.l           (%sp)+,%d2              # restore d2
11168        rts
11169
11170fadd_ovfl_ena:
11171        mov.b           L_SCR3(%a6),%d1
11172        andi.b          &0xc0,%d1               # is precision extended?
11173        bne.b           fadd_ovfl_ena_sd        # no; prec = sgl or dbl
11174
11175fadd_ovfl_ena_cont:
11176        mov.w           (%sp),%d1
11177        andi.w          &0x8000,%d1             # keep sign
11178        subi.l          &0x6000,%d2             # add extra bias
11179        andi.w          &0x7fff,%d2
11180        or.w            %d2,%d1                 # concat sign,new exp
11181        mov.w           %d1,(%sp)               # insert new exponent
11182
11183        fmovm.x         (%sp)+,&0x40            # return EXOP in fp1
11184        bra.b           fadd_ovfl_dis
11185
11186fadd_ovfl_ena_sd:
11187        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
11188
11189        mov.l           L_SCR3(%a6),%d1
11190        andi.b          &0x30,%d1               # keep rnd mode
11191        fmov.l          %d1,%fpcr               # set FPCR
11192
11193        fadd.x          FP_SCR0(%a6),%fp0       # execute add
11194
11195        fmov.l          &0x0,%fpcr              # clear FPCR
11196
11197        add.l           &0xc,%sp
11198        fmovm.x         &0x01,-(%sp)
11199        bra.b           fadd_ovfl_ena_cont
11200
11201fadd_unfl:
11202        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11203
11204        add.l           &0xc,%sp
11205
11206        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
11207
11208        fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
11209        fmov.l          &0x0,%fpsr              # clear FPSR
11210
11211        fadd.x          FP_SCR0(%a6),%fp0       # execute add
11212
11213        fmov.l          &0x0,%fpcr              # clear FPCR
11214        fmov.l          %fpsr,%d1               # save status
11215
11216        or.l            %d1,USER_FPSR(%a6)      # save INEX,N
11217
11218        mov.b           FPCR_ENABLE(%a6),%d1
11219        andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
11220        bne.b           fadd_unfl_ena           # yes
11221
11222fadd_unfl_dis:
11223        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
11224
11225        lea             FP_SCR0(%a6),%a0        # pass: result addr
11226        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
11227        bsr.l           unf_res                 # calculate default result
11228        or.b            %d0,FPSR_CC(%a6)        # 'Z' bit may have been set
11229        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
11230        mov.l           (%sp)+,%d2              # restore d2
11231        rts
11232
11233fadd_unfl_ena:
11234        fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
11235
11236        mov.l           L_SCR3(%a6),%d1
11237        andi.b          &0xc0,%d1               # is precision extended?
11238        bne.b           fadd_unfl_ena_sd        # no; sgl or dbl
11239
11240        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11241
11242fadd_unfl_ena_cont:
11243        fmov.l          &0x0,%fpsr              # clear FPSR
11244
11245        fadd.x          FP_SCR0(%a6),%fp1       # execute multiply
11246
11247        fmov.l          &0x0,%fpcr              # clear FPCR
11248
11249        fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
11250        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
11251        mov.l           %d1,%d2                 # make a copy
11252        andi.l          &0x7fff,%d1             # strip sign
11253        andi.w          &0x8000,%d2             # keep old sign
11254        sub.l           %d0,%d1                 # add scale factor
11255        addi.l          &0x6000,%d1             # add new bias
11256        andi.w          &0x7fff,%d1             # clear top bit
11257        or.w            %d2,%d1                 # concat sign,new exp
11258        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
11259        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
11260        bra.w           fadd_unfl_dis
11261
11262fadd_unfl_ena_sd:
11263        mov.l           L_SCR3(%a6),%d1
11264        andi.b          &0x30,%d1               # use only rnd mode
11265        fmov.l          %d1,%fpcr               # set FPCR
11266
11267        bra.b           fadd_unfl_ena_cont
11268
11269#
11270# result is equal to the smallest normalized number in the selected precision
11271# if the precision is extended, this result could not have come from an
11272# underflow that rounded up.
11273#
11274fadd_may_unfl:
11275        mov.l           L_SCR3(%a6),%d1
11276        andi.b          &0xc0,%d1
11277        beq.w           fadd_normal             # yes; no underflow occurred
11278
11279        mov.l           0x4(%sp),%d1            # extract hi(man)
11280        cmpi.l          %d1,&0x80000000         # is hi(man) = 0x80000000?
11281        bne.w           fadd_normal             # no; no underflow occurred
11282
11283        tst.l           0x8(%sp)                # is lo(man) = 0x0?
11284        bne.w           fadd_normal             # no; no underflow occurred
11285
11286        btst            &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11287        beq.w           fadd_normal             # no; no underflow occurred
11288
11289#
11290# ok, so now the result has a exponent equal to the smallest normalized
11291# exponent for the selected precision. also, the mantissa is equal to
11292# 0x8000000000000000 and this mantissa is the result of rounding non-zero
11293# g,r,s.
11294# now, we must determine whether the pre-rounded result was an underflow
11295# rounded "up" or a normalized number rounded "down".
11296# so, we do this be re-executing the add using RZ as the rounding mode and
11297# seeing if the new result is smaller or equal to the current result.
11298#
11299        fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into fp1
11300
11301        mov.l           L_SCR3(%a6),%d1
11302        andi.b          &0xc0,%d1               # keep rnd prec
11303        ori.b           &rz_mode*0x10,%d1       # insert rnd mode
11304        fmov.l          %d1,%fpcr               # set FPCR
11305        fmov.l          &0x0,%fpsr              # clear FPSR
11306
11307        fadd.x          FP_SCR0(%a6),%fp1       # execute add
11308
11309        fmov.l          &0x0,%fpcr              # clear FPCR
11310
11311        fabs.x          %fp0                    # compare absolute values
11312        fabs.x          %fp1
11313        fcmp.x          %fp0,%fp1               # is first result > second?
11314
11315        fbgt.w          fadd_unfl               # yes; it's an underflow
11316        bra.w           fadd_normal             # no; it's not an underflow
11317
11318##########################################################################
11319
11320#
11321# Add: inputs are not both normalized; what are they?
11322#
11323fadd_not_norm:
11324        mov.w           (tbl_fadd_op.b,%pc,%d1.w*2),%d1
11325        jmp             (tbl_fadd_op.b,%pc,%d1.w*1)
11326
11327        swbeg           &48
11328tbl_fadd_op:
11329        short           fadd_norm       - tbl_fadd_op # NORM + NORM
11330        short           fadd_zero_src   - tbl_fadd_op # NORM + ZERO
11331        short           fadd_inf_src    - tbl_fadd_op # NORM + INF
11332        short           fadd_res_qnan   - tbl_fadd_op # NORM + QNAN
11333        short           fadd_norm       - tbl_fadd_op # NORM + DENORM
11334        short           fadd_res_snan   - tbl_fadd_op # NORM + SNAN
11335        short           tbl_fadd_op     - tbl_fadd_op #
11336        short           tbl_fadd_op     - tbl_fadd_op #
11337
11338        short           fadd_zero_dst   - tbl_fadd_op # ZERO + NORM
11339        short           fadd_zero_2     - tbl_fadd_op # ZERO + ZERO
11340        short           fadd_inf_src    - tbl_fadd_op # ZERO + INF
11341        short           fadd_res_qnan   - tbl_fadd_op # NORM + QNAN
11342        short           fadd_zero_dst   - tbl_fadd_op # ZERO + DENORM
11343        short           fadd_res_snan   - tbl_fadd_op # NORM + SNAN
11344        short           tbl_fadd_op     - tbl_fadd_op #
11345        short           tbl_fadd_op     - tbl_fadd_op #
11346
11347        short           fadd_inf_dst    - tbl_fadd_op # INF + NORM
11348        short           fadd_inf_dst    - tbl_fadd_op # INF + ZERO
11349        short           fadd_inf_2      - tbl_fadd_op # INF + INF
11350        short           fadd_res_qnan   - tbl_fadd_op # NORM + QNAN
11351        short           fadd_inf_dst    - tbl_fadd_op # INF + DENORM
11352        short           fadd_res_snan   - tbl_fadd_op # NORM + SNAN
11353        short           tbl_fadd_op     - tbl_fadd_op #
11354        short           tbl_fadd_op     - tbl_fadd_op #
11355
11356        short           fadd_res_qnan   - tbl_fadd_op # QNAN + NORM
11357        short           fadd_res_qnan   - tbl_fadd_op # QNAN + ZERO
11358        short           fadd_res_qnan   - tbl_fadd_op # QNAN + INF
11359        short           fadd_res_qnan   - tbl_fadd_op # QNAN + QNAN
11360        short           fadd_res_qnan   - tbl_fadd_op # QNAN + DENORM
11361        short           fadd_res_snan   - tbl_fadd_op # QNAN + SNAN
11362        short           tbl_fadd_op     - tbl_fadd_op #
11363        short           tbl_fadd_op     - tbl_fadd_op #
11364
11365        short           fadd_norm       - tbl_fadd_op # DENORM + NORM
11366        short           fadd_zero_src   - tbl_fadd_op # DENORM + ZERO
11367        short           fadd_inf_src    - tbl_fadd_op # DENORM + INF
11368        short           fadd_res_qnan   - tbl_fadd_op # NORM + QNAN
11369        short           fadd_norm       - tbl_fadd_op # DENORM + DENORM
11370        short           fadd_res_snan   - tbl_fadd_op # NORM + SNAN
11371        short           tbl_fadd_op     - tbl_fadd_op #
11372        short           tbl_fadd_op     - tbl_fadd_op #
11373
11374        short           fadd_res_snan   - tbl_fadd_op # SNAN + NORM
11375        short           fadd_res_snan   - tbl_fadd_op # SNAN + ZERO
11376        short           fadd_res_snan   - tbl_fadd_op # SNAN + INF
11377        short           fadd_res_snan   - tbl_fadd_op # SNAN + QNAN
11378        short           fadd_res_snan   - tbl_fadd_op # SNAN + DENORM
11379        short           fadd_res_snan   - tbl_fadd_op # SNAN + SNAN
11380        short           tbl_fadd_op     - tbl_fadd_op #
11381        short           tbl_fadd_op     - tbl_fadd_op #
11382
11383fadd_res_qnan:
11384        bra.l           res_qnan
11385fadd_res_snan:
11386        bra.l           res_snan
11387
11388#
11389# both operands are ZEROes
11390#
11391fadd_zero_2:
11392        mov.b           SRC_EX(%a0),%d0         # are the signs opposite
11393        mov.b           DST_EX(%a1),%d1
11394        eor.b           %d0,%d1
11395        bmi.w           fadd_zero_2_chk_rm      # weed out (-ZERO)+(+ZERO)
11396
11397# the signs are the same. so determine whether they are positive or negative
11398# and return the appropriately signed zero.
11399        tst.b           %d0                     # are ZEROes positive or negative?
11400        bmi.b           fadd_zero_rm            # negative
11401        fmov.s          &0x00000000,%fp0        # return +ZERO
11402        mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
11403        rts
11404
11405#
11406# the ZEROes have opposite signs:
11407# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
11408# - -ZERO is returned in the case of RM.
11409#
11410fadd_zero_2_chk_rm:
11411        mov.b           3+L_SCR3(%a6),%d1
11412        andi.b          &0x30,%d1               # extract rnd mode
11413        cmpi.b          %d1,&rm_mode*0x10       # is rnd mode == RM?
11414        beq.b           fadd_zero_rm            # yes
11415        fmov.s          &0x00000000,%fp0        # return +ZERO
11416        mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
11417        rts
11418
11419fadd_zero_rm:
11420        fmov.s          &0x80000000,%fp0        # return -ZERO
11421        mov.b           &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
11422        rts
11423
11424#
11425# one operand is a ZERO and the other is a DENORM or NORM. scale
11426# the DENORM or NORM and jump to the regular fadd routine.
11427#
11428fadd_zero_dst:
11429        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
11430        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
11431        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
11432        bsr.l           scale_to_zero_src       # scale the operand
11433        clr.w           FP_SCR1_EX(%a6)
11434        clr.l           FP_SCR1_HI(%a6)
11435        clr.l           FP_SCR1_LO(%a6)
11436        bra.w           fadd_zero_entry         # go execute fadd
11437
11438fadd_zero_src:
11439        mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
11440        mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
11441        mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
11442        bsr.l           scale_to_zero_dst       # scale the operand
11443        clr.w           FP_SCR0_EX(%a6)
11444        clr.l           FP_SCR0_HI(%a6)
11445        clr.l           FP_SCR0_LO(%a6)
11446        bra.w           fadd_zero_entry         # go execute fadd
11447
11448#
11449# both operands are INFs. an OPERR will result if the INFs have
11450# different signs. else, an INF of the same sign is returned
11451#
11452fadd_inf_2:
11453        mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
11454        mov.b           DST_EX(%a1),%d1
11455        eor.b           %d1,%d0
11456        bmi.l           res_operr               # weed out (-INF)+(+INF)
11457
11458# ok, so it's not an OPERR. but, we do have to remember to return the
11459# src INF since that's where the 881/882 gets the j-bit from...
11460
11461#
11462# operands are INF and one of {ZERO, INF, DENORM, NORM}
11463#
11464fadd_inf_src:
11465        fmovm.x         SRC(%a0),&0x80          # return src INF
11466        tst.b           SRC_EX(%a0)             # is INF positive?
11467        bpl.b           fadd_inf_done           # yes; we're done
11468        mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11469        rts
11470
11471#
11472# operands are INF and one of {ZERO, INF, DENORM, NORM}
11473#
11474fadd_inf_dst:
11475        fmovm.x         DST(%a1),&0x80          # return dst INF
11476        tst.b           DST_EX(%a1)             # is INF positive?
11477        bpl.b           fadd_inf_done           # yes; we're done
11478        mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11479        rts
11480
11481fadd_inf_done:
11482        mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
11483        rts
11484
11485#########################################################################
11486# XDEF **************************************************************** #
11487#       fsub(): emulates the fsub instruction                           #
11488#       fssub(): emulates the fssub instruction                         #
11489#       fdsub(): emulates the fdsub instruction                         #
11490#                                                                       #
11491# XREF **************************************************************** #
11492#       addsub_scaler2() - scale the operands so they won't take exc    #
11493#       ovf_res() - return default overflow result                      #
11494#       unf_res() - return default underflow result                     #
11495#       res_qnan() - set QNAN result                                    #
11496#       res_snan() - set SNAN result                                    #
11497#       res_operr() - set OPERR result                                  #
11498#       scale_to_zero_src() - set src operand exponent equal to zero    #
11499#       scale_to_zero_dst() - set dst operand exponent equal to zero    #
11500#                                                                       #
11501# INPUT *************************************************************** #
11502#       a0 = pointer to extended precision source operand               #
11503#       a1 = pointer to extended precision destination operand          #
11504#                                                                       #
11505# OUTPUT ************************************************************** #
11506#       fp0 = result                                                    #
11507#       fp1 = EXOP (if exception occurred)                              #
11508#                                                                       #
11509# ALGORITHM *********************************************************** #
11510#       Handle NANs, infinities, and zeroes as special cases. Divide    #
11511# norms into extended, single, and double precision.                    #
11512#       Do subtraction after scaling exponents such that exception won't#
11513# occur. Then, check result exponent to see if exception would have     #
11514# occurred. If so, return default result and maybe EXOP. Else, insert   #
11515# the correct result exponent and return. Set FPSR bits as appropriate. #
11516#                                                                       #
11517#########################################################################
11518
11519        global          fssub
11520fssub:
11521        andi.b          &0x30,%d0               # clear rnd prec
11522        ori.b           &s_mode*0x10,%d0        # insert sgl prec
11523        bra.b           fsub
11524
11525        global          fdsub
11526fdsub:
11527        andi.b          &0x30,%d0               # clear rnd prec
11528        ori.b           &d_mode*0x10,%d0        # insert dbl prec
11529
11530        global          fsub
11531fsub:
11532        mov.l           %d0,L_SCR3(%a6)         # store rnd info
11533
11534        clr.w           %d1
11535        mov.b           DTAG(%a6),%d1
11536        lsl.b           &0x3,%d1
11537        or.b            STAG(%a6),%d1           # combine src tags
11538
11539        bne.w           fsub_not_norm           # optimize on non-norm input
11540
11541#
11542# SUB: norms and denorms
11543#
11544fsub_norm:
11545        bsr.l           addsub_scaler2          # scale exponents
11546
11547fsub_zero_entry:
11548        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
11549
11550        fmov.l          &0x0,%fpsr              # clear FPSR
11551        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11552
11553        fsub.x          FP_SCR0(%a6),%fp0       # execute subtract
11554
11555        fmov.l          &0x0,%fpcr              # clear FPCR
11556        fmov.l          %fpsr,%d1               # fetch INEX2, N, Z
11557
11558        or.l            %d1,USER_FPSR(%a6)      # save exc and ccode bits
11559
11560        fbeq.w          fsub_zero_exit          # if result zero, end now
11561
11562        mov.l           %d2,-(%sp)              # save d2
11563
11564        fmovm.x         &0x01,-(%sp)            # save result to stack
11565
11566        mov.w           2+L_SCR3(%a6),%d1
11567        lsr.b           &0x6,%d1
11568
11569        mov.w           (%sp),%d2               # fetch new exponent
11570        andi.l          &0x7fff,%d2             # strip sign
11571        sub.l           %d0,%d2                 # add scale factor
11572
11573        cmp.l           %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11574        bge.b           fsub_ovfl               # yes
11575
11576        cmp.l           %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
11577        blt.w           fsub_unfl               # yes
11578        beq.w           fsub_may_unfl           # maybe; go find out
11579
11580fsub_normal:
11581        mov.w           (%sp),%d1
11582        andi.w          &0x8000,%d1             # keep sign
11583        or.w            %d2,%d1                 # insert new exponent
11584        mov.w           %d1,(%sp)               # insert new exponent
11585
11586        fmovm.x         (%sp)+,&0x80            # return result in fp0
11587
11588        mov.l           (%sp)+,%d2              # restore d2
11589        rts
11590
11591fsub_zero_exit:
11592#       fmov.s          &0x00000000,%fp0        # return zero in fp0
11593        rts
11594
11595tbl_fsub_ovfl:
11596        long            0x7fff                  # ext ovfl
11597        long            0x407f                  # sgl ovfl
11598        long            0x43ff                  # dbl ovfl
11599
11600tbl_fsub_unfl:
11601        long            0x0000                  # ext unfl
11602        long            0x3f81                  # sgl unfl
11603        long            0x3c01                  # dbl unfl
11604
11605fsub_ovfl:
11606        or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11607
11608        mov.b           FPCR_ENABLE(%a6),%d1
11609        andi.b          &0x13,%d1               # is OVFL or INEX enabled?
11610        bne.b           fsub_ovfl_ena           # yes
11611
11612        add.l           &0xc,%sp
11613fsub_ovfl_dis:
11614        btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
11615        sne             %d1                     # set sign param accordingly
11616        mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
11617        bsr.l           ovf_res                 # calculate default result
11618        or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
11619        fmovm.x         (%a0),&0x80             # return default result in fp0
11620        mov.l           (%sp)+,%d2              # restore d2
11621        rts
11622
11623fsub_ovfl_ena:
11624        mov.b           L_SCR3(%a6),%d1
11625        andi.b          &0xc0,%d1               # is precision extended?
11626        bne.b           fsub_ovfl_ena_sd        # no
11627
11628fsub_ovfl_ena_cont:
11629        mov.w           (%sp),%d1               # fetch {sgn,exp}
11630        andi.w          &0x8000,%d1             # keep sign
11631        subi.l          &0x6000,%d2             # subtract new bias
11632        andi.w          &0x7fff,%d2             # clear top bit
11633        or.w            %d2,%d1                 # concat sign,exp
11634        mov.w           %d1,(%sp)               # insert new exponent
11635
11636        fmovm.x         (%sp)+,&0x40            # return EXOP in fp1
11637        bra.b           fsub_ovfl_dis
11638
11639fsub_ovfl_ena_sd:
11640        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
11641
11642        mov.l           L_SCR3(%a6),%d1
11643        andi.b          &0x30,%d1               # clear rnd prec
11644        fmov.l          %d1,%fpcr               # set FPCR
11645
11646        fsub.x          FP_SCR0(%a6),%fp0       # execute subtract
11647
11648        fmov.l          &0x0,%fpcr              # clear FPCR
11649
11650        add.l           &0xc,%sp
11651        fmovm.x         &0x01,-(%sp)
11652        bra.b           fsub_ovfl_ena_cont
11653
11654fsub_unfl:
11655        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11656
11657        add.l           &0xc,%sp
11658
11659        fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
11660
11661        fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
11662        fmov.l          &0x0,%fpsr              # clear FPSR
11663
11664        fsub.x          FP_SCR0(%a6),%fp0       # execute subtract
11665
11666        fmov.l          &0x0,%fpcr              # clear FPCR
11667        fmov.l          %fpsr,%d1               # save status
11668
11669        or.l            %d1,USER_FPSR(%a6)
11670
11671        mov.b           FPCR_ENABLE(%a6),%d1
11672        andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
11673        bne.b           fsub_unfl_ena           # yes
11674
11675fsub_unfl_dis:
11676        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
11677
11678        lea             FP_SCR0(%a6),%a0        # pass: result addr
11679        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
11680        bsr.l           unf_res                 # calculate default result
11681        or.b            %d0,FPSR_CC(%a6)        # 'Z' may have been set
11682        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
11683        mov.l           (%sp)+,%d2              # restore d2
11684        rts
11685
11686fsub_unfl_ena:
11687        fmovm.x         FP_SCR1(%a6),&0x40
11688
11689        mov.l           L_SCR3(%a6),%d1
11690        andi.b          &0xc0,%d1               # is precision extended?
11691        bne.b           fsub_unfl_ena_sd        # no
11692
11693        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11694
11695fsub_unfl_ena_cont:
11696        fmov.l          &0x0,%fpsr              # clear FPSR
11697
11698        fsub.x          FP_SCR0(%a6),%fp1       # execute subtract
11699
11700        fmov.l          &0x0,%fpcr              # clear FPCR
11701
11702        fmovm.x         &0x40,FP_SCR0(%a6)      # store result to stack
11703        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
11704        mov.l           %d1,%d2                 # make a copy
11705        andi.l          &0x7fff,%d1             # strip sign
11706        andi.w          &0x8000,%d2             # keep old sign
11707        sub.l           %d0,%d1                 # add scale factor
11708        addi.l          &0x6000,%d1             # subtract new bias
11709        andi.w          &0x7fff,%d1             # clear top bit
11710        or.w            %d2,%d1                 # concat sgn,exp
11711        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
11712        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
11713        bra.w           fsub_unfl_dis
11714
11715fsub_unfl_ena_sd:
11716        mov.l           L_SCR3(%a6),%d1
11717        andi.b          &0x30,%d1               # clear rnd prec
11718        fmov.l          %d1,%fpcr               # set FPCR
11719
11720        bra.b           fsub_unfl_ena_cont
11721
11722#
11723# result is equal to the smallest normalized number in the selected precision
11724# if the precision is extended, this result could not have come from an
11725# underflow that rounded up.
11726#
11727fsub_may_unfl:
11728        mov.l           L_SCR3(%a6),%d1
11729        andi.b          &0xc0,%d1               # fetch rnd prec
11730        beq.w           fsub_normal             # yes; no underflow occurred
11731
11732        mov.l           0x4(%sp),%d1
11733        cmpi.l          %d1,&0x80000000         # is hi(man) = 0x80000000?
11734        bne.w           fsub_normal             # no; no underflow occurred
11735
11736        tst.l           0x8(%sp)                # is lo(man) = 0x0?
11737        bne.w           fsub_normal             # no; no underflow occurred
11738
11739        btst            &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11740        beq.w           fsub_normal             # no; no underflow occurred
11741
11742#
11743# ok, so now the result has a exponent equal to the smallest normalized
11744# exponent for the selected precision. also, the mantissa is equal to
11745# 0x8000000000000000 and this mantissa is the result of rounding non-zero
11746# g,r,s.
11747# now, we must determine whether the pre-rounded result was an underflow
11748# rounded "up" or a normalized number rounded "down".
11749# so, we do this be re-executing the add using RZ as the rounding mode and
11750# seeing if the new result is smaller or equal to the current result.
11751#
11752        fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into fp1
11753
11754        mov.l           L_SCR3(%a6),%d1
11755        andi.b          &0xc0,%d1               # keep rnd prec
11756        ori.b           &rz_mode*0x10,%d1       # insert rnd mode
11757        fmov.l          %d1,%fpcr               # set FPCR
11758        fmov.l          &0x0,%fpsr              # clear FPSR
11759
11760        fsub.x          FP_SCR0(%a6),%fp1       # execute subtract
11761
11762        fmov.l          &0x0,%fpcr              # clear FPCR
11763
11764        fabs.x          %fp0                    # compare absolute values
11765        fabs.x          %fp1
11766        fcmp.x          %fp0,%fp1               # is first result > second?
11767
11768        fbgt.w          fsub_unfl               # yes; it's an underflow
11769        bra.w           fsub_normal             # no; it's not an underflow
11770
11771##########################################################################
11772
11773#
11774# Sub: inputs are not both normalized; what are they?
11775#
11776fsub_not_norm:
11777        mov.w           (tbl_fsub_op.b,%pc,%d1.w*2),%d1
11778        jmp             (tbl_fsub_op.b,%pc,%d1.w*1)
11779
11780        swbeg           &48
11781tbl_fsub_op:
11782        short           fsub_norm       - tbl_fsub_op # NORM - NORM
11783        short           fsub_zero_src   - tbl_fsub_op # NORM - ZERO
11784        short           fsub_inf_src    - tbl_fsub_op # NORM - INF
11785        short           fsub_res_qnan   - tbl_fsub_op # NORM - QNAN
11786        short           fsub_norm       - tbl_fsub_op # NORM - DENORM
11787        short           fsub_res_snan   - tbl_fsub_op # NORM - SNAN
11788        short           tbl_fsub_op     - tbl_fsub_op #
11789        short           tbl_fsub_op     - tbl_fsub_op #
11790
11791        short           fsub_zero_dst   - tbl_fsub_op # ZERO - NORM
11792        short           fsub_zero_2     - tbl_fsub_op # ZERO - ZERO
11793        short           fsub_inf_src    - tbl_fsub_op # ZERO - INF
11794        short           fsub_res_qnan   - tbl_fsub_op # NORM - QNAN
11795        short           fsub_zero_dst   - tbl_fsub_op # ZERO - DENORM
11796        short           fsub_res_snan   - tbl_fsub_op # NORM - SNAN
11797        short           tbl_fsub_op     - tbl_fsub_op #
11798        short           tbl_fsub_op     - tbl_fsub_op #
11799
11800        short           fsub_inf_dst    - tbl_fsub_op # INF - NORM
11801        short           fsub_inf_dst    - tbl_fsub_op # INF - ZERO
11802        short           fsub_inf_2      - tbl_fsub_op # INF - INF
11803        short           fsub_res_qnan   - tbl_fsub_op # NORM - QNAN
11804        short           fsub_inf_dst    - tbl_fsub_op # INF - DENORM
11805        short           fsub_res_snan   - tbl_fsub_op # NORM - SNAN
11806        short           tbl_fsub_op     - tbl_fsub_op #
11807        short           tbl_fsub_op     - tbl_fsub_op #
11808
11809        short           fsub_res_qnan   - tbl_fsub_op # QNAN - NORM
11810        short           fsub_res_qnan   - tbl_fsub_op # QNAN - ZERO
11811        short           fsub_res_qnan   - tbl_fsub_op # QNAN - INF
11812        short           fsub_res_qnan   - tbl_fsub_op # QNAN - QNAN
11813        short           fsub_res_qnan   - tbl_fsub_op # QNAN - DENORM
11814        short           fsub_res_snan   - tbl_fsub_op # QNAN - SNAN
11815        short           tbl_fsub_op     - tbl_fsub_op #
11816        short           tbl_fsub_op     - tbl_fsub_op #
11817
11818        short           fsub_norm       - tbl_fsub_op # DENORM - NORM
11819        short           fsub_zero_src   - tbl_fsub_op # DENORM - ZERO
11820        short           fsub_inf_src    - tbl_fsub_op # DENORM - INF
11821        short           fsub_res_qnan   - tbl_fsub_op # NORM - QNAN
11822        short           fsub_norm       - tbl_fsub_op # DENORM - DENORM
11823        short           fsub_res_snan   - tbl_fsub_op # NORM - SNAN
11824        short           tbl_fsub_op     - tbl_fsub_op #
11825        short           tbl_fsub_op     - tbl_fsub_op #
11826
11827        short           fsub_res_snan   - tbl_fsub_op # SNAN - NORM
11828        short           fsub_res_snan   - tbl_fsub_op # SNAN - ZERO
11829        short           fsub_res_snan   - tbl_fsub_op # SNAN - INF
11830        short           fsub_res_snan   - tbl_fsub_op # SNAN - QNAN
11831        short           fsub_res_snan   - tbl_fsub_op # SNAN - DENORM
11832        short           fsub_res_snan   - tbl_fsub_op # SNAN - SNAN
11833        short           tbl_fsub_op     - tbl_fsub_op #
11834        short           tbl_fsub_op     - tbl_fsub_op #
11835
11836fsub_res_qnan:
11837        bra.l           res_qnan
11838fsub_res_snan:
11839        bra.l           res_snan
11840
11841#
11842# both operands are ZEROes
11843#
11844fsub_zero_2:
11845        mov.b           SRC_EX(%a0),%d0
11846        mov.b           DST_EX(%a1),%d1
11847        eor.b           %d1,%d0
11848        bpl.b           fsub_zero_2_chk_rm
11849
11850# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
11851        tst.b           %d0                     # is dst negative?
11852        bmi.b           fsub_zero_2_rm          # yes
11853        fmov.s          &0x00000000,%fp0        # no; return +ZERO
11854        mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
11855        rts
11856
11857#
11858# the ZEROes have the same signs:
11859# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
11860# - -ZERO is returned in the case of RM.
11861#
11862fsub_zero_2_chk_rm:
11863        mov.b           3+L_SCR3(%a6),%d1
11864        andi.b          &0x30,%d1               # extract rnd mode
11865        cmpi.b          %d1,&rm_mode*0x10       # is rnd mode = RM?
11866        beq.b           fsub_zero_2_rm          # yes
11867        fmov.s          &0x00000000,%fp0        # no; return +ZERO
11868        mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
11869        rts
11870
11871fsub_zero_2_rm:
11872        fmov.s          &0x80000000,%fp0        # return -ZERO
11873        mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG
11874        rts
11875
11876#
11877# one operand is a ZERO and the other is a DENORM or a NORM.
11878# scale the DENORM or NORM and jump to the regular fsub routine.
11879#
11880fsub_zero_dst:
11881        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
11882        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
11883        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
11884        bsr.l           scale_to_zero_src       # scale the operand
11885        clr.w           FP_SCR1_EX(%a6)
11886        clr.l           FP_SCR1_HI(%a6)
11887        clr.l           FP_SCR1_LO(%a6)
11888        bra.w           fsub_zero_entry         # go execute fsub
11889
11890fsub_zero_src:
11891        mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
11892        mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
11893        mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
11894        bsr.l           scale_to_zero_dst       # scale the operand
11895        clr.w           FP_SCR0_EX(%a6)
11896        clr.l           FP_SCR0_HI(%a6)
11897        clr.l           FP_SCR0_LO(%a6)
11898        bra.w           fsub_zero_entry         # go execute fsub
11899
11900#
11901# both operands are INFs. an OPERR will result if the INFs have the
11902# same signs. else,
11903#
11904fsub_inf_2:
11905        mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
11906        mov.b           DST_EX(%a1),%d1
11907        eor.b           %d1,%d0
11908        bpl.l           res_operr               # weed out (-INF)+(+INF)
11909
11910# ok, so it's not an OPERR. but we do have to remember to return
11911# the src INF since that's where the 881/882 gets the j-bit.
11912
11913fsub_inf_src:
11914        fmovm.x         SRC(%a0),&0x80          # return src INF
11915        fneg.x          %fp0                    # invert sign
11916        fbge.w          fsub_inf_done           # sign is now positive
11917        mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11918        rts
11919
11920fsub_inf_dst:
11921        fmovm.x         DST(%a1),&0x80          # return dst INF
11922        tst.b           DST_EX(%a1)             # is INF negative?
11923        bpl.b           fsub_inf_done           # no
11924        mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11925        rts
11926
11927fsub_inf_done:
11928        mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
11929        rts
11930
11931#########################################################################
11932# XDEF **************************************************************** #
11933#       fsqrt(): emulates the fsqrt instruction                         #
11934#       fssqrt(): emulates the fssqrt instruction                       #
11935#       fdsqrt(): emulates the fdsqrt instruction                       #
11936#                                                                       #
11937# XREF **************************************************************** #
11938#       scale_sqrt() - scale the source operand                         #
11939#       unf_res() - return default underflow result                     #
11940#       ovf_res() - return default overflow result                      #
11941#       res_qnan_1op() - return QNAN result                             #
11942#       res_snan_1op() - return SNAN result                             #
11943#                                                                       #
11944# INPUT *************************************************************** #
11945#       a0 = pointer to extended precision source operand               #
11946#       d0  rnd prec,mode                                               #
11947#                                                                       #
11948# OUTPUT ************************************************************** #
11949#       fp0 = result                                                    #
11950#       fp1 = EXOP (if exception occurred)                              #
11951#                                                                       #
11952# ALGORITHM *********************************************************** #
11953#       Handle NANs, infinities, and zeroes as special cases. Divide    #
11954# norms/denorms into ext/sgl/dbl precision.                             #
11955#       For norms/denorms, scale the exponents such that a sqrt         #
11956# instruction won't cause an exception. Use the regular fsqrt to        #
11957# compute a result. Check if the regular operands would have taken      #
11958# an exception. If so, return the default overflow/underflow result     #
11959# and return the EXOP if exceptions are enabled. Else, scale the        #
11960# result operand to the proper exponent.                                #
11961#                                                                       #
11962#########################################################################
11963
11964        global          fssqrt
11965fssqrt:
11966        andi.b          &0x30,%d0               # clear rnd prec
11967        ori.b           &s_mode*0x10,%d0        # insert sgl precision
11968        bra.b           fsqrt
11969
11970        global          fdsqrt
11971fdsqrt:
11972        andi.b          &0x30,%d0               # clear rnd prec
11973        ori.b           &d_mode*0x10,%d0        # insert dbl precision
11974
11975        global          fsqrt
11976fsqrt:
11977        mov.l           %d0,L_SCR3(%a6)         # store rnd info
11978        clr.w           %d1
11979        mov.b           STAG(%a6),%d1
11980        bne.w           fsqrt_not_norm          # optimize on non-norm input
11981
11982#
11983# SQUARE ROOT: norms and denorms ONLY!
11984#
11985fsqrt_norm:
11986        tst.b           SRC_EX(%a0)             # is operand negative?
11987        bmi.l           res_operr               # yes
11988
11989        andi.b          &0xc0,%d0               # is precision extended?
11990        bne.b           fsqrt_not_ext           # no; go handle sgl or dbl
11991
11992        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11993        fmov.l          &0x0,%fpsr              # clear FPSR
11994
11995        fsqrt.x         (%a0),%fp0              # execute square root
11996
11997        fmov.l          %fpsr,%d1
11998        or.l            %d1,USER_FPSR(%a6)      # set N,INEX
11999
12000        rts
12001
12002fsqrt_denorm:
12003        tst.b           SRC_EX(%a0)             # is operand negative?
12004        bmi.l           res_operr               # yes
12005
12006        andi.b          &0xc0,%d0               # is precision extended?
12007        bne.b           fsqrt_not_ext           # no; go handle sgl or dbl
12008
12009        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
12010        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12011        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12012
12013        bsr.l           scale_sqrt              # calculate scale factor
12014
12015        bra.w           fsqrt_sd_normal
12016
12017#
12018# operand is either single or double
12019#
12020fsqrt_not_ext:
12021        cmpi.b          %d0,&s_mode*0x10        # separate sgl/dbl prec
12022        bne.w           fsqrt_dbl
12023
12024#
12025# operand is to be rounded to single precision
12026#
12027fsqrt_sgl:
12028        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
12029        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12030        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12031
12032        bsr.l           scale_sqrt              # calculate scale factor
12033
12034        cmpi.l          %d0,&0x3fff-0x3f81      # will move in underflow?
12035        beq.w           fsqrt_sd_may_unfl
12036        bgt.w           fsqrt_sd_unfl           # yes; go handle underflow
12037        cmpi.l          %d0,&0x3fff-0x407f      # will move in overflow?
12038        beq.w           fsqrt_sd_may_ovfl       # maybe; go check
12039        blt.w           fsqrt_sd_ovfl           # yes; go handle overflow
12040
12041#
12042# operand will NOT overflow or underflow when moved in to the fp reg file
12043#
12044fsqrt_sd_normal:
12045        fmov.l          &0x0,%fpsr              # clear FPSR
12046        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12047
12048        fsqrt.x         FP_SCR0(%a6),%fp0       # perform absolute
12049
12050        fmov.l          %fpsr,%d1               # save FPSR
12051        fmov.l          &0x0,%fpcr              # clear FPCR
12052
12053        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12054
12055fsqrt_sd_normal_exit:
12056        mov.l           %d2,-(%sp)              # save d2
12057        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
12058        mov.w           FP_SCR0_EX(%a6),%d1     # load sgn,exp
12059        mov.l           %d1,%d2                 # make a copy
12060        andi.l          &0x7fff,%d1             # strip sign
12061        sub.l           %d0,%d1                 # add scale factor
12062        andi.w          &0x8000,%d2             # keep old sign
12063        or.w            %d1,%d2                 # concat old sign,new exp
12064        mov.w           %d2,FP_SCR0_EX(%a6)     # insert new exponent
12065        mov.l           (%sp)+,%d2              # restore d2
12066        fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
12067        rts
12068
12069#
12070# operand is to be rounded to double precision
12071#
12072fsqrt_dbl:
12073        mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
12074        mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12075        mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12076
12077        bsr.l           scale_sqrt              # calculate scale factor
12078
12079        cmpi.l          %d0,&0x3fff-0x3c01      # will move in underflow?
12080        beq.w           fsqrt_sd_may_unfl
12081        bgt.b           fsqrt_sd_unfl           # yes; go handle underflow
12082        cmpi.l          %d0,&0x3fff-0x43ff      # will move in overflow?
12083        beq.w           fsqrt_sd_may_ovfl       # maybe; go check
12084        blt.w           fsqrt_sd_ovfl           # yes; go handle overflow
12085        bra.w           fsqrt_sd_normal         # no; ho handle normalized op
12086
12087# we're on the line here and the distinguising characteristic is whether
12088# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
12089# elsewise fall through to underflow.
12090fsqrt_sd_may_unfl:
12091        btst            &0x0,1+FP_SCR0_EX(%a6)  # is exponent 0x3fff?
12092        bne.w           fsqrt_sd_normal         # yes, so no underflow
12093
12094#
12095# operand WILL underflow when moved in to the fp register file
12096#
12097fsqrt_sd_unfl:
12098        bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12099
12100        fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
12101        fmov.l          &0x0,%fpsr              # clear FPSR
12102
12103        fsqrt.x         FP_SCR0(%a6),%fp0       # execute square root
12104
12105        fmov.l          %fpsr,%d1               # save status
12106        fmov.l          &0x0,%fpcr              # clear FPCR
12107
12108        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12109
12110# if underflow or inexact is enabled, go calculate EXOP first.
12111        mov.b           FPCR_ENABLE(%a6),%d1
12112        andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
12113        bne.b           fsqrt_sd_unfl_ena       # yes
12114
12115fsqrt_sd_unfl_dis:
12116        fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
12117
12118        lea             FP_SCR0(%a6),%a0        # pass: result addr
12119        mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
12120        bsr.l           unf_res                 # calculate default result
12121        or.b            %d0,FPSR_CC(%a6)        # set possible 'Z' ccode
12122        fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
12123        rts
12124
12125#
12126# operand will underflow AND underflow is enabled.
12127# Therefore, we must return the result rounded to extended precision.
12128#
12129fsqrt_sd_unfl_ena:
12130        mov.l           FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12131        mov.l           FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12132        mov.w           FP_SCR0_EX(%a6),%d1     # load current exponent
12133
12134        mov.l           %d2,-(%sp)              # save d2
12135        mov.l           %d1,%d2                 # make a copy
12136        andi.l          &0x7fff,%d1             # strip sign
12137        andi.w          &0x8000,%d2             # keep old sign
12138        sub.l           %d0,%d1                 # subtract scale factor
12139        addi.l          &0x6000,%d1             # add new bias
12140        andi.w          &0x7fff,%d1
12141        or.w            %d2,%d1                 # concat new sign,new exp
12142        mov.w           %d1,FP_SCR1_EX(%a6)     # insert new exp
12143        fmovm.x         FP_SCR1(%a6),&0x40      # return EXOP in fp1
12144        mov.l           (%sp)+,%d2              # restore d2
12145        bra.b           fsqrt_sd_unfl_dis
12146
12147#
12148# operand WILL overflow.
12149#
12150fsqrt_sd_ovfl:
12151        fmov.l          &0x0,%fpsr              # clear FPSR
12152        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12153
12154        fsqrt.x         FP_SCR0(%a6),%fp0       # perform square root
12155
12156        fmov.l          &0x0,%fpcr              # clear FPCR
12157        fmov.l          %fpsr,%d1               # save FPSR
12158
12159        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12160
12161fsqrt_sd_ovfl_tst:
12162        or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12163
12164        mov.b           FPCR_ENABLE(%a6),%d1
12165        andi.b          &0x13,%d1               # is OVFL or INEX enabled?
12166        bne.b           fsqrt_sd_ovfl_ena       # yes
12167
12168#
12169# OVFL is not enabled; therefore, we must create the default result by
12170# calling ovf_res().
12171#
12172fsqrt_sd_ovfl_dis:
12173        btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
12174        sne             %d1                     # set sign param accordingly
12175        mov.l           L_SCR3(%a6),%d0         # pass: prec,mode
12176        bsr.l           ovf_res                 # calculate default result
12177        or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
12178        fmovm.x         (%a0),&0x80             # return default result in fp0
12179        rts
12180
12181#
12182# OVFL is enabled.
12183# the INEX2 bit has already been updated by the round to the correct precision.
12184# now, round to extended(and don't alter the FPSR).
12185#
12186fsqrt_sd_ovfl_ena:
12187        mov.l           %d2,-(%sp)              # save d2
12188        mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
12189        mov.l           %d1,%d2                 # make a copy
12190        andi.l          &0x7fff,%d1             # strip sign
12191        andi.w          &0x8000,%d2             # keep old sign
12192        sub.l           %d0,%d1                 # add scale factor
12193        subi.l          &0x6000,%d1             # subtract bias
12194        andi.w          &0x7fff,%d1
12195        or.w            %d2,%d1                 # concat sign,exp
12196        mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
12197        fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
12198        mov.l           (%sp)+,%d2              # restore d2
12199        bra.b           fsqrt_sd_ovfl_dis
12200
12201#
12202# the move in MAY underflow. so...
12203#
12204fsqrt_sd_may_ovfl:
12205        btst            &0x0,1+FP_SCR0_EX(%a6)  # is exponent 0x3fff?
12206        bne.w           fsqrt_sd_ovfl           # yes, so overflow
12207
12208        fmov.l          &0x0,%fpsr              # clear FPSR
12209        fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12210
12211        fsqrt.x         FP_SCR0(%a6),%fp0       # perform absolute
12212
12213        fmov.l          %fpsr,%d1               # save status
12214        fmov.l          &0x0,%fpcr              # clear FPCR
12215
12216        or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12217
12218        fmov.x          %fp0,%fp1               # make a copy of result
12219        fcmp.b          %fp1,&0x1               # is |result| >= 1.b?
12220        fbge.w          fsqrt_sd_ovfl_tst       # yes; overflow has occurred
12221
12222# no, it didn't overflow; we have correct result
12223        bra.w           fsqrt_sd_normal_exit
12224
12225##########################################################################
12226
12227#
12228# input is not normalized; what is it?
12229#
12230fsqrt_not_norm:
12231        cmpi.b          %d1,&DENORM             # weed out DENORM
12232        beq.w           fsqrt_denorm
12233        cmpi.b          %d1,&ZERO               # weed out ZERO
12234        beq.b           fsqrt_zero
12235        cmpi.b          %d1,&INF                # weed out INF
12236        beq.b           fsqrt_inf
12237        cmpi.b          %d1,&SNAN               # weed out SNAN
12238        beq.l           res_snan_1op
12239        bra.l           res_qnan_1op
12240
12241#
12242#       fsqrt(+0) = +0
12243#       fsqrt(-0) = -0
12244#       fsqrt(+INF) = +INF
12245#       fsqrt(-INF) = OPERR
12246#
12247fsqrt_zero:
12248        tst.b           SRC_EX(%a0)             # is ZERO positive or negative?
12249        bmi.b           fsqrt_zero_m            # negative
12250fsqrt_zero_p:
12251        fmov.s          &0x00000000,%fp0        # return +ZERO
12252        mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
12253        rts
12254fsqrt_zero_m:
12255        fmov.s          &0x80000000,%fp0        # return -ZERO
12256        mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
12257        rts
12258
12259fsqrt_inf:
12260        tst.b           SRC_EX(%a0)             # is INF positive or negative?
12261        bmi.l           res_operr               # negative
12262fsqrt_inf_p:
12263        fmovm.x         SRC(%a0),&0x80          # return +INF in fp0
12264        mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
12265        rts
12266
12267#########################################################################
12268# XDEF **************************************************************** #
12269#       fetch_dreg(): fetch register according to index in d1           #
12270#                                                                       #
12271# XREF **************************************************************** #
12272#       None                                                            #
12273#                                                                       #
12274# INPUT *************************************************************** #
12275#       d1 = index of register to fetch from                            #
12276#                                                                       #
12277# OUTPUT ************************************************************** #
12278#       d0 = value of register fetched                                  #
12279#                                                                       #
12280# ALGORITHM *********************************************************** #
12281#       According to the index value in d1 which can range from zero    #
12282# to fifteen, load the corresponding register file value (where         #
12283# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the    #
12284# stack. The rest should still be in their original places.             #
12285#                                                                       #
12286#########################################################################
12287
12288# this routine leaves d1 intact for subsequent store_dreg calls.
12289        global          fetch_dreg
12290fetch_dreg:
12291        mov.w           (tbl_fdreg.b,%pc,%d1.w*2),%d0
12292        jmp             (tbl_fdreg.b,%pc,%d0.w*1)
12293
12294tbl_fdreg:
12295        short           fdreg0 - tbl_fdreg
12296        short           fdreg1 - tbl_fdreg
12297        short           fdreg2 - tbl_fdreg
12298        short           fdreg3 - tbl_fdreg
12299        short           fdreg4 - tbl_fdreg
12300        short           fdreg5 - tbl_fdreg
12301        short           fdreg6 - tbl_fdreg
12302        short           fdreg7 - tbl_fdreg
12303        short           fdreg8 - tbl_fdreg
12304        short           fdreg9 - tbl_fdreg
12305        short           fdrega - tbl_fdreg
12306        short           fdregb - tbl_fdreg
12307        short           fdregc - tbl_fdreg
12308        short           fdregd - tbl_fdreg
12309        short           fdrege - tbl_fdreg
12310        short           fdregf - tbl_fdreg
12311
12312fdreg0:
12313        mov.l           EXC_DREGS+0x0(%a6),%d0
12314        rts
12315fdreg1:
12316        mov.l           EXC_DREGS+0x4(%a6),%d0
12317        rts
12318fdreg2:
12319        mov.l           %d2,%d0
12320        rts
12321fdreg3:
12322        mov.l           %d3,%d0
12323        rts
12324fdreg4:
12325        mov.l           %d4,%d0
12326        rts
12327fdreg5:
12328        mov.l           %d5,%d0
12329        rts
12330fdreg6:
12331        mov.l           %d6,%d0
12332        rts
12333fdreg7:
12334        mov.l           %d7,%d0
12335        rts
12336fdreg8:
12337        mov.l           EXC_DREGS+0x8(%a6),%d0
12338        rts
12339fdreg9:
12340        mov.l           EXC_DREGS+0xc(%a6),%d0
12341        rts
12342fdrega:
12343        mov.l           %a2,%d0
12344        rts
12345fdregb:
12346        mov.l           %a3,%d0
12347        rts
12348fdregc:
12349        mov.l           %a4,%d0
12350        rts
12351fdregd:
12352        mov.l           %a5,%d0
12353        rts
12354fdrege:
12355        mov.l           (%a6),%d0
12356        rts
12357fdregf:
12358        mov.l           EXC_A7(%a6),%d0
12359        rts
12360
12361#########################################################################
12362# XDEF **************************************************************** #
12363#       store_dreg_l(): store longword to data register specified by d1 #
12364#                                                                       #
12365# XREF **************************************************************** #
12366#       None                                                            #
12367#                                                                       #
12368# INPUT *************************************************************** #
12369#       d0 = longowrd value to store                                    #
12370#       d1 = index of register to fetch from                            #
12371#                                                                       #
12372# OUTPUT ************************************************************** #
12373#       (data register is updated)                                      #
12374#                                                                       #
12375# ALGORITHM *********************************************************** #
12376#       According to the index value in d1, store the longword value    #
12377# in d0 to the corresponding data register. D0/D1 are on the stack      #
12378# while the rest are in their initial places.                           #
12379#                                                                       #
12380#########################################################################
12381
12382        global          store_dreg_l
12383store_dreg_l:
12384        mov.w           (tbl_sdregl.b,%pc,%d1.w*2),%d1
12385        jmp             (tbl_sdregl.b,%pc,%d1.w*1)
12386
12387tbl_sdregl:
12388        short           sdregl0 - tbl_sdregl
12389        short           sdregl1 - tbl_sdregl
12390        short           sdregl2 - tbl_sdregl
12391        short           sdregl3 - tbl_sdregl
12392        short           sdregl4 - tbl_sdregl
12393        short           sdregl5 - tbl_sdregl
12394        short           sdregl6 - tbl_sdregl
12395        short           sdregl7 - tbl_sdregl
12396
12397sdregl0:
12398        mov.l           %d0,EXC_DREGS+0x0(%a6)
12399        rts
12400sdregl1:
12401        mov.l           %d0,EXC_DREGS+0x4(%a6)
12402        rts
12403sdregl2:
12404        mov.l           %d0,%d2
12405        rts
12406sdregl3:
12407        mov.l           %d0,%d3
12408        rts
12409sdregl4:
12410        mov.l           %d0,%d4
12411        rts
12412sdregl5:
12413        mov.l           %d0,%d5
12414        rts
12415sdregl6:
12416        mov.l           %d0,%d6
12417        rts
12418sdregl7:
12419        mov.l           %d0,%d7
12420        rts
12421
12422#########################################################################
12423# XDEF **************************************************************** #
12424#       store_dreg_w(): store word to data register specified by d1     #
12425#                                                                       #
12426# XREF **************************************************************** #
12427#       None                                                            #
12428#                                                                       #
12429# INPUT *************************************************************** #
12430#       d0 = word value to store                                        #
12431#       d1 = index of register to fetch from                            #
12432#                                                                       #
12433# OUTPUT ************************************************************** #
12434#       (data register is updated)                                      #
12435#                                                                       #
12436# ALGORITHM *********************************************************** #
12437#       According to the index value in d1, store the word value        #
12438# in d0 to the corresponding data register. D0/D1 are on the stack      #
12439# while the rest are in their initial places.                           #
12440#                                                                       #
12441#########################################################################
12442
12443        global          store_dreg_w
12444store_dreg_w:
12445        mov.w           (tbl_sdregw.b,%pc,%d1.w*2),%d1
12446        jmp             (tbl_sdregw.b,%pc,%d1.w*1)
12447
12448tbl_sdregw:
12449        short           sdregw0 - tbl_sdregw
12450        short           sdregw1 - tbl_sdregw
12451        short           sdregw2 - tbl_sdregw
12452        short           sdregw3 - tbl_sdregw
12453        short           sdregw4 - tbl_sdregw
12454        short           sdregw5 - tbl_sdregw
12455        short           sdregw6 - tbl_sdregw
12456        short           sdregw7 - tbl_sdregw
12457
12458sdregw0:
12459        mov.w           %d0,2+EXC_DREGS+0x0(%a6)
12460        rts
12461sdregw1:
12462        mov.w           %d0,2+EXC_DREGS+0x4(%a6)
12463        rts
12464sdregw2:
12465        mov.w           %d0,%d2
12466        rts
12467sdregw3:
12468        mov.w           %d0,%d3
12469        rts
12470sdregw4:
12471        mov.w           %d0,%d4
12472        rts
12473sdregw5:
12474        mov.w           %d0,%d5
12475        rts
12476sdregw6:
12477        mov.w           %d0,%d6
12478        rts
12479sdregw7:
12480        mov.w           %d0,%d7
12481        rts
12482
12483#########################################################################
12484# XDEF **************************************************************** #
12485#       store_dreg_b(): store byte to data register specified by d1     #
12486#                                                                       #
12487# XREF **************************************************************** #
12488#       None                                                            #
12489#                                                                       #
12490# INPUT *************************************************************** #
12491#       d0 = byte value to store                                        #
12492#       d1 = index of register to fetch from                            #
12493#                                                                       #
12494# OUTPUT ************************************************************** #
12495#       (data register is updated)                                      #
12496#                                                                       #
12497# ALGORITHM *********************************************************** #
12498#       According to the index value in d1, store the byte value        #
12499# in d0 to the corresponding data register. D0/D1 are on the stack      #
12500# while the rest are in their initial places.                           #
12501#                                                                       #
12502#########################################################################
12503
12504        global          store_dreg_b
12505store_dreg_b:
12506        mov.w           (tbl_sdregb.b,%pc,%d1.w*2),%d1
12507        jmp             (tbl_sdregb.b,%pc,%d1.w*1)
12508
12509tbl_sdregb:
12510        short           sdregb0 - tbl_sdregb
12511        short           sdregb1 - tbl_sdregb
12512        short           sdregb2 - tbl_sdregb
12513        short           sdregb3 - tbl_sdregb
12514        short           sdregb4 - tbl_sdregb
12515        short           sdregb5 - tbl_sdregb
12516        short           sdregb6 - tbl_sdregb
12517        short           sdregb7 - tbl_sdregb
12518
12519sdregb0:
12520        mov.b           %d0,3+EXC_DREGS+0x0(%a6)
12521        rts
12522sdregb1:
12523        mov.b           %d0,3+EXC_DREGS+0x4(%a6)
12524        rts
12525sdregb2:
12526        mov.b           %d0,%d2
12527        rts
12528sdregb3:
12529        mov.b           %d0,%d3
12530        rts
12531sdregb4:
12532        mov.b           %d0,%d4
12533        rts
12534sdregb5:
12535        mov.b           %d0,%d5
12536        rts
12537sdregb6:
12538        mov.b           %d0,%d6
12539        rts
12540sdregb7:
12541        mov.b           %d0,%d7
12542        rts
12543
12544#########################################################################
12545# XDEF **************************************************************** #
12546#       inc_areg(): increment an address register by the value in d0    #
12547#                                                                       #
12548# XREF **************************************************************** #
12549#       None                                                            #
12550#                                                                       #
12551# INPUT *************************************************************** #
12552#       d0 = amount to increment by                                     #
12553#       d1 = index of address register to increment                     #
12554#                                                                       #
12555# OUTPUT ************************************************************** #
12556#       (address register is updated)                                   #
12557#                                                                       #
12558# ALGORITHM *********************************************************** #
12559#       Typically used for an instruction w/ a post-increment <ea>,     #
12560# this routine adds the increment value in d0 to the address register   #
12561# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside     #
12562# in their original places.                                             #
12563#       For a7, if the increment amount is one, then we have to         #
12564# increment by two. For any a7 update, set the mia7_flag so that if     #
12565# an access error exception occurs later in emulation, this address     #
12566# register update can be undone.                                        #
12567#                                                                       #
12568#########################################################################
12569
12570        global          inc_areg
12571inc_areg:
12572        mov.w           (tbl_iareg.b,%pc,%d1.w*2),%d1
12573        jmp             (tbl_iareg.b,%pc,%d1.w*1)
12574
12575tbl_iareg:
12576        short           iareg0 - tbl_iareg
12577        short           iareg1 - tbl_iareg
12578        short           iareg2 - tbl_iareg
12579        short           iareg3 - tbl_iareg
12580        short           iareg4 - tbl_iareg
12581        short           iareg5 - tbl_iareg
12582        short           iareg6 - tbl_iareg
12583        short           iareg7 - tbl_iareg
12584
12585iareg0: add.l           %d0,EXC_DREGS+0x8(%a6)
12586        rts
12587iareg1: add.l           %d0,EXC_DREGS+0xc(%a6)
12588        rts
12589iareg2: add.l           %d0,%a2
12590        rts
12591iareg3: add.l           %d0,%a3
12592        rts
12593iareg4: add.l           %d0,%a4
12594        rts
12595iareg5: add.l           %d0,%a5
12596        rts
12597iareg6: add.l           %d0,(%a6)
12598        rts
12599iareg7: mov.b           &mia7_flg,SPCOND_FLG(%a6)
12600        cmpi.b          %d0,&0x1
12601        beq.b           iareg7b
12602        add.l           %d0,EXC_A7(%a6)
12603        rts
12604iareg7b:
12605        addq.l          &0x2,EXC_A7(%a6)
12606        rts
12607
12608#########################################################################
12609# XDEF **************************************************************** #
12610#       dec_areg(): decrement an address register by the value in d0    #
12611#                                                                       #
12612# XREF **************************************************************** #
12613#       None                                                            #
12614#                                                                       #
12615# INPUT *************************************************************** #
12616#       d0 = amount to decrement by                                     #
12617#       d1 = index of address register to decrement                     #
12618#                                                                       #
12619# OUTPUT ************************************************************** #
12620#       (address register is updated)                                   #
12621#                                                                       #
12622# ALGORITHM *********************************************************** #
12623#       Typically used for an instruction w/ a pre-decrement <ea>,      #
12624# this routine adds the decrement value in d0 to the address register   #
12625# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside     #
12626# in their original places.                                             #
12627#       For a7, if the decrement amount is one, then we have to         #
12628# decrement by two. For any a7 update, set the mda7_flag so that if     #
12629# an access error exception occurs later in emulation, this address     #
12630# register update can be undone.                                        #
12631#                                                                       #
12632#########################################################################
12633
12634        global          dec_areg
12635dec_areg:
12636        mov.w           (tbl_dareg.b,%pc,%d1.w*2),%d1
12637        jmp             (tbl_dareg.b,%pc,%d1.w*1)
12638
12639tbl_dareg:
12640        short           dareg0 - tbl_dareg
12641        short           dareg1 - tbl_dareg
12642        short           dareg2 - tbl_dareg
12643        short           dareg3 - tbl_dareg
12644        short           dareg4 - tbl_dareg
12645        short           dareg5 - tbl_dareg
12646        short           dareg6 - tbl_dareg
12647        short           dareg7 - tbl_dareg
12648
12649dareg0: sub.l           %d0,EXC_DREGS+0x8(%a6)
12650        rts
12651dareg1: sub.l           %d0,EXC_DREGS+0xc(%a6)
12652        rts
12653dareg2: sub.l           %d0,%a2
12654        rts
12655dareg3: sub.l           %d0,%a3
12656        rts
12657dareg4: sub.l           %d0,%a4
12658        rts
12659dareg5: sub.l           %d0,%a5
12660        rts
12661dareg6: sub.l           %d0,(%a6)
12662        rts
12663dareg7: mov.b           &mda7_flg,SPCOND_FLG(%a6)
12664        cmpi.b          %d0,&0x1
12665        beq.b           dareg7b
12666        sub.l           %d0,EXC_A7(%a6)
12667        rts
12668dareg7b:
12669        subq.l          &0x2,EXC_A7(%a6)
12670        rts
12671
12672##############################################################################
12673
12674#########################################################################
12675# XDEF **************************************************************** #
12676#       load_fpn1(): load FP register value into FP_SRC(a6).            #
12677#                                                                       #
12678# XREF **************************************************************** #
12679#       None                                                            #
12680#                                                                       #
12681# INPUT *************************************************************** #
12682#       d0 = index of FP register to load                               #
12683#                                                                       #
12684# OUTPUT ************************************************************** #
12685#       FP_SRC(a6) = value loaded from FP register file                 #
12686#                                                                       #
12687# ALGORITHM *********************************************************** #
12688#       Using the index in d0, load FP_SRC(a6) with a number from the   #
12689# FP register file.                                                     #
12690#                                                                       #
12691#########################################################################
12692
12693        global          load_fpn1
12694load_fpn1:
12695        mov.w           (tbl_load_fpn1.b,%pc,%d0.w*2), %d0
12696        jmp             (tbl_load_fpn1.b,%pc,%d0.w*1)
12697
12698tbl_load_fpn1:
12699        short           load_fpn1_0 - tbl_load_fpn1
12700        short           load_fpn1_1 - tbl_load_fpn1
12701        short           load_fpn1_2 - tbl_load_fpn1
12702        short           load_fpn1_3 - tbl_load_fpn1
12703        short           load_fpn1_4 - tbl_load_fpn1
12704        short           load_fpn1_5 - tbl_load_fpn1
12705        short           load_fpn1_6 - tbl_load_fpn1
12706        short           load_fpn1_7 - tbl_load_fpn1
12707
12708load_fpn1_0:
12709        mov.l           0+EXC_FP0(%a6), 0+FP_SRC(%a6)
12710        mov.l           4+EXC_FP0(%a6), 4+FP_SRC(%a6)
12711        mov.l           8+EXC_FP0(%a6), 8+FP_SRC(%a6)
12712        lea             FP_SRC(%a6), %a0
12713        rts
12714load_fpn1_1:
12715        mov.l           0+EXC_FP1(%a6), 0+FP_SRC(%a6)
12716        mov.l           4+EXC_FP1(%a6), 4+FP_SRC(%a6)
12717        mov.l           8+EXC_FP1(%a6), 8+FP_SRC(%a6)
12718        lea             FP_SRC(%a6), %a0
12719        rts
12720load_fpn1_2:
12721        fmovm.x         &0x20, FP_SRC(%a6)
12722        lea             FP_SRC(%a6), %a0
12723        rts
12724load_fpn1_3:
12725        fmovm.x         &0x10, FP_SRC(%a6)
12726        lea             FP_SRC(%a6), %a0
12727        rts
12728load_fpn1_4:
12729        fmovm.x         &0x08, FP_SRC(%a6)
12730        lea             FP_SRC(%a6), %a0
12731        rts
12732load_fpn1_5:
12733        fmovm.x         &0x04, FP_SRC(%a6)
12734        lea             FP_SRC(%a6), %a0
12735        rts
12736load_fpn1_6:
12737        fmovm.x         &0x02, FP_SRC(%a6)
12738        lea             FP_SRC(%a6), %a0
12739        rts
12740load_fpn1_7:
12741        fmovm.x         &0x01, FP_SRC(%a6)
12742        lea             FP_SRC(%a6), %a0
12743        rts
12744
12745#############################################################################
12746
12747#########################################################################
12748# XDEF **************************************************************** #
12749#       load_fpn2(): load FP register value into FP_DST(a6).            #
12750#                                                                       #
12751# XREF **************************************************************** #
12752#       None                                                            #
12753#                                                                       #
12754# INPUT *************************************************************** #
12755#       d0 = index of FP register to load                               #
12756#                                                                       #
12757# OUTPUT ************************************************************** #
12758#       FP_DST(a6) = value loaded from FP register file                 #
12759#                                                                       #
12760# ALGORITHM *********************************************************** #
12761#       Using the index in d0, load FP_DST(a6) with a number from the   #
12762# FP register file.                                                     #
12763#                                                                       #
12764#########################################################################
12765
12766        global          load_fpn2
12767load_fpn2:
12768        mov.w           (tbl_load_fpn2.b,%pc,%d0.w*2), %d0
12769        jmp             (tbl_load_fpn2.b,%pc,%d0.w*1)
12770
12771tbl_load_fpn2:
12772        short           load_fpn2_0 - tbl_load_fpn2
12773        short           load_fpn2_1 - tbl_load_fpn2
12774        short           load_fpn2_2 - tbl_load_fpn2
12775        short           load_fpn2_3 - tbl_load_fpn2
12776        short           load_fpn2_4 - tbl_load_fpn2
12777        short           load_fpn2_5 - tbl_load_fpn2
12778        short           load_fpn2_6 - tbl_load_fpn2
12779        short           load_fpn2_7 - tbl_load_fpn2
12780
12781load_fpn2_0:
12782        mov.l           0+EXC_FP0(%a6), 0+FP_DST(%a6)
12783        mov.l           4+EXC_FP0(%a6), 4+FP_DST(%a6)
12784        mov.l           8+EXC_FP0(%a6), 8+FP_DST(%a6)
12785        lea             FP_DST(%a6), %a0
12786        rts
12787load_fpn2_1:
12788        mov.l           0+EXC_FP1(%a6), 0+FP_DST(%a6)
12789        mov.l           4+EXC_FP1(%a6), 4+FP_DST(%a6)
12790        mov.l           8+EXC_FP1(%a6), 8+FP_DST(%a6)
12791        lea             FP_DST(%a6), %a0
12792        rts
12793load_fpn2_2:
12794        fmovm.x         &0x20, FP_DST(%a6)
12795        lea             FP_DST(%a6), %a0
12796        rts
12797load_fpn2_3:
12798        fmovm.x         &0x10, FP_DST(%a6)
12799        lea             FP_DST(%a6), %a0
12800        rts
12801load_fpn2_4:
12802        fmovm.x         &0x08, FP_DST(%a6)
12803        lea             FP_DST(%a6), %a0
12804        rts
12805load_fpn2_5:
12806        fmovm.x         &0x04, FP_DST(%a6)
12807        lea             FP_DST(%a6), %a0
12808        rts
12809load_fpn2_6:
12810        fmovm.x         &0x02, FP_DST(%a6)
12811        lea             FP_DST(%a6), %a0
12812        rts
12813load_fpn2_7:
12814        fmovm.x         &0x01, FP_DST(%a6)
12815        lea             FP_DST(%a6), %a0
12816        rts
12817
12818#############################################################################
12819
12820#########################################################################
12821# XDEF **************************************************************** #
12822#       store_fpreg(): store an fp value to the fpreg designated d0.    #
12823#                                                                       #
12824# XREF **************************************************************** #
12825#       None                                                            #
12826#                                                                       #
12827# INPUT *************************************************************** #
12828#       fp0 = extended precision value to store                         #
12829#       d0  = index of floating-point register                          #
12830#                                                                       #
12831# OUTPUT ************************************************************** #
12832#       None                                                            #
12833#                                                                       #
12834# ALGORITHM *********************************************************** #
12835#       Store the value in fp0 to the FP register designated by the     #
12836# value in d0. The FP number can be DENORM or SNAN so we have to be     #
12837# careful that we don't take an exception here.                         #
12838#                                                                       #
12839#########################################################################
12840
12841        global          store_fpreg
12842store_fpreg:
12843        mov.w           (tbl_store_fpreg.b,%pc,%d0.w*2), %d0
12844        jmp             (tbl_store_fpreg.b,%pc,%d0.w*1)
12845
12846tbl_store_fpreg:
12847        short           store_fpreg_0 - tbl_store_fpreg
12848        short           store_fpreg_1 - tbl_store_fpreg
12849        short           store_fpreg_2 - tbl_store_fpreg
12850        short           store_fpreg_3 - tbl_store_fpreg
12851        short           store_fpreg_4 - tbl_store_fpreg
12852        short           store_fpreg_5 - tbl_store_fpreg
12853        short           store_fpreg_6 - tbl_store_fpreg
12854        short           store_fpreg_7 - tbl_store_fpreg
12855
12856store_fpreg_0:
12857        fmovm.x         &0x80, EXC_FP0(%a6)
12858        rts
12859store_fpreg_1:
12860        fmovm.x         &0x80, EXC_FP1(%a6)
12861        rts
12862store_fpreg_2:
12863        fmovm.x         &0x01, -(%sp)
12864        fmovm.x         (%sp)+, &0x20
12865        rts
12866store_fpreg_3:
12867        fmovm.x         &0x01, -(%sp)
12868        fmovm.x         (%sp)+, &0x10
12869        rts
12870store_fpreg_4:
12871        fmovm.x         &0x01, -(%sp)
12872        fmovm.x         (%sp)+, &0x08
12873        rts
12874store_fpreg_5:
12875        fmovm.x         &0x01, -(%sp)
12876        fmovm.x         (%sp)+, &0x04
12877        rts
12878store_fpreg_6:
12879        fmovm.x         &0x01, -(%sp)
12880        fmovm.x         (%sp)+, &0x02
12881        rts
12882store_fpreg_7:
12883        fmovm.x         &0x01, -(%sp)
12884        fmovm.x         (%sp)+, &0x01
12885        rts
12886
12887#########################################################################
12888# XDEF **************************************************************** #
12889#       get_packed(): fetch a packed operand from memory and then       #
12890#                     convert it to a floating-point binary number.     #
12891#                                                                       #
12892# XREF **************************************************************** #
12893#       _dcalc_ea() - calculate the correct <ea>                        #
12894#       _mem_read() - fetch the packed operand from memory              #
12895#       facc_in_x() - the fetch failed so jump to special exit code     #
12896#       decbin()    - convert packed to binary extended precision       #
12897#                                                                       #
12898# INPUT *************************************************************** #
12899#       None                                                            #
12900#                                                                       #
12901# OUTPUT ************************************************************** #
12902#       If no failure on _mem_read():                                   #
12903#       FP_SRC(a6) = packed operand now as a binary FP number           #
12904#                                                                       #
12905# ALGORITHM *********************************************************** #
12906#       Get the correct <ea> which is the value on the exception stack  #
12907# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+.     #
12908# Then, fetch the operand from memory. If the fetch fails, exit         #
12909# through facc_in_x().                                                  #
12910#       If the packed operand is a ZERO,NAN, or INF, convert it to      #
12911# its binary representation here. Else, call decbin() which will        #
12912# convert the packed value to an extended precision binary value.       #
12913#                                                                       #
12914#########################################################################
12915
12916# the stacked <ea> for packed is correct except for -(An).
12917# the base reg must be updated for both -(An) and (An)+.
12918        global          get_packed
12919get_packed:
12920        mov.l           &0xc,%d0                # packed is 12 bytes
12921        bsr.l           _dcalc_ea               # fetch <ea>; correct An
12922
12923        lea             FP_SRC(%a6),%a1         # pass: ptr to super dst
12924        mov.l           &0xc,%d0                # pass: 12 bytes
12925        bsr.l           _dmem_read              # read packed operand
12926
12927        tst.l           %d1                     # did dfetch fail?
12928        bne.l           facc_in_x               # yes
12929
12930# The packed operand is an INF or a NAN if the exponent field is all ones.
12931        bfextu          FP_SRC(%a6){&1:&15},%d0 # get exp
12932        cmpi.w          %d0,&0x7fff             # INF or NAN?
12933        bne.b           gp_try_zero             # no
12934        rts                                     # operand is an INF or NAN
12935
12936# The packed operand is a zero if the mantissa is all zero, else it's
12937# a normal packed op.
12938gp_try_zero:
12939        mov.b           3+FP_SRC(%a6),%d0       # get byte 4
12940        andi.b          &0x0f,%d0               # clear all but last nybble
12941        bne.b           gp_not_spec             # not a zero
12942        tst.l           FP_SRC_HI(%a6)          # is lw 2 zero?
12943        bne.b           gp_not_spec             # not a zero
12944        tst.l           FP_SRC_LO(%a6)          # is lw 3 zero?
12945        bne.b           gp_not_spec             # not a zero
12946        rts                                     # operand is a ZERO
12947gp_not_spec:
12948        lea             FP_SRC(%a6),%a0         # pass: ptr to packed op
12949        bsr.l           decbin                  # convert to extended
12950        fmovm.x         &0x80,FP_SRC(%a6)       # make this the srcop
12951        rts
12952
12953#########################################################################
12954# decbin(): Converts normalized packed bcd value pointed to by register #
12955#           a0 to extended-precision value in fp0.                      #
12956#                                                                       #
12957# INPUT *************************************************************** #
12958#       a0 = pointer to normalized packed bcd value                     #
12959#                                                                       #
12960# OUTPUT ************************************************************** #
12961#       fp0 = exact fp representation of the packed bcd value.          #
12962#                                                                       #
12963# ALGORITHM *********************************************************** #
12964#       Expected is a normal bcd (i.e. non-exceptional; all inf, zero,  #
12965#       and NaN operands are dispatched without entering this routine)  #
12966#       value in 68881/882 format at location (a0).                     #
12967#                                                                       #
12968#       A1. Convert the bcd exponent to binary by successive adds and   #
12969#       muls. Set the sign according to SE. Subtract 16 to compensate   #
12970#       for the mantissa which is to be interpreted as 17 integer       #
12971#       digits, rather than 1 integer and 16 fraction digits.           #
12972#       Note: this operation can never overflow.                        #
12973#                                                                       #
12974#       A2. Convert the bcd mantissa to binary by successive            #
12975#       adds and muls in FP0. Set the sign according to SM.             #
12976#       The mantissa digits will be converted with the decimal point    #
12977#       assumed following the least-significant digit.                  #
12978#       Note: this operation can never overflow.                        #
12979#                                                                       #
12980#       A3. Count the number of leading/trailing zeros in the           #
12981#       bcd string.  If SE is positive, count the leading zeros;        #
12982#       if negative, count the trailing zeros.  Set the adjusted        #
12983#       exponent equal to the exponent from A1 and the zero count       #
12984#       added if SM = 1 and subtracted if SM = 0.  Scale the            #
12985#       mantissa the equivalent of forcing in the bcd value:            #
12986#                                                                       #
12987#       SM = 0  a non-zero digit in the integer position                #
12988#       SM = 1  a non-zero digit in Mant0, lsd of the fraction          #
12989#                                                                       #
12990#       this will insure that any value, regardless of its              #
12991#       representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted     #
12992#       consistently.                                                   #
12993#                                                                       #
12994#       A4. Calculate the factor 10^exp in FP1 using a table of         #
12995#       10^(2^n) values.  To reduce the error in forming factors        #
12996#       greater than 10^27, a directed rounding scheme is used with     #
12997#       tables rounded to RN, RM, and RP, according to the table        #
12998#       in the comments of the pwrten section.                          #
12999#                                                                       #
13000#       A5. Form the final binary number by scaling the mantissa by     #
13001#       the exponent factor.  This is done by multiplying the           #
13002#       mantissa in FP0 by the factor in FP1 if the adjusted            #
13003#       exponent sign is positive, and dividing FP0 by FP1 if           #
13004#       it is negative.                                                 #
13005#                                                                       #
13006#       Clean up and return. Check if the final mul or div was inexact. #
13007#       If so, set INEX1 in USER_FPSR.                                  #
13008#                                                                       #
13009#########################################################################
13010
13011#
13012#       PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
13013#       to nearest, minus, and plus, respectively.  The tables include
13014#       10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
13015#       is required until the power is greater than 27, however, all
13016#       tables include the first 5 for ease of indexing.
13017#
13018RTABLE:
13019        byte            0,0,0,0
13020        byte            2,3,2,3
13021        byte            2,3,3,2
13022        byte            3,2,2,3
13023
13024        set             FNIBS,7
13025        set             FSTRT,0
13026
13027        set             ESTRT,4
13028        set             EDIGITS,2
13029
13030        global          decbin
13031decbin:
13032        mov.l           0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
13033        mov.l           0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
13034        mov.l           0x8(%a0),FP_SCR0_LO(%a6)
13035
13036        lea             FP_SCR0(%a6),%a0
13037
13038        movm.l          &0x3c00,-(%sp)          # save d2-d5
13039        fmovm.x         &0x1,-(%sp)             # save fp1
13040#
13041# Calculate exponent:
13042#  1. Copy bcd value in memory for use as a working copy.
13043#  2. Calculate absolute value of exponent in d1 by mul and add.
13044#  3. Correct for exponent sign.
13045#  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
13046#     (i.e., all digits assumed left of the decimal point.)
13047#
13048# Register usage:
13049#
13050#  calc_e:
13051#       (*)  d0: temp digit storage
13052#       (*)  d1: accumulator for binary exponent
13053#       (*)  d2: digit count
13054#       (*)  d3: offset pointer
13055#       ( )  d4: first word of bcd
13056#       ( )  a0: pointer to working bcd value
13057#       ( )  a6: pointer to original bcd value
13058#       (*)  FP_SCR1: working copy of original bcd value
13059#       (*)  L_SCR1: copy of original exponent word
13060#
13061calc_e:
13062        mov.l           &EDIGITS,%d2            # # of nibbles (digits) in fraction part
13063        mov.l           &ESTRT,%d3              # counter to pick up digits
13064        mov.l           (%a0),%d4               # get first word of bcd
13065        clr.l           %d1                     # zero d1 for accumulator
13066e_gd:
13067        mulu.l          &0xa,%d1                # mul partial product by one digit place
13068        bfextu          %d4{%d3:&4},%d0         # get the digit and zero extend into d0
13069        add.l           %d0,%d1                 # d1 = d1 + d0
13070        addq.b          &4,%d3                  # advance d3 to the next digit
13071        dbf.w           %d2,e_gd                # if we have used all 3 digits, exit loop
13072        btst            &30,%d4                 # get SE
13073        beq.b           e_pos                   # don't negate if pos
13074        neg.l           %d1                     # negate before subtracting
13075e_pos:
13076        sub.l           &16,%d1                 # sub to compensate for shift of mant
13077        bge.b           e_save                  # if still pos, do not neg
13078        neg.l           %d1                     # now negative, make pos and set SE
13079        or.l            &0x40000000,%d4         # set SE in d4,
13080        or.l            &0x40000000,(%a0)       # and in working bcd
13081e_save:
13082        mov.l           %d1,-(%sp)              # save exp on stack
13083#
13084#
13085# Calculate mantissa:
13086#  1. Calculate absolute value of mantissa in fp0 by mul and add.
13087#  2. Correct for mantissa sign.
13088#     (i.e., all digits assumed left of the decimal point.)
13089#
13090# Register usage:
13091#
13092#  calc_m:
13093#       (*)  d0: temp digit storage
13094#       (*)  d1: lword counter
13095#       (*)  d2: digit count
13096#       (*)  d3: offset pointer
13097#       ( )  d4: words 2 and 3 of bcd
13098#       ( )  a0: pointer to working bcd value
13099#       ( )  a6: pointer to original bcd value
13100#       (*) fp0: mantissa accumulator
13101#       ( )  FP_SCR1: working copy of original bcd value
13102#       ( )  L_SCR1: copy of original exponent word
13103#
13104calc_m:
13105        mov.l           &1,%d1                  # word counter, init to 1
13106        fmov.s          &0x00000000,%fp0        # accumulator
13107#
13108#
13109#  Since the packed number has a long word between the first & second parts,
13110#  get the integer digit then skip down & get the rest of the
13111#  mantissa.  We will unroll the loop once.
13112#
13113        bfextu          (%a0){&28:&4},%d0       # integer part is ls digit in long word
13114        fadd.b          %d0,%fp0                # add digit to sum in fp0
13115#
13116#
13117#  Get the rest of the mantissa.
13118#
13119loadlw:
13120        mov.l           (%a0,%d1.L*4),%d4       # load mantissa lonqword into d4
13121        mov.l           &FSTRT,%d3              # counter to pick up digits
13122        mov.l           &FNIBS,%d2              # reset number of digits per a0 ptr
13123md2b:
13124        fmul.s          &0x41200000,%fp0        # fp0 = fp0 * 10
13125        bfextu          %d4{%d3:&4},%d0         # get the digit and zero extend
13126        fadd.b          %d0,%fp0                # fp0 = fp0 + digit
13127#
13128#
13129#  If all the digits (8) in that long word have been converted (d2=0),
13130#  then inc d1 (=2) to point to the next long word and reset d3 to 0
13131#  to initialize the digit offset, and set d2 to 7 for the digit count;
13132#  else continue with this long word.
13133#
13134        addq.b          &4,%d3                  # advance d3 to the next digit
13135        dbf.w           %d2,md2b                # check for last digit in this lw
13136nextlw:
13137        addq.l          &1,%d1                  # inc lw pointer in mantissa
13138        cmp.l           %d1,&2                  # test for last lw
13139        ble.b           loadlw                  # if not, get last one
13140#
13141#  Check the sign of the mant and make the value in fp0 the same sign.
13142#
13143m_sign:
13144        btst            &31,(%a0)               # test sign of the mantissa
13145        beq.b           ap_st_z                 # if clear, go to append/strip zeros
13146        fneg.x          %fp0                    # if set, negate fp0
13147#
13148# Append/strip zeros:
13149#
13150#  For adjusted exponents which have an absolute value greater than 27*,
13151#  this routine calculates the amount needed to normalize the mantissa
13152#  for the adjusted exponent.  That number is subtracted from the exp
13153#  if the exp was positive, and added if it was negative.  The purpose
13154#  of this is to reduce the value of the exponent and the possibility
13155#  of error in calculation of pwrten.
13156#
13157#  1. Branch on the sign of the adjusted exponent.
13158#  2p.(positive exp)
13159#   2. Check M16 and the digits in lwords 2 and 3 in decending order.
13160#   3. Add one for each zero encountered until a non-zero digit.
13161#   4. Subtract the count from the exp.
13162#   5. Check if the exp has crossed zero in #3 above; make the exp abs
13163#          and set SE.
13164#       6. Multiply the mantissa by 10**count.
13165#  2n.(negative exp)
13166#   2. Check the digits in lwords 3 and 2 in decending order.
13167#   3. Add one for each zero encountered until a non-zero digit.
13168#   4. Add the count to the exp.
13169#   5. Check if the exp has crossed zero in #3 above; clear SE.
13170#   6. Divide the mantissa by 10**count.
13171#
13172#  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
13173#   any adjustment due to append/strip zeros will drive the resultane
13174#   exponent towards zero.  Since all pwrten constants with a power
13175#   of 27 or less are exact, there is no need to use this routine to
13176#   attempt to lessen the resultant exponent.
13177#
13178# Register usage:
13179#
13180#  ap_st_z:
13181#       (*)  d0: temp digit storage
13182#       (*)  d1: zero count
13183#       (*)  d2: digit count
13184#       (*)  d3: offset pointer
13185#       ( )  d4: first word of bcd
13186#       (*)  d5: lword counter
13187#       ( )  a0: pointer to working bcd value
13188#       ( )  FP_SCR1: working copy of original bcd value
13189#       ( )  L_SCR1: copy of original exponent word
13190#
13191#
13192# First check the absolute value of the exponent to see if this
13193# routine is necessary.  If so, then check the sign of the exponent
13194# and do append (+) or strip (-) zeros accordingly.
13195# This section handles a positive adjusted exponent.
13196#
13197ap_st_z:
13198        mov.l           (%sp),%d1               # load expA for range test
13199        cmp.l           %d1,&27                 # test is with 27
13200        ble.w           pwrten                  # if abs(expA) <28, skip ap/st zeros
13201        btst            &30,(%a0)               # check sign of exp
13202        bne.b           ap_st_n                 # if neg, go to neg side
13203        clr.l           %d1                     # zero count reg
13204        mov.l           (%a0),%d4               # load lword 1 to d4
13205        bfextu          %d4{&28:&4},%d0         # get M16 in d0
13206        bne.b           ap_p_fx                 # if M16 is non-zero, go fix exp
13207        addq.l          &1,%d1                  # inc zero count
13208        mov.l           &1,%d5                  # init lword counter
13209        mov.l           (%a0,%d5.L*4),%d4       # get lword 2 to d4
13210        bne.b           ap_p_cl                 # if lw 2 is zero, skip it
13211        addq.l          &8,%d1                  # and inc count by 8
13212        addq.l          &1,%d5                  # inc lword counter
13213        mov.l           (%a0,%d5.L*4),%d4       # get lword 3 to d4
13214ap_p_cl:
13215        clr.l           %d3                     # init offset reg
13216        mov.l           &7,%d2                  # init digit counter
13217ap_p_gd:
13218        bfextu          %d4{%d3:&4},%d0         # get digit
13219        bne.b           ap_p_fx                 # if non-zero, go to fix exp
13220        addq.l          &4,%d3                  # point to next digit
13221        addq.l          &1,%d1                  # inc digit counter
13222        dbf.w           %d2,ap_p_gd             # get next digit
13223ap_p_fx:
13224        mov.l           %d1,%d0                 # copy counter to d2
13225        mov.l           (%sp),%d1               # get adjusted exp from memory
13226        sub.l           %d0,%d1                 # subtract count from exp
13227        bge.b           ap_p_fm                 # if still pos, go to pwrten
13228        neg.l           %d1                     # now its neg; get abs
13229        mov.l           (%a0),%d4               # load lword 1 to d4
13230        or.l            &0x40000000,%d4         # and set SE in d4
13231        or.l            &0x40000000,(%a0)       # and in memory
13232#
13233# Calculate the mantissa multiplier to compensate for the striping of
13234# zeros from the mantissa.
13235#
13236ap_p_fm:
13237        lea.l           PTENRN(%pc),%a1         # get address of power-of-ten table
13238        clr.l           %d3                     # init table index
13239        fmov.s          &0x3f800000,%fp1        # init fp1 to 1
13240        mov.l           &3,%d2                  # init d2 to count bits in counter
13241ap_p_el:
13242        asr.l           &1,%d0                  # shift lsb into carry
13243        bcc.b           ap_p_en                 # if 1, mul fp1 by pwrten factor
13244        fmul.x          (%a1,%d3),%fp1          # mul by 10**(d3_bit_no)
13245ap_p_en:
13246        add.l           &12,%d3                 # inc d3 to next rtable entry
13247        tst.l           %d0                     # check if d0 is zero
13248        bne.b           ap_p_el                 # if not, get next bit
13249        fmul.x          %fp1,%fp0               # mul mantissa by 10**(no_bits_shifted)
13250        bra.b           pwrten                  # go calc pwrten
13251#
13252# This section handles a negative adjusted exponent.
13253#
13254ap_st_n:
13255        clr.l           %d1                     # clr counter
13256        mov.l           &2,%d5                  # set up d5 to point to lword 3
13257        mov.l           (%a0,%d5.L*4),%d4       # get lword 3
13258        bne.b           ap_n_cl                 # if not zero, check digits
13259        sub.l           &1,%d5                  # dec d5 to point to lword 2
13260        addq.l          &8,%d1                  # inc counter by 8
13261        mov.l           (%a0,%d5.L*4),%d4       # get lword 2
13262ap_n_cl:
13263        mov.l           &28,%d3                 # point to last digit
13264        mov.l           &7,%d2                  # init digit counter
13265ap_n_gd:
13266        bfextu          %d4{%d3:&4},%d0         # get digit
13267        bne.b           ap_n_fx                 # if non-zero, go to exp fix
13268        subq.l          &4,%d3                  # point to previous digit
13269        addq.l          &1,%d1                  # inc digit counter
13270        dbf.w           %d2,ap_n_gd             # get next digit
13271ap_n_fx:
13272        mov.l           %d1,%d0                 # copy counter to d0
13273        mov.l           (%sp),%d1               # get adjusted exp from memory
13274        sub.l           %d0,%d1                 # subtract count from exp
13275        bgt.b           ap_n_fm                 # if still pos, go fix mantissa
13276        neg.l           %d1                     # take abs of exp and clr SE
13277        mov.l           (%a0),%d4               # load lword 1 to d4
13278        and.l           &0xbfffffff,%d4         # and clr SE in d4
13279        and.l           &0xbfffffff,(%a0)       # and in memory
13280#
13281# Calculate the mantissa multiplier to compensate for the appending of
13282# zeros to the mantissa.
13283#
13284ap_n_fm:
13285        lea.l           PTENRN(%pc),%a1         # get address of power-of-ten table
13286        clr.l           %d3                     # init table index
13287        fmov.s          &0x3f800000,%fp1        # init fp1 to 1
13288        mov.l           &3,%d2                  # init d2 to count bits in counter
13289ap_n_el:
13290        asr.l           &1,%d0                  # shift lsb into carry
13291        bcc.b           ap_n_en                 # if 1, mul fp1 by pwrten factor
13292        fmul.x          (%a1,%d3),%fp1          # mul by 10**(d3_bit_no)
13293ap_n_en:
13294        add.l           &12,%d3                 # inc d3 to next rtable entry
13295        tst.l           %d0                     # check if d0 is zero
13296        bne.b           ap_n_el                 # if not, get next bit
13297        fdiv.x          %fp1,%fp0               # div mantissa by 10**(no_bits_shifted)
13298#
13299#
13300# Calculate power-of-ten factor from adjusted and shifted exponent.
13301#
13302# Register usage:
13303#
13304#  pwrten:
13305#       (*)  d0: temp
13306#       ( )  d1: exponent
13307#       (*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
13308#       (*)  d3: FPCR work copy
13309#       ( )  d4: first word of bcd
13310#       (*)  a1: RTABLE pointer
13311#  calc_p:
13312#       (*)  d0: temp
13313#       ( )  d1: exponent
13314#       (*)  d3: PWRTxx table index
13315#       ( )  a0: pointer to working copy of bcd
13316#       (*)  a1: PWRTxx pointer
13317#       (*) fp1: power-of-ten accumulator
13318#
13319# Pwrten calculates the exponent factor in the selected rounding mode
13320# according to the following table:
13321#
13322#       Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
13323#
13324#       ANY       ANY   RN      RN
13325#
13326#        +         +    RP      RP
13327#        -         +    RP      RM
13328#        +         -    RP      RM
13329#        -         -    RP      RP
13330#
13331#        +         +    RM      RM
13332#        -         +    RM      RP
13333#        +         -    RM      RP
13334#        -         -    RM      RM
13335#
13336#        +         +    RZ      RM
13337#        -         +    RZ      RM
13338#        +         -    RZ      RP
13339#        -         -    RZ      RP
13340#
13341#
13342pwrten:
13343        mov.l           USER_FPCR(%a6),%d3      # get user's FPCR
13344        bfextu          %d3{&26:&2},%d2         # isolate rounding mode bits
13345        mov.l           (%a0),%d4               # reload 1st bcd word to d4
13346        asl.l           &2,%d2                  # format d2 to be
13347        bfextu          %d4{&0:&2},%d0          # {FPCR[6],FPCR[5],SM,SE}
13348        add.l           %d0,%d2                 # in d2 as index into RTABLE
13349        lea.l           RTABLE(%pc),%a1         # load rtable base
13350        mov.b           (%a1,%d2),%d0           # load new rounding bits from table
13351        clr.l           %d3                     # clear d3 to force no exc and extended
13352        bfins           %d0,%d3{&26:&2}         # stuff new rounding bits in FPCR
13353        fmov.l          %d3,%fpcr               # write new FPCR
13354        asr.l           &1,%d0                  # write correct PTENxx table
13355        bcc.b           not_rp                  # to a1
13356        lea.l           PTENRP(%pc),%a1         # it is RP
13357        bra.b           calc_p                  # go to init section
13358not_rp:
13359        asr.l           &1,%d0                  # keep checking
13360        bcc.b           not_rm
13361        lea.l           PTENRM(%pc),%a1         # it is RM
13362        bra.b           calc_p                  # go to init section
13363not_rm:
13364        lea.l           PTENRN(%pc),%a1         # it is RN
13365calc_p:
13366        mov.l           %d1,%d0                 # copy exp to d0;use d0
13367        bpl.b           no_neg                  # if exp is negative,
13368        neg.l           %d0                     # invert it
13369        or.l            &0x40000000,(%a0)       # and set SE bit
13370no_neg:
13371        clr.l           %d3                     # table index
13372        fmov.s          &0x3f800000,%fp1        # init fp1 to 1
13373e_loop:
13374        asr.l           &1,%d0                  # shift next bit into carry
13375        bcc.b           e_next                  # if zero, skip the mul
13376        fmul.x          (%a1,%d3),%fp1          # mul by 10**(d3_bit_no)
13377e_next:
13378        add.l           &12,%d3                 # inc d3 to next rtable entry
13379        tst.l           %d0                     # check if d0 is zero
13380        bne.b           e_loop                  # not zero, continue shifting
13381#
13382#
13383#  Check the sign of the adjusted exp and make the value in fp0 the
13384#  same sign. If the exp was pos then multiply fp1*fp0;
13385#  else divide fp0/fp1.
13386#
13387# Register Usage:
13388#  norm:
13389#       ( )  a0: pointer to working bcd value
13390#       (*) fp0: mantissa accumulator
13391#       ( ) fp1: scaling factor - 10**(abs(exp))
13392#
13393pnorm:
13394        btst            &30,(%a0)               # test the sign of the exponent
13395        beq.b           mul                     # if clear, go to multiply
13396div:
13397        fdiv.x          %fp1,%fp0               # exp is negative, so divide mant by exp
13398        bra.b           end_dec
13399mul:
13400        fmul.x          %fp1,%fp0               # exp is positive, so multiply by exp
13401#
13402#
13403# Clean up and return with result in fp0.
13404#
13405# If the final mul/div in decbin incurred an inex exception,
13406# it will be inex2, but will be reported as inex1 by get_op.
13407#
13408end_dec:
13409        fmov.l          %fpsr,%d0               # get status register
13410        bclr            &inex2_bit+8,%d0        # test for inex2 and clear it
13411        beq.b           no_exc                  # skip this if no exc
13412        ori.w           &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
13413no_exc:
13414        add.l           &0x4,%sp                # clear 1 lw param
13415        fmovm.x         (%sp)+,&0x40            # restore fp1
13416        movm.l          (%sp)+,&0x3c            # restore d2-d5
13417        fmov.l          &0x0,%fpcr
13418        fmov.l          &0x0,%fpsr
13419        rts
13420
13421#########################################################################
13422# bindec(): Converts an input in extended precision format to bcd format#
13423#                                                                       #
13424# INPUT *************************************************************** #
13425#       a0 = pointer to the input extended precision value in memory.   #
13426#            the input may be either normalized, unnormalized, or       #
13427#            denormalized.                                              #
13428#       d0 = contains the k-factor sign-extended to 32-bits.            #
13429#                                                                       #
13430# OUTPUT ************************************************************** #
13431#       FP_SCR0(a6) = bcd format result on the stack.                   #
13432#                                                                       #
13433# ALGORITHM *********************************************************** #
13434#                                                                       #
13435#       A1.     Set RM and size ext;  Set SIGMA = sign of input.        #
13436#               The k-factor is saved for use in d7. Clear the          #
13437#               BINDEC_FLG for separating normalized/denormalized       #
13438#               input.  If input is unnormalized or denormalized,       #
13439#               normalize it.                                           #
13440#                                                                       #
13441#       A2.     Set X = abs(input).                                     #
13442#                                                                       #
13443#       A3.     Compute ILOG.                                           #
13444#               ILOG is the log base 10 of the input value.  It is      #
13445#               approximated by adding e + 0.f when the original        #
13446#               value is viewed as 2^^e * 1.f in extended precision.    #
13447#               This value is stored in d6.                             #
13448#                                                                       #
13449#       A4.     Clr INEX bit.                                           #
13450#               The operation in A3 above may have set INEX2.           #
13451#                                                                       #
13452#       A5.     Set ICTR = 0;                                           #
13453#               ICTR is a flag used in A13.  It must be set before the  #
13454#               loop entry A6.                                          #
13455#                                                                       #
13456#       A6.     Calculate LEN.                                          #
13457#               LEN is the number of digits to be displayed.  The       #
13458#               k-factor can dictate either the total number of digits, #
13459#               if it is a positive number, or the number of digits     #
13460#               after the decimal point which are to be included as     #
13461#               significant.  See the 68882 manual for examples.        #
13462#               If LEN is computed to be greater than 17, set OPERR in  #
13463#               USER_FPSR.  LEN is stored in d4.                        #
13464#                                                                       #
13465#       A7.     Calculate SCALE.                                        #
13466#               SCALE is equal to 10^ISCALE, where ISCALE is the number #
13467#               of decimal places needed to insure LEN integer digits   #
13468#               in the output before conversion to bcd. LAMBDA is the   #
13469#               sign of ISCALE, used in A9. Fp1 contains                #
13470#               10^^(abs(ISCALE)) using a rounding mode which is a      #
13471#               function of the original rounding mode and the signs    #
13472#               of ISCALE and X.  A table is given in the code.         #
13473#                                                                       #
13474#       A8.     Clr INEX; Force RZ.                                     #
13475#               The operation in A3 above may have set INEX2.           #
13476#               RZ mode is forced for the scaling operation to insure   #
13477#               only one rounding error.  The grs bits are collected in #
13478#               the INEX flag for use in A10.                           #
13479#                                                                       #
13480#       A9.     Scale X -> Y.                                           #
13481#               The mantissa is scaled to the desired number of         #
13482#               significant digits.  The excess digits are collected    #
13483#               in INEX2.                                               #
13484#                                                                       #
13485#       A10.    Or in INEX.                                             #
13486#               If INEX is set, round error occurred.  This is          #
13487#               compensated for by 'or-ing' in the INEX2 flag to        #
13488#               the lsb of Y.                                           #
13489#                                                                       #
13490#       A11.    Restore original FPCR; set size ext.                    #
13491#               Perform FINT operation in the user's rounding mode.     #
13492#               Keep the size to extended.                              #
13493#                                                                       #
13494#       A12.    Calculate YINT = FINT(Y) according to user's rounding   #
13495#               mode.  The FPSP routine sintd0 is used.  The output     #
13496#               is in fp0.                                              #
13497#                                                                       #
13498#       A13.    Check for LEN digits.                                   #
13499#               If the int operation results in more than LEN digits,   #
13500#               or less than LEN -1 digits, adjust ILOG and repeat from #
13501#               A6.  This test occurs only on the first pass.  If the   #
13502#               result is exactly 10^LEN, decrement ILOG and divide     #
13503#               the mantissa by 10.                                     #
13504#                                                                       #
13505#       A14.    Convert the mantissa to bcd.                            #
13506#               The binstr routine is used to convert the LEN digit     #
13507#               mantissa to bcd in memory.  The input to binstr is      #
13508#               to be a fraction; i.e. (mantissa)/10^LEN and adjusted   #
13509#               such that the decimal point is to the left of bit 63.   #
13510#               The bcd digits are stored in the correct position in    #
13511#               the final string area in memory.                        #
13512#                                                                       #
13513#       A15.    Convert the exponent to bcd.                            #
13514#               As in A14 above, the exp is converted to bcd and the    #
13515#               digits are stored in the final string.                  #
13516#               Test the length of the final exponent string.  If the   #
13517#               length is 4, set operr.                                 #
13518#                                                                       #
13519#       A16.    Write sign bits to final string.                        #
13520#                                                                       #
13521#########################################################################
13522
13523set     BINDEC_FLG,     EXC_TEMP        # DENORM flag
13524
13525# Constants in extended precision
13526PLOG2:
13527        long            0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
13528PLOG2UP1:
13529        long            0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
13530
13531# Constants in single precision
13532FONE:
13533        long            0x3F800000,0x00000000,0x00000000,0x00000000
13534FTWO:
13535        long            0x40000000,0x00000000,0x00000000,0x00000000
13536FTEN:
13537        long            0x41200000,0x00000000,0x00000000,0x00000000
13538F4933:
13539        long            0x459A2800,0x00000000,0x00000000,0x00000000
13540
13541RBDTBL:
13542        byte            0,0,0,0
13543        byte            3,3,2,2
13544        byte            3,2,2,3
13545        byte            2,3,3,2
13546
13547#       Implementation Notes:
13548#
13549#       The registers are used as follows:
13550#
13551#               d0: scratch; LEN input to binstr
13552#               d1: scratch
13553#               d2: upper 32-bits of mantissa for binstr
13554#               d3: scratch;lower 32-bits of mantissa for binstr
13555#               d4: LEN
13556#               d5: LAMBDA/ICTR
13557#               d6: ILOG
13558#               d7: k-factor
13559#               a0: ptr for original operand/final result
13560#               a1: scratch pointer
13561#               a2: pointer to FP_X; abs(original value) in ext
13562#               fp0: scratch
13563#               fp1: scratch
13564#               fp2: scratch
13565#               F_SCR1:
13566#               F_SCR2:
13567#               L_SCR1:
13568#               L_SCR2:
13569
13570        global          bindec
13571bindec:
13572        movm.l          &0x3f20,-(%sp)  #  {%d2-%d7/%a2}
13573        fmovm.x         &0x7,-(%sp)     #  {%fp0-%fp2}
13574
13575# A1. Set RM and size ext. Set SIGMA = sign input;
13576#     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
13577#     separating  normalized/denormalized input.  If the input
13578#     is a denormalized number, set the BINDEC_FLG memory word
13579#     to signal denorm.  If the input is unnormalized, normalize
13580#     the input and test for denormalized result.
13581#
13582        fmov.l          &rm_mode*0x10,%fpcr     # set RM and ext
13583        mov.l           (%a0),L_SCR2(%a6)       # save exponent for sign check
13584        mov.l           %d0,%d7         # move k-factor to d7
13585
13586        clr.b           BINDEC_FLG(%a6) # clr norm/denorm flag
13587        cmpi.b          STAG(%a6),&DENORM # is input a DENORM?
13588        bne.w           A2_str          # no; input is a NORM
13589
13590#
13591# Normalize the denorm
13592#
13593un_de_norm:
13594        mov.w           (%a0),%d0
13595        and.w           &0x7fff,%d0     # strip sign of normalized exp
13596        mov.l           4(%a0),%d1
13597        mov.l           8(%a0),%d2
13598norm_loop:
13599        sub.w           &1,%d0
13600        lsl.l           &1,%d2
13601        roxl.l          &1,%d1
13602        tst.l           %d1
13603        bge.b           norm_loop
13604#
13605# Test if the normalized input is denormalized
13606#
13607        tst.w           %d0
13608        bgt.b           pos_exp         # if greater than zero, it is a norm
13609        st              BINDEC_FLG(%a6) # set flag for denorm
13610pos_exp:
13611        and.w           &0x7fff,%d0     # strip sign of normalized exp
13612        mov.w           %d0,(%a0)
13613        mov.l           %d1,4(%a0)
13614        mov.l           %d2,8(%a0)
13615
13616# A2. Set X = abs(input).
13617#
13618A2_str:
13619        mov.l           (%a0),FP_SCR1(%a6)      # move input to work space
13620        mov.l           4(%a0),FP_SCR1+4(%a6)   # move input to work space
13621        mov.l           8(%a0),FP_SCR1+8(%a6)   # move input to work space
13622        and.l           &0x7fffffff,FP_SCR1(%a6)        # create abs(X)
13623
13624# A3. Compute ILOG.
13625#     ILOG is the log base 10 of the input value.  It is approx-
13626#     imated by adding e + 0.f when the original value is viewed
13627#     as 2^^e * 1.f in extended precision.  This value is stored
13628#     in d6.
13629#
13630# Register usage:
13631#       Input/Output
13632#       d0: k-factor/exponent
13633#       d2: x/x
13634#       d3: x/x
13635#       d4: x/x
13636#       d5: x/x
13637#       d6: x/ILOG
13638#       d7: k-factor/Unchanged
13639#       a0: ptr for original operand/final result
13640#       a1: x/x
13641#       a2: x/x
13642#       fp0: x/float(ILOG)
13643#       fp1: x/x
13644#       fp2: x/x
13645#       F_SCR1:x/x
13646#       F_SCR2:Abs(X)/Abs(X) with $3fff exponent
13647#       L_SCR1:x/x
13648#       L_SCR2:first word of X packed/Unchanged
13649
13650        tst.b           BINDEC_FLG(%a6) # check for denorm
13651        beq.b           A3_cont         # if clr, continue with norm
13652        mov.l           &-4933,%d6      # force ILOG = -4933
13653        bra.b           A4_str
13654A3_cont:
13655        mov.w           FP_SCR1(%a6),%d0        # move exp to d0
13656        mov.w           &0x3fff,FP_SCR1(%a6)    # replace exponent with 0x3fff
13657        fmov.x          FP_SCR1(%a6),%fp0       # now fp0 has 1.f
13658        sub.w           &0x3fff,%d0     # strip off bias
13659        fadd.w          %d0,%fp0        # add in exp
13660        fsub.s          FONE(%pc),%fp0  # subtract off 1.0
13661        fbge.w          pos_res         # if pos, branch
13662        fmul.x          PLOG2UP1(%pc),%fp0      # if neg, mul by LOG2UP1
13663        fmov.l          %fp0,%d6        # put ILOG in d6 as a lword
13664        bra.b           A4_str          # go move out ILOG
13665pos_res:
13666        fmul.x          PLOG2(%pc),%fp0 # if pos, mul by LOG2
13667        fmov.l          %fp0,%d6        # put ILOG in d6 as a lword
13668
13669
13670# A4. Clr INEX bit.
13671#     The operation in A3 above may have set INEX2.
13672
13673A4_str:
13674        fmov.l          &0,%fpsr        # zero all of fpsr - nothing needed
13675
13676
13677# A5. Set ICTR = 0;
13678#     ICTR is a flag used in A13.  It must be set before the
13679#     loop entry A6. The lower word of d5 is used for ICTR.
13680
13681        clr.w           %d5             # clear ICTR
13682
13683# A6. Calculate LEN.
13684#     LEN is the number of digits to be displayed.  The k-factor
13685#     can dictate either the total number of digits, if it is
13686#     a positive number, or the number of digits after the
13687#     original decimal point which are to be included as
13688#     significant.  See the 68882 manual for examples.
13689#     If LEN is computed to be greater than 17, set OPERR in
13690#     USER_FPSR.  LEN is stored in d4.
13691#
13692# Register usage:
13693#       Input/Output
13694#       d0: exponent/Unchanged
13695#       d2: x/x/scratch
13696#       d3: x/x
13697#       d4: exc picture/LEN
13698#       d5: ICTR/Unchanged
13699#       d6: ILOG/Unchanged
13700#       d7: k-factor/Unchanged
13701#       a0: ptr for original operand/final result
13702#       a1: x/x
13703#       a2: x/x
13704#       fp0: float(ILOG)/Unchanged
13705#       fp1: x/x
13706#       fp2: x/x
13707#       F_SCR1:x/x
13708#       F_SCR2:Abs(X) with $3fff exponent/Unchanged
13709#       L_SCR1:x/x
13710#       L_SCR2:first word of X packed/Unchanged
13711
13712A6_str:
13713        tst.l           %d7             # branch on sign of k
13714        ble.b           k_neg           # if k <= 0, LEN = ILOG + 1 - k
13715        mov.l           %d7,%d4         # if k > 0, LEN = k
13716        bra.b           len_ck          # skip to LEN check
13717k_neg:
13718        mov.l           %d6,%d4         # first load ILOG to d4
13719        sub.l           %d7,%d4         # subtract off k
13720        addq.l          &1,%d4          # add in the 1
13721len_ck:
13722        tst.l           %d4             # LEN check: branch on sign of LEN
13723        ble.b           LEN_ng          # if neg, set LEN = 1
13724        cmp.l           %d4,&17         # test if LEN > 17
13725        ble.b           A7_str          # if not, forget it
13726        mov.l           &17,%d4         # set max LEN = 17
13727        tst.l           %d7             # if negative, never set OPERR
13728        ble.b           A7_str          # if positive, continue
13729        or.l            &opaop_mask,USER_FPSR(%a6)      # set OPERR & AIOP in USER_FPSR
13730        bra.b           A7_str          # finished here
13731LEN_ng:
13732        mov.l           &1,%d4          # min LEN is 1
13733
13734
13735# A7. Calculate SCALE.
13736#     SCALE is equal to 10^ISCALE, where ISCALE is the number
13737#     of decimal places needed to insure LEN integer digits
13738#     in the output before conversion to bcd. LAMBDA is the sign
13739#     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
13740#     the rounding mode as given in the following table (see
13741#     Coonen, p. 7.23 as ref.; however, the SCALE variable is
13742#     of opposite sign in bindec.sa from Coonen).
13743#
13744#       Initial                                 USE
13745#       FPCR[6:5]       LAMBDA  SIGN(X)         FPCR[6:5]
13746#       ----------------------------------------------
13747#        RN     00         0       0            00/0    RN
13748#        RN     00         0       1            00/0    RN
13749#        RN     00         1       0            00/0    RN
13750#        RN     00         1       1            00/0    RN
13751#        RZ     01         0       0            11/3    RP
13752#        RZ     01         0       1            11/3    RP
13753#        RZ     01         1       0            10/2    RM
13754#        RZ     01         1       1            10/2    RM
13755#        RM     10         0       0            11/3    RP
13756#        RM     10         0       1            10/2    RM
13757#        RM     10         1       0            10/2    RM
13758#        RM     10         1       1            11/3    RP
13759#        RP     11         0       0            10/2    RM
13760#        RP     11         0       1            11/3    RP
13761#        RP     11         1       0            11/3    RP
13762#        RP     11         1       1            10/2    RM
13763#
13764# Register usage:
13765#       Input/Output
13766#       d0: exponent/scratch - final is 0
13767#       d2: x/0 or 24 for A9
13768#       d3: x/scratch - offset ptr into PTENRM array
13769#       d4: LEN/Unchanged
13770#       d5: 0/ICTR:LAMBDA
13771#       d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
13772#       d7: k-factor/Unchanged
13773#       a0: ptr for original operand/final result
13774#       a1: x/ptr to PTENRM array
13775#       a2: x/x
13776#       fp0: float(ILOG)/Unchanged
13777#       fp1: x/10^ISCALE
13778#       fp2: x/x
13779#       F_SCR1:x/x
13780#       F_SCR2:Abs(X) with $3fff exponent/Unchanged
13781#       L_SCR1:x/x
13782#       L_SCR2:first word of X packed/Unchanged
13783
13784A7_str:
13785        tst.l           %d7             # test sign of k
13786        bgt.b           k_pos           # if pos and > 0, skip this
13787        cmp.l           %d7,%d6         # test k - ILOG
13788        blt.b           k_pos           # if ILOG >= k, skip this
13789        mov.l           %d7,%d6         # if ((k<0) & (ILOG < k)) ILOG = k
13790k_pos:
13791        mov.l           %d6,%d0         # calc ILOG + 1 - LEN in d0
13792        addq.l          &1,%d0          # add the 1
13793        sub.l           %d4,%d0         # sub off LEN
13794        swap            %d5             # use upper word of d5 for LAMBDA
13795        clr.w           %d5             # set it zero initially
13796        clr.w           %d2             # set up d2 for very small case
13797        tst.l           %d0             # test sign of ISCALE
13798        bge.b           iscale          # if pos, skip next inst
13799        addq.w          &1,%d5          # if neg, set LAMBDA true
13800        cmp.l           %d0,&0xffffecd4 # test iscale <= -4908
13801        bgt.b           no_inf          # if false, skip rest
13802        add.l           &24,%d0         # add in 24 to iscale
13803        mov.l           &24,%d2         # put 24 in d2 for A9
13804no_inf:
13805        neg.l           %d0             # and take abs of ISCALE
13806iscale:
13807        fmov.s          FONE(%pc),%fp1  # init fp1 to 1
13808        bfextu          USER_FPCR(%a6){&26:&2},%d1      # get initial rmode bits
13809        lsl.w           &1,%d1          # put them in bits 2:1
13810        add.w           %d5,%d1         # add in LAMBDA
13811        lsl.w           &1,%d1          # put them in bits 3:1
13812        tst.l           L_SCR2(%a6)     # test sign of original x
13813        bge.b           x_pos           # if pos, don't set bit 0
13814        addq.l          &1,%d1          # if neg, set bit 0
13815x_pos:
13816        lea.l           RBDTBL(%pc),%a2 # load rbdtbl base
13817        mov.b           (%a2,%d1),%d3   # load d3 with new rmode
13818        lsl.l           &4,%d3          # put bits in proper position
13819        fmov.l          %d3,%fpcr       # load bits into fpu
13820        lsr.l           &4,%d3          # put bits in proper position
13821        tst.b           %d3             # decode new rmode for pten table
13822        bne.b           not_rn          # if zero, it is RN
13823        lea.l           PTENRN(%pc),%a1 # load a1 with RN table base
13824        bra.b           rmode           # exit decode
13825not_rn:
13826        lsr.b           &1,%d3          # get lsb in carry
13827        bcc.b           not_rp2         # if carry clear, it is RM
13828        lea.l           PTENRP(%pc),%a1 # load a1 with RP table base
13829        bra.b           rmode           # exit decode
13830not_rp2:
13831        lea.l           PTENRM(%pc),%a1 # load a1 with RM table base
13832rmode:
13833        clr.l           %d3             # clr table index
13834e_loop2:
13835        lsr.l           &1,%d0          # shift next bit into carry
13836        bcc.b           e_next2         # if zero, skip the mul
13837        fmul.x          (%a1,%d3),%fp1  # mul by 10**(d3_bit_no)
13838e_next2:
13839        add.l           &12,%d3         # inc d3 to next pwrten table entry
13840        tst.l           %d0             # test if ISCALE is zero
13841        bne.b           e_loop2         # if not, loop
13842
13843# A8. Clr INEX; Force RZ.
13844#     The operation in A3 above may have set INEX2.
13845#     RZ mode is forced for the scaling operation to insure
13846#     only one rounding error.  The grs bits are collected in
13847#     the INEX flag for use in A10.
13848#
13849# Register usage:
13850#       Input/Output
13851
13852        fmov.l          &0,%fpsr        # clr INEX
13853        fmov.l          &rz_mode*0x10,%fpcr     # set RZ rounding mode
13854
13855# A9. Scale X -> Y.
13856#     The mantissa is scaled to the desired number of significant
13857#     digits.  The excess digits are collected in INEX2. If mul,
13858#     Check d2 for excess 10 exponential value.  If not zero,
13859#     the iscale value would have caused the pwrten calculation
13860#     to overflow.  Only a negative iscale can cause this, so
13861#     multiply by 10^(d2), which is now only allowed to be 24,
13862#     with a multiply by 10^8 and 10^16, which is exact since
13863#     10^24 is exact.  If the input was denormalized, we must
13864#     create a busy stack frame with the mul command and the
13865#     two operands, and allow the fpu to complete the multiply.
13866#
13867# Register usage:
13868#       Input/Output
13869#       d0: FPCR with RZ mode/Unchanged
13870#       d2: 0 or 24/unchanged
13871#       d3: x/x
13872#       d4: LEN/Unchanged
13873#       d5: ICTR:LAMBDA
13874#       d6: ILOG/Unchanged
13875#       d7: k-factor/Unchanged
13876#       a0: ptr for original operand/final result
13877#       a1: ptr to PTENRM array/Unchanged
13878#       a2: x/x
13879#       fp0: float(ILOG)/X adjusted for SCALE (Y)
13880#       fp1: 10^ISCALE/Unchanged
13881#       fp2: x/x
13882#       F_SCR1:x/x
13883#       F_SCR2:Abs(X) with $3fff exponent/Unchanged
13884#       L_SCR1:x/x
13885#       L_SCR2:first word of X packed/Unchanged
13886
13887A9_str:
13888        fmov.x          (%a0),%fp0      # load X from memory
13889        fabs.x          %fp0            # use abs(X)
13890        tst.w           %d5             # LAMBDA is in lower word of d5
13891        bne.b           sc_mul          # if neg (LAMBDA = 1), scale by mul
13892        fdiv.x          %fp1,%fp0       # calculate X / SCALE -> Y to fp0
13893        bra.w           A10_st          # branch to A10
13894
13895sc_mul:
13896        tst.b           BINDEC_FLG(%a6) # check for denorm
13897        beq.w           A9_norm         # if norm, continue with mul
13898
13899# for DENORM, we must calculate:
13900#       fp0 = input_op * 10^ISCALE * 10^24
13901# since the input operand is a DENORM, we can't multiply it directly.
13902# so, we do the multiplication of the exponents and mantissas separately.
13903# in this way, we avoid underflow on intermediate stages of the
13904# multiplication and guarantee a result without exception.
13905        fmovm.x         &0x2,-(%sp)     # save 10^ISCALE to stack
13906
13907        mov.w           (%sp),%d3       # grab exponent
13908        andi.w          &0x7fff,%d3     # clear sign
13909        ori.w           &0x8000,(%a0)   # make DENORM exp negative
13910        add.w           (%a0),%d3       # add DENORM exp to 10^ISCALE exp
13911        subi.w          &0x3fff,%d3     # subtract BIAS
13912        add.w           36(%a1),%d3
13913        subi.w          &0x3fff,%d3     # subtract BIAS
13914        add.w           48(%a1),%d3
13915        subi.w          &0x3fff,%d3     # subtract BIAS
13916
13917        bmi.w           sc_mul_err      # is result is DENORM, punt!!!
13918
13919        andi.w          &0x8000,(%sp)   # keep sign
13920        or.w            %d3,(%sp)       # insert new exponent
13921        andi.w          &0x7fff,(%a0)   # clear sign bit on DENORM again
13922        mov.l           0x8(%a0),-(%sp) # put input op mantissa on stk
13923        mov.l           0x4(%a0),-(%sp)
13924        mov.l           &0x3fff0000,-(%sp) # force exp to zero
13925        fmovm.x         (%sp)+,&0x80    # load normalized DENORM into fp0
13926        fmul.x          (%sp)+,%fp0
13927
13928#       fmul.x  36(%a1),%fp0    # multiply fp0 by 10^8
13929#       fmul.x  48(%a1),%fp0    # multiply fp0 by 10^16
13930        mov.l           36+8(%a1),-(%sp) # get 10^8 mantissa
13931        mov.l           36+4(%a1),-(%sp)
13932        mov.l           &0x3fff0000,-(%sp) # force exp to zero
13933        mov.l           48+8(%a1),-(%sp) # get 10^16 mantissa
13934        mov.l           48+4(%a1),-(%sp)
13935        mov.l           &0x3fff0000,-(%sp)# force exp to zero
13936        fmul.x          (%sp)+,%fp0     # multiply fp0 by 10^8
13937        fmul.x          (%sp)+,%fp0     # multiply fp0 by 10^16
13938        bra.b           A10_st
13939
13940sc_mul_err:
13941        bra.b           sc_mul_err
13942
13943A9_norm:
13944        tst.w           %d2             # test for small exp case
13945        beq.b           A9_con          # if zero, continue as normal
13946        fmul.x          36(%a1),%fp0    # multiply fp0 by 10^8
13947        fmul.x          48(%a1),%fp0    # multiply fp0 by 10^16
13948A9_con:
13949        fmul.x          %fp1,%fp0       # calculate X * SCALE -> Y to fp0
13950
13951# A10. Or in INEX.
13952#      If INEX is set, round error occurred.  This is compensated
13953#      for by 'or-ing' in the INEX2 flag to the lsb of Y.
13954#
13955# Register usage:
13956#       Input/Output
13957#       d0: FPCR with RZ mode/FPSR with INEX2 isolated
13958#       d2: x/x
13959#       d3: x/x
13960#       d4: LEN/Unchanged
13961#       d5: ICTR:LAMBDA
13962#       d6: ILOG/Unchanged
13963#       d7: k-factor/Unchanged
13964#       a0: ptr for original operand/final result
13965#       a1: ptr to PTENxx array/Unchanged
13966#       a2: x/ptr to FP_SCR1(a6)
13967#       fp0: Y/Y with lsb adjusted
13968#       fp1: 10^ISCALE/Unchanged
13969#       fp2: x/x
13970
13971A10_st:
13972        fmov.l          %fpsr,%d0       # get FPSR
13973        fmov.x          %fp0,FP_SCR1(%a6)       # move Y to memory
13974        lea.l           FP_SCR1(%a6),%a2        # load a2 with ptr to FP_SCR1
13975        btst            &9,%d0          # check if INEX2 set
13976        beq.b           A11_st          # if clear, skip rest
13977        or.l            &1,8(%a2)       # or in 1 to lsb of mantissa
13978        fmov.x          FP_SCR1(%a6),%fp0       # write adjusted Y back to fpu
13979
13980
13981# A11. Restore original FPCR; set size ext.
13982#      Perform FINT operation in the user's rounding mode.  Keep
13983#      the size to extended.  The sintdo entry point in the sint
13984#      routine expects the FPCR value to be in USER_FPCR for
13985#      mode and precision.  The original FPCR is saved in L_SCR1.
13986
13987A11_st:
13988        mov.l           USER_FPCR(%a6),L_SCR1(%a6)      # save it for later
13989        and.l           &0x00000030,USER_FPCR(%a6)      # set size to ext,
13990#                                       ;block exceptions
13991
13992
13993# A12. Calculate YINT = FINT(Y) according to user's rounding mode.
13994#      The FPSP routine sintd0 is used.  The output is in fp0.
13995#
13996# Register usage:
13997#       Input/Output
13998#       d0: FPSR with AINEX cleared/FPCR with size set to ext
13999#       d2: x/x/scratch
14000#       d3: x/x
14001#       d4: LEN/Unchanged
14002#       d5: ICTR:LAMBDA/Unchanged
14003#       d6: ILOG/Unchanged
14004#       d7: k-factor/Unchanged
14005#       a0: ptr for original operand/src ptr for sintdo
14006#       a1: ptr to PTENxx array/Unchanged
14007#       a2: ptr to FP_SCR1(a6)/Unchanged
14008#       a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
14009#       fp0: Y/YINT
14010#       fp1: 10^ISCALE/Unchanged
14011#       fp2: x/x
14012#       F_SCR1:x/x
14013#       F_SCR2:Y adjusted for inex/Y with original exponent
14014#       L_SCR1:x/original USER_FPCR
14015#       L_SCR2:first word of X packed/Unchanged
14016
14017A12_st:
14018        movm.l  &0xc0c0,-(%sp)  # save regs used by sintd0       {%d0-%d1/%a0-%a1}
14019        mov.l   L_SCR1(%a6),-(%sp)
14020        mov.l   L_SCR2(%a6),-(%sp)
14021
14022        lea.l           FP_SCR1(%a6),%a0        # a0 is ptr to FP_SCR1(a6)
14023        fmov.x          %fp0,(%a0)      # move Y to memory at FP_SCR1(a6)
14024        tst.l           L_SCR2(%a6)     # test sign of original operand
14025        bge.b           do_fint12               # if pos, use Y
14026        or.l            &0x80000000,(%a0)       # if neg, use -Y
14027do_fint12:
14028        mov.l   USER_FPSR(%a6),-(%sp)
14029#       bsr     sintdo          # sint routine returns int in fp0
14030
14031        fmov.l  USER_FPCR(%a6),%fpcr
14032        fmov.l  &0x0,%fpsr                      # clear the AEXC bits!!!
14033##      mov.l           USER_FPCR(%a6),%d0      # ext prec/keep rnd mode
14034##      andi.l          &0x00000030,%d0
14035##      fmov.l          %d0,%fpcr
14036        fint.x          FP_SCR1(%a6),%fp0       # do fint()
14037        fmov.l  %fpsr,%d0
14038        or.w    %d0,FPSR_EXCEPT(%a6)
14039##      fmov.l          &0x0,%fpcr
14040##      fmov.l          %fpsr,%d0               # don't keep ccodes
14041##      or.w            %d0,FPSR_EXCEPT(%a6)
14042
14043        mov.b   (%sp),USER_FPSR(%a6)
14044        add.l   &4,%sp
14045
14046        mov.l   (%sp)+,L_SCR2(%a6)
14047        mov.l   (%sp)+,L_SCR1(%a6)
14048        movm.l  (%sp)+,&0x303   # restore regs used by sint      {%d0-%d1/%a0-%a1}
14049
14050        mov.l   L_SCR2(%a6),FP_SCR1(%a6)        # restore original exponent
14051        mov.l   L_SCR1(%a6),USER_FPCR(%a6)      # restore user's FPCR
14052
14053# A13. Check for LEN digits.
14054#      If the int operation results in more than LEN digits,
14055#      or less than LEN -1 digits, adjust ILOG and repeat from
14056#      A6.  This test occurs only on the first pass.  If the
14057#      result is exactly 10^LEN, decrement ILOG and divide
14058#      the mantissa by 10.  The calculation of 10^LEN cannot
14059#      be inexact, since all powers of ten up to 10^27 are exact
14060#      in extended precision, so the use of a previous power-of-ten
14061#      table will introduce no error.
14062#
14063#
14064# Register usage:
14065#       Input/Output
14066#       d0: FPCR with size set to ext/scratch final = 0
14067#       d2: x/x
14068#       d3: x/scratch final = x
14069#       d4: LEN/LEN adjusted
14070#       d5: ICTR:LAMBDA/LAMBDA:ICTR
14071#       d6: ILOG/ILOG adjusted
14072#       d7: k-factor/Unchanged
14073#       a0: pointer into memory for packed bcd string formation
14074#       a1: ptr to PTENxx array/Unchanged
14075#       a2: ptr to FP_SCR1(a6)/Unchanged
14076#       fp0: int portion of Y/abs(YINT) adjusted
14077#       fp1: 10^ISCALE/Unchanged
14078#       fp2: x/10^LEN
14079#       F_SCR1:x/x
14080#       F_SCR2:Y with original exponent/Unchanged
14081#       L_SCR1:original USER_FPCR/Unchanged
14082#       L_SCR2:first word of X packed/Unchanged
14083
14084A13_st:
14085        swap            %d5             # put ICTR in lower word of d5
14086        tst.w           %d5             # check if ICTR = 0
14087        bne             not_zr          # if non-zero, go to second test
14088#
14089# Compute 10^(LEN-1)
14090#
14091        fmov.s          FONE(%pc),%fp2  # init fp2 to 1.0
14092        mov.l           %d4,%d0         # put LEN in d0
14093        subq.l          &1,%d0          # d0 = LEN -1
14094        clr.l           %d3             # clr table index
14095l_loop:
14096        lsr.l           &1,%d0          # shift next bit into carry
14097        bcc.b           l_next          # if zero, skip the mul
14098        fmul.x          (%a1,%d3),%fp2  # mul by 10**(d3_bit_no)
14099l_next:
14100        add.l           &12,%d3         # inc d3 to next pwrten table entry
14101        tst.l           %d0             # test if LEN is zero
14102        bne.b           l_loop          # if not, loop
14103#
14104# 10^LEN-1 is computed for this test and A14.  If the input was
14105# denormalized, check only the case in which YINT > 10^LEN.
14106#
14107        tst.b           BINDEC_FLG(%a6) # check if input was norm
14108        beq.b           A13_con         # if norm, continue with checking
14109        fabs.x          %fp0            # take abs of YINT
14110        bra             test_2
14111#
14112# Compare abs(YINT) to 10^(LEN-1) and 10^LEN
14113#
14114A13_con:
14115        fabs.x          %fp0            # take abs of YINT
14116        fcmp.x          %fp0,%fp2       # compare abs(YINT) with 10^(LEN-1)
14117        fbge.w          test_2          # if greater, do next test
14118        subq.l          &1,%d6          # subtract 1 from ILOG
14119        mov.w           &1,%d5          # set ICTR
14120        fmov.l          &rm_mode*0x10,%fpcr     # set rmode to RM
14121        fmul.s          FTEN(%pc),%fp2  # compute 10^LEN
14122        bra.w           A6_str          # return to A6 and recompute YINT
14123test_2:
14124        fmul.s          FTEN(%pc),%fp2  # compute 10^LEN
14125        fcmp.x          %fp0,%fp2       # compare abs(YINT) with 10^LEN
14126        fblt.w          A14_st          # if less, all is ok, go to A14
14127        fbgt.w          fix_ex          # if greater, fix and redo
14128        fdiv.s          FTEN(%pc),%fp0  # if equal, divide by 10
14129        addq.l          &1,%d6          # and inc ILOG
14130        bra.b           A14_st          # and continue elsewhere
14131fix_ex:
14132        addq.l          &1,%d6          # increment ILOG by 1
14133        mov.w           &1,%d5          # set ICTR
14134        fmov.l          &rm_mode*0x10,%fpcr     # set rmode to RM
14135        bra.w           A6_str          # return to A6 and recompute YINT
14136#
14137# Since ICTR <> 0, we have already been through one adjustment,
14138# and shouldn't have another; this is to check if abs(YINT) = 10^LEN
14139# 10^LEN is again computed using whatever table is in a1 since the
14140# value calculated cannot be inexact.
14141#
14142not_zr:
14143        fmov.s          FONE(%pc),%fp2  # init fp2 to 1.0
14144        mov.l           %d4,%d0         # put LEN in d0
14145        clr.l           %d3             # clr table index
14146z_loop:
14147        lsr.l           &1,%d0          # shift next bit into carry
14148        bcc.b           z_next          # if zero, skip the mul
14149        fmul.x          (%a1,%d3),%fp2  # mul by 10**(d3_bit_no)
14150z_next:
14151        add.l           &12,%d3         # inc d3 to next pwrten table entry
14152        tst.l           %d0             # test if LEN is zero
14153        bne.b           z_loop          # if not, loop
14154        fabs.x          %fp0            # get abs(YINT)
14155        fcmp.x          %fp0,%fp2       # check if abs(YINT) = 10^LEN
14156        fbneq.w         A14_st          # if not, skip this
14157        fdiv.s          FTEN(%pc),%fp0  # divide abs(YINT) by 10
14158        addq.l          &1,%d6          # and inc ILOG by 1
14159        addq.l          &1,%d4          # and inc LEN
14160        fmul.s          FTEN(%pc),%fp2  # if LEN++, the get 10^^LEN
14161
14162# A14. Convert the mantissa to bcd.
14163#      The binstr routine is used to convert the LEN digit
14164#      mantissa to bcd in memory.  The input to binstr is
14165#      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
14166#      such that the decimal point is to the left of bit 63.
14167#      The bcd digits are stored in the correct position in
14168#      the final string area in memory.
14169#
14170#
14171# Register usage:
14172#       Input/Output
14173#       d0: x/LEN call to binstr - final is 0
14174#       d1: x/0
14175#       d2: x/ms 32-bits of mant of abs(YINT)
14176#       d3: x/ls 32-bits of mant of abs(YINT)
14177#       d4: LEN/Unchanged
14178#       d5: ICTR:LAMBDA/LAMBDA:ICTR
14179#       d6: ILOG
14180#       d7: k-factor/Unchanged
14181#       a0: pointer into memory for packed bcd string formation
14182#           /ptr to first mantissa byte in result string
14183#       a1: ptr to PTENxx array/Unchanged
14184#       a2: ptr to FP_SCR1(a6)/Unchanged
14185#       fp0: int portion of Y/abs(YINT) adjusted
14186#       fp1: 10^ISCALE/Unchanged
14187#       fp2: 10^LEN/Unchanged
14188#       F_SCR1:x/Work area for final result
14189#       F_SCR2:Y with original exponent/Unchanged
14190#       L_SCR1:original USER_FPCR/Unchanged
14191#       L_SCR2:first word of X packed/Unchanged
14192
14193A14_st:
14194        fmov.l          &rz_mode*0x10,%fpcr     # force rz for conversion
14195        fdiv.x          %fp2,%fp0       # divide abs(YINT) by 10^LEN
14196        lea.l           FP_SCR0(%a6),%a0
14197        fmov.x          %fp0,(%a0)      # move abs(YINT)/10^LEN to memory
14198        mov.l           4(%a0),%d2      # move 2nd word of FP_RES to d2
14199        mov.l           8(%a0),%d3      # move 3rd word of FP_RES to d3
14200        clr.l           4(%a0)          # zero word 2 of FP_RES
14201        clr.l           8(%a0)          # zero word 3 of FP_RES
14202        mov.l           (%a0),%d0       # move exponent to d0
14203        swap            %d0             # put exponent in lower word
14204        beq.b           no_sft          # if zero, don't shift
14205        sub.l           &0x3ffd,%d0     # sub bias less 2 to make fract
14206        tst.l           %d0             # check if > 1
14207        bgt.b           no_sft          # if so, don't shift
14208        neg.l           %d0             # make exp positive
14209m_loop:
14210        lsr.l           &1,%d2          # shift d2:d3 right, add 0s
14211        roxr.l          &1,%d3          # the number of places
14212        dbf.w           %d0,m_loop      # given in d0
14213no_sft:
14214        tst.l           %d2             # check for mantissa of zero
14215        bne.b           no_zr           # if not, go on
14216        tst.l           %d3             # continue zero check
14217        beq.b           zer_m           # if zero, go directly to binstr
14218no_zr:
14219        clr.l           %d1             # put zero in d1 for addx
14220        add.l           &0x00000080,%d3 # inc at bit 7
14221        addx.l          %d1,%d2         # continue inc
14222        and.l           &0xffffff80,%d3 # strip off lsb not used by 882
14223zer_m:
14224        mov.l           %d4,%d0         # put LEN in d0 for binstr call
14225        addq.l          &3,%a0          # a0 points to M16 byte in result
14226        bsr             binstr          # call binstr to convert mant
14227
14228
14229# A15. Convert the exponent to bcd.
14230#      As in A14 above, the exp is converted to bcd and the
14231#      digits are stored in the final string.
14232#
14233#      Digits are stored in L_SCR1(a6) on return from BINDEC as:
14234#
14235#        32               16 15                0
14236#       -----------------------------------------
14237#       |  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
14238#       -----------------------------------------
14239#
14240# And are moved into their proper places in FP_SCR0.  If digit e4
14241# is non-zero, OPERR is signaled.  In all cases, all 4 digits are
14242# written as specified in the 881/882 manual for packed decimal.
14243#
14244# Register usage:
14245#       Input/Output
14246#       d0: x/LEN call to binstr - final is 0
14247#       d1: x/scratch (0);shift count for final exponent packing
14248#       d2: x/ms 32-bits of exp fraction/scratch
14249#       d3: x/ls 32-bits of exp fraction
14250#       d4: LEN/Unchanged
14251#       d5: ICTR:LAMBDA/LAMBDA:ICTR
14252#       d6: ILOG
14253#       d7: k-factor/Unchanged
14254#       a0: ptr to result string/ptr to L_SCR1(a6)
14255#       a1: ptr to PTENxx array/Unchanged
14256#       a2: ptr to FP_SCR1(a6)/Unchanged
14257#       fp0: abs(YINT) adjusted/float(ILOG)
14258#       fp1: 10^ISCALE/Unchanged
14259#       fp2: 10^LEN/Unchanged
14260#       F_SCR1:Work area for final result/BCD result
14261#       F_SCR2:Y with original exponent/ILOG/10^4
14262#       L_SCR1:original USER_FPCR/Exponent digits on return from binstr
14263#       L_SCR2:first word of X packed/Unchanged
14264
14265A15_st:
14266        tst.b           BINDEC_FLG(%a6) # check for denorm
14267        beq.b           not_denorm
14268        ftest.x         %fp0            # test for zero
14269        fbeq.w          den_zero        # if zero, use k-factor or 4933
14270        fmov.l          %d6,%fp0        # float ILOG
14271        fabs.x          %fp0            # get abs of ILOG
14272        bra.b           convrt
14273den_zero:
14274        tst.l           %d7             # check sign of the k-factor
14275        blt.b           use_ilog        # if negative, use ILOG
14276        fmov.s          F4933(%pc),%fp0 # force exponent to 4933
14277        bra.b           convrt          # do it
14278use_ilog:
14279        fmov.l          %d6,%fp0        # float ILOG
14280        fabs.x          %fp0            # get abs of ILOG
14281        bra.b           convrt
14282not_denorm:
14283        ftest.x         %fp0            # test for zero
14284        fbneq.w         not_zero        # if zero, force exponent
14285        fmov.s          FONE(%pc),%fp0  # force exponent to 1
14286        bra.b           convrt          # do it
14287not_zero:
14288        fmov.l          %d6,%fp0        # float ILOG
14289        fabs.x          %fp0            # get abs of ILOG
14290convrt:
14291        fdiv.x          24(%a1),%fp0    # compute ILOG/10^4
14292        fmov.x          %fp0,FP_SCR1(%a6)       # store fp0 in memory
14293        mov.l           4(%a2),%d2      # move word 2 to d2
14294        mov.l           8(%a2),%d3      # move word 3 to d3
14295        mov.w           (%a2),%d0       # move exp to d0
14296        beq.b           x_loop_fin      # if zero, skip the shift
14297        sub.w           &0x3ffd,%d0     # subtract off bias
14298        neg.w           %d0             # make exp positive
14299x_loop:
14300        lsr.l           &1,%d2          # shift d2:d3 right
14301        roxr.l          &1,%d3          # the number of places
14302        dbf.w           %d0,x_loop      # given in d0
14303x_loop_fin:
14304        clr.l           %d1             # put zero in d1 for addx
14305        add.l           &0x00000080,%d3 # inc at bit 6
14306        addx.l          %d1,%d2         # continue inc
14307        and.l           &0xffffff80,%d3 # strip off lsb not used by 882
14308        mov.l           &4,%d0          # put 4 in d0 for binstr call
14309        lea.l           L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits
14310        bsr             binstr          # call binstr to convert exp
14311        mov.l           L_SCR1(%a6),%d0 # load L_SCR1 lword to d0
14312        mov.l           &12,%d1         # use d1 for shift count
14313        lsr.l           %d1,%d0         # shift d0 right by 12
14314        bfins           %d0,FP_SCR0(%a6){&4:&12}        # put e3:e2:e1 in FP_SCR0
14315        lsr.l           %d1,%d0         # shift d0 right by 12
14316        bfins           %d0,FP_SCR0(%a6){&16:&4}        # put e4 in FP_SCR0
14317        tst.b           %d0             # check if e4 is zero
14318        beq.b           A16_st          # if zero, skip rest
14319        or.l            &opaop_mask,USER_FPSR(%a6)      # set OPERR & AIOP in USER_FPSR
14320
14321
14322# A16. Write sign bits to final string.
14323#          Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
14324#
14325# Register usage:
14326#       Input/Output
14327#       d0: x/scratch - final is x
14328#       d2: x/x
14329#       d3: x/x
14330#       d4: LEN/Unchanged
14331#       d5: ICTR:LAMBDA/LAMBDA:ICTR
14332#       d6: ILOG/ILOG adjusted
14333#       d7: k-factor/Unchanged
14334#       a0: ptr to L_SCR1(a6)/Unchanged
14335#       a1: ptr to PTENxx array/Unchanged
14336#       a2: ptr to FP_SCR1(a6)/Unchanged
14337#       fp0: float(ILOG)/Unchanged
14338#       fp1: 10^ISCALE/Unchanged
14339#       fp2: 10^LEN/Unchanged
14340#       F_SCR1:BCD result with correct signs
14341#       F_SCR2:ILOG/10^4
14342#       L_SCR1:Exponent digits on return from binstr
14343#       L_SCR2:first word of X packed/Unchanged
14344
14345A16_st:
14346        clr.l           %d0             # clr d0 for collection of signs
14347        and.b           &0x0f,FP_SCR0(%a6)      # clear first nibble of FP_SCR0
14348        tst.l           L_SCR2(%a6)     # check sign of original mantissa
14349        bge.b           mant_p          # if pos, don't set SM
14350        mov.l           &2,%d0          # move 2 in to d0 for SM
14351mant_p:
14352        tst.l           %d6             # check sign of ILOG
14353        bge.b           wr_sgn          # if pos, don't set SE
14354        addq.l          &1,%d0          # set bit 0 in d0 for SE
14355wr_sgn:
14356        bfins           %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR0
14357
14358# Clean up and restore all registers used.
14359
14360        fmov.l          &0,%fpsr        # clear possible inex2/ainex bits
14361        fmovm.x         (%sp)+,&0xe0    #  {%fp0-%fp2}
14362        movm.l          (%sp)+,&0x4fc   #  {%d2-%d7/%a2}
14363        rts
14364
14365        global          PTENRN
14366PTENRN:
14367        long            0x40020000,0xA0000000,0x00000000        # 10 ^ 1
14368        long            0x40050000,0xC8000000,0x00000000        # 10 ^ 2
14369        long            0x400C0000,0x9C400000,0x00000000        # 10 ^ 4
14370        long            0x40190000,0xBEBC2000,0x00000000        # 10 ^ 8
14371        long            0x40340000,0x8E1BC9BF,0x04000000        # 10 ^ 16
14372        long            0x40690000,0x9DC5ADA8,0x2B70B59E        # 10 ^ 32
14373        long            0x40D30000,0xC2781F49,0xFFCFA6D5        # 10 ^ 64
14374        long            0x41A80000,0x93BA47C9,0x80E98CE0        # 10 ^ 128
14375        long            0x43510000,0xAA7EEBFB,0x9DF9DE8E        # 10 ^ 256
14376        long            0x46A30000,0xE319A0AE,0xA60E91C7        # 10 ^ 512
14377        long            0x4D480000,0xC9767586,0x81750C17        # 10 ^ 1024
14378        long            0x5A920000,0x9E8B3B5D,0xC53D5DE5        # 10 ^ 2048
14379        long            0x75250000,0xC4605202,0x8A20979B        # 10 ^ 4096
14380
14381        global          PTENRP
14382PTENRP:
14383        long            0x40020000,0xA0000000,0x00000000        # 10 ^ 1
14384        long            0x40050000,0xC8000000,0x00000000        # 10 ^ 2
14385        long            0x400C0000,0x9C400000,0x00000000        # 10 ^ 4
14386        long            0x40190000,0xBEBC2000,0x00000000        # 10 ^ 8
14387        long            0x40340000,0x8E1BC9BF,0x04000000        # 10 ^ 16
14388        long            0x40690000,0x9DC5ADA8,0x2B70B59E        # 10 ^ 32
14389        long            0x40D30000,0xC2781F49,0xFFCFA6D6        # 10 ^ 64
14390        long            0x41A80000,0x93BA47C9,0x80E98CE0        # 10 ^ 128
14391        long            0x43510000,0xAA7EEBFB,0x9DF9DE8E        # 10 ^ 256
14392        long            0x46A30000,0xE319A0AE,0xA60E91C7        # 10 ^ 512
14393        long            0x4D480000,0xC9767586,0x81750C18        # 10 ^ 1024
14394        long            0x5A920000,0x9E8B3B5D,0xC53D5DE5        # 10 ^ 2048
14395        long            0x75250000,0xC4605202,0x8A20979B        # 10 ^ 4096
14396
14397        global          PTENRM
14398PTENRM:
14399        long            0x40020000,0xA0000000,0x00000000        # 10 ^ 1
14400        long            0x40050000,0xC8000000,0x00000000        # 10 ^ 2
14401        long            0x400C0000,0x9C400000,0x00000000        # 10 ^ 4
14402        long            0x40190000,0xBEBC2000,0x00000000        # 10 ^ 8
14403        long            0x40340000,0x8E1BC9BF,0x04000000        # 10 ^ 16
14404        long            0x40690000,0x9DC5ADA8,0x2B70B59D        # 10 ^ 32
14405        long            0x40D30000,0xC2781F49,0xFFCFA6D5        # 10 ^ 64
14406        long            0x41A80000,0x93BA47C9,0x80E98CDF        # 10 ^ 128
14407        long            0x43510000,0xAA7EEBFB,0x9DF9DE8D        # 10 ^ 256
14408        long            0x46A30000,0xE319A0AE,0xA60E91C6        # 10 ^ 512
14409        long            0x4D480000,0xC9767586,0x81750C17        # 10 ^ 1024
14410        long            0x5A920000,0x9E8B3B5D,0xC53D5DE4        # 10 ^ 2048
14411        long            0x75250000,0xC4605202,0x8A20979A        # 10 ^ 4096
14412
14413#########################################################################
14414# binstr(): Converts a 64-bit binary integer to bcd.                    #
14415#                                                                       #
14416# INPUT *************************************************************** #
14417#       d2:d3 = 64-bit binary integer                                   #
14418#       d0    = desired length (LEN)                                    #
14419#       a0    = pointer to start in memory for bcd characters           #
14420#               (This pointer must point to byte 4 of the first         #
14421#                lword of the packed decimal memory string.)            #
14422#                                                                       #
14423# OUTPUT ************************************************************** #
14424#       a0 = pointer to LEN bcd digits representing the 64-bit integer. #
14425#                                                                       #
14426# ALGORITHM *********************************************************** #
14427#       The 64-bit binary is assumed to have a decimal point before     #
14428#       bit 63.  The fraction is multiplied by 10 using a mul by 2      #
14429#       shift and a mul by 8 shift.  The bits shifted out of the        #
14430#       msb form a decimal digit.  This process is iterated until       #
14431#       LEN digits are formed.                                          #
14432#                                                                       #
14433# A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the        #
14434#     digit formed will be assumed the least significant.  This is      #
14435#     to force the first byte formed to have a 0 in the upper 4 bits.   #
14436#                                                                       #
14437# A2. Beginning of the loop:                                            #
14438#     Copy the fraction in d2:d3 to d4:d5.                              #
14439#                                                                       #
14440# A3. Multiply the fraction in d2:d3 by 8 using bit-field               #
14441#     extracts and shifts.  The three msbs from d2 will go into d1.     #
14442#                                                                       #
14443# A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb        #
14444#     will be collected by the carry.                                   #
14445#                                                                       #
14446# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5      #
14447#     into d2:d3.  D1 will contain the bcd digit formed.                #
14448#                                                                       #
14449# A6. Test d7.  If zero, the digit formed is the ms digit.  If non-     #
14450#     zero, it is the ls digit.  Put the digit in its place in the      #
14451#     upper word of d0.  If it is the ls digit, write the word          #
14452#     from d0 to memory.                                                #
14453#                                                                       #
14454# A7. Decrement d6 (LEN counter) and repeat the loop until zero.        #
14455#                                                                       #
14456#########################################################################
14457
14458#       Implementation Notes:
14459#
14460#       The registers are used as follows:
14461#
14462#               d0: LEN counter
14463#               d1: temp used to form the digit
14464#               d2: upper 32-bits of fraction for mul by 8
14465#               d3: lower 32-bits of fraction for mul by 8
14466#               d4: upper 32-bits of fraction for mul by 2
14467#               d5: lower 32-bits of fraction for mul by 2
14468#               d6: temp for bit-field extracts
14469#               d7: byte digit formation word;digit count {0,1}
14470#               a0: pointer into memory for packed bcd string formation
14471#
14472
14473        global          binstr
14474binstr:
14475        movm.l          &0xff00,-(%sp)  #  {%d0-%d7}
14476
14477#
14478# A1: Init d7
14479#
14480        mov.l           &1,%d7          # init d7 for second digit
14481        subq.l          &1,%d0          # for dbf d0 would have LEN+1 passes
14482#
14483# A2. Copy d2:d3 to d4:d5.  Start loop.
14484#
14485loop:
14486        mov.l           %d2,%d4         # copy the fraction before muls
14487        mov.l           %d3,%d5         # to d4:d5
14488#
14489# A3. Multiply d2:d3 by 8; extract msbs into d1.
14490#
14491        bfextu          %d2{&0:&3},%d1  # copy 3 msbs of d2 into d1
14492        asl.l           &3,%d2          # shift d2 left by 3 places
14493        bfextu          %d3{&0:&3},%d6  # copy 3 msbs of d3 into d6
14494        asl.l           &3,%d3          # shift d3 left by 3 places
14495        or.l            %d6,%d2         # or in msbs from d3 into d2
14496#
14497# A4. Multiply d4:d5 by 2; add carry out to d1.
14498#
14499        asl.l           &1,%d5          # mul d5 by 2
14500        roxl.l          &1,%d4          # mul d4 by 2
14501        swap            %d6             # put 0 in d6 lower word
14502        addx.w          %d6,%d1         # add in extend from mul by 2
14503#
14504# A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
14505#
14506        add.l           %d5,%d3         # add lower 32 bits
14507        nop                             # ERRATA FIX #13 (Rev. 1.2 6/6/90)
14508        addx.l          %d4,%d2         # add with extend upper 32 bits
14509        nop                             # ERRATA FIX #13 (Rev. 1.2 6/6/90)
14510        addx.w          %d6,%d1         # add in extend from add to d1
14511        swap            %d6             # with d6 = 0; put 0 in upper word
14512#
14513# A6. Test d7 and branch.
14514#
14515        tst.w           %d7             # if zero, store digit & to loop
14516        beq.b           first_d         # if non-zero, form byte & write
14517sec_d:
14518        swap            %d7             # bring first digit to word d7b
14519        asl.w           &4,%d7          # first digit in upper 4 bits d7b
14520        add.w           %d1,%d7         # add in ls digit to d7b
14521        mov.b           %d7,(%a0)+      # store d7b byte in memory
14522        swap            %d7             # put LEN counter in word d7a
14523        clr.w           %d7             # set d7a to signal no digits done
14524        dbf.w           %d0,loop        # do loop some more!
14525        bra.b           end_bstr        # finished, so exit
14526first_d:
14527        swap            %d7             # put digit word in d7b
14528        mov.w           %d1,%d7         # put new digit in d7b
14529        swap            %d7             # put LEN counter in word d7a
14530        addq.w          &1,%d7          # set d7a to signal first digit done
14531        dbf.w           %d0,loop        # do loop some more!
14532        swap            %d7             # put last digit in string
14533        lsl.w           &4,%d7          # move it to upper 4 bits
14534        mov.b           %d7,(%a0)+      # store it in memory string
14535#
14536# Clean up and return with result in fp0.
14537#
14538end_bstr:
14539        movm.l          (%sp)+,&0xff    #  {%d0-%d7}
14540        rts
14541
14542#########################################################################
14543# XDEF **************************************************************** #
14544#       facc_in_b(): dmem_read_byte failed                              #
14545#       facc_in_w(): dmem_read_word failed                              #
14546#       facc_in_l(): dmem_read_long failed                              #
14547#       facc_in_d(): dmem_read of dbl prec failed                       #
14548#       facc_in_x(): dmem_read of ext prec failed                       #
14549#                                                                       #
14550#       facc_out_b(): dmem_write_byte failed                            #
14551#       facc_out_w(): dmem_write_word failed                            #
14552#       facc_out_l(): dmem_write_long failed                            #
14553#       facc_out_d(): dmem_write of dbl prec failed                     #
14554#       facc_out_x(): dmem_write of ext prec failed                     #
14555#                                                                       #
14556# XREF **************************************************************** #
14557#       _real_access() - exit through access error handler              #
14558#                                                                       #
14559# INPUT *************************************************************** #
14560#       None                                                            #
14561#                                                                       #
14562# OUTPUT ************************************************************** #
14563#       None                                                            #
14564#                                                                       #
14565# ALGORITHM *********************************************************** #
14566#       Flow jumps here when an FP data fetch call gets an error        #
14567# result. This means the operating system wants an access error frame   #
14568# made out of the current exception stack frame.                        #
14569#       So, we first call restore() which makes sure that any updated   #
14570# -(an)+ register gets returned to its pre-exception value and then     #
14571# we change the stack to an access error stack frame.                   #
14572#                                                                       #
14573#########################################################################
14574
14575facc_in_b:
14576        movq.l          &0x1,%d0                        # one byte
14577        bsr.w           restore                         # fix An
14578
14579        mov.w           &0x0121,EXC_VOFF(%a6)           # set FSLW
14580        bra.w           facc_finish
14581
14582facc_in_w:
14583        movq.l          &0x2,%d0                        # two bytes
14584        bsr.w           restore                         # fix An
14585
14586        mov.w           &0x0141,EXC_VOFF(%a6)           # set FSLW
14587        bra.b           facc_finish
14588
14589facc_in_l:
14590        movq.l          &0x4,%d0                        # four bytes
14591        bsr.w           restore                         # fix An
14592
14593        mov.w           &0x0101,EXC_VOFF(%a6)           # set FSLW
14594        bra.b           facc_finish
14595
14596facc_in_d:
14597        movq.l          &0x8,%d0                        # eight bytes
14598        bsr.w           restore                         # fix An
14599
14600        mov.w           &0x0161,EXC_VOFF(%a6)           # set FSLW
14601        bra.b           facc_finish
14602
14603facc_in_x:
14604        movq.l          &0xc,%d0                        # twelve bytes
14605        bsr.w           restore                         # fix An
14606
14607        mov.w           &0x0161,EXC_VOFF(%a6)           # set FSLW
14608        bra.b           facc_finish
14609
14610################################################################
14611
14612facc_out_b:
14613        movq.l          &0x1,%d0                        # one byte
14614        bsr.w           restore                         # restore An
14615
14616        mov.w           &0x00a1,EXC_VOFF(%a6)           # set FSLW
14617        bra.b           facc_finish
14618
14619facc_out_w:
14620        movq.l          &0x2,%d0                        # two bytes
14621        bsr.w           restore                         # restore An
14622
14623        mov.w           &0x00c1,EXC_VOFF(%a6)           # set FSLW
14624        bra.b           facc_finish
14625
14626facc_out_l:
14627        movq.l          &0x4,%d0                        # four bytes
14628        bsr.w           restore                         # restore An
14629
14630        mov.w           &0x0081,EXC_VOFF(%a6)           # set FSLW
14631        bra.b           facc_finish
14632
14633facc_out_d:
14634        movq.l          &0x8,%d0                        # eight bytes
14635        bsr.w           restore                         # restore An
14636
14637        mov.w           &0x00e1,EXC_VOFF(%a6)           # set FSLW
14638        bra.b           facc_finish
14639
14640facc_out_x:
14641        mov.l           &0xc,%d0                        # twelve bytes
14642        bsr.w           restore                         # restore An
14643
14644        mov.w           &0x00e1,EXC_VOFF(%a6)           # set FSLW
14645
14646# here's where we actually create the access error frame from the
14647# current exception stack frame.
14648facc_finish:
14649        mov.l           USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
14650
14651        fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
14652        fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
14653        movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
14654
14655        unlk            %a6
14656
14657        mov.l           (%sp),-(%sp)            # store SR, hi(PC)
14658        mov.l           0x8(%sp),0x4(%sp)       # store lo(PC)
14659        mov.l           0xc(%sp),0x8(%sp)       # store EA
14660        mov.l           &0x00000001,0xc(%sp)    # store FSLW
14661        mov.w           0x6(%sp),0xc(%sp)       # fix FSLW (size)
14662        mov.w           &0x4008,0x6(%sp)        # store voff
14663
14664        btst            &0x5,(%sp)              # supervisor or user mode?
14665        beq.b           facc_out2               # user
14666        bset            &0x2,0xd(%sp)           # set supervisor TM bit
14667
14668facc_out2:
14669        bra.l           _real_access
14670
14671##################################################################
14672
14673# if the effective addressing mode was predecrement or postincrement,
14674# the emulation has already changed its value to the correct post-
14675# instruction value. but since we're exiting to the access error
14676# handler, then AN must be returned to its pre-instruction value.
14677# we do that here.
14678restore:
14679        mov.b           EXC_OPWORD+0x1(%a6),%d1
14680        andi.b          &0x38,%d1               # extract opmode
14681        cmpi.b          %d1,&0x18               # postinc?
14682        beq.w           rest_inc
14683        cmpi.b          %d1,&0x20               # predec?
14684        beq.w           rest_dec
14685        rts
14686
14687rest_inc:
14688        mov.b           EXC_OPWORD+0x1(%a6),%d1
14689        andi.w          &0x0007,%d1             # fetch An
14690
14691        mov.w           (tbl_rest_inc.b,%pc,%d1.w*2),%d1
14692        jmp             (tbl_rest_inc.b,%pc,%d1.w*1)
14693
14694tbl_rest_inc:
14695        short           ri_a0 - tbl_rest_inc
14696        short           ri_a1 - tbl_rest_inc
14697        short           ri_a2 - tbl_rest_inc
14698        short           ri_a3 - tbl_rest_inc
14699        short           ri_a4 - tbl_rest_inc
14700        short           ri_a5 - tbl_rest_inc
14701        short           ri_a6 - tbl_rest_inc
14702        short           ri_a7 - tbl_rest_inc
14703
14704ri_a0:
14705        sub.l           %d0,EXC_DREGS+0x8(%a6)  # fix stacked a0
14706        rts
14707ri_a1:
14708        sub.l           %d0,EXC_DREGS+0xc(%a6)  # fix stacked a1
14709        rts
14710ri_a2:
14711        sub.l           %d0,%a2                 # fix a2
14712        rts
14713ri_a3:
14714        sub.l           %d0,%a3                 # fix a3
14715        rts
14716ri_a4:
14717        sub.l           %d0,%a4                 # fix a4
14718        rts
14719ri_a5:
14720        sub.l           %d0,%a5                 # fix a5
14721        rts
14722ri_a6:
14723        sub.l           %d0,(%a6)               # fix stacked a6
14724        rts
14725# if it's a fmove out instruction, we don't have to fix a7
14726# because we hadn't changed it yet. if it's an opclass two
14727# instruction (data moved in) and the exception was in supervisor
14728# mode, then also also wasn't updated. if it was user mode, then
14729# restore the correct a7 which is in the USP currently.
14730ri_a7:
14731        cmpi.b          EXC_VOFF(%a6),&0x30     # move in or out?
14732        bne.b           ri_a7_done              # out
14733
14734        btst            &0x5,EXC_SR(%a6)        # user or supervisor?
14735        bne.b           ri_a7_done              # supervisor
14736        movc            %usp,%a0                # restore USP
14737        sub.l           %d0,%a0
14738        movc            %a0,%usp
14739ri_a7_done:
14740        rts
14741
14742# need to invert adjustment value if the <ea> was predec
14743rest_dec:
14744        neg.l           %d0
14745        bra.b           rest_inc
14746