linux/arch/m68k/ifpsp060/src/ilsp.S
<<
>>
Prefs
   1~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
   3M68000 Hi-Performance Microprocessor Division
   4M68060 Software Package
   5Production Release P1.00 -- October 10, 1994
   6
   7M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
   8
   9THE SOFTWARE is provided on an "AS IS" basis and without warranty.
  10To the maximum extent permitted by applicable law,
  11MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
  12INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
  13and any warranty against infringement with regard to the SOFTWARE
  14(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
  15
  16To the maximum extent permitted by applicable law,
  17IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
  18(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
  19BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
  20ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
  21Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
  22
  23You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
  24so long as this entire notice is retained without alteration in any modified and/or
  25redistributed versions, and that such modified versions are clearly identified as such.
  26No licenses are granted by implication, estoppel or otherwise under any patents
  27or trademarks of Motorola, Inc.
  28~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  29# litop.s:
  30#       This file is appended to the top of the 060FPLSP package
  31# and contains the entry points into the package. The user, in
  32# effect, branches to one of the branch table entries located here.
  33#
  34
  35        bra.l   _060LSP__idivs64_
  36        short   0x0000
  37        bra.l   _060LSP__idivu64_
  38        short   0x0000
  39
  40        bra.l   _060LSP__imuls64_
  41        short   0x0000
  42        bra.l   _060LSP__imulu64_
  43        short   0x0000
  44
  45        bra.l   _060LSP__cmp2_Ab_
  46        short   0x0000
  47        bra.l   _060LSP__cmp2_Aw_
  48        short   0x0000
  49        bra.l   _060LSP__cmp2_Al_
  50        short   0x0000
  51        bra.l   _060LSP__cmp2_Db_
  52        short   0x0000
  53        bra.l   _060LSP__cmp2_Dw_
  54        short   0x0000
  55        bra.l   _060LSP__cmp2_Dl_
  56        short   0x0000
  57
  58# leave room for future possible aditions.
  59        align   0x200
  60
  61#########################################################################
  62# XDEF **************************************************************** #
  63#       _060LSP__idivu64_(): Emulate 64-bit unsigned div instruction.   #
  64#       _060LSP__idivs64_(): Emulate 64-bit signed div instruction.     #
  65#                                                                       #
  66#       This is the library version which is accessed as a subroutine   #
  67#       and therefore does not work exactly like the 680X0 div{s,u}.l   #
  68#       64-bit divide instruction.                                      #
  69#                                                                       #
  70# XREF **************************************************************** #
  71#       None.                                                           #
  72#                                                                       #
  73# INPUT *************************************************************** #
  74#       0x4(sp)  = divisor                                              #
  75#       0x8(sp)  = hi(dividend)                                         #
  76#       0xc(sp)  = lo(dividend)                                         #
  77#       0x10(sp) = pointer to location to place quotient/remainder      #
  78#                                                                       #
  79# OUTPUT ************************************************************** #
  80#       0x10(sp) = points to location of remainder/quotient.            #
  81#                  remainder is in first longword, quotient is in 2nd.  #
  82#                                                                       #
  83# ALGORITHM *********************************************************** #
  84#       If the operands are signed, make them unsigned and save the     #
  85# sign info for later. Separate out special cases like divide-by-zero   #
  86# or 32-bit divides if possible. Else, use a special math algorithm     #
  87# to calculate the result.                                              #
  88#       Restore sign info if signed instruction. Set the condition      #
  89# codes before performing the final "rts". If the divisor was equal to  #
  90# zero, then perform a divide-by-zero using a 16-bit implemented        #
  91# divide instruction. This way, the operating system can record that    #
  92# the event occurred even though it may not point to the correct place. #
  93#                                                                       #
  94#########################################################################
  95
  96set     POSNEG,         -1
  97set     NDIVISOR,       -2
  98set     NDIVIDEND,      -3
  99set     DDSECOND,       -4
 100set     DDNORMAL,       -8
 101set     DDQUOTIENT,     -12
 102set     DIV64_CC,       -16
 103
 104##########
 105# divs.l #
 106##########
 107        global          _060LSP__idivs64_
 108_060LSP__idivs64_:
 109# PROLOGUE BEGIN ########################################################
 110        link.w          %a6,&-16
 111        movm.l          &0x3f00,-(%sp)          # save d2-d7
 112#       fmovm.l         &0x0,-(%sp)             # save no fpregs
 113# PROLOGUE END ##########################################################
 114
 115        mov.w           %cc,DIV64_CC(%a6)
 116        st              POSNEG(%a6)             # signed operation
 117        bra.b           ldiv64_cont
 118
 119##########
 120# divu.l #
 121##########
 122        global          _060LSP__idivu64_
 123_060LSP__idivu64_:
 124# PROLOGUE BEGIN ########################################################
 125        link.w          %a6,&-16
 126        movm.l          &0x3f00,-(%sp)          # save d2-d7
 127#       fmovm.l         &0x0,-(%sp)             # save no fpregs
 128# PROLOGUE END ##########################################################
 129
 130        mov.w           %cc,DIV64_CC(%a6)
 131        sf              POSNEG(%a6)             # unsigned operation
 132
 133ldiv64_cont:
 134        mov.l           0x8(%a6),%d7            # fetch divisor
 135
 136        beq.w           ldiv64eq0               # divisor is = 0!!!
 137
 138        mov.l           0xc(%a6), %d5           # get dividend hi
 139        mov.l           0x10(%a6), %d6          # get dividend lo
 140
 141# separate signed and unsigned divide
 142        tst.b           POSNEG(%a6)             # signed or unsigned?
 143        beq.b           ldspecialcases          # use positive divide
 144
 145# save the sign of the divisor
 146# make divisor unsigned if it's negative
 147        tst.l           %d7                     # chk sign of divisor
 148        slt             NDIVISOR(%a6)           # save sign of divisor
 149        bpl.b           ldsgndividend
 150        neg.l           %d7                     # complement negative divisor
 151
 152# save the sign of the dividend
 153# make dividend unsigned if it's negative
 154ldsgndividend:
 155        tst.l           %d5                     # chk sign of hi(dividend)
 156        slt             NDIVIDEND(%a6)          # save sign of dividend
 157        bpl.b           ldspecialcases
 158
 159        mov.w           &0x0, %cc               # clear 'X' cc bit
 160        negx.l          %d6                     # complement signed dividend
 161        negx.l          %d5
 162
 163# extract some special cases:
 164#       - is (dividend == 0) ?
 165#       - is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div)
 166ldspecialcases:
 167        tst.l           %d5                     # is (hi(dividend) == 0)
 168        bne.b           ldnormaldivide          # no, so try it the long way
 169
 170        tst.l           %d6                     # is (lo(dividend) == 0), too
 171        beq.w           lddone                  # yes, so (dividend == 0)
 172
 173        cmp.l           %d7,%d6                 # is (divisor <= lo(dividend))
 174        bls.b           ld32bitdivide           # yes, so use 32 bit divide
 175
 176        exg             %d5,%d6                 # q = 0, r = dividend
 177        bra.w           ldivfinish              # can't divide, we're done.
 178
 179ld32bitdivide:
 180        tdivu.l         %d7, %d5:%d6            # it's only a 32/32 bit div!
 181
 182        bra.b           ldivfinish
 183
 184ldnormaldivide:
 185# last special case:
 186#       - is hi(dividend) >= divisor ? if yes, then overflow
 187        cmp.l           %d7,%d5
 188        bls.b           lddovf                  # answer won't fit in 32 bits
 189
 190# perform the divide algorithm:
 191        bsr.l           ldclassical             # do int divide
 192
 193# separate into signed and unsigned finishes.
 194ldivfinish:
 195        tst.b           POSNEG(%a6)             # do divs, divu separately
 196        beq.b           lddone                  # divu has no processing!!!
 197
 198# it was a divs.l, so ccode setting is a little more complicated...
 199        tst.b           NDIVIDEND(%a6)          # remainder has same sign
 200        beq.b           ldcc                    # as dividend.
 201        neg.l           %d5                     # sgn(rem) = sgn(dividend)
 202ldcc:
 203        mov.b           NDIVISOR(%a6), %d0
 204        eor.b           %d0, NDIVIDEND(%a6)     # chk if quotient is negative
 205        beq.b           ldqpos                  # branch to quot positive
 206
 207# 0x80000000 is the largest number representable as a 32-bit negative
 208# number. the negative of 0x80000000 is 0x80000000.
 209        cmpi.l          %d6, &0x80000000        # will (-quot) fit in 32 bits?
 210        bhi.b           lddovf
 211
 212        neg.l           %d6                     # make (-quot) 2's comp
 213
 214        bra.b           lddone
 215
 216ldqpos:
 217        btst            &0x1f, %d6              # will (+quot) fit in 32 bits?
 218        bne.b           lddovf
 219
 220lddone:
 221# if the register numbers are the same, only the quotient gets saved.
 222# so, if we always save the quotient second, we save ourselves a cmp&beq
 223        andi.w          &0x10,DIV64_CC(%a6)
 224        mov.w           DIV64_CC(%a6),%cc
 225        tst.l           %d6                     # may set 'N' ccode bit
 226
 227# here, the result is in d1 and d0. the current strategy is to save
 228# the values at the location pointed to by a0.
 229# use movm here to not disturb the condition codes.
 230ldexit:
 231        movm.l          &0x0060,([0x14,%a6])    # save result
 232
 233# EPILOGUE BEGIN ########################################################
 234#       fmovm.l         (%sp)+,&0x0             # restore no fpregs
 235        movm.l          (%sp)+,&0x00fc          # restore d2-d7
 236        unlk            %a6
 237# EPILOGUE END ##########################################################
 238
 239        rts
 240
 241# the result should be the unchanged dividend
 242lddovf:
 243        mov.l           0xc(%a6), %d5           # get dividend hi
 244        mov.l           0x10(%a6), %d6          # get dividend lo
 245
 246        andi.w          &0x1c,DIV64_CC(%a6)
 247        ori.w           &0x02,DIV64_CC(%a6)     # set 'V' ccode bit
 248        mov.w           DIV64_CC(%a6),%cc
 249
 250        bra.b           ldexit
 251
 252ldiv64eq0:
 253        mov.l           0xc(%a6),([0x14,%a6])
 254        mov.l           0x10(%a6),([0x14,%a6],0x4)
 255
 256        mov.w           DIV64_CC(%a6),%cc
 257
 258# EPILOGUE BEGIN ########################################################
 259#       fmovm.l         (%sp)+,&0x0             # restore no fpregs
 260        movm.l          (%sp)+,&0x00fc          # restore d2-d7
 261        unlk            %a6
 262# EPILOGUE END ##########################################################
 263
 264        divu.w          &0x0,%d0                # force a divbyzero exception
 265        rts
 266
 267###########################################################################
 268#########################################################################
 269# This routine uses the 'classical' Algorithm D from Donald Knuth's     #
 270# Art of Computer Programming, vol II, Seminumerical Algorithms.        #
 271# For this implementation b=2**16, and the target is U1U2U3U4/V1V2,     #
 272# where U,V are words of the quadword dividend and longword divisor,    #
 273# and U1, V1 are the most significant words.                            #
 274#                                                                       #
 275# The most sig. longword of the 64 bit dividend must be in %d5, least   #
 276# in %d6. The divisor must be in the variable ddivisor, and the         #
 277# signed/unsigned flag ddusign must be set (0=unsigned,1=signed).       #
 278# The quotient is returned in %d6, remainder in %d5, unless the         #
 279# v (overflow) bit is set in the saved %ccr. If overflow, the dividend  #
 280# is unchanged.                                                         #
 281#########################################################################
 282ldclassical:
 283# if the divisor msw is 0, use simpler algorithm then the full blown
 284# one at ddknuth:
 285
 286        cmpi.l          %d7, &0xffff
 287        bhi.b           lddknuth                # go use D. Knuth algorithm
 288
 289# Since the divisor is only a word (and larger than the mslw of the dividend),
 290# a simpler algorithm may be used :
 291# In the general case, four quotient words would be created by
 292# dividing the divisor word into each dividend word. In this case,
 293# the first two quotient words must be zero, or overflow would occur.
 294# Since we already checked this case above, we can treat the most significant
 295# longword of the dividend as (0) remainder (see Knuth) and merely complete
 296# the last two divisions to get a quotient longword and word remainder:
 297
 298        clr.l           %d1
 299        swap            %d5                     # same as r*b if previous step rqd
 300        swap            %d6                     # get u3 to lsw position
 301        mov.w           %d6, %d5                # rb + u3
 302
 303        divu.w          %d7, %d5
 304
 305        mov.w           %d5, %d1                # first quotient word
 306        swap            %d6                     # get u4
 307        mov.w           %d6, %d5                # rb + u4
 308
 309        divu.w          %d7, %d5
 310
 311        swap            %d1
 312        mov.w           %d5, %d1                # 2nd quotient 'digit'
 313        clr.w           %d5
 314        swap            %d5                     # now remainder
 315        mov.l           %d1, %d6                # and quotient
 316
 317        rts
 318
 319lddknuth:
 320# In this algorithm, the divisor is treated as a 2 digit (word) number
 321# which is divided into a 3 digit (word) dividend to get one quotient
 322# digit (word). After subtraction, the dividend is shifted and the
 323# process repeated. Before beginning, the divisor and quotient are
 324# 'normalized' so that the process of estimating the quotient digit
 325# will yield verifiably correct results..
 326
 327        clr.l           DDNORMAL(%a6)           # count of shifts for normalization
 328        clr.b           DDSECOND(%a6)           # clear flag for quotient digits
 329        clr.l           %d1                     # %d1 will hold trial quotient
 330lddnchk:
 331        btst            &31, %d7                # must we normalize? first word of
 332        bne.b           lddnormalized           # divisor (V1) must be >= 65536/2
 333        addq.l          &0x1, DDNORMAL(%a6)     # count normalization shifts
 334        lsl.l           &0x1, %d7               # shift the divisor
 335        lsl.l           &0x1, %d6               # shift u4,u3 with overflow to u2
 336        roxl.l          &0x1, %d5               # shift u1,u2
 337        bra.w           lddnchk
 338lddnormalized:
 339
 340# Now calculate an estimate of the quotient words (msw first, then lsw).
 341# The comments use subscripts for the first quotient digit determination.
 342        mov.l           %d7, %d3                # divisor
 343        mov.l           %d5, %d2                # dividend mslw
 344        swap            %d2
 345        swap            %d3
 346        cmp.w           %d2, %d3                # V1 = U1 ?
 347        bne.b           lddqcalc1
 348        mov.w           &0xffff, %d1            # use max trial quotient word
 349        bra.b           lddadj0
 350lddqcalc1:
 351        mov.l           %d5, %d1
 352
 353        divu.w          %d3, %d1                # use quotient of mslw/msw
 354
 355        andi.l          &0x0000ffff, %d1        # zero any remainder
 356lddadj0:
 357
 358# now test the trial quotient and adjust. This step plus the
 359# normalization assures (according to Knuth) that the trial
 360# quotient will be at worst 1 too large.
 361        mov.l           %d6, -(%sp)
 362        clr.w           %d6                     # word u3 left
 363        swap            %d6                     # in lsw position
 364lddadj1: mov.l          %d7, %d3
 365        mov.l           %d1, %d2
 366        mulu.w          %d7, %d2                # V2q
 367        swap            %d3
 368        mulu.w          %d1, %d3                # V1q
 369        mov.l           %d5, %d4                # U1U2
 370        sub.l           %d3, %d4                # U1U2 - V1q
 371
 372        swap            %d4
 373
 374        mov.w           %d4,%d0
 375        mov.w           %d6,%d4                 # insert lower word (U3)
 376
 377        tst.w           %d0                     # is upper word set?
 378        bne.w           lddadjd1
 379
 380#       add.l           %d6, %d4                # (U1U2 - V1q) + U3
 381
 382        cmp.l           %d2, %d4
 383        bls.b           lddadjd1                # is V2q > (U1U2-V1q) + U3 ?
 384        subq.l          &0x1, %d1               # yes, decrement and recheck
 385        bra.b           lddadj1
 386lddadjd1:
 387# now test the word by multiplying it by the divisor (V1V2) and comparing
 388# the 3 digit (word) result with the current dividend words
 389        mov.l           %d5, -(%sp)             # save %d5 (%d6 already saved)
 390        mov.l           %d1, %d6
 391        swap            %d6                     # shift answer to ms 3 words
 392        mov.l           %d7, %d5
 393        bsr.l           ldmm2
 394        mov.l           %d5, %d2                # now %d2,%d3 are trial*divisor
 395        mov.l           %d6, %d3
 396        mov.l           (%sp)+, %d5             # restore dividend
 397        mov.l           (%sp)+, %d6
 398        sub.l           %d3, %d6
 399        subx.l          %d2, %d5                # subtract double precision
 400        bcc             ldd2nd                  # no carry, do next quotient digit
 401        subq.l          &0x1, %d1               # q is one too large
 402# need to add back divisor longword to current ms 3 digits of dividend
 403# - according to Knuth, this is done only 2 out of 65536 times for random
 404# divisor, dividend selection.
 405        clr.l           %d2
 406        mov.l           %d7, %d3
 407        swap            %d3
 408        clr.w           %d3                     # %d3 now ls word of divisor
 409        add.l           %d3, %d6                # aligned with 3rd word of dividend
 410        addx.l          %d2, %d5
 411        mov.l           %d7, %d3
 412        clr.w           %d3                     # %d3 now ms word of divisor
 413        swap            %d3                     # aligned with 2nd word of dividend
 414        add.l           %d3, %d5
 415ldd2nd:
 416        tst.b           DDSECOND(%a6)   # both q words done?
 417        bne.b           lddremain
 418# first quotient digit now correct. store digit and shift the
 419# (subtracted) dividend
 420        mov.w           %d1, DDQUOTIENT(%a6)
 421        clr.l           %d1
 422        swap            %d5
 423        swap            %d6
 424        mov.w           %d6, %d5
 425        clr.w           %d6
 426        st              DDSECOND(%a6)           # second digit
 427        bra.w           lddnormalized
 428lddremain:
 429# add 2nd word to quotient, get the remainder.
 430        mov.w           %d1, DDQUOTIENT+2(%a6)
 431# shift down one word/digit to renormalize remainder.
 432        mov.w           %d5, %d6
 433        swap            %d6
 434        swap            %d5
 435        mov.l           DDNORMAL(%a6), %d7      # get norm shift count
 436        beq.b           lddrn
 437        subq.l          &0x1, %d7               # set for loop count
 438lddnlp:
 439        lsr.l           &0x1, %d5               # shift into %d6
 440        roxr.l          &0x1, %d6
 441        dbf             %d7, lddnlp
 442lddrn:
 443        mov.l           %d6, %d5                # remainder
 444        mov.l           DDQUOTIENT(%a6), %d6    # quotient
 445
 446        rts
 447ldmm2:
 448# factors for the 32X32->64 multiplication are in %d5 and %d6.
 449# returns 64 bit result in %d5 (hi) %d6(lo).
 450# destroys %d2,%d3,%d4.
 451
 452# multiply hi,lo words of each factor to get 4 intermediate products
 453        mov.l           %d6, %d2
 454        mov.l           %d6, %d3
 455        mov.l           %d5, %d4
 456        swap            %d3
 457        swap            %d4
 458        mulu.w          %d5, %d6                # %d6 <- lsw*lsw
 459        mulu.w          %d3, %d5                # %d5 <- msw-dest*lsw-source
 460        mulu.w          %d4, %d2                # %d2 <- msw-source*lsw-dest
 461        mulu.w          %d4, %d3                # %d3 <- msw*msw
 462# now use swap and addx to consolidate to two longwords
 463        clr.l           %d4
 464        swap            %d6
 465        add.w           %d5, %d6                # add msw of l*l to lsw of m*l product
 466        addx.w          %d4, %d3                # add any carry to m*m product
 467        add.w           %d2, %d6                # add in lsw of other m*l product
 468        addx.w          %d4, %d3                # add any carry to m*m product
 469        swap            %d6                     # %d6 is low 32 bits of final product
 470        clr.w           %d5
 471        clr.w           %d2                     # lsw of two mixed products used,
 472        swap            %d5                     # now use msws of longwords
 473        swap            %d2
 474        add.l           %d2, %d5
 475        add.l           %d3, %d5        # %d5 now ms 32 bits of final product
 476        rts
 477
 478#########################################################################
 479# XDEF **************************************************************** #
 480#       _060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction    #
 481#       _060LSP__imuls64_(): Emulate 64-bit signed mul instruction.     #
 482#                                                                       #
 483#       This is the library version which is accessed as a subroutine   #
 484#       and therefore does not work exactly like the 680X0 mul{s,u}.l   #
 485#       64-bit multiply instruction.                                    #
 486#                                                                       #
 487# XREF **************************************************************** #
 488#       None                                                            #
 489#                                                                       #
 490# INPUT *************************************************************** #
 491#       0x4(sp) = multiplier                                            #
 492#       0x8(sp) = multiplicand                                          #
 493#       0xc(sp) = pointer to location to place 64-bit result            #
 494#                                                                       #
 495# OUTPUT ************************************************************** #
 496#       0xc(sp) = points to location of 64-bit result                   #
 497#                                                                       #
 498# ALGORITHM *********************************************************** #
 499#       Perform the multiply in pieces using 16x16->32 unsigned         #
 500# multiplies and "add" instructions.                                    #
 501#       Set the condition codes as appropriate before performing an     #
 502# "rts".                                                                #
 503#                                                                       #
 504#########################################################################
 505
 506set MUL64_CC, -4
 507
 508        global          _060LSP__imulu64_
 509_060LSP__imulu64_:
 510
 511# PROLOGUE BEGIN ########################################################
 512        link.w          %a6,&-4
 513        movm.l          &0x3800,-(%sp)          # save d2-d4
 514#       fmovm.l         &0x0,-(%sp)             # save no fpregs
 515# PROLOGUE END ##########################################################
 516
 517        mov.w           %cc,MUL64_CC(%a6)       # save incoming ccodes
 518
 519        mov.l           0x8(%a6),%d0            # store multiplier in d0
 520        beq.w           mulu64_zero             # handle zero separately
 521
 522        mov.l           0xc(%a6),%d1            # get multiplicand in d1
 523        beq.w           mulu64_zero             # handle zero separately
 524
 525#########################################################################
 526#       63                         32                           0       #
 527#       ----------------------------                                    #
 528#       | hi(mplier) * hi(mplicand)|                                    #
 529#       ----------------------------                                    #
 530#                    -----------------------------                      #
 531#                    | hi(mplier) * lo(mplicand) |                      #
 532#                    -----------------------------                      #
 533#                    -----------------------------                      #
 534#                    | lo(mplier) * hi(mplicand) |                      #
 535#                    -----------------------------                      #
 536#         |                        -----------------------------        #
 537#       --|--                      | lo(mplier) * lo(mplicand) |        #
 538#         |                        -----------------------------        #
 539#       ========================================================        #
 540#       --------------------------------------------------------        #
 541#       |       hi(result)         |        lo(result)         |        #
 542#       --------------------------------------------------------        #
 543#########################################################################
 544mulu64_alg:
 545# load temp registers with operands
 546        mov.l           %d0,%d2                 # mr in d2
 547        mov.l           %d0,%d3                 # mr in d3
 548        mov.l           %d1,%d4                 # md in d4
 549        swap            %d3                     # hi(mr) in lo d3
 550        swap            %d4                     # hi(md) in lo d4
 551
 552# complete necessary multiplies:
 553        mulu.w          %d1,%d0                 # [1] lo(mr) * lo(md)
 554        mulu.w          %d3,%d1                 # [2] hi(mr) * lo(md)
 555        mulu.w          %d4,%d2                 # [3] lo(mr) * hi(md)
 556        mulu.w          %d4,%d3                 # [4] hi(mr) * hi(md)
 557
 558# add lo portions of [2],[3] to hi portion of [1].
 559# add carries produced from these adds to [4].
 560# lo([1]) is the final lo 16 bits of the result.
 561        clr.l           %d4                     # load d4 w/ zero value
 562        swap            %d0                     # hi([1]) <==> lo([1])
 563        add.w           %d1,%d0                 # hi([1]) + lo([2])
 564        addx.l          %d4,%d3                 #    [4]  + carry
 565        add.w           %d2,%d0                 # hi([1]) + lo([3])
 566        addx.l          %d4,%d3                 #    [4]  + carry
 567        swap            %d0                     # lo([1]) <==> hi([1])
 568
 569# lo portions of [2],[3] have been added in to final result.
 570# now, clear lo, put hi in lo reg, and add to [4]
 571        clr.w           %d1                     # clear lo([2])
 572        clr.w           %d2                     # clear hi([3])
 573        swap            %d1                     # hi([2]) in lo d1
 574        swap            %d2                     # hi([3]) in lo d2
 575        add.l           %d2,%d1                 #    [4]  + hi([2])
 576        add.l           %d3,%d1                 #    [4]  + hi([3])
 577
 578# now, grab the condition codes. only one that can be set is 'N'.
 579# 'N' CAN be set if the operation is unsigned if bit 63 is set.
 580        mov.w           MUL64_CC(%a6),%d4
 581        andi.b          &0x10,%d4               # keep old 'X' bit
 582        tst.l           %d1                     # may set 'N' bit
 583        bpl.b           mulu64_ddone
 584        ori.b           &0x8,%d4                # set 'N' bit
 585mulu64_ddone:
 586        mov.w           %d4,%cc
 587
 588# here, the result is in d1 and d0. the current strategy is to save
 589# the values at the location pointed to by a0.
 590# use movm here to not disturb the condition codes.
 591mulu64_end:
 592        exg             %d1,%d0
 593        movm.l          &0x0003,([0x10,%a6])            # save result
 594
 595# EPILOGUE BEGIN ########################################################
 596#       fmovm.l         (%sp)+,&0x0             # restore no fpregs
 597        movm.l          (%sp)+,&0x001c          # restore d2-d4
 598        unlk            %a6
 599# EPILOGUE END ##########################################################
 600
 601        rts
 602
 603# one or both of the operands is zero so the result is also zero.
 604# save the zero result to the register file and set the 'Z' ccode bit.
 605mulu64_zero:
 606        clr.l           %d0
 607        clr.l           %d1
 608
 609        mov.w           MUL64_CC(%a6),%d4
 610        andi.b          &0x10,%d4
 611        ori.b           &0x4,%d4
 612        mov.w           %d4,%cc                 # set 'Z' ccode bit
 613
 614        bra.b           mulu64_end
 615
 616##########
 617# muls.l #
 618##########
 619        global          _060LSP__imuls64_
 620_060LSP__imuls64_:
 621
 622# PROLOGUE BEGIN ########################################################
 623        link.w          %a6,&-4
 624        movm.l          &0x3c00,-(%sp)          # save d2-d5
 625#       fmovm.l         &0x0,-(%sp)             # save no fpregs
 626# PROLOGUE END ##########################################################
 627
 628        mov.w           %cc,MUL64_CC(%a6)       # save incoming ccodes
 629
 630        mov.l           0x8(%a6),%d0            # store multiplier in d0
 631        beq.b           mulu64_zero             # handle zero separately
 632
 633        mov.l           0xc(%a6),%d1            # get multiplicand in d1
 634        beq.b           mulu64_zero             # handle zero separately
 635
 636        clr.b           %d5                     # clear sign tag
 637        tst.l           %d0                     # is multiplier negative?
 638        bge.b           muls64_chk_md_sgn       # no
 639        neg.l           %d0                     # make multiplier positive
 640
 641        ori.b           &0x1,%d5                # save multiplier sgn
 642
 643# the result sign is the exclusive or of the operand sign bits.
 644muls64_chk_md_sgn:
 645        tst.l           %d1                     # is multiplicand negative?
 646        bge.b           muls64_alg              # no
 647        neg.l           %d1                     # make multiplicand positive
 648
 649        eori.b          &0x1,%d5                # calculate correct sign
 650
 651#########################################################################
 652#       63                         32                           0       #
 653#       ----------------------------                                    #
 654#       | hi(mplier) * hi(mplicand)|                                    #
 655#       ----------------------------                                    #
 656#                    -----------------------------                      #
 657#                    | hi(mplier) * lo(mplicand) |                      #
 658#                    -----------------------------                      #
 659#                    -----------------------------                      #
 660#                    | lo(mplier) * hi(mplicand) |                      #
 661#                    -----------------------------                      #
 662#         |                        -----------------------------        #
 663#       --|--                      | lo(mplier) * lo(mplicand) |        #
 664#         |                        -----------------------------        #
 665#       ========================================================        #
 666#       --------------------------------------------------------        #
 667#       |       hi(result)         |        lo(result)         |        #
 668#       --------------------------------------------------------        #
 669#########################################################################
 670muls64_alg:
 671# load temp registers with operands
 672        mov.l           %d0,%d2                 # mr in d2
 673        mov.l           %d0,%d3                 # mr in d3
 674        mov.l           %d1,%d4                 # md in d4
 675        swap            %d3                     # hi(mr) in lo d3
 676        swap            %d4                     # hi(md) in lo d4
 677
 678# complete necessary multiplies:
 679        mulu.w          %d1,%d0                 # [1] lo(mr) * lo(md)
 680        mulu.w          %d3,%d1                 # [2] hi(mr) * lo(md)
 681        mulu.w          %d4,%d2                 # [3] lo(mr) * hi(md)
 682        mulu.w          %d4,%d3                 # [4] hi(mr) * hi(md)
 683
 684# add lo portions of [2],[3] to hi portion of [1].
 685# add carries produced from these adds to [4].
 686# lo([1]) is the final lo 16 bits of the result.
 687        clr.l           %d4                     # load d4 w/ zero value
 688        swap            %d0                     # hi([1]) <==> lo([1])
 689        add.w           %d1,%d0                 # hi([1]) + lo([2])
 690        addx.l          %d4,%d3                 #    [4]  + carry
 691        add.w           %d2,%d0                 # hi([1]) + lo([3])
 692        addx.l          %d4,%d3                 #    [4]  + carry
 693        swap            %d0                     # lo([1]) <==> hi([1])
 694
 695# lo portions of [2],[3] have been added in to final result.
 696# now, clear lo, put hi in lo reg, and add to [4]
 697        clr.w           %d1                     # clear lo([2])
 698        clr.w           %d2                     # clear hi([3])
 699        swap            %d1                     # hi([2]) in lo d1
 700        swap            %d2                     # hi([3]) in lo d2
 701        add.l           %d2,%d1                 #    [4]  + hi([2])
 702        add.l           %d3,%d1                 #    [4]  + hi([3])
 703
 704        tst.b           %d5                     # should result be signed?
 705        beq.b           muls64_done             # no
 706
 707# result should be a signed negative number.
 708# compute 2's complement of the unsigned number:
 709#   -negate all bits and add 1
 710muls64_neg:
 711        not.l           %d0                     # negate lo(result) bits
 712        not.l           %d1                     # negate hi(result) bits
 713        addq.l          &1,%d0                  # add 1 to lo(result)
 714        addx.l          %d4,%d1                 # add carry to hi(result)
 715
 716muls64_done:
 717        mov.w           MUL64_CC(%a6),%d4
 718        andi.b          &0x10,%d4               # keep old 'X' bit
 719        tst.l           %d1                     # may set 'N' bit
 720        bpl.b           muls64_ddone
 721        ori.b           &0x8,%d4                # set 'N' bit
 722muls64_ddone:
 723        mov.w           %d4,%cc
 724
 725# here, the result is in d1 and d0. the current strategy is to save
 726# the values at the location pointed to by a0.
 727# use movm here to not disturb the condition codes.
 728muls64_end:
 729        exg             %d1,%d0
 730        movm.l          &0x0003,([0x10,%a6])    # save result at (a0)
 731
 732# EPILOGUE BEGIN ########################################################
 733#       fmovm.l         (%sp)+,&0x0             # restore no fpregs
 734        movm.l          (%sp)+,&0x003c          # restore d2-d5
 735        unlk            %a6
 736# EPILOGUE END ##########################################################
 737
 738        rts
 739
 740# one or both of the operands is zero so the result is also zero.
 741# save the zero result to the register file and set the 'Z' ccode bit.
 742muls64_zero:
 743        clr.l           %d0
 744        clr.l           %d1
 745
 746        mov.w           MUL64_CC(%a6),%d4
 747        andi.b          &0x10,%d4
 748        ori.b           &0x4,%d4
 749        mov.w           %d4,%cc                 # set 'Z' ccode bit
 750
 751        bra.b           muls64_end
 752
 753#########################################################################
 754# XDEF **************************************************************** #
 755#       _060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>".                  #
 756#       _060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>".                  #
 757#       _060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>".                  #
 758#       _060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>".                  #
 759#       _060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>".                  #
 760#       _060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>".                  #
 761#                                                                       #
 762#       This is the library version which is accessed as a subroutine   #
 763#       and therefore does not work exactly like the 680X0 "cmp2"       #
 764#       instruction.                                                    #
 765#                                                                       #
 766# XREF **************************************************************** #
 767#       None                                                            #
 768#                                                                       #
 769# INPUT *************************************************************** #
 770#       0x4(sp) = Rn                                                    #
 771#       0x8(sp) = pointer to boundary pair                              #
 772#                                                                       #
 773# OUTPUT ************************************************************** #
 774#       cc = condition codes are set correctly                          #
 775#                                                                       #
 776# ALGORITHM *********************************************************** #
 777#       In the interest of simplicity, all operands are converted to    #
 778# longword size whether the operation is byte, word, or long. The       #
 779# bounds are sign extended accordingly. If Rn is a data register, Rn is #
 780# also sign extended. If Rn is an address register, it need not be sign #
 781# extended since the full register is always used.                      #
 782#       The condition codes are set correctly before the final "rts".   #
 783#                                                                       #
 784#########################################################################
 785
 786set     CMP2_CC,        -4
 787
 788        global          _060LSP__cmp2_Ab_
 789_060LSP__cmp2_Ab_:
 790
 791# PROLOGUE BEGIN ########################################################
 792        link.w          %a6,&-4
 793        movm.l          &0x3800,-(%sp)          # save d2-d4
 794#       fmovm.l         &0x0,-(%sp)             # save no fpregs
 795# PROLOGUE END ##########################################################
 796
 797        mov.w           %cc,CMP2_CC(%a6)
 798        mov.l           0x8(%a6), %d2           # get regval
 799
 800        mov.b           ([0xc,%a6],0x0),%d0
 801        mov.b           ([0xc,%a6],0x1),%d1
 802
 803        extb.l          %d0                     # sign extend lo bnd
 804        extb.l          %d1                     # sign extend hi bnd
 805        bra.w           l_cmp2_cmp              # go do the compare emulation
 806
 807        global          _060LSP__cmp2_Aw_
 808_060LSP__cmp2_Aw_:
 809
 810# PROLOGUE BEGIN ########################################################
 811        link.w          %a6,&-4
 812        movm.l          &0x3800,-(%sp)          # save d2-d4
 813#       fmovm.l         &0x0,-(%sp)             # save no fpregs
 814# PROLOGUE END ##########################################################
 815
 816        mov.w           %cc,CMP2_CC(%a6)
 817        mov.l           0x8(%a6), %d2           # get regval
 818
 819        mov.w           ([0xc,%a6],0x0),%d0
 820        mov.w           ([0xc,%a6],0x2),%d1
 821
 822        ext.l           %d0                     # sign extend lo bnd
 823        ext.l           %d1                     # sign extend hi bnd
 824        bra.w           l_cmp2_cmp              # go do the compare emulation
 825
 826        global          _060LSP__cmp2_Al_
 827_060LSP__cmp2_Al_:
 828
 829# PROLOGUE BEGIN ########################################################
 830        link.w          %a6,&-4
 831        movm.l          &0x3800,-(%sp)          # save d2-d4
 832#       fmovm.l         &0x0,-(%sp)             # save no fpregs
 833# PROLOGUE END ##########################################################
 834
 835        mov.w           %cc,CMP2_CC(%a6)
 836        mov.l           0x8(%a6), %d2           # get regval
 837
 838        mov.l           ([0xc,%a6],0x0),%d0
 839        mov.l           ([0xc,%a6],0x4),%d1
 840        bra.w           l_cmp2_cmp              # go do the compare emulation
 841
 842        global          _060LSP__cmp2_Db_
 843_060LSP__cmp2_Db_:
 844
 845# PROLOGUE BEGIN ########################################################
 846        link.w          %a6,&-4
 847        movm.l          &0x3800,-(%sp)          # save d2-d4
 848#       fmovm.l         &0x0,-(%sp)             # save no fpregs
 849# PROLOGUE END ##########################################################
 850
 851        mov.w           %cc,CMP2_CC(%a6)
 852        mov.l           0x8(%a6), %d2           # get regval
 853
 854        mov.b           ([0xc,%a6],0x0),%d0
 855        mov.b           ([0xc,%a6],0x1),%d1
 856
 857        extb.l          %d0                     # sign extend lo bnd
 858        extb.l          %d1                     # sign extend hi bnd
 859
 860# operation is a data register compare.
 861# sign extend byte to long so we can do simple longword compares.
 862        extb.l          %d2                     # sign extend data byte
 863        bra.w           l_cmp2_cmp              # go do the compare emulation
 864
 865        global          _060LSP__cmp2_Dw_
 866_060LSP__cmp2_Dw_:
 867
 868# PROLOGUE BEGIN ########################################################
 869        link.w          %a6,&-4
 870        movm.l          &0x3800,-(%sp)          # save d2-d4
 871#       fmovm.l         &0x0,-(%sp)             # save no fpregs
 872# PROLOGUE END ##########################################################
 873
 874        mov.w           %cc,CMP2_CC(%a6)
 875        mov.l           0x8(%a6), %d2           # get regval
 876
 877        mov.w           ([0xc,%a6],0x0),%d0
 878        mov.w           ([0xc,%a6],0x2),%d1
 879
 880        ext.l           %d0                     # sign extend lo bnd
 881        ext.l           %d1                     # sign extend hi bnd
 882
 883# operation is a data register compare.
 884# sign extend word to long so we can do simple longword compares.
 885        ext.l           %d2                     # sign extend data word
 886        bra.w           l_cmp2_cmp              # go emulate compare
 887
 888        global          _060LSP__cmp2_Dl_
 889_060LSP__cmp2_Dl_:
 890
 891# PROLOGUE BEGIN ########################################################
 892        link.w          %a6,&-4
 893        movm.l          &0x3800,-(%sp)          # save d2-d4
 894#       fmovm.l         &0x0,-(%sp)             # save no fpregs
 895# PROLOGUE END ##########################################################
 896
 897        mov.w           %cc,CMP2_CC(%a6)
 898        mov.l           0x8(%a6), %d2           # get regval
 899
 900        mov.l           ([0xc,%a6],0x0),%d0
 901        mov.l           ([0xc,%a6],0x4),%d1
 902
 903#
 904# To set the ccodes correctly:
 905#       (1) save 'Z' bit from (Rn - lo)
 906#       (2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi))
 907#       (3) keep 'X', 'N', and 'V' from before instruction
 908#       (4) combine ccodes
 909#
 910l_cmp2_cmp:
 911        sub.l           %d0, %d2                # (Rn - lo)
 912        mov.w           %cc, %d3                # fetch resulting ccodes
 913        andi.b          &0x4, %d3               # keep 'Z' bit
 914        sub.l           %d0, %d1                # (hi - lo)
 915        cmp.l           %d1,%d2                 # ((hi - lo) - (Rn - hi))
 916
 917        mov.w           %cc, %d4                # fetch resulting ccodes
 918        or.b            %d4, %d3                # combine w/ earlier ccodes
 919        andi.b          &0x5, %d3               # keep 'Z' and 'N'
 920
 921        mov.w           CMP2_CC(%a6), %d4       # fetch old ccodes
 922        andi.b          &0x1a, %d4              # keep 'X','N','V' bits
 923        or.b            %d3, %d4                # insert new ccodes
 924        mov.w           %d4,%cc                 # save new ccodes
 925
 926# EPILOGUE BEGIN ########################################################
 927#       fmovm.l         (%sp)+,&0x0             # restore no fpregs
 928        movm.l          (%sp)+,&0x001c          # restore d2-d4
 929        unlk            %a6
 930# EPILOGUE END ##########################################################
 931
 932        rts
 933