linux/arch/m68k/fpsp040/slogn.S
<<
>>
Prefs
   1|
   2|       slogn.sa 3.1 12/10/90
   3|
   4|       slogn computes the natural logarithm of an
   5|       input value. slognd does the same except the input value is a
   6|       denormalized number. slognp1 computes log(1+X), and slognp1d
   7|       computes log(1+X) for denormalized X.
   8|
   9|       Input: Double-extended value in memory location pointed to by address
  10|               register a0.
  11|
  12|       Output: log(X) or log(1+X) returned in floating-point register Fp0.
  13|
  14|       Accuracy and Monotonicity: The returned result is within 2 ulps in
  15|               64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
  16|               result is subsequently rounded to double precision. The
  17|               result is provably monotonic in double precision.
  18|
  19|       Speed: The program slogn takes approximately 190 cycles for input
  20|               argument X such that |X-1| >= 1/16, which is the usual
  21|               situation. For those arguments, slognp1 takes approximately
  22|                210 cycles. For the less common arguments, the program will
  23|                run no worse than 10% slower.
  24|
  25|       Algorithm:
  26|       LOGN:
  27|       Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
  28|               u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
  29|
  30|       Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
  31|               significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
  32|               2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
  33|
  34|       Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
  35|               log(1+u) = poly.
  36|
  37|       Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
  38|               by k*log(2) + (log(F) + poly). The values of log(F) are calculated
  39|               beforehand and stored in the program.
  40|
  41|       lognp1:
  42|       Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
  43|               u where u = 2X/(2+X). Otherwise, move on to Step 2.
  44|
  45|       Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
  46|               of the algorithm for LOGN and compute log(1+X) as
  47|               k*log(2) + log(F) + poly where poly approximates log(1+u),
  48|               u = (Y-F)/F.
  49|
  50|       Implementation Notes:
  51|       Note 1. There are 64 different possible values for F, thus 64 log(F)'s
  52|               need to be tabulated. Moreover, the values of 1/F are also
  53|               tabulated so that the division in (Y-F)/F can be performed by a
  54|               multiplication.
  55|
  56|       Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
  57|               Y-F has to be calculated carefully when 1/2 <= X < 3/2.
  58|
  59|       Note 3. To fully exploit the pipeline, polynomials are usually separated
  60|               into two parts evaluated independently before being added up.
  61|
  62
  63|               Copyright (C) Motorola, Inc. 1990
  64|                       All Rights Reserved
  65|
  66|       For details on the license for this file, please see the
  67|       file, README, in this same directory.
  68
  69|slogn  idnt    2,1 | Motorola 040 Floating Point Software Package
  70
  71        |section        8
  72
  73#include "fpsp.h"
  74
  75BOUNDS1:  .long 0x3FFEF07D,0x3FFF8841
  76BOUNDS2:  .long 0x3FFE8000,0x3FFFC000
  77
  78LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
  79
  80one:    .long 0x3F800000
  81zero:   .long 0x00000000
  82infty:  .long 0x7F800000
  83negone: .long 0xBF800000
  84
  85LOGA6:  .long 0x3FC2499A,0xB5E4040B
  86LOGA5:  .long 0xBFC555B5,0x848CB7DB
  87
  88LOGA4:  .long 0x3FC99999,0x987D8730
  89LOGA3:  .long 0xBFCFFFFF,0xFF6F7E97
  90
  91LOGA2:  .long 0x3FD55555,0x555555a4
  92LOGA1:  .long 0xBFE00000,0x00000008
  93
  94LOGB5:  .long 0x3F175496,0xADD7DAD6
  95LOGB4:  .long 0x3F3C71C2,0xFE80C7E0
  96
  97LOGB3:  .long 0x3F624924,0x928BCCFF
  98LOGB2:  .long 0x3F899999,0x999995EC
  99
 100LOGB1:  .long 0x3FB55555,0x55555555
 101TWO:    .long 0x40000000,0x00000000
 102
 103LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000
 104
 105LOGTBL:
 106        .long  0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
 107        .long  0x3FF70000,0xFF015358,0x833C47E2,0x00000000
 108        .long  0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
 109        .long  0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
 110        .long  0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
 111        .long  0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
 112        .long  0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
 113        .long  0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
 114        .long  0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
 115        .long  0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
 116        .long  0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
 117        .long  0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
 118        .long  0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
 119        .long  0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
 120        .long  0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
 121        .long  0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
 122        .long  0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
 123        .long  0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
 124        .long  0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
 125        .long  0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
 126        .long  0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
 127        .long  0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
 128        .long  0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
 129        .long  0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
 130        .long  0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
 131        .long  0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
 132        .long  0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
 133        .long  0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
 134        .long  0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
 135        .long  0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
 136        .long  0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
 137        .long  0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
 138        .long  0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
 139        .long  0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
 140        .long  0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
 141        .long  0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
 142        .long  0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
 143        .long  0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
 144        .long  0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
 145        .long  0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
 146        .long  0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
 147        .long  0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
 148        .long  0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
 149        .long  0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
 150        .long  0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
 151        .long  0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
 152        .long  0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
 153        .long  0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
 154        .long  0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
 155        .long  0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
 156        .long  0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
 157        .long  0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
 158        .long  0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
 159        .long  0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
 160        .long  0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
 161        .long  0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
 162        .long  0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
 163        .long  0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
 164        .long  0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
 165        .long  0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
 166        .long  0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
 167        .long  0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
 168        .long  0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
 169        .long  0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
 170        .long  0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
 171        .long  0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
 172        .long  0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
 173        .long  0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
 174        .long  0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
 175        .long  0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
 176        .long  0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
 177        .long  0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
 178        .long  0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
 179        .long  0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
 180        .long  0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
 181        .long  0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
 182        .long  0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
 183        .long  0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
 184        .long  0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
 185        .long  0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
 186        .long  0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
 187        .long  0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
 188        .long  0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
 189        .long  0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
 190        .long  0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
 191        .long  0x3FFE0000,0x825EFCED,0x49369330,0x00000000
 192        .long  0x3FFE0000,0x9868C809,0x868C8098,0x00000000
 193        .long  0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
 194        .long  0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
 195        .long  0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
 196        .long  0x3FFE0000,0x95A02568,0x095A0257,0x00000000
 197        .long  0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
 198        .long  0x3FFE0000,0x94458094,0x45809446,0x00000000
 199        .long  0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
 200        .long  0x3FFE0000,0x92F11384,0x0497889C,0x00000000
 201        .long  0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
 202        .long  0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
 203        .long  0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
 204        .long  0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
 205        .long  0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
 206        .long  0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
 207        .long  0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
 208        .long  0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
 209        .long  0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
 210        .long  0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
 211        .long  0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
 212        .long  0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
 213        .long  0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
 214        .long  0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
 215        .long  0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
 216        .long  0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
 217        .long  0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
 218        .long  0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
 219        .long  0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
 220        .long  0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
 221        .long  0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
 222        .long  0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
 223        .long  0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
 224        .long  0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
 225        .long  0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
 226        .long  0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
 227        .long  0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
 228        .long  0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
 229        .long  0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
 230        .long  0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
 231        .long  0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
 232        .long  0x3FFE0000,0x80808080,0x80808081,0x00000000
 233        .long  0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
 234
 235        .set    ADJK,L_SCR1
 236
 237        .set    X,FP_SCR1
 238        .set    XDCARE,X+2
 239        .set    XFRAC,X+4
 240
 241        .set    F,FP_SCR2
 242        .set    FFRAC,F+4
 243
 244        .set    KLOG2,FP_SCR3
 245
 246        .set    SAVEU,FP_SCR4
 247
 248        | xref  t_frcinx
 249        |xref   t_extdnrm
 250        |xref   t_operr
 251        |xref   t_dz
 252
 253        .global slognd
 254slognd:
 255|--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
 256
 257        movel           #-100,ADJK(%a6) | ...INPUT = 2^(ADJK) * FP0
 258
 259|----normalize the input value by left shifting k bits (k to be determined
 260|----below), adjusting exponent and storing -k to  ADJK
 261|----the value TWOTO100 is no longer needed.
 262|----Note that this code assumes the denormalized input is NON-ZERO.
 263
 264     moveml     %d2-%d7,-(%a7)          | ...save some registers
 265     movel      #0x00000000,%d3         | ...D3 is exponent of smallest norm. #
 266     movel      4(%a0),%d4
 267     movel      8(%a0),%d5              | ...(D4,D5) is (Hi_X,Lo_X)
 268     clrl       %d2                     | ...D2 used for holding K
 269
 270     tstl       %d4
 271     bnes       HiX_not0
 272
 273HiX_0:
 274     movel      %d5,%d4
 275     clrl       %d5
 276     movel      #32,%d2
 277     clrl       %d6
 278     bfffo      %d4{#0:#32},%d6
 279     lsll      %d6,%d4
 280     addl       %d6,%d2                 | ...(D3,D4,D5) is normalized
 281
 282     movel      %d3,X(%a6)
 283     movel      %d4,XFRAC(%a6)
 284     movel      %d5,XFRAC+4(%a6)
 285     negl       %d2
 286     movel      %d2,ADJK(%a6)
 287     fmovex     X(%a6),%fp0
 288     moveml     (%a7)+,%d2-%d7          | ...restore registers
 289     lea        X(%a6),%a0
 290     bras       LOGBGN                  | ...begin regular log(X)
 291
 292
 293HiX_not0:
 294     clrl       %d6
 295     bfffo      %d4{#0:#32},%d6         | ...find first 1
 296     movel      %d6,%d2                 | ...get k
 297     lsll       %d6,%d4
 298     movel      %d5,%d7                 | ...a copy of D5
 299     lsll       %d6,%d5
 300     negl       %d6
 301     addil      #32,%d6
 302     lsrl       %d6,%d7
 303     orl        %d7,%d4                 | ...(D3,D4,D5) normalized
 304
 305     movel      %d3,X(%a6)
 306     movel      %d4,XFRAC(%a6)
 307     movel      %d5,XFRAC+4(%a6)
 308     negl       %d2
 309     movel      %d2,ADJK(%a6)
 310     fmovex     X(%a6),%fp0
 311     moveml     (%a7)+,%d2-%d7          | ...restore registers
 312     lea        X(%a6),%a0
 313     bras       LOGBGN                  | ...begin regular log(X)
 314
 315
 316        .global slogn
 317slogn:
 318|--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
 319
 320        fmovex          (%a0),%fp0      | ...LOAD INPUT
 321        movel           #0x00000000,ADJK(%a6)
 322
 323LOGBGN:
 324|--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
 325|--A FINITE, NON-ZERO, NORMALIZED NUMBER.
 326
 327        movel   (%a0),%d0
 328        movew   4(%a0),%d0
 329
 330        movel   (%a0),X(%a6)
 331        movel   4(%a0),X+4(%a6)
 332        movel   8(%a0),X+8(%a6)
 333
 334        cmpil   #0,%d0          | ...CHECK IF X IS NEGATIVE
 335        blt     LOGNEG          | ...LOG OF NEGATIVE ARGUMENT IS INVALID
 336        cmp2l   BOUNDS1,%d0     | ...X IS POSITIVE, CHECK IF X IS NEAR 1
 337        bcc     LOGNEAR1        | ...BOUNDS IS ROUGHLY [15/16, 17/16]
 338
 339LOGMAIN:
 340|--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
 341
 342|--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
 343|--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
 344|--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
 345|--                      = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
 346|--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
 347|--LOG(1+U) CAN BE VERY EFFICIENT.
 348|--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
 349|--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
 350
 351|--GET K, Y, F, AND ADDRESS OF 1/F.
 352        asrl    #8,%d0
 353        asrl    #8,%d0          | ...SHIFTED 16 BITS, BIASED EXPO. OF X
 354        subil   #0x3FFF,%d0     | ...THIS IS K
 355        addl    ADJK(%a6),%d0   | ...ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
 356        lea     LOGTBL,%a0      | ...BASE ADDRESS OF 1/F AND LOG(F)
 357        fmovel  %d0,%fp1                | ...CONVERT K TO FLOATING-POINT FORMAT
 358
 359|--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
 360        movel   #0x3FFF0000,X(%a6)      | ...X IS NOW Y, I.E. 2^(-K)*X
 361        movel   XFRAC(%a6),FFRAC(%a6)
 362        andil   #0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y
 363        oril    #0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT
 364        movel   FFRAC(%a6),%d0  | ...READY TO GET ADDRESS OF 1/F
 365        andil   #0x7E000000,%d0
 366        asrl    #8,%d0
 367        asrl    #8,%d0
 368        asrl    #4,%d0          | ...SHIFTED 20, D0 IS THE DISPLACEMENT
 369        addal   %d0,%a0         | ...A0 IS THE ADDRESS FOR 1/F
 370
 371        fmovex  X(%a6),%fp0
 372        movel   #0x3fff0000,F(%a6)
 373        clrl    F+8(%a6)
 374        fsubx   F(%a6),%fp0             | ...Y-F
 375        fmovemx %fp2-%fp2/%fp3,-(%sp)   | ...SAVE FP2 WHILE FP0 IS NOT READY
 376|--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
 377|--REGISTERS SAVED: FPCR, FP1, FP2
 378
 379LP1CONT1:
 380|--AN RE-ENTRY POINT FOR LOGNP1
 381        fmulx   (%a0),%fp0      | ...FP0 IS U = (Y-F)/F
 382        fmulx   LOGOF2,%fp1     | ...GET K*LOG2 WHILE FP0 IS NOT READY
 383        fmovex  %fp0,%fp2
 384        fmulx   %fp2,%fp2               | ...FP2 IS V=U*U
 385        fmovex  %fp1,KLOG2(%a6) | ...PUT K*LOG2 IN MEMORY, FREE FP1
 386
 387|--LOG(1+U) IS APPROXIMATED BY
 388|--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
 389|--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
 390
 391        fmovex  %fp2,%fp3
 392        fmovex  %fp2,%fp1
 393
 394        fmuld   LOGA6,%fp1      | ...V*A6
 395        fmuld   LOGA5,%fp2      | ...V*A5
 396
 397        faddd   LOGA4,%fp1      | ...A4+V*A6
 398        faddd   LOGA3,%fp2      | ...A3+V*A5
 399
 400        fmulx   %fp3,%fp1               | ...V*(A4+V*A6)
 401        fmulx   %fp3,%fp2               | ...V*(A3+V*A5)
 402
 403        faddd   LOGA2,%fp1      | ...A2+V*(A4+V*A6)
 404        faddd   LOGA1,%fp2      | ...A1+V*(A3+V*A5)
 405
 406        fmulx   %fp3,%fp1               | ...V*(A2+V*(A4+V*A6))
 407        addal   #16,%a0         | ...ADDRESS OF LOG(F)
 408        fmulx   %fp3,%fp2               | ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
 409
 410        fmulx   %fp0,%fp1               | ...U*V*(A2+V*(A4+V*A6))
 411        faddx   %fp2,%fp0               | ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
 412
 413        faddx   (%a0),%fp1      | ...LOG(F)+U*V*(A2+V*(A4+V*A6))
 414        fmovemx  (%sp)+,%fp2-%fp2/%fp3  | ...RESTORE FP2
 415        faddx   %fp1,%fp0               | ...FP0 IS LOG(F) + LOG(1+U)
 416
 417        fmovel  %d1,%fpcr
 418        faddx   KLOG2(%a6),%fp0 | ...FINAL ADD
 419        bra     t_frcinx
 420
 421
 422LOGNEAR1:
 423|--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
 424        fmovex  %fp0,%fp1
 425        fsubs   one,%fp1                | ...FP1 IS X-1
 426        fadds   one,%fp0                | ...FP0 IS X+1
 427        faddx   %fp1,%fp1               | ...FP1 IS 2(X-1)
 428|--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
 429|--IN U, U = 2(X-1)/(X+1) = FP1/FP0
 430
 431LP1CONT2:
 432|--THIS IS AN RE-ENTRY POINT FOR LOGNP1
 433        fdivx   %fp0,%fp1               | ...FP1 IS U
 434        fmovemx %fp2-%fp2/%fp3,-(%sp)    | ...SAVE FP2
 435|--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
 436|--LET V=U*U, W=V*V, CALCULATE
 437|--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
 438|--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
 439        fmovex  %fp1,%fp0
 440        fmulx   %fp0,%fp0       | ...FP0 IS V
 441        fmovex  %fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1
 442        fmovex  %fp0,%fp1
 443        fmulx   %fp1,%fp1       | ...FP1 IS W
 444
 445        fmoved  LOGB5,%fp3
 446        fmoved  LOGB4,%fp2
 447
 448        fmulx   %fp1,%fp3       | ...W*B5
 449        fmulx   %fp1,%fp2       | ...W*B4
 450
 451        faddd   LOGB3,%fp3 | ...B3+W*B5
 452        faddd   LOGB2,%fp2 | ...B2+W*B4
 453
 454        fmulx   %fp3,%fp1       | ...W*(B3+W*B5), FP3 RELEASED
 455
 456        fmulx   %fp0,%fp2       | ...V*(B2+W*B4)
 457
 458        faddd   LOGB1,%fp1 | ...B1+W*(B3+W*B5)
 459        fmulx   SAVEU(%a6),%fp0 | ...FP0 IS U*V
 460
 461        faddx   %fp2,%fp1       | ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
 462        fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...FP2 RESTORED
 463
 464        fmulx   %fp1,%fp0       | ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
 465
 466        fmovel  %d1,%fpcr
 467        faddx   SAVEU(%a6),%fp0
 468        bra     t_frcinx
 469        rts
 470
 471LOGNEG:
 472|--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
 473        bra     t_operr
 474
 475        .global slognp1d
 476slognp1d:
 477|--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
 478| Simply return the denorm
 479
 480        bra     t_extdnrm
 481
 482        .global slognp1
 483slognp1:
 484|--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
 485
 486        fmovex  (%a0),%fp0      | ...LOAD INPUT
 487        fabsx   %fp0            |test magnitude
 488        fcmpx   LTHOLD,%fp0     |compare with min threshold
 489        fbgt    LP1REAL         |if greater, continue
 490        fmovel  #0,%fpsr                |clr N flag from compare
 491        fmovel  %d1,%fpcr
 492        fmovex  (%a0),%fp0      |return signed argument
 493        bra     t_frcinx
 494
 495LP1REAL:
 496        fmovex  (%a0),%fp0      | ...LOAD INPUT
 497        movel   #0x00000000,ADJK(%a6)
 498        fmovex  %fp0,%fp1       | ...FP1 IS INPUT Z
 499        fadds   one,%fp0        | ...X := ROUND(1+Z)
 500        fmovex  %fp0,X(%a6)
 501        movew   XFRAC(%a6),XDCARE(%a6)
 502        movel   X(%a6),%d0
 503        cmpil   #0,%d0
 504        ble     LP1NEG0 | ...LOG OF ZERO OR -VE
 505        cmp2l   BOUNDS2,%d0
 506        bcs     LOGMAIN | ...BOUNDS2 IS [1/2,3/2]
 507|--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
 508|--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
 509|--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
 510
 511LP1NEAR1:
 512|--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
 513        cmp2l   BOUNDS1,%d0
 514        bcss    LP1CARE
 515
 516LP1ONE16:
 517|--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
 518|--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
 519        faddx   %fp1,%fp1       | ...FP1 IS 2Z
 520        fadds   one,%fp0        | ...FP0 IS 1+X
 521|--U = FP1/FP0
 522        bra     LP1CONT2
 523
 524LP1CARE:
 525|--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
 526|--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
 527|--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
 528|--THERE ARE ONLY TWO CASES.
 529|--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
 530|--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
 531|--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
 532|--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
 533
 534        movel   XFRAC(%a6),FFRAC(%a6)
 535        andil   #0xFE000000,FFRAC(%a6)
 536        oril    #0x01000000,FFRAC(%a6)  | ...F OBTAINED
 537        cmpil   #0x3FFF8000,%d0 | ...SEE IF 1+Z > 1
 538        bges    KISZERO
 539
 540KISNEG1:
 541        fmoves  TWO,%fp0
 542        movel   #0x3fff0000,F(%a6)
 543        clrl    F+8(%a6)
 544        fsubx   F(%a6),%fp0     | ...2-F
 545        movel   FFRAC(%a6),%d0
 546        andil   #0x7E000000,%d0
 547        asrl    #8,%d0
 548        asrl    #8,%d0
 549        asrl    #4,%d0          | ...D0 CONTAINS DISPLACEMENT FOR 1/F
 550        faddx   %fp1,%fp1               | ...GET 2Z
 551        fmovemx %fp2-%fp2/%fp3,-(%sp)   | ...SAVE FP2
 552        faddx   %fp1,%fp0               | ...FP0 IS Y-F = (2-F)+2Z
 553        lea     LOGTBL,%a0      | ...A0 IS ADDRESS OF 1/F
 554        addal   %d0,%a0
 555        fmoves  negone,%fp1     | ...FP1 IS K = -1
 556        bra     LP1CONT1
 557
 558KISZERO:
 559        fmoves  one,%fp0
 560        movel   #0x3fff0000,F(%a6)
 561        clrl    F+8(%a6)
 562        fsubx   F(%a6),%fp0             | ...1-F
 563        movel   FFRAC(%a6),%d0
 564        andil   #0x7E000000,%d0
 565        asrl    #8,%d0
 566        asrl    #8,%d0
 567        asrl    #4,%d0
 568        faddx   %fp1,%fp0               | ...FP0 IS Y-F
 569        fmovemx %fp2-%fp2/%fp3,-(%sp)   | ...FP2 SAVED
 570        lea     LOGTBL,%a0
 571        addal   %d0,%a0         | ...A0 IS ADDRESS OF 1/F
 572        fmoves  zero,%fp1       | ...FP1 IS K = 0
 573        bra     LP1CONT1
 574
 575LP1NEG0:
 576|--FPCR SAVED. D0 IS X IN COMPACT FORM.
 577        cmpil   #0,%d0
 578        blts    LP1NEG
 579LP1ZERO:
 580        fmoves  negone,%fp0
 581
 582        fmovel  %d1,%fpcr
 583        bra t_dz
 584
 585LP1NEG:
 586        fmoves  zero,%fp0
 587
 588        fmovel  %d1,%fpcr
 589        bra     t_operr
 590
 591        |end
 592