root/arch/m68k/fpsp040/slogn.S

/* [previous][next][first][last][top][bottom][index][help] */
   1 |
   2 |       slogn.sa 3.1 12/10/90
   3 |
   4 |       slogn computes the natural logarithm of an
   5 |       input value. slognd does the same except the input value is a
   6 |       denormalized number. slognp1 computes log(1+X), and slognp1d
   7 |       computes log(1+X) for denormalized X.
   8 |
   9 |       Input: Double-extended value in memory location pointed to by address
  10 |               register a0.
  11 |
  12 |       Output: log(X) or log(1+X) returned in floating-point register Fp0.
  13 |
  14 |       Accuracy and Monotonicity: The returned result is within 2 ulps in
  15 |               64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
  16 |               result is subsequently rounded to double precision. The 
  17 |               result is provably monotonic in double precision.
  18 |
  19 |       Speed: The program slogn takes approximately 190 cycles for input 
  20 |               argument X such that |X-1| >= 1/16, which is the the usual 
  21 |               situation. For those arguments, slognp1 takes approximately
  22 |                210 cycles. For the less common arguments, the program will
  23 |                run no worse than 10% slower.
  24 |
  25 |       Algorithm:
  26 |       LOGN:
  27 |       Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
  28 |               u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
  29 |
  30 |       Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
  31 |               significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
  32 |               2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
  33 |
  34 |       Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
  35 |               log(1+u) = poly.
  36 |
  37 |       Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
  38 |               by k*log(2) + (log(F) + poly). The values of log(F) are calculated
  39 |               beforehand and stored in the program.
  40 |
  41 |       lognp1:
  42 |       Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
  43 |               u where u = 2X/(2+X). Otherwise, move on to Step 2.
  44 |
  45 |       Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
  46 |               of the algorithm for LOGN and compute log(1+X) as
  47 |               k*log(2) + log(F) + poly where poly approximates log(1+u),
  48 |               u = (Y-F)/F. 
  49 |
  50 |       Implementation Notes:
  51 |       Note 1. There are 64 different possible values for F, thus 64 log(F)'s
  52 |               need to be tabulated. Moreover, the values of 1/F are also 
  53 |               tabulated so that the division in (Y-F)/F can be performed by a
  54 |               multiplication.
  55 |
  56 |       Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
  57 |               Y-F has to be calculated carefully when 1/2 <= X < 3/2. 
  58 |
  59 |       Note 3. To fully exploit the pipeline, polynomials are usually separated
  60 |               into two parts evaluated independently before being added up.
  61 |       
  62 
  63 |               Copyright (C) Motorola, Inc. 1990
  64 |                       All Rights Reserved
  65 |
  66 |       THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
  67 |       The copyright notice above does not evidence any  
  68 |       actual or intended publication of such source code.
  69 
  70 |slogn  idnt    2,1 | Motorola 040 Floating Point Software Package
  71 
  72         |section        8
  73 
  74         .include "fpsp.h"
  75 
  76 BOUNDS1:  .long 0x3FFEF07D,0x3FFF8841
  77 BOUNDS2:  .long 0x3FFE8000,0x3FFFC000
  78 
  79 LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
  80 
  81 one:    .long 0x3F800000
  82 zero:   .long 0x00000000
  83 infty:  .long 0x7F800000
  84 negone: .long 0xBF800000
  85 
  86 LOGA6:  .long 0x3FC2499A,0xB5E4040B
  87 LOGA5:  .long 0xBFC555B5,0x848CB7DB
  88 
  89 LOGA4:  .long 0x3FC99999,0x987D8730
  90 LOGA3:  .long 0xBFCFFFFF,0xFF6F7E97
  91 
  92 LOGA2:  .long 0x3FD55555,0x555555a4
  93 LOGA1:  .long 0xBFE00000,0x00000008
  94 
  95 LOGB5:  .long 0x3F175496,0xADD7DAD6
  96 LOGB4:  .long 0x3F3C71C2,0xFE80C7E0
  97 
  98 LOGB3:  .long 0x3F624924,0x928BCCFF
  99 LOGB2:  .long 0x3F899999,0x999995EC
 100 
 101 LOGB1:  .long 0x3FB55555,0x55555555
 102 TWO:    .long 0x40000000,0x00000000
 103 
 104 LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000
 105 
 106 LOGTBL:
 107         .long  0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
 108         .long  0x3FF70000,0xFF015358,0x833C47E2,0x00000000
 109         .long  0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
 110         .long  0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
 111         .long  0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
 112         .long  0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
 113         .long  0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
 114         .long  0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
 115         .long  0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
 116         .long  0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
 117         .long  0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
 118         .long  0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
 119         .long  0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
 120         .long  0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
 121         .long  0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
 122         .long  0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
 123         .long  0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
 124         .long  0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
 125         .long  0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
 126         .long  0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
 127         .long  0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
 128         .long  0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
 129         .long  0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
 130         .long  0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
 131         .long  0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
 132         .long  0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
 133         .long  0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
 134         .long  0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
 135         .long  0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
 136         .long  0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
 137         .long  0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
 138         .long  0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
 139         .long  0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
 140         .long  0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
 141         .long  0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
 142         .long  0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
 143         .long  0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
 144         .long  0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
 145         .long  0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
 146         .long  0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
 147         .long  0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
 148         .long  0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
 149         .long  0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
 150         .long  0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
 151         .long  0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
 152         .long  0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
 153         .long  0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
 154         .long  0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
 155         .long  0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
 156         .long  0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
 157         .long  0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
 158         .long  0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
 159         .long  0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
 160         .long  0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
 161         .long  0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
 162         .long  0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
 163         .long  0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
 164         .long  0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
 165         .long  0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
 166         .long  0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
 167         .long  0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
 168         .long  0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
 169         .long  0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
 170         .long  0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
 171         .long  0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
 172         .long  0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
 173         .long  0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
 174         .long  0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
 175         .long  0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
 176         .long  0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
 177         .long  0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
 178         .long  0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
 179         .long  0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
 180         .long  0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
 181         .long  0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
 182         .long  0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
 183         .long  0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
 184         .long  0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
 185         .long  0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
 186         .long  0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
 187         .long  0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
 188         .long  0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
 189         .long  0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
 190         .long  0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
 191         .long  0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
 192         .long  0x3FFE0000,0x825EFCED,0x49369330,0x00000000
 193         .long  0x3FFE0000,0x9868C809,0x868C8098,0x00000000
 194         .long  0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
 195         .long  0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
 196         .long  0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
 197         .long  0x3FFE0000,0x95A02568,0x095A0257,0x00000000
 198         .long  0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
 199         .long  0x3FFE0000,0x94458094,0x45809446,0x00000000
 200         .long  0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
 201         .long  0x3FFE0000,0x92F11384,0x0497889C,0x00000000
 202         .long  0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
 203         .long  0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
 204         .long  0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
 205         .long  0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
 206         .long  0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
 207         .long  0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
 208         .long  0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
 209         .long  0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
 210         .long  0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
 211         .long  0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
 212         .long  0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
 213         .long  0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
 214         .long  0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
 215         .long  0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
 216         .long  0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
 217         .long  0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
 218         .long  0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
 219         .long  0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
 220         .long  0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
 221         .long  0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
 222         .long  0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
 223         .long  0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
 224         .long  0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
 225         .long  0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
 226         .long  0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
 227         .long  0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
 228         .long  0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
 229         .long  0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
 230         .long  0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
 231         .long  0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
 232         .long  0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
 233         .long  0x3FFE0000,0x80808080,0x80808081,0x00000000
 234         .long  0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
 235 
 236         .set    ADJK,L_SCR1
 237 
 238         .set    X,FP_SCR1
 239         .set    XDCARE,X+2
 240         .set    XFRAC,X+4
 241 
 242         .set    F,FP_SCR2
 243         .set    FFRAC,F+4
 244 
 245         .set    KLOG2,FP_SCR3
 246 
 247         .set    SAVEU,FP_SCR4
 248 
 249         | xref  t_frcinx
 250         |xref   t_extdnrm
 251         |xref   t_operr
 252         |xref   t_dz
 253 
 254         .global slognd
 255 slognd:
 256 |--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
 257 
 258         movel           #-100,ADJK(%a6) | ...INPUT = 2^(ADJK) * FP0
 259 
 260 |----normalize the input value by left shifting k bits (k to be determined
 261 |----below), adjusting exponent and storing -k to  ADJK
 262 |----the value TWOTO100 is no longer needed.
 263 |----Note that this code assumes the denormalized input is NON-ZERO.
 264 
 265      moveml     %d2-%d7,-(%a7)          | ...save some registers 
 266      movel      #0x00000000,%d3         | ...D3 is exponent of smallest norm. #
 267      movel      4(%a0),%d4
 268      movel      8(%a0),%d5              | ...(D4,D5) is (Hi_X,Lo_X)
 269      clrl       %d2                     | ...D2 used for holding K
 270 
 271      tstl       %d4
 272      bnes       HiX_not0
 273 
 274 HiX_0:
 275      movel      %d5,%d4
 276      clrl       %d5
 277      movel      #32,%d2
 278      clrl       %d6
 279      bfffo      %d4{#0:#32},%d6
 280      lsll      %d6,%d4
 281      addl       %d6,%d2                 | ...(D3,D4,D5) is normalized
 282 
 283      movel      %d3,X(%a6)
 284      movel      %d4,XFRAC(%a6)
 285      movel      %d5,XFRAC+4(%a6)
 286      negl       %d2
 287      movel      %d2,ADJK(%a6)
 288      fmovex     X(%a6),%fp0
 289      moveml     (%a7)+,%d2-%d7          | ...restore registers
 290      lea        X(%a6),%a0
 291      bras       LOGBGN                  | ...begin regular log(X)
 292 
 293 
 294 HiX_not0:
 295      clrl       %d6
 296      bfffo      %d4{#0:#32},%d6         | ...find first 1
 297      movel      %d6,%d2                 | ...get k
 298      lsll       %d6,%d4
 299      movel      %d5,%d7                 | ...a copy of D5
 300      lsll       %d6,%d5
 301      negl       %d6
 302      addil      #32,%d6
 303      lsrl       %d6,%d7
 304      orl        %d7,%d4                 | ...(D3,D4,D5) normalized
 305 
 306      movel      %d3,X(%a6)
 307      movel      %d4,XFRAC(%a6)
 308      movel      %d5,XFRAC+4(%a6)
 309      negl       %d2
 310      movel      %d2,ADJK(%a6)
 311      fmovex     X(%a6),%fp0
 312      moveml     (%a7)+,%d2-%d7          | ...restore registers
 313      lea        X(%a6),%a0
 314      bras       LOGBGN                  | ...begin regular log(X)
 315 
 316 
 317         .global slogn
 318 slogn:
 319 |--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
 320 
 321         fmovex          (%a0),%fp0      | ...LOAD INPUT
 322         movel           #0x00000000,ADJK(%a6)
 323 
 324 LOGBGN:
 325 |--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
 326 |--A FINITE, NON-ZERO, NORMALIZED NUMBER.
 327 
 328         movel   (%a0),%d0
 329         movew   4(%a0),%d0
 330 
 331         movel   (%a0),X(%a6)
 332         movel   4(%a0),X+4(%a6)
 333         movel   8(%a0),X+8(%a6)
 334 
 335         cmpil   #0,%d0          | ...CHECK IF X IS NEGATIVE
 336         blt     LOGNEG          | ...LOG OF NEGATIVE ARGUMENT IS INVALID
 337         cmp2l   BOUNDS1,%d0     | ...X IS POSITIVE, CHECK IF X IS NEAR 1
 338         bcc     LOGNEAR1        | ...BOUNDS IS ROUGHLY [15/16, 17/16]
 339 
 340 LOGMAIN:
 341 |--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
 342 
 343 |--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
 344 |--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
 345 |--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
 346 |--                      = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
 347 |--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
 348 |--LOG(1+U) CAN BE VERY EFFICIENT.
 349 |--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
 350 |--DIVISION IS NEEDED TO CALCULATE (Y-F)/F. 
 351 
 352 |--GET K, Y, F, AND ADDRESS OF 1/F.
 353         asrl    #8,%d0
 354         asrl    #8,%d0          | ...SHIFTED 16 BITS, BIASED EXPO. OF X
 355         subil   #0x3FFF,%d0     | ...THIS IS K
 356         addl    ADJK(%a6),%d0   | ...ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
 357         lea     LOGTBL,%a0      | ...BASE ADDRESS OF 1/F AND LOG(F)
 358         fmovel  %d0,%fp1                | ...CONVERT K TO FLOATING-POINT FORMAT
 359 
 360 |--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
 361         movel   #0x3FFF0000,X(%a6)      | ...X IS NOW Y, I.E. 2^(-K)*X
 362         movel   XFRAC(%a6),FFRAC(%a6)
 363         andil   #0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y
 364         oril    #0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT
 365         movel   FFRAC(%a6),%d0  | ...READY TO GET ADDRESS OF 1/F
 366         andil   #0x7E000000,%d0 
 367         asrl    #8,%d0
 368         asrl    #8,%d0
 369         asrl    #4,%d0          | ...SHIFTED 20, D0 IS THE DISPLACEMENT
 370         addal   %d0,%a0         | ...A0 IS THE ADDRESS FOR 1/F
 371 
 372         fmovex  X(%a6),%fp0
 373         movel   #0x3fff0000,F(%a6)
 374         clrl    F+8(%a6)
 375         fsubx   F(%a6),%fp0             | ...Y-F
 376         fmovemx %fp2-%fp2/%fp3,-(%sp)   | ...SAVE FP2 WHILE FP0 IS NOT READY
 377 |--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
 378 |--REGISTERS SAVED: FPCR, FP1, FP2
 379 
 380 LP1CONT1:
 381 |--AN RE-ENTRY POINT FOR LOGNP1
 382         fmulx   (%a0),%fp0      | ...FP0 IS U = (Y-F)/F
 383         fmulx   LOGOF2,%fp1     | ...GET K*LOG2 WHILE FP0 IS NOT READY
 384         fmovex  %fp0,%fp2
 385         fmulx   %fp2,%fp2               | ...FP2 IS V=U*U
 386         fmovex  %fp1,KLOG2(%a6) | ...PUT K*LOG2 IN MEMORY, FREE FP1
 387 
 388 |--LOG(1+U) IS APPROXIMATED BY
 389 |--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
 390 |--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
 391 
 392         fmovex  %fp2,%fp3
 393         fmovex  %fp2,%fp1       
 394 
 395         fmuld   LOGA6,%fp1      | ...V*A6
 396         fmuld   LOGA5,%fp2      | ...V*A5
 397 
 398         faddd   LOGA4,%fp1      | ...A4+V*A6
 399         faddd   LOGA3,%fp2      | ...A3+V*A5
 400 
 401         fmulx   %fp3,%fp1               | ...V*(A4+V*A6)
 402         fmulx   %fp3,%fp2               | ...V*(A3+V*A5)
 403 
 404         faddd   LOGA2,%fp1      | ...A2+V*(A4+V*A6)
 405         faddd   LOGA1,%fp2      | ...A1+V*(A3+V*A5)
 406 
 407         fmulx   %fp3,%fp1               | ...V*(A2+V*(A4+V*A6))
 408         addal   #16,%a0         | ...ADDRESS OF LOG(F)
 409         fmulx   %fp3,%fp2               | ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
 410 
 411         fmulx   %fp0,%fp1               | ...U*V*(A2+V*(A4+V*A6))
 412         faddx   %fp2,%fp0               | ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
 413 
 414         faddx   (%a0),%fp1      | ...LOG(F)+U*V*(A2+V*(A4+V*A6))
 415         fmovemx  (%sp)+,%fp2-%fp2/%fp3  | ...RESTORE FP2
 416         faddx   %fp1,%fp0               | ...FP0 IS LOG(F) + LOG(1+U)
 417 
 418         fmovel  %d1,%fpcr
 419         faddx   KLOG2(%a6),%fp0 | ...FINAL ADD
 420         bra     t_frcinx
 421 
 422 
 423 LOGNEAR1:
 424 |--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
 425         fmovex  %fp0,%fp1
 426         fsubs   one,%fp1                | ...FP1 IS X-1
 427         fadds   one,%fp0                | ...FP0 IS X+1
 428         faddx   %fp1,%fp1               | ...FP1 IS 2(X-1)
 429 |--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
 430 |--IN U, U = 2(X-1)/(X+1) = FP1/FP0
 431 
 432 LP1CONT2:
 433 |--THIS IS AN RE-ENTRY POINT FOR LOGNP1
 434         fdivx   %fp0,%fp1               | ...FP1 IS U
 435         fmovemx %fp2-%fp2/%fp3,-(%sp)    | ...SAVE FP2
 436 |--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
 437 |--LET V=U*U, W=V*V, CALCULATE
 438 |--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
 439 |--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
 440         fmovex  %fp1,%fp0
 441         fmulx   %fp0,%fp0       | ...FP0 IS V
 442         fmovex  %fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1
 443         fmovex  %fp0,%fp1       
 444         fmulx   %fp1,%fp1       | ...FP1 IS W
 445 
 446         fmoved  LOGB5,%fp3
 447         fmoved  LOGB4,%fp2
 448 
 449         fmulx   %fp1,%fp3       | ...W*B5
 450         fmulx   %fp1,%fp2       | ...W*B4
 451 
 452         faddd   LOGB3,%fp3 | ...B3+W*B5
 453         faddd   LOGB2,%fp2 | ...B2+W*B4
 454 
 455         fmulx   %fp3,%fp1       | ...W*(B3+W*B5), FP3 RELEASED
 456 
 457         fmulx   %fp0,%fp2       | ...V*(B2+W*B4)
 458 
 459         faddd   LOGB1,%fp1 | ...B1+W*(B3+W*B5)
 460         fmulx   SAVEU(%a6),%fp0 | ...FP0 IS U*V
 461 
 462         faddx   %fp2,%fp1       | ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
 463         fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...FP2 RESTORED
 464 
 465         fmulx   %fp1,%fp0       | ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
 466 
 467         fmovel  %d1,%fpcr
 468         faddx   SAVEU(%a6),%fp0         
 469         bra     t_frcinx
 470         rts
 471 
 472 LOGNEG:
 473 |--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
 474         bra     t_operr
 475 
 476         .global slognp1d
 477 slognp1d:
 478 |--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
 479 | Simply return the denorm
 480 
 481         bra     t_extdnrm
 482 
 483         .global slognp1
 484 slognp1:
 485 |--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
 486 
 487         fmovex  (%a0),%fp0      | ...LOAD INPUT
 488         fabsx   %fp0            |test magnitude
 489         fcmpx   LTHOLD,%fp0     |compare with min threshold
 490         fbgt    LP1REAL         |if greater, continue
 491         fmovel  #0,%fpsr                |clr N flag from compare
 492         fmovel  %d1,%fpcr
 493         fmovex  (%a0),%fp0      |return signed argument
 494         bra     t_frcinx
 495 
 496 LP1REAL:
 497         fmovex  (%a0),%fp0      | ...LOAD INPUT
 498         movel   #0x00000000,ADJK(%a6)
 499         fmovex  %fp0,%fp1       | ...FP1 IS INPUT Z
 500         fadds   one,%fp0        | ...X := ROUND(1+Z)
 501         fmovex  %fp0,X(%a6)
 502         movew   XFRAC(%a6),XDCARE(%a6)
 503         movel   X(%a6),%d0
 504         cmpil   #0,%d0
 505         ble     LP1NEG0 | ...LOG OF ZERO OR -VE
 506         cmp2l   BOUNDS2,%d0
 507         bcs     LOGMAIN | ...BOUNDS2 IS [1/2,3/2]
 508 |--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
 509 |--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
 510 |--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
 511 
 512 LP1NEAR1:
 513 |--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
 514         cmp2l   BOUNDS1,%d0
 515         bcss    LP1CARE
 516 
 517 LP1ONE16:
 518 |--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
 519 |--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
 520         faddx   %fp1,%fp1       | ...FP1 IS 2Z
 521         fadds   one,%fp0        | ...FP0 IS 1+X
 522 |--U = FP1/FP0
 523         bra     LP1CONT2
 524 
 525 LP1CARE:
 526 |--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
 527 |--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
 528 |--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
 529 |--THERE ARE ONLY TWO CASES.
 530 |--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
 531 |--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
 532 |--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
 533 |--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
 534 
 535         movel   XFRAC(%a6),FFRAC(%a6)
 536         andil   #0xFE000000,FFRAC(%a6)
 537         oril    #0x01000000,FFRAC(%a6)  | ...F OBTAINED
 538         cmpil   #0x3FFF8000,%d0 | ...SEE IF 1+Z > 1
 539         bges    KISZERO
 540 
 541 KISNEG1:
 542         fmoves  TWO,%fp0
 543         movel   #0x3fff0000,F(%a6)
 544         clrl    F+8(%a6)
 545         fsubx   F(%a6),%fp0     | ...2-F
 546         movel   FFRAC(%a6),%d0
 547         andil   #0x7E000000,%d0
 548         asrl    #8,%d0
 549         asrl    #8,%d0
 550         asrl    #4,%d0          | ...D0 CONTAINS DISPLACEMENT FOR 1/F
 551         faddx   %fp1,%fp1               | ...GET 2Z
 552         fmovemx %fp2-%fp2/%fp3,-(%sp)   | ...SAVE FP2 
 553         faddx   %fp1,%fp0               | ...FP0 IS Y-F = (2-F)+2Z
 554         lea     LOGTBL,%a0      | ...A0 IS ADDRESS OF 1/F
 555         addal   %d0,%a0
 556         fmoves  negone,%fp1     | ...FP1 IS K = -1
 557         bra     LP1CONT1
 558 
 559 KISZERO:
 560         fmoves  one,%fp0
 561         movel   #0x3fff0000,F(%a6)
 562         clrl    F+8(%a6)
 563         fsubx   F(%a6),%fp0             | ...1-F
 564         movel   FFRAC(%a6),%d0
 565         andil   #0x7E000000,%d0
 566         asrl    #8,%d0
 567         asrl    #8,%d0
 568         asrl    #4,%d0
 569         faddx   %fp1,%fp0               | ...FP0 IS Y-F
 570         fmovemx %fp2-%fp2/%fp3,-(%sp)   | ...FP2 SAVED
 571         lea     LOGTBL,%a0
 572         addal   %d0,%a0         | ...A0 IS ADDRESS OF 1/F
 573         fmoves  zero,%fp1       | ...FP1 IS K = 0
 574         bra     LP1CONT1
 575 
 576 LP1NEG0:
 577 |--FPCR SAVED. D0 IS X IN COMPACT FORM.
 578         cmpil   #0,%d0
 579         blts    LP1NEG
 580 LP1ZERO:
 581         fmoves  negone,%fp0
 582 
 583         fmovel  %d1,%fpcr
 584         bra t_dz
 585 
 586 LP1NEG:
 587         fmoves  zero,%fp0
 588 
 589         fmovel  %d1,%fpcr
 590         bra     t_operr
 591 
 592         |end

/* [previous][next][first][last][top][bottom][index][help] */