1 | 2 | slogn.sa 3.1 12/10/90 3 | 4 | slogn computes the natural logarithm of an 5 | input value. slognd does the same except the input value is a 6 | denormalized number. slognp1 computes log(1+X), and slognp1d 7 | computes log(1+X) for denormalized X. 8 | 9 | Input: Double-extended value in memory location pointed to by address 10 | register a0. 11 | 12 | Output: log(X) or log(1+X) returned in floating-point register Fp0. 13 | 14 | Accuracy and Monotonicity: The returned result is within 2 ulps in 15 | 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the 16 | result is subsequently rounded to double precision. The 17 | result is provably monotonic in double precision. 18 | 19 | Speed: The program slogn takes approximately 190 cycles for input 20 | argument X such that |X-1| >= 1/16, which is the the usual 21 | situation. For those arguments, slognp1 takes approximately 22 | 210 cycles. For the less common arguments, the program will 23 | run no worse than 10% slower. 24 | 25 | Algorithm: 26 | LOGN: 27 | Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in 28 | u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2. 29 | 30 | Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven 31 | significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base 32 | 2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7). 33 | 34 | Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u, 35 | log(1+u) = poly. 36 | 37 | Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) 38 | by k*log(2) + (log(F) + poly). The values of log(F) are calculated 39 | beforehand and stored in the program. 40 | 41 | lognp1: 42 | Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in 43 | u where u = 2X/(2+X). Otherwise, move on to Step 2. 44 | 45 | Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2 46 | of the algorithm for LOGN and compute log(1+X) as 47 | k*log(2) + log(F) + poly where poly approximates log(1+u), 48 | u = (Y-F)/F. 49 | 50 | Implementation Notes: 51 | Note 1. There are 64 different possible values for F, thus 64 log(F)'s 52 | need to be tabulated. Moreover, the values of 1/F are also 53 | tabulated so that the division in (Y-F)/F can be performed by a 54 | multiplication. 55 | 56 | Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value 57 | Y-F has to be calculated carefully when 1/2 <= X < 3/2. 58 | 59 | Note 3. To fully exploit the pipeline, polynomials are usually separated 60 | into two parts evaluated independently before being added up. 61 | 62 63 | Copyright (C) Motorola, Inc. 1990 64 | All Rights Reserved 65 | 66 | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 67 | The copyright notice above does not evidence any 68 | actual or intended publication of such source code. 69 70 |slogn idnt 2,1 | Motorola 040 Floating Point Software Package 71 72 |section 8 73 74 .include "fpsp.h" 75 76 BOUNDS1: .long 0x3FFEF07D,0x3FFF8841 77 BOUNDS2: .long 0x3FFE8000,0x3FFFC000 78 79 LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 80 81 one: .long 0x3F800000 82 zero: .long 0x00000000 83 infty: .long 0x7F800000 84 negone: .long 0xBF800000 85 86 LOGA6: .long 0x3FC2499A,0xB5E4040B 87 LOGA5: .long 0xBFC555B5,0x848CB7DB 88 89 LOGA4: .long 0x3FC99999,0x987D8730 90 LOGA3: .long 0xBFCFFFFF,0xFF6F7E97 91 92 LOGA2: .long 0x3FD55555,0x555555a4 93 LOGA1: .long 0xBFE00000,0x00000008 94 95 LOGB5: .long 0x3F175496,0xADD7DAD6 96 LOGB4: .long 0x3F3C71C2,0xFE80C7E0 97 98 LOGB3: .long 0x3F624924,0x928BCCFF 99 LOGB2: .long 0x3F899999,0x999995EC 100 101 LOGB1: .long 0x3FB55555,0x55555555 102 TWO: .long 0x40000000,0x00000000 103 104 LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000 105 106 LOGTBL: 107 .long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000 108 .long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000 109 .long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000 110 .long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000 111 .long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000 112 .long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000 113 .long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000 114 .long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000 115 .long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000 116 .long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000 117 .long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000 118 .long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000 119 .long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000 120 .long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000 121 .long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000 122 .long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000 123 .long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000 124 .long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000 125 .long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000 126 .long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000 127 .long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000 128 .long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000 129 .long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000 130 .long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000 131 .long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000 132 .long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000 133 .long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000 134 .long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000 135 .long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000 136 .long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000 137 .long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000 138 .long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000 139 .long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000 140 .long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000 141 .long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000 142 .long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000 143 .long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000 144 .long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000 145 .long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000 146 .long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000 147 .long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000 148 .long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000 149 .long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000 150 .long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000 151 .long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000 152 .long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000 153 .long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000 154 .long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000 155 .long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000 156 .long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000 157 .long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000 158 .long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000 159 .long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000 160 .long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000 161 .long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000 162 .long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000 163 .long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000 164 .long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000 165 .long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000 166 .long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000 167 .long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000 168 .long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000 169 .long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000 170 .long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000 171 .long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000 172 .long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000 173 .long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000 174 .long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000 175 .long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000 176 .long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000 177 .long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000 178 .long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000 179 .long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000 180 .long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000 181 .long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000 182 .long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000 183 .long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000 184 .long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000 185 .long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000 186 .long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000 187 .long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000 188 .long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000 189 .long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000 190 .long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000 191 .long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000 192 .long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000 193 .long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000 194 .long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000 195 .long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000 196 .long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000 197 .long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000 198 .long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000 199 .long 0x3FFE0000,0x94458094,0x45809446,0x00000000 200 .long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000 201 .long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000 202 .long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000 203 .long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000 204 .long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000 205 .long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000 206 .long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000 207 .long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000 208 .long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000 209 .long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000 210 .long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000 211 .long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000 212 .long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000 213 .long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000 214 .long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000 215 .long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000 216 .long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000 217 .long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000 218 .long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000 219 .long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000 220 .long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000 221 .long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000 222 .long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000 223 .long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000 224 .long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000 225 .long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000 226 .long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000 227 .long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000 228 .long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000 229 .long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000 230 .long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000 231 .long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000 232 .long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000 233 .long 0x3FFE0000,0x80808080,0x80808081,0x00000000 234 .long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000 235 236 .set ADJK,L_SCR1 237 238 .set X,FP_SCR1 239 .set XDCARE,X+2 240 .set XFRAC,X+4 241 242 .set F,FP_SCR2 243 .set FFRAC,F+4 244 245 .set KLOG2,FP_SCR3 246 247 .set SAVEU,FP_SCR4 248 249 | xref t_frcinx 250 |xref t_extdnrm 251 |xref t_operr 252 |xref t_dz 253 254 .global slognd 255 slognd: 256 |--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT 257 258 movel #-100,ADJK(%a6) | ...INPUT = 2^(ADJK) * FP0 259 260 |----normalize the input value by left shifting k bits (k to be determined 261 |----below), adjusting exponent and storing -k to ADJK 262 |----the value TWOTO100 is no longer needed. 263 |----Note that this code assumes the denormalized input is NON-ZERO. 264 265 moveml %d2-%d7,-(%a7) | ...save some registers 266 movel #0x00000000,%d3 | ...D3 is exponent of smallest norm. # 267 movel 4(%a0),%d4 268 movel 8(%a0),%d5 | ...(D4,D5) is (Hi_X,Lo_X) 269 clrl %d2 | ...D2 used for holding K 270 271 tstl %d4 272 bnes HiX_not0 273 274 HiX_0: 275 movel %d5,%d4 276 clrl %d5 277 movel #32,%d2 278 clrl %d6 279 bfffo %d4{#0:#32},%d6 280 lsll %d6,%d4 281 addl %d6,%d2 | ...(D3,D4,D5) is normalized 282 283 movel %d3,X(%a6) 284 movel %d4,XFRAC(%a6) 285 movel %d5,XFRAC+4(%a6) 286 negl %d2 287 movel %d2,ADJK(%a6) 288 fmovex X(%a6),%fp0 289 moveml (%a7)+,%d2-%d7 | ...restore registers 290 lea X(%a6),%a0 291 bras LOGBGN | ...begin regular log(X) 292 293 294 HiX_not0: 295 clrl %d6 296 bfffo %d4{#0:#32},%d6 | ...find first 1 297 movel %d6,%d2 | ...get k 298 lsll %d6,%d4 299 movel %d5,%d7 | ...a copy of D5 300 lsll %d6,%d5 301 negl %d6 302 addil #32,%d6 303 lsrl %d6,%d7 304 orl %d7,%d4 | ...(D3,D4,D5) normalized 305 306 movel %d3,X(%a6) 307 movel %d4,XFRAC(%a6) 308 movel %d5,XFRAC+4(%a6) 309 negl %d2 310 movel %d2,ADJK(%a6) 311 fmovex X(%a6),%fp0 312 moveml (%a7)+,%d2-%d7 | ...restore registers 313 lea X(%a6),%a0 314 bras LOGBGN | ...begin regular log(X) 315 316 317 .global slogn 318 slogn: 319 |--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S 320 321 fmovex (%a0),%fp0 | ...LOAD INPUT 322 movel #0x00000000,ADJK(%a6) 323 324 LOGBGN: 325 |--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS 326 |--A FINITE, NON-ZERO, NORMALIZED NUMBER. 327 328 movel (%a0),%d0 329 movew 4(%a0),%d0 330 331 movel (%a0),X(%a6) 332 movel 4(%a0),X+4(%a6) 333 movel 8(%a0),X+8(%a6) 334 335 cmpil #0,%d0 | ...CHECK IF X IS NEGATIVE 336 blt LOGNEG | ...LOG OF NEGATIVE ARGUMENT IS INVALID 337 cmp2l BOUNDS1,%d0 | ...X IS POSITIVE, CHECK IF X IS NEAR 1 338 bcc LOGNEAR1 | ...BOUNDS IS ROUGHLY [15/16, 17/16] 339 340 LOGMAIN: 341 |--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1 342 343 |--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY. 344 |--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1. 345 |--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y) 346 |-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F). 347 |--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING 348 |--LOG(1+U) CAN BE VERY EFFICIENT. 349 |--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO 350 |--DIVISION IS NEEDED TO CALCULATE (Y-F)/F. 351 352 |--GET K, Y, F, AND ADDRESS OF 1/F. 353 asrl #8,%d0 354 asrl #8,%d0 | ...SHIFTED 16 BITS, BIASED EXPO. OF X 355 subil #0x3FFF,%d0 | ...THIS IS K 356 addl ADJK(%a6),%d0 | ...ADJUST K, ORIGINAL INPUT MAY BE DENORM. 357 lea LOGTBL,%a0 | ...BASE ADDRESS OF 1/F AND LOG(F) 358 fmovel %d0,%fp1 | ...CONVERT K TO FLOATING-POINT FORMAT 359 360 |--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F 361 movel #0x3FFF0000,X(%a6) | ...X IS NOW Y, I.E. 2^(-K)*X 362 movel XFRAC(%a6),FFRAC(%a6) 363 andil #0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y 364 oril #0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT 365 movel FFRAC(%a6),%d0 | ...READY TO GET ADDRESS OF 1/F 366 andil #0x7E000000,%d0 367 asrl #8,%d0 368 asrl #8,%d0 369 asrl #4,%d0 | ...SHIFTED 20, D0 IS THE DISPLACEMENT 370 addal %d0,%a0 | ...A0 IS THE ADDRESS FOR 1/F 371 372 fmovex X(%a6),%fp0 373 movel #0x3fff0000,F(%a6) 374 clrl F+8(%a6) 375 fsubx F(%a6),%fp0 | ...Y-F 376 fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 WHILE FP0 IS NOT READY 377 |--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K 378 |--REGISTERS SAVED: FPCR, FP1, FP2 379 380 LP1CONT1: 381 |--AN RE-ENTRY POINT FOR LOGNP1 382 fmulx (%a0),%fp0 | ...FP0 IS U = (Y-F)/F 383 fmulx LOGOF2,%fp1 | ...GET K*LOG2 WHILE FP0 IS NOT READY 384 fmovex %fp0,%fp2 385 fmulx %fp2,%fp2 | ...FP2 IS V=U*U 386 fmovex %fp1,KLOG2(%a6) | ...PUT K*LOG2 IN MEMORY, FREE FP1 387 388 |--LOG(1+U) IS APPROXIMATED BY 389 |--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS 390 |--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))] 391 392 fmovex %fp2,%fp3 393 fmovex %fp2,%fp1 394 395 fmuld LOGA6,%fp1 | ...V*A6 396 fmuld LOGA5,%fp2 | ...V*A5 397 398 faddd LOGA4,%fp1 | ...A4+V*A6 399 faddd LOGA3,%fp2 | ...A3+V*A5 400 401 fmulx %fp3,%fp1 | ...V*(A4+V*A6) 402 fmulx %fp3,%fp2 | ...V*(A3+V*A5) 403 404 faddd LOGA2,%fp1 | ...A2+V*(A4+V*A6) 405 faddd LOGA1,%fp2 | ...A1+V*(A3+V*A5) 406 407 fmulx %fp3,%fp1 | ...V*(A2+V*(A4+V*A6)) 408 addal #16,%a0 | ...ADDRESS OF LOG(F) 409 fmulx %fp3,%fp2 | ...V*(A1+V*(A3+V*A5)), FP3 RELEASED 410 411 fmulx %fp0,%fp1 | ...U*V*(A2+V*(A4+V*A6)) 412 faddx %fp2,%fp0 | ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED 413 414 faddx (%a0),%fp1 | ...LOG(F)+U*V*(A2+V*(A4+V*A6)) 415 fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...RESTORE FP2 416 faddx %fp1,%fp0 | ...FP0 IS LOG(F) + LOG(1+U) 417 418 fmovel %d1,%fpcr 419 faddx KLOG2(%a6),%fp0 | ...FINAL ADD 420 bra t_frcinx 421 422 423 LOGNEAR1: 424 |--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT. 425 fmovex %fp0,%fp1 426 fsubs one,%fp1 | ...FP1 IS X-1 427 fadds one,%fp0 | ...FP0 IS X+1 428 faddx %fp1,%fp1 | ...FP1 IS 2(X-1) 429 |--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL 430 |--IN U, U = 2(X-1)/(X+1) = FP1/FP0 431 432 LP1CONT2: 433 |--THIS IS AN RE-ENTRY POINT FOR LOGNP1 434 fdivx %fp0,%fp1 | ...FP1 IS U 435 fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 436 |--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3 437 |--LET V=U*U, W=V*V, CALCULATE 438 |--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY 439 |--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] ) 440 fmovex %fp1,%fp0 441 fmulx %fp0,%fp0 | ...FP0 IS V 442 fmovex %fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1 443 fmovex %fp0,%fp1 444 fmulx %fp1,%fp1 | ...FP1 IS W 445 446 fmoved LOGB5,%fp3 447 fmoved LOGB4,%fp2 448 449 fmulx %fp1,%fp3 | ...W*B5 450 fmulx %fp1,%fp2 | ...W*B4 451 452 faddd LOGB3,%fp3 | ...B3+W*B5 453 faddd LOGB2,%fp2 | ...B2+W*B4 454 455 fmulx %fp3,%fp1 | ...W*(B3+W*B5), FP3 RELEASED 456 457 fmulx %fp0,%fp2 | ...V*(B2+W*B4) 458 459 faddd LOGB1,%fp1 | ...B1+W*(B3+W*B5) 460 fmulx SAVEU(%a6),%fp0 | ...FP0 IS U*V 461 462 faddx %fp2,%fp1 | ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED 463 fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...FP2 RESTORED 464 465 fmulx %fp1,%fp0 | ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] ) 466 467 fmovel %d1,%fpcr 468 faddx SAVEU(%a6),%fp0 469 bra t_frcinx 470 rts 471 472 LOGNEG: 473 |--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID 474 bra t_operr 475 476 .global slognp1d 477 slognp1d: 478 |--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT 479 | Simply return the denorm 480 481 bra t_extdnrm 482 483 .global slognp1 484 slognp1: 485 |--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S 486 487 fmovex (%a0),%fp0 | ...LOAD INPUT 488 fabsx %fp0 |test magnitude 489 fcmpx LTHOLD,%fp0 |compare with min threshold 490 fbgt LP1REAL |if greater, continue 491 fmovel #0,%fpsr |clr N flag from compare 492 fmovel %d1,%fpcr 493 fmovex (%a0),%fp0 |return signed argument 494 bra t_frcinx 495 496 LP1REAL: 497 fmovex (%a0),%fp0 | ...LOAD INPUT 498 movel #0x00000000,ADJK(%a6) 499 fmovex %fp0,%fp1 | ...FP1 IS INPUT Z 500 fadds one,%fp0 | ...X := ROUND(1+Z) 501 fmovex %fp0,X(%a6) 502 movew XFRAC(%a6),XDCARE(%a6) 503 movel X(%a6),%d0 504 cmpil #0,%d0 505 ble LP1NEG0 | ...LOG OF ZERO OR -VE 506 cmp2l BOUNDS2,%d0 507 bcs LOGMAIN | ...BOUNDS2 IS [1/2,3/2] 508 |--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z, 509 |--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE, 510 |--SIMPLY INVOKE LOG(X) FOR LOG(1+Z). 511 512 LP1NEAR1: 513 |--NEXT SEE IF EXP(-1/16) < X < EXP(1/16) 514 cmp2l BOUNDS1,%d0 515 bcss LP1CARE 516 517 LP1ONE16: 518 |--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2) 519 |--WHERE U = 2Z/(2+Z) = 2Z/(1+X). 520 faddx %fp1,%fp1 | ...FP1 IS 2Z 521 fadds one,%fp0 | ...FP0 IS 1+X 522 |--U = FP1/FP0 523 bra LP1CONT2 524 525 LP1CARE: 526 |--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE 527 |--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST 528 |--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2], 529 |--THERE ARE ONLY TWO CASES. 530 |--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z 531 |--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z 532 |--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF 533 |--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED. 534 535 movel XFRAC(%a6),FFRAC(%a6) 536 andil #0xFE000000,FFRAC(%a6) 537 oril #0x01000000,FFRAC(%a6) | ...F OBTAINED 538 cmpil #0x3FFF8000,%d0 | ...SEE IF 1+Z > 1 539 bges KISZERO 540 541 KISNEG1: 542 fmoves TWO,%fp0 543 movel #0x3fff0000,F(%a6) 544 clrl F+8(%a6) 545 fsubx F(%a6),%fp0 | ...2-F 546 movel FFRAC(%a6),%d0 547 andil #0x7E000000,%d0 548 asrl #8,%d0 549 asrl #8,%d0 550 asrl #4,%d0 | ...D0 CONTAINS DISPLACEMENT FOR 1/F 551 faddx %fp1,%fp1 | ...GET 2Z 552 fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 553 faddx %fp1,%fp0 | ...FP0 IS Y-F = (2-F)+2Z 554 lea LOGTBL,%a0 | ...A0 IS ADDRESS OF 1/F 555 addal %d0,%a0 556 fmoves negone,%fp1 | ...FP1 IS K = -1 557 bra LP1CONT1 558 559 KISZERO: 560 fmoves one,%fp0 561 movel #0x3fff0000,F(%a6) 562 clrl F+8(%a6) 563 fsubx F(%a6),%fp0 | ...1-F 564 movel FFRAC(%a6),%d0 565 andil #0x7E000000,%d0 566 asrl #8,%d0 567 asrl #8,%d0 568 asrl #4,%d0 569 faddx %fp1,%fp0 | ...FP0 IS Y-F 570 fmovemx %fp2-%fp2/%fp3,-(%sp) | ...FP2 SAVED 571 lea LOGTBL,%a0 572 addal %d0,%a0 | ...A0 IS ADDRESS OF 1/F 573 fmoves zero,%fp1 | ...FP1 IS K = 0 574 bra LP1CONT1 575 576 LP1NEG0: 577 |--FPCR SAVED. D0 IS X IN COMPACT FORM. 578 cmpil #0,%d0 579 blts LP1NEG 580 LP1ZERO: 581 fmoves negone,%fp0 582 583 fmovel %d1,%fpcr 584 bra t_dz 585 586 LP1NEG: 587 fmoves zero,%fp0 588 589 fmovel %d1,%fpcr 590 bra t_operr 591 592 |end