root/arch/m68k/fpsp040/stwotox.S

/* [previous][next][first][last][top][bottom][index][help] */
   1 |
   2 |       stwotox.sa 3.1 12/10/90
   3 |
   4 |       stwotox  --- 2**X
   5 |       stwotoxd --- 2**X for denormalized X
   6 |       stentox  --- 10**X
   7 |       stentoxd --- 10**X for denormalized X
   8 |
   9 |       Input: Double-extended number X in location pointed to
  10 |               by address register a0.
  11 |
  12 |       Output: The function values are returned in Fp0.
  13 |
  14 |       Accuracy and Monotonicity: The returned result is within 2 ulps in
  15 |               64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
  16 |               result is subsequently rounded to double precision. The
  17 |               result is provably monotonic in double precision.
  18 |
  19 |       Speed: The program stwotox takes approximately 190 cycles and the
  20 |               program stentox takes approximately 200 cycles.
  21 |
  22 |       Algorithm:
  23 |
  24 |       twotox
  25 |       1. If |X| > 16480, go to ExpBig.
  26 |
  27 |       2. If |X| < 2**(-70), go to ExpSm.
  28 |
  29 |       3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore
  30 |               decompose N as
  31 |                N = 64(M + M') + j,  j = 0,1,2,...,63.
  32 |
  33 |       4. Overwrite r := r * log2. Then
  34 |               2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
  35 |               Go to expr to compute that expression.
  36 |
  37 |       tentox
  38 |       1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.
  39 |
  40 |       2. If |X| < 2**(-70), go to ExpSm.
  41 |
  42 |       3. Set y := X*log_2(10)*64 (base 2 log of 10). Set
  43 |               N := round-to-int(y). Decompose N as
  44 |                N = 64(M + M') + j,  j = 0,1,2,...,63.
  45 |
  46 |       4. Define r as
  47 |               r := ((X - N*L1)-N*L2) * L10
  48 |               where L1, L2 are the leading and trailing parts of log_10(2)/64
  49 |               and L10 is the natural log of 10. Then
  50 |               10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
  51 |               Go to expr to compute that expression.
  52 |
  53 |       expr
  54 |       1. Fetch 2**(j/64) from table as Fact1 and Fact2.
  55 |
  56 |       2. Overwrite Fact1 and Fact2 by
  57 |               Fact1 := 2**(M) * Fact1
  58 |               Fact2 := 2**(M) * Fact2
  59 |               Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).
  60 |
  61 |       3. Calculate P where 1 + P approximates exp(r):
  62 |               P = r + r*r*(A1+r*(A2+...+r*A5)).
  63 |
  64 |       4. Let AdjFact := 2**(M'). Return
  65 |               AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).
  66 |               Exit.
  67 |
  68 |       ExpBig
  69 |       1. Generate overflow by Huge * Huge if X > 0; otherwise, generate
  70 |               underflow by Tiny * Tiny.
  71 |
  72 |       ExpSm
  73 |       1. Return 1 + X.
  74 |
  75 
  76 |               Copyright (C) Motorola, Inc. 1990
  77 |                       All Rights Reserved
  78 |
  79 |       THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
  80 |       The copyright notice above does not evidence any  
  81 |       actual or intended publication of such source code.
  82 
  83 |STWOTOX        idnt    2,1 | Motorola 040 Floating Point Software Package
  84 
  85         |section        8
  86 
  87         .include "fpsp.h"
  88 
  89 BOUNDS1:        .long 0x3FB98000,0x400D80C0 | ... 2^(-70),16480
  90 BOUNDS2:        .long 0x3FB98000,0x400B9B07 | ... 2^(-70),16480 LOG2/LOG10
  91 
  92 L2TEN64:        .long 0x406A934F,0x0979A371 | ... 64LOG10/LOG2
  93 L10TWO1:        .long 0x3F734413,0x509F8000 | ... LOG2/64LOG10
  94 
  95 L10TWO2:        .long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
  96 
  97 LOG10:  .long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
  98 
  99 LOG2:   .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
 100 
 101 EXPA5:  .long 0x3F56C16D,0x6F7BD0B2
 102 EXPA4:  .long 0x3F811112,0x302C712C
 103 EXPA3:  .long 0x3FA55555,0x55554CC1
 104 EXPA2:  .long 0x3FC55555,0x55554A54
 105 EXPA1:  .long 0x3FE00000,0x00000000,0x00000000,0x00000000
 106 
 107 HUGE:   .long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
 108 TINY:   .long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
 109 
 110 EXPTBL:
 111         .long  0x3FFF0000,0x80000000,0x00000000,0x3F738000
 112         .long  0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
 113         .long  0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
 114         .long  0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
 115         .long  0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
 116         .long  0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
 117         .long  0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
 118         .long  0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
 119         .long  0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
 120         .long  0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
 121         .long  0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
 122         .long  0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
 123         .long  0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
 124         .long  0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
 125         .long  0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
 126         .long  0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
 127         .long  0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
 128         .long  0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
 129         .long  0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
 130         .long  0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
 131         .long  0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
 132         .long  0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
 133         .long  0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
 134         .long  0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
 135         .long  0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
 136         .long  0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
 137         .long  0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
 138         .long  0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
 139         .long  0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
 140         .long  0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
 141         .long  0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
 142         .long  0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
 143         .long  0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
 144         .long  0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
 145         .long  0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
 146         .long  0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
 147         .long  0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
 148         .long  0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
 149         .long  0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
 150         .long  0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
 151         .long  0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
 152         .long  0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
 153         .long  0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
 154         .long  0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
 155         .long  0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
 156         .long  0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
 157         .long  0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
 158         .long  0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
 159         .long  0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
 160         .long  0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
 161         .long  0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
 162         .long  0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
 163         .long  0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
 164         .long  0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
 165         .long  0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
 166         .long  0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
 167         .long  0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
 168         .long  0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
 169         .long  0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
 170         .long  0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
 171         .long  0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
 172         .long  0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
 173         .long  0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
 174         .long  0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
 175 
 176         .set    N,L_SCR1
 177 
 178         .set    X,FP_SCR1
 179         .set    XDCARE,X+2
 180         .set    XFRAC,X+4
 181 
 182         .set    ADJFACT,FP_SCR2
 183 
 184         .set    FACT1,FP_SCR3
 185         .set    FACT1HI,FACT1+4
 186         .set    FACT1LOW,FACT1+8
 187 
 188         .set    FACT2,FP_SCR4
 189         .set    FACT2HI,FACT2+4
 190         .set    FACT2LOW,FACT2+8
 191 
 192         | xref  t_unfl
 193         |xref   t_ovfl
 194         |xref   t_frcinx
 195 
 196         .global stwotoxd
 197 stwotoxd:
 198 |--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
 199 
 200         fmovel          %d1,%fpcr               | ...set user's rounding mode/precision
 201         fmoves          #0x3F800000,%fp0  | ...RETURN 1 + X
 202         movel           (%a0),%d0
 203         orl             #0x00800001,%d0
 204         fadds           %d0,%fp0
 205         bra             t_frcinx
 206 
 207         .global stwotox
 208 stwotox:
 209 |--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
 210         fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's
 211 
 212         movel           (%a0),%d0
 213         movew           4(%a0),%d0
 214         fmovex          %fp0,X(%a6)
 215         andil           #0x7FFFFFFF,%d0
 216 
 217         cmpil           #0x3FB98000,%d0         | ...|X| >= 2**(-70)?
 218         bges            TWOOK1
 219         bra             EXPBORS
 220 
 221 TWOOK1:
 222         cmpil           #0x400D80C0,%d0         | ...|X| > 16480?
 223         bles            TWOMAIN
 224         bra             EXPBORS
 225         
 226 
 227 TWOMAIN:
 228 |--USUAL CASE, 2^(-70) <= |X| <= 16480
 229 
 230         fmovex          %fp0,%fp1
 231         fmuls           #0x42800000,%fp1  | ...64 * X
 232         
 233         fmovel          %fp1,N(%a6)             | ...N = ROUND-TO-INT(64 X)
 234         movel           %d2,-(%sp)
 235         lea             EXPTBL,%a1      | ...LOAD ADDRESS OF TABLE OF 2^(J/64)
 236         fmovel          N(%a6),%fp1             | ...N --> FLOATING FMT
 237         movel           N(%a6),%d0
 238         movel           %d0,%d2
 239         andil           #0x3F,%d0               | ...D0 IS J
 240         asll            #4,%d0          | ...DISPLACEMENT FOR 2^(J/64)
 241         addal           %d0,%a1         | ...ADDRESS FOR 2^(J/64)
 242         asrl            #6,%d2          | ...d2 IS L, N = 64L + J
 243         movel           %d2,%d0
 244         asrl            #1,%d0          | ...D0 IS M
 245         subl            %d0,%d2         | ...d2 IS M', N = 64(M+M') + J
 246         addil           #0x3FFF,%d2
 247         movew           %d2,ADJFACT(%a6)        | ...ADJFACT IS 2^(M')
 248         movel           (%sp)+,%d2
 249 |--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
 250 |--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
 251 |--ADJFACT = 2^(M').
 252 |--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
 253 
 254         fmuls           #0x3C800000,%fp1  | ...(1/64)*N
 255         movel           (%a1)+,FACT1(%a6)
 256         movel           (%a1)+,FACT1HI(%a6)
 257         movel           (%a1)+,FACT1LOW(%a6)
 258         movew           (%a1)+,FACT2(%a6)
 259         clrw            FACT2+2(%a6)
 260 
 261         fsubx           %fp1,%fp0               | ...X - (1/64)*INT(64 X)
 262 
 263         movew           (%a1)+,FACT2HI(%a6)
 264         clrw            FACT2HI+2(%a6)
 265         clrl            FACT2LOW(%a6)
 266         addw            %d0,FACT1(%a6)
 267         
 268         fmulx           LOG2,%fp0       | ...FP0 IS R
 269         addw            %d0,FACT2(%a6)
 270 
 271         bra             expr
 272 
 273 EXPBORS:
 274 |--FPCR, D0 SAVED
 275         cmpil           #0x3FFF8000,%d0
 276         bgts            EXPBIG
 277 
 278 EXPSM:
 279 |--|X| IS SMALL, RETURN 1 + X
 280 
 281         fmovel          %d1,%FPCR               |restore users exceptions
 282         fadds           #0x3F800000,%fp0  | ...RETURN 1 + X
 283 
 284         bra             t_frcinx
 285 
 286 EXPBIG:
 287 |--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
 288 |--REGISTERS SAVE SO FAR ARE FPCR AND  D0
 289         movel           X(%a6),%d0
 290         cmpil           #0,%d0
 291         blts            EXPNEG
 292 
 293         bclrb           #7,(%a0)                |t_ovfl expects positive value
 294         bra             t_ovfl
 295 
 296 EXPNEG:
 297         bclrb           #7,(%a0)                |t_unfl expects positive value
 298         bra             t_unfl
 299 
 300         .global stentoxd
 301 stentoxd:
 302 |--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
 303 
 304         fmovel          %d1,%fpcr               | ...set user's rounding mode/precision
 305         fmoves          #0x3F800000,%fp0  | ...RETURN 1 + X
 306         movel           (%a0),%d0
 307         orl             #0x00800001,%d0
 308         fadds           %d0,%fp0
 309         bra             t_frcinx
 310 
 311         .global stentox
 312 stentox:
 313 |--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
 314         fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's
 315 
 316         movel           (%a0),%d0
 317         movew           4(%a0),%d0
 318         fmovex          %fp0,X(%a6)
 319         andil           #0x7FFFFFFF,%d0
 320 
 321         cmpil           #0x3FB98000,%d0         | ...|X| >= 2**(-70)?
 322         bges            TENOK1
 323         bra             EXPBORS
 324 
 325 TENOK1:
 326         cmpil           #0x400B9B07,%d0         | ...|X| <= 16480*log2/log10 ?
 327         bles            TENMAIN
 328         bra             EXPBORS
 329 
 330 TENMAIN:
 331 |--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
 332 
 333         fmovex          %fp0,%fp1
 334         fmuld           L2TEN64,%fp1    | ...X*64*LOG10/LOG2
 335         
 336         fmovel          %fp1,N(%a6)             | ...N=INT(X*64*LOG10/LOG2)
 337         movel           %d2,-(%sp)
 338         lea             EXPTBL,%a1      | ...LOAD ADDRESS OF TABLE OF 2^(J/64)
 339         fmovel          N(%a6),%fp1             | ...N --> FLOATING FMT
 340         movel           N(%a6),%d0
 341         movel           %d0,%d2
 342         andil           #0x3F,%d0               | ...D0 IS J
 343         asll            #4,%d0          | ...DISPLACEMENT FOR 2^(J/64)
 344         addal           %d0,%a1         | ...ADDRESS FOR 2^(J/64)
 345         asrl            #6,%d2          | ...d2 IS L, N = 64L + J
 346         movel           %d2,%d0
 347         asrl            #1,%d0          | ...D0 IS M
 348         subl            %d0,%d2         | ...d2 IS M', N = 64(M+M') + J
 349         addil           #0x3FFF,%d2
 350         movew           %d2,ADJFACT(%a6)        | ...ADJFACT IS 2^(M')
 351         movel           (%sp)+,%d2
 352 
 353 |--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
 354 |--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
 355 |--ADJFACT = 2^(M').
 356 |--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
 357 
 358         fmovex          %fp1,%fp2
 359 
 360         fmuld           L10TWO1,%fp1    | ...N*(LOG2/64LOG10)_LEAD
 361         movel           (%a1)+,FACT1(%a6)
 362 
 363         fmulx           L10TWO2,%fp2    | ...N*(LOG2/64LOG10)_TRAIL
 364 
 365         movel           (%a1)+,FACT1HI(%a6)
 366         movel           (%a1)+,FACT1LOW(%a6)
 367         fsubx           %fp1,%fp0               | ...X - N L_LEAD
 368         movew           (%a1)+,FACT2(%a6)
 369 
 370         fsubx           %fp2,%fp0               | ...X - N L_TRAIL
 371 
 372         clrw            FACT2+2(%a6)
 373         movew           (%a1)+,FACT2HI(%a6)
 374         clrw            FACT2HI+2(%a6)
 375         clrl            FACT2LOW(%a6)
 376 
 377         fmulx           LOG10,%fp0      | ...FP0 IS R
 378         
 379         addw            %d0,FACT1(%a6)
 380         addw            %d0,FACT2(%a6)
 381 
 382 expr:
 383 |--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
 384 |--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
 385 |--FP0 IS R. THE FOLLOWING CODE COMPUTES
 386 |--     2**(M'+M) * 2**(J/64) * EXP(R)
 387 
 388         fmovex          %fp0,%fp1
 389         fmulx           %fp1,%fp1               | ...FP1 IS S = R*R
 390 
 391         fmoved          EXPA5,%fp2      | ...FP2 IS A5
 392         fmoved          EXPA4,%fp3      | ...FP3 IS A4
 393 
 394         fmulx           %fp1,%fp2               | ...FP2 IS S*A5
 395         fmulx           %fp1,%fp3               | ...FP3 IS S*A4
 396 
 397         faddd           EXPA3,%fp2      | ...FP2 IS A3+S*A5
 398         faddd           EXPA2,%fp3      | ...FP3 IS A2+S*A4
 399 
 400         fmulx           %fp1,%fp2               | ...FP2 IS S*(A3+S*A5)
 401         fmulx           %fp1,%fp3               | ...FP3 IS S*(A2+S*A4)
 402 
 403         faddd           EXPA1,%fp2      | ...FP2 IS A1+S*(A3+S*A5)
 404         fmulx           %fp0,%fp3               | ...FP3 IS R*S*(A2+S*A4)
 405 
 406         fmulx           %fp1,%fp2               | ...FP2 IS S*(A1+S*(A3+S*A5))
 407         faddx           %fp3,%fp0               | ...FP0 IS R+R*S*(A2+S*A4)
 408         
 409         faddx           %fp2,%fp0               | ...FP0 IS EXP(R) - 1
 410         
 411 
 412 |--FINAL RECONSTRUCTION PROCESS
 413 |--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
 414 
 415         fmulx           FACT1(%a6),%fp0
 416         faddx           FACT2(%a6),%fp0
 417         faddx           FACT1(%a6),%fp0
 418 
 419         fmovel          %d1,%FPCR               |restore users exceptions
 420         clrw            ADJFACT+2(%a6)
 421         movel           #0x80000000,ADJFACT+4(%a6)
 422         clrl            ADJFACT+8(%a6)
 423         fmulx           ADJFACT(%a6),%fp0       | ...FINAL ADJUSTMENT
 424 
 425         bra             t_frcinx
 426 
 427         |end

/* [previous][next][first][last][top][bottom][index][help] */