kernel/FPU-emu/wm

/* */
   1         .file   "wm_sqrt.S"
   2 /*---------------------------------------------------------------------------+
   3  |  wm_sqrt.S                                                                |
   4  |                                                                           |
   5  | Fixed point arithmetic square root evaluation.                            |
   6  |                                                                           |
   7  | Copyright (C) 1992,1993                                                   |
   8  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
   9  |                       Australia.  E-mail apm233m@vaxc.cc.monash.edu.au    |
  10  |                                                                           |
  11  | Call from C as:                                                           |
  12  |   void wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
  13  |                                                                           |
  14  +---------------------------------------------------------------------------*/
  15 
  16 /*---------------------------------------------------------------------------+
  17  |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
  18  |    returns the square root of n in n.                                     |
  19  |                                                                           |
  20  |  Use Newton's method to compute the square root of a number, which must   |
  21  |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
  22  |  Does not check the sign or tag of the argument.                          |
  23  |  Sets the exponent, but not the sign or tag of the result.                |
  24  |                                                                           |
  25  |  The guess is kept in %esi:%edi                                           |
  26  +---------------------------------------------------------------------------*/
  27 
  28 #include "exception.h"
  29 #include "fpu_asm.h"
  30 
  31 
  32 .data
  33 /*
  34         Local storage:
  35  */
  36         .align 4,0
  37 accum_3:
  38         .long   0               // ms word
  39 accum_2:
  40         .long   0
  41 accum_1:
  42         .long   0
  43 accum_0:
  44         .long   0
  45 
  46 // The de-normalised argument:
  47 //                  sq_2                  sq_1              sq_0
  48 //        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
  49 //           ^ binary point here
  50 fsqrt_arg_2:
  51         .long   0               // ms word
  52 fsqrt_arg_1:
  53         .long   0
  54 fsqrt_arg_0:
  55         .long   0               // ls word, at most the ms bit is set
  56 
  57 .text
  58         .align 2,144
  59 
  60 .globl _wm_sqrt
  61 
  62 _wm_sqrt:
  63         pushl   %ebp
  64         movl    %esp,%ebp
  65         pushl   %esi
  66         pushl   %edi
  67         pushl   %ebx
  68 
  69         movl    PARAM1,%esi
  70 
  71         movl    SIGH(%esi),%eax
  72         movl    SIGL(%esi),%ecx
  73         xorl    %edx,%edx
  74 
  75 // We use a rough linear estimate for the first guess..
  76 
  77         cmpl    EXP_BIAS,EXP(%esi)
  78         jnz     sqrt_arg_ge_2
  79 
  80         shrl    $1,%eax                 /* arg is in the range  [1.0 .. 2.0) */
  81         rcrl    $1,%ecx
  82         rcrl    $1,%edx
  83 
  84 sqrt_arg_ge_2:
  85 // From here on, n is never accessed directly again until it is
  86 // replaced by the answer.
  87 
  88         movl    %eax,fsqrt_arg_2                // ms word of n
  89         movl    %ecx,fsqrt_arg_1
  90         movl    %edx,fsqrt_arg_0
  91 
  92 // Make a linear first estimate
  93         shrl    $1,%eax
  94         addl    $0x40000000,%eax
  95         movl    $0xaaaaaaaa,%ecx
  96         mull    %ecx
  97         shll    %edx                    /* max result was 7fff... */
  98         testl   $0x80000000,%edx        /* but min was 3fff... */
  99         jnz     sqrt_prelim_no_adjust
 100 
 101         movl    $0x80000000,%edx        /* round up */
 102 
 103 sqrt_prelim_no_adjust:
 104         movl    %edx,%esi       // Our first guess
 105 
 106 /* We have now computed (approx)   (2 + x) / 3, which forms the basis
 107    for a few iterations of Newton's method */
 108 
 109         movl    fsqrt_arg_2,%ecx        // ms word
 110 
 111 // From our initial estimate, three iterations are enough to get us
 112 // to 30 bits or so. This will then allow two iterations at better
 113 // precision to complete the process.
 114 
 115 // Compute  (g + n/g)/2  at each iteration (g is the guess).
 116         shrl    %ecx            // Doing this first will prevent a divide
 117                                 // overflow later.
 118 
 119         movl    %ecx,%edx       // msw of the arg / 2
 120         divl    %esi            // current estimate
 121         shrl    %esi            // divide by 2
 122         addl    %eax,%esi       // the new estimate
 123 
 124         movl    %ecx,%edx
 125         divl    %esi
 126         shrl    %esi
 127         addl    %eax,%esi
 128 
 129         movl    %ecx,%edx
 130         divl    %esi
 131         shrl    %esi
 132         addl    %eax,%esi
 133 
 134 // Now that an estimate accurate to about 30 bits has been obtained (in %esi),
 135 // we improve it to 60 bits or so.
 136 
 137 // The strategy from now on is to compute new estimates from
 138 //      guess := guess + (n - guess^2) / (2 * guess)
 139 
 140 // First, find the square of the guess
 141         movl    %esi,%eax
 142         mull    %esi
 143 // guess^2 now in %edx:%eax
 144 
 145         movl    fsqrt_arg_1,%ecx
 146         subl    %ecx,%eax
 147         movl    fsqrt_arg_2,%ecx        // ms word of normalized n
 148         sbbl    %ecx,%edx
 149         jnc     sqrt_stage_2_positive
 150 
 151 // subtraction gives a negative result
 152 // negate the result before division
 153         notl    %edx
 154         notl    %eax
 155         addl    $1,%eax
 156         adcl    $0,%edx
 157 
 158         divl    %esi
 159         movl    %eax,%ecx
 160 
 161         movl    %edx,%eax
 162         divl    %esi
 163         jmp     sqrt_stage_2_finish
 164 
 165 sqrt_stage_2_positive:
 166         divl    %esi
 167         movl    %eax,%ecx
 168 
 169         movl    %edx,%eax
 170         divl    %esi
 171 
 172         notl    %ecx
 173         notl    %eax
 174         addl    $1,%eax
 175         adcl    $0,%ecx
 176 
 177 sqrt_stage_2_finish:
 178         sarl    $1,%ecx         // divide by 2
 179         rcrl    $1,%eax
 180 
 181         // Form the new estimate in %esi:%edi
 182         movl    %eax,%edi
 183         addl    %ecx,%esi
 184 
 185         jnz     sqrt_stage_2_done       // result should be [1..2)
 186 
 187 #ifdef PARANOID
 188 // It should be possible to get here only if the arg is ffff....ffff
 189         cmp     $0xffffffff,fsqrt_arg_1
 190         jnz     sqrt_stage_2_error
 191 #endif PARANOID
 192 
 193 // The best rounded result.
 194         xorl    %eax,%eax
 195         decl    %eax
 196         movl    %eax,%edi
 197         movl    %eax,%esi
 198         movl    $0x7fffffff,%eax
 199         jmp     sqrt_round_result
 200 
 201 #ifdef PARANOID
 202 sqrt_stage_2_error:
 203         pushl   EX_INTERNAL|0x213
 204         call    EXCEPTION
 205 #endif PARANOID
 206 
 207 sqrt_stage_2_done:
 208 
 209 // Now the square root has been computed to better than 60 bits
 210 
 211 // Find the square of the guess
 212         movl    %edi,%eax               // ls word of guess
 213         mull    %edi
 214         movl    %edx,accum_1
 215 
 216         movl    %esi,%eax
 217         mull    %esi
 218         movl    %edx,accum_3
 219         movl    %eax,accum_2
 220 
 221         movl    %edi,%eax
 222         mull    %esi
 223         addl    %eax,accum_1
 224         adcl    %edx,accum_2
 225         adcl    $0,accum_3
 226 
 227 //      movl    %esi,%eax
 228 //      mull    %edi
 229         addl    %eax,accum_1
 230         adcl    %edx,accum_2
 231         adcl    $0,accum_3
 232 
 233 // guess^2 now in accum_3:accum_2:accum_1
 234 
 235         movl    fsqrt_arg_0,%eax                // get normalized n
 236         subl    %eax,accum_1
 237         movl    fsqrt_arg_1,%eax
 238         sbbl    %eax,accum_2
 239         movl    fsqrt_arg_2,%eax                // ms word of normalized n
 240         sbbl    %eax,accum_3
 241         jnc     sqrt_stage_3_positive
 242 
 243 // subtraction gives a negative result
 244 // negate the result before division
 245         notl    accum_1
 246         notl    accum_2
 247         notl    accum_3
 248         addl    $1,accum_1
 249         adcl    $0,accum_2
 250 
 251 #ifdef PARANOID
 252         adcl    $0,accum_3      // This must be zero
 253         jz      sqrt_stage_3_no_error
 254 
 255 sqrt_stage_3_error:
 256         pushl   EX_INTERNAL|0x207
 257         call    EXCEPTION
 258 
 259 sqrt_stage_3_no_error:
 260 #endif PARANOID
 261 
 262         movl    accum_2,%edx
 263         movl    accum_1,%eax
 264         divl    %esi
 265         movl    %eax,%ecx
 266 
 267         movl    %edx,%eax
 268         divl    %esi
 269 
 270         sarl    $1,%ecx         // divide by 2
 271         rcrl    $1,%eax
 272 
 273         // prepare to round the result
 274 
 275         addl    %ecx,%edi
 276         adcl    $0,%esi
 277 
 278         jmp     sqrt_stage_3_finished
 279 
 280 sqrt_stage_3_positive:
 281         movl    accum_2,%edx
 282         movl    accum_1,%eax
 283         divl    %esi
 284         movl    %eax,%ecx
 285 
 286         movl    %edx,%eax
 287         divl    %esi
 288 
 289         sarl    $1,%ecx         // divide by 2
 290         rcrl    $1,%eax
 291 
 292         // prepare to round the result
 293 
 294         notl    %eax            // Negate the correction term
 295         notl    %ecx
 296         addl    $1,%eax
 297         adcl    $0,%ecx         // carry here ==> correction == 0
 298         adcl    $0xffffffff,%esi
 299 
 300         addl    %ecx,%edi
 301         adcl    $0,%esi
 302 
 303 sqrt_stage_3_finished:
 304 
 305 // The result in %esi:%edi:%esi should be good to about 90 bits here,
 306 // and the rounding information here does not have sufficient accuracy
 307 // in a few rare cases.
 308         cmpl    $0xffffffe0,%eax
 309         ja      sqrt_near_exact_x
 310 
 311         cmpl    $0x00000020,%eax
 312         jb      sqrt_near_exact
 313 
 314         cmpl    $0x7fffffe0,%eax
 315         jb      sqrt_round_result
 316 
 317         cmpl    $0x80000020,%eax
 318         jb      sqrt_get_more_precision
 319 
 320 sqrt_round_result:
 321 // Set up for rounding operations
 322         movl    %eax,%edx
 323         movl    %esi,%eax
 324         movl    %edi,%ebx
 325         movl    PARAM1,%edi
 326         movl    EXP_BIAS,EXP(%edi)      // Result is in  [1.0 .. 2.0)
 327         movl    PARAM2,%ecx
 328         jmp     FPU_round_sqrt
 329 
 330 
 331 sqrt_near_exact_x:
 332 // First, the estimate must be rounded up.
 333         addl    $1,%edi
 334         adcl    $0,%esi
 335 
 336 sqrt_near_exact:
 337 // This is an easy case because x^1/2 is monotonic.
 338 // We need just find the square of our estimate, compare it
 339 // with the argument, and deduce whether our estimate is
 340 // above, below, or exact. We use the fact that the estimate
 341 // is known to be accurate to about 90 bits.
 342         movl    %edi,%eax               // ls word of guess
 343         mull    %edi
 344         movl    %edx,%ebx               // 2nd ls word of square
 345         movl    %eax,%ecx               // ls word of square
 346 
 347         movl    %edi,%eax
 348         mull    %esi
 349         addl    %eax,%ebx
 350         addl    %eax,%ebx
 351 
 352 #ifdef PARANOID
 353         cmp     $0xffffffb0,%ebx
 354         jb      sqrt_near_exact_ok
 355 
 356         cmp     $0x00000050,%ebx
 357         ja      sqrt_near_exact_ok
 358 
 359         pushl   EX_INTERNAL|0x214
 360         call    EXCEPTION
 361 
 362 sqrt_near_exact_ok:
 363 #endif PARANOID
 364 
 365         or      %ebx,%ebx
 366         js      sqrt_near_exact_small
 367 
 368         jnz     sqrt_near_exact_large
 369 
 370         or      %ebx,%edx
 371         jnz     sqrt_near_exact_large
 372 
 373 // Our estimate is exactly the right answer
 374         xorl    %eax,%eax
 375         jmp     sqrt_round_result
 376 
 377 sqrt_near_exact_small:
 378 // Our estimate is too small
 379         movl    $0x000000ff,%eax
 380         jmp     sqrt_round_result
 381         
 382 sqrt_near_exact_large:
 383 // Our estimate is too large, we need to decrement it
 384         subl    $1,%edi
 385         sbbl    $0,%esi
 386         movl    $0xffffff00,%eax
 387         jmp     sqrt_round_result
 388 
 389 
 390 sqrt_get_more_precision:
 391 // This case is almost the same as the above, except we start
 392 // with an extra bit of precision in the estimate.
 393         stc                     // The extra bit.
 394         rcll    $1,%edi         // Shift the estimate left one bit
 395         rcll    $1,%esi
 396 
 397         movl    %edi,%eax               // ls word of guess
 398         mull    %edi
 399         movl    %edx,%ebx               // 2nd ls word of square
 400         movl    %eax,%ecx               // ls word of square
 401 
 402         movl    %edi,%eax
 403         mull    %esi
 404         addl    %eax,%ebx
 405         addl    %eax,%ebx
 406 
 407 // Put our estimate back to its original value
 408         stc                     // The ms bit.
 409         rcrl    $1,%esi         // Shift the estimate left one bit
 410         rcrl    $1,%edi
 411 
 412 #ifdef PARANOID
 413         cmp     $0xffffff60,%ebx
 414         jb      sqrt_more_prec_ok
 415 
 416         cmp     $0x000000a0,%ebx
 417         ja      sqrt_more_prec_ok
 418 
 419         pushl   EX_INTERNAL|0x215
 420         call    EXCEPTION
 421 
 422 sqrt_more_prec_ok:
 423 #endif PARANOID
 424 
 425         or      %ebx,%ebx
 426         js      sqrt_more_prec_small
 427 
 428         jnz     sqrt_more_prec_large
 429 
 430         or      %ebx,%ecx
 431         jnz     sqrt_more_prec_large
 432 
 433 // Our estimate is exactly the right answer
 434         movl    $0x80000000,%eax
 435         jmp     sqrt_round_result
 436 
 437 sqrt_more_prec_small:
 438 // Our estimate is too small
 439         movl    $0x800000ff,%eax
 440         jmp     sqrt_round_result
 441         
 442 sqrt_more_prec_large:
 443 // Our estimate is too large
 444         movl    $0x7fffff00,%eax
 445         jmp     sqrt_round_result
/* */
root/kernel/FPU-emu/wm_sqrt.S