root/arch/i386/math-emu/wm_sqrt.S

/* [previous][next][first][last][top][bottom][index][help] */
   1         .file   "wm_sqrt.S"
   2 /*---------------------------------------------------------------------------+
   3  |  wm_sqrt.S                                                                |
   4  |                                                                           |
   5  | Fixed point arithmetic square root evaluation.                            |
   6  |                                                                           |
   7  | Copyright (C) 1992,1993                                                   |
   8  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
   9  |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
  10  |                                                                           |
  11  | Call from C as:                                                           |
  12  |   void wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
  13  |                                                                           |
  14  +---------------------------------------------------------------------------*/
  15 
  16 /*---------------------------------------------------------------------------+
  17  |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
  18  |    returns the square root of n in n.                                     |
  19  |                                                                           |
  20  |  Use Newton's method to compute the square root of a number, which must   |
  21  |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
  22  |  Does not check the sign or tag of the argument.                          |
  23  |  Sets the exponent, but not the sign or tag of the result.                |
  24  |                                                                           |
  25  |  The guess is kept in %esi:%edi                                           |
  26  +---------------------------------------------------------------------------*/
  27 
  28 #include "exception.h"
  29 #include "fpu_asm.h"
  30 
  31 
  32 #ifndef NON_REENTRANT_FPU
  33 /*      Local storage on the stack: */
  34 #define FPU_accum_3     -4(%ebp)        /* ms word */
  35 #define FPU_accum_2     -8(%ebp)
  36 #define FPU_accum_1     -12(%ebp)
  37 #define FPU_accum_0     -16(%ebp)
  38 
  39 /*
  40  * The de-normalised argument:
  41  *                  sq_2                  sq_1              sq_0
  42  *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
  43  *           ^ binary point here
  44  */
  45 #define FPU_fsqrt_arg_2 -20(%ebp)       /* ms word */
  46 #define FPU_fsqrt_arg_1 -24(%ebp)
  47 #define FPU_fsqrt_arg_0 -28(%ebp)       /* ls word, at most the ms bit is set */
  48 
  49 #else
  50 /*      Local storage in a static area: */
  51 .data
  52         .align 4,0
  53 FPU_accum_3:
  54         .long   0               /* ms word */
  55 FPU_accum_2:
  56         .long   0
  57 FPU_accum_1:
  58         .long   0
  59 FPU_accum_0:
  60         .long   0
  61 
  62 /* The de-normalised argument:
  63                     sq_2                  sq_1              sq_0
  64           b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
  65              ^ binary point here
  66  */
  67 FPU_fsqrt_arg_2:
  68         .long   0               /* ms word */
  69 FPU_fsqrt_arg_1:
  70         .long   0
  71 FPU_fsqrt_arg_0:
  72         .long   0               /* ls word, at most the ms bit is set */
  73 #endif NON_REENTRANT_FPU
  74 
  75 
  76 .text
  77         .align 2,144
  78 
  79 .globl _wm_sqrt
  80 _wm_sqrt:
  81         pushl   %ebp
  82         movl    %esp,%ebp
  83 #ifndef NON_REENTRANT_FPU
  84         subl    $28,%esp
  85 #endif NON_REENTRANT_FPU
  86         pushl   %esi
  87         pushl   %edi
  88         pushl   %ebx
  89 
  90         movl    PARAM1,%esi
  91 
  92         movl    SIGH(%esi),%eax
  93         movl    SIGL(%esi),%ecx
  94         xorl    %edx,%edx
  95 
  96 /* We use a rough linear estimate for the first guess.. */
  97 
  98         cmpl    EXP_BIAS,EXP(%esi)
  99         jnz     sqrt_arg_ge_2
 100 
 101         shrl    $1,%eax                 /* arg is in the range  [1.0 .. 2.0) */
 102         rcrl    $1,%ecx
 103         rcrl    $1,%edx
 104 
 105 sqrt_arg_ge_2:
 106 /* From here on, n is never accessed directly again until it is
 107    replaced by the answer. */
 108 
 109         movl    %eax,FPU_fsqrt_arg_2            /* ms word of n */
 110         movl    %ecx,FPU_fsqrt_arg_1
 111         movl    %edx,FPU_fsqrt_arg_0
 112 
 113 /* Make a linear first estimate */
 114         shrl    $1,%eax
 115         addl    $0x40000000,%eax
 116         movl    $0xaaaaaaaa,%ecx
 117         mull    %ecx
 118         shll    %edx                    /* max result was 7fff... */
 119         testl   $0x80000000,%edx        /* but min was 3fff... */
 120         jnz     sqrt_prelim_no_adjust
 121 
 122         movl    $0x80000000,%edx        /* round up */
 123 
 124 sqrt_prelim_no_adjust:
 125         movl    %edx,%esi       /* Our first guess */
 126 
 127 /* We have now computed (approx)   (2 + x) / 3, which forms the basis
 128    for a few iterations of Newton's method */
 129 
 130         movl    FPU_fsqrt_arg_2,%ecx    /* ms word */
 131 
 132 /*
 133  * From our initial estimate, three iterations are enough to get us
 134  * to 30 bits or so. This will then allow two iterations at better
 135  * precision to complete the process.
 136  */
 137 
 138 /* Compute  (g + n/g)/2  at each iteration (g is the guess). */
 139         shrl    %ecx            /* Doing this first will prevent a divide */
 140                                 /* overflow later. */
 141 
 142         movl    %ecx,%edx       /* msw of the arg / 2 */
 143         divl    %esi            /* current estimate */
 144         shrl    %esi            /* divide by 2 */
 145         addl    %eax,%esi       /* the new estimate */
 146 
 147         movl    %ecx,%edx
 148         divl    %esi
 149         shrl    %esi
 150         addl    %eax,%esi
 151 
 152         movl    %ecx,%edx
 153         divl    %esi
 154         shrl    %esi
 155         addl    %eax,%esi
 156 
 157 /*
 158  * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
 159  * we improve it to 60 bits or so.
 160  *
 161  * The strategy from now on is to compute new estimates from
 162  *      guess := guess + (n - guess^2) / (2 * guess)
 163  */
 164 
 165 /* First, find the square of the guess */
 166         movl    %esi,%eax
 167         mull    %esi
 168 /* guess^2 now in %edx:%eax */
 169 
 170         movl    FPU_fsqrt_arg_1,%ecx
 171         subl    %ecx,%eax
 172         movl    FPU_fsqrt_arg_2,%ecx    /* ms word of normalized n */
 173         sbbl    %ecx,%edx
 174         jnc     sqrt_stage_2_positive
 175 
 176 /* Subtraction gives a negative result,
 177    negate the result before division. */
 178         notl    %edx
 179         notl    %eax
 180         addl    $1,%eax
 181         adcl    $0,%edx
 182 
 183         divl    %esi
 184         movl    %eax,%ecx
 185 
 186         movl    %edx,%eax
 187         divl    %esi
 188         jmp     sqrt_stage_2_finish
 189 
 190 sqrt_stage_2_positive:
 191         divl    %esi
 192         movl    %eax,%ecx
 193 
 194         movl    %edx,%eax
 195         divl    %esi
 196 
 197         notl    %ecx
 198         notl    %eax
 199         addl    $1,%eax
 200         adcl    $0,%ecx
 201 
 202 sqrt_stage_2_finish:
 203         sarl    $1,%ecx         /* divide by 2 */
 204         rcrl    $1,%eax
 205 
 206         /* Form the new estimate in %esi:%edi */
 207         movl    %eax,%edi
 208         addl    %ecx,%esi
 209 
 210         jnz     sqrt_stage_2_done       /* result should be [1..2) */
 211 
 212 #ifdef PARANOID
 213 /* It should be possible to get here only if the arg is ffff....ffff */
 214         cmp     $0xffffffff,FPU_fsqrt_arg_1
 215         jnz     sqrt_stage_2_error
 216 #endif PARANOID
 217 
 218 /* The best rounded result. */
 219         xorl    %eax,%eax
 220         decl    %eax
 221         movl    %eax,%edi
 222         movl    %eax,%esi
 223         movl    $0x7fffffff,%eax
 224         jmp     sqrt_round_result
 225 
 226 #ifdef PARANOID
 227 sqrt_stage_2_error:
 228         pushl   EX_INTERNAL|0x213
 229         call    EXCEPTION
 230 #endif PARANOID
 231 
 232 sqrt_stage_2_done:
 233 
 234 /* Now the square root has been computed to better than 60 bits. */
 235 
 236 /* Find the square of the guess. */
 237         movl    %edi,%eax               /* ls word of guess */
 238         mull    %edi
 239         movl    %edx,FPU_accum_1
 240 
 241         movl    %esi,%eax
 242         mull    %esi
 243         movl    %edx,FPU_accum_3
 244         movl    %eax,FPU_accum_2
 245 
 246         movl    %edi,%eax
 247         mull    %esi
 248         addl    %eax,FPU_accum_1
 249         adcl    %edx,FPU_accum_2
 250         adcl    $0,FPU_accum_3
 251 
 252 /*      movl    %esi,%eax */
 253 /*      mull    %edi */
 254         addl    %eax,FPU_accum_1
 255         adcl    %edx,FPU_accum_2
 256         adcl    $0,FPU_accum_3
 257 
 258 /* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
 259 
 260         movl    FPU_fsqrt_arg_0,%eax            /* get normalized n */
 261         subl    %eax,FPU_accum_1
 262         movl    FPU_fsqrt_arg_1,%eax
 263         sbbl    %eax,FPU_accum_2
 264         movl    FPU_fsqrt_arg_2,%eax            /* ms word of normalized n */
 265         sbbl    %eax,FPU_accum_3
 266         jnc     sqrt_stage_3_positive
 267 
 268 /* Subtraction gives a negative result,
 269    negate the result before division */
 270         notl    FPU_accum_1
 271         notl    FPU_accum_2
 272         notl    FPU_accum_3
 273         addl    $1,FPU_accum_1
 274         adcl    $0,FPU_accum_2
 275 
 276 #ifdef PARANOID
 277         adcl    $0,FPU_accum_3  /* This must be zero */
 278         jz      sqrt_stage_3_no_error
 279 
 280 sqrt_stage_3_error:
 281         pushl   EX_INTERNAL|0x207
 282         call    EXCEPTION
 283 
 284 sqrt_stage_3_no_error:
 285 #endif PARANOID
 286 
 287         movl    FPU_accum_2,%edx
 288         movl    FPU_accum_1,%eax
 289         divl    %esi
 290         movl    %eax,%ecx
 291 
 292         movl    %edx,%eax
 293         divl    %esi
 294 
 295         sarl    $1,%ecx         /* divide by 2 */
 296         rcrl    $1,%eax
 297 
 298         /* prepare to round the result */
 299 
 300         addl    %ecx,%edi
 301         adcl    $0,%esi
 302 
 303         jmp     sqrt_stage_3_finished
 304 
 305 sqrt_stage_3_positive:
 306         movl    FPU_accum_2,%edx
 307         movl    FPU_accum_1,%eax
 308         divl    %esi
 309         movl    %eax,%ecx
 310 
 311         movl    %edx,%eax
 312         divl    %esi
 313 
 314         sarl    $1,%ecx         /* divide by 2 */
 315         rcrl    $1,%eax
 316 
 317         /* prepare to round the result */
 318 
 319         notl    %eax            /* Negate the correction term */
 320         notl    %ecx
 321         addl    $1,%eax
 322         adcl    $0,%ecx         /* carry here ==> correction == 0 */
 323         adcl    $0xffffffff,%esi
 324 
 325         addl    %ecx,%edi
 326         adcl    $0,%esi
 327 
 328 sqrt_stage_3_finished:
 329 
 330 /*
 331  * The result in %esi:%edi:%esi should be good to about 90 bits here,
 332  * and the rounding information here does not have sufficient accuracy
 333  * in a few rare cases.
 334  */
 335         cmpl    $0xffffffe0,%eax
 336         ja      sqrt_near_exact_x
 337 
 338         cmpl    $0x00000020,%eax
 339         jb      sqrt_near_exact
 340 
 341         cmpl    $0x7fffffe0,%eax
 342         jb      sqrt_round_result
 343 
 344         cmpl    $0x80000020,%eax
 345         jb      sqrt_get_more_precision
 346 
 347 sqrt_round_result:
 348 /* Set up for rounding operations */
 349         movl    %eax,%edx
 350         movl    %esi,%eax
 351         movl    %edi,%ebx
 352         movl    PARAM1,%edi
 353         movl    EXP_BIAS,EXP(%edi)      /* Result is in  [1.0 .. 2.0) */
 354         movl    PARAM2,%ecx
 355         jmp     fpu_reg_round_sqrt
 356 
 357 
 358 sqrt_near_exact_x:
 359 /* First, the estimate must be rounded up. */
 360         addl    $1,%edi
 361         adcl    $0,%esi
 362 
 363 sqrt_near_exact:
 364 /*
 365  * This is an easy case because x^1/2 is monotonic.
 366  * We need just find the square of our estimate, compare it
 367  * with the argument, and deduce whether our estimate is
 368  * above, below, or exact. We use the fact that the estimate
 369  * is known to be accurate to about 90 bits.
 370  */
 371         movl    %edi,%eax               /* ls word of guess */
 372         mull    %edi
 373         movl    %edx,%ebx               /* 2nd ls word of square */
 374         movl    %eax,%ecx               /* ls word of square */
 375 
 376         movl    %edi,%eax
 377         mull    %esi
 378         addl    %eax,%ebx
 379         addl    %eax,%ebx
 380 
 381 #ifdef PARANOID
 382         cmp     $0xffffffb0,%ebx
 383         jb      sqrt_near_exact_ok
 384 
 385         cmp     $0x00000050,%ebx
 386         ja      sqrt_near_exact_ok
 387 
 388         pushl   EX_INTERNAL|0x214
 389         call    EXCEPTION
 390 
 391 sqrt_near_exact_ok:
 392 #endif PARANOID
 393 
 394         or      %ebx,%ebx
 395         js      sqrt_near_exact_small
 396 
 397         jnz     sqrt_near_exact_large
 398 
 399         or      %ebx,%edx
 400         jnz     sqrt_near_exact_large
 401 
 402 /* Our estimate is exactly the right answer */
 403         xorl    %eax,%eax
 404         jmp     sqrt_round_result
 405 
 406 sqrt_near_exact_small:
 407 /* Our estimate is too small */
 408         movl    $0x000000ff,%eax
 409         jmp     sqrt_round_result
 410         
 411 sqrt_near_exact_large:
 412 /* Our estimate is too large, we need to decrement it */
 413         subl    $1,%edi
 414         sbbl    $0,%esi
 415         movl    $0xffffff00,%eax
 416         jmp     sqrt_round_result
 417 
 418 
 419 sqrt_get_more_precision:
 420 /* This case is almost the same as the above, except we start
 421    with an extra bit of precision in the estimate. */
 422         stc                     /* The extra bit. */
 423         rcll    $1,%edi         /* Shift the estimate left one bit */
 424         rcll    $1,%esi
 425 
 426         movl    %edi,%eax               /* ls word of guess */
 427         mull    %edi
 428         movl    %edx,%ebx               /* 2nd ls word of square */
 429         movl    %eax,%ecx               /* ls word of square */
 430 
 431         movl    %edi,%eax
 432         mull    %esi
 433         addl    %eax,%ebx
 434         addl    %eax,%ebx
 435 
 436 /* Put our estimate back to its original value */
 437         stc                     /* The ms bit. */
 438         rcrl    $1,%esi         /* Shift the estimate left one bit */
 439         rcrl    $1,%edi
 440 
 441 #ifdef PARANOID
 442         cmp     $0xffffff60,%ebx
 443         jb      sqrt_more_prec_ok
 444 
 445         cmp     $0x000000a0,%ebx
 446         ja      sqrt_more_prec_ok
 447 
 448         pushl   EX_INTERNAL|0x215
 449         call    EXCEPTION
 450 
 451 sqrt_more_prec_ok:
 452 #endif PARANOID
 453 
 454         or      %ebx,%ebx
 455         js      sqrt_more_prec_small
 456 
 457         jnz     sqrt_more_prec_large
 458 
 459         or      %ebx,%ecx
 460         jnz     sqrt_more_prec_large
 461 
 462 /* Our estimate is exactly the right answer */
 463         movl    $0x80000000,%eax
 464         jmp     sqrt_round_result
 465 
 466 sqrt_more_prec_small:
 467 /* Our estimate is too small */
 468         movl    $0x800000ff,%eax
 469         jmp     sqrt_round_result
 470         
 471 sqrt_more_prec_large:
 472 /* Our estimate is too large */
 473         movl    $0x7fffff00,%eax
 474         jmp     sqrt_round_result

/* [previous][next][first][last][top][bottom][index][help] */