root/kernel/FPU-emu/wm_sqrt.S

/* [previous][next][first][last][top][bottom][index][help] */
   1         .file   "wm_sqrt.S"
   2 /*---------------------------------------------------------------------------+
   3  |  wm_sqrt.S                                                                |
   4  |                                                                           |
   5  | Fixed point arithmetic square root evaluation.                            |
   6  |                                                                           |
   7  | Copyright (C) 1992    W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
   8  |                       Australia.  E-mail apm233m@vaxc.cc.monash.edu.au    |
   9  |                                                                           |
  10  | Call from C as:                                                           |
  11  |   void wm_sqrt(FPU_REG *n)                                                |
  12  |                                                                           |
  13  +---------------------------------------------------------------------------*/
  14 
  15 /*---------------------------------------------------------------------------+
  16  |  wm_sqrt(FPU_REG *n)                                                      |
  17  |    returns the square root of n in n.                                     |
  18  |                                                                           |
  19  |  Use Newton's method to compute the square root of a number, which must   |
  20  |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
  21  |  Does not check the sign or tag of the argument.                          |
  22  |  Sets the exponent, but not the sign or tag of the result.                |
  23  |                                                                           |
  24  |  The guess is kept in %esi:%edi                                           |
  25  +---------------------------------------------------------------------------*/
  26 
  27 #include "exception.h"
  28 #include "fpu_asm.h"
  29 
  30 
  31 .data
  32 /*
  33         Local storage:
  34  */
  35         .align 4,0
  36 accum_2:
  37         .long   0               // ms word
  38 accum_1:
  39         .long   0
  40 accum_0:
  41         .long   0
  42 
  43 sq_2:
  44         .long   0               // ms word
  45 sq_1:
  46         .long   0
  47 sq_0:
  48         .long   0
  49 
  50 .text
  51         .align 2,144
  52 
  53 .globl _wm_sqrt
  54 
  55 _wm_sqrt:
  56         pushl   %ebp
  57         movl    %esp,%ebp
  58         pushl   %esi
  59         pushl   %edi
  60 //      pushl   %ebx
  61 
  62         movl    PARAM1,%esi
  63 
  64         movl    SIGH(%esi),%eax
  65         movl    SIGL(%esi),%ecx
  66         xorl    %edx,%edx
  67 
  68 // We use a rough linear estimate for the first guess..
  69 
  70         cmpl    EXP_BIAS,EXP(%esi)
  71         jnz     L10
  72 
  73         shrl    $1,%eax                 /* arg is in the range  [1.0 .. 2.0) */
  74         rcrl    $1,%ecx
  75         rcrl    $1,%edx
  76 
  77 L10:
  78 // From here on, n is never accessed directly again until it is
  79 // replaced by the answer.
  80 
  81         movl    %eax,sq_2               // ms word of n
  82         movl    %ecx,sq_1
  83         movl    %edx,sq_0
  84 
  85         shrl    $1,%eax
  86         addl    $0x40000000,%eax
  87         movl    $0xaaaaaaaa,%ecx
  88         mull    %ecx
  89         shll    %edx                    /* max result was 7fff... */
  90         testl   $0x80000000,%edx        /* but min was 3fff... */
  91         jnz     no_adjust
  92 
  93         movl    $0x80000000,%edx        /* round up */
  94 
  95 no_adjust:
  96         movl    %edx,%esi       // Our first guess
  97 
  98 /* We have now computed (approx)   (2 + x) / 3, which forms the basis
  99    for a few iterations of Newton's method */
 100 
 101         movl    sq_2,%ecx       // ms word
 102 
 103 // From our initial estimate, three iterations are enough to get us
 104 // to 30 bits or so. This will then allow two iterations at better
 105 // precision to complete the process.
 106 
 107 // Compute  (g + n/g)/2  at each iteration (g is the guess).
 108         shrl    %ecx            // Doing this first will prevent a divide
 109                                 // overflow later.
 110 
 111         movl    %ecx,%edx
 112         divl    %esi
 113         shrl    %esi
 114         addl    %eax,%esi
 115 
 116         movl    %ecx,%edx
 117         divl    %esi
 118         shrl    %esi
 119         addl    %eax,%esi
 120 
 121         movl    %ecx,%edx
 122         divl    %esi
 123         shrl    %esi
 124         addl    %eax,%esi
 125 
 126 // Now that an estimate accurate to about 30 bits has been obtained,
 127 // we improve it to 60 bits or so.
 128 
 129 // The strategy from now on is to compute new estimates from
 130 //      guess := guess + (n - guess^2) / (2 * guess)
 131 
 132 // First, find the square of the guess
 133         movl    %esi,%eax
 134         mull    %esi
 135 // guess^2 now in %edx:%eax
 136 
 137         movl    sq_1,%ecx
 138         subl    %ecx,%eax
 139         movl    sq_2,%ecx       // ms word of normalized n
 140         sbbl    %ecx,%edx
 141         jnc     l40
 142 
 143 // subtraction gives a negative result
 144 // negate the reult before division
 145         notl    %edx
 146         notl    %eax
 147         addl    $1,%eax
 148         adcl    $0,%edx
 149 
 150         divl    %esi
 151         movl    %eax,%ecx
 152 
 153         movl    %edx,%eax
 154         divl    %esi
 155         jmp     l45
 156 
 157 l40:
 158         divl    %esi
 159         movl    %eax,%ecx
 160 
 161         movl    %edx,%eax
 162         divl    %esi
 163 
 164         notl    %ecx
 165         notl    %eax
 166         addl    $1,%eax
 167         adcl    $0,%ecx
 168 
 169 l45:
 170         sarl    $1,%ecx         // divide by 2
 171         rcrl    $1,%eax
 172 
 173         movl    %eax,%edi
 174         addl    %ecx,%esi
 175 
 176 // Now the square root has been computed to better than 60 bits
 177 
 178 // Find the square of the guess
 179         movl    %edi,%eax               // ls word of guess
 180         mull    %edi
 181         movl    %edx,accum_0
 182 
 183         movl    %esi,%eax
 184         mull    %esi
 185         movl    %edx,accum_2
 186         movl    %eax,accum_1
 187 
 188         movl    %edi,%eax
 189         mull    %esi
 190         addl    %eax,accum_0
 191         adcl    %edx,accum_1
 192         adcl    $0,accum_2
 193 
 194         movl    %esi,%eax
 195         mull    %edi
 196         addl    %eax,accum_0
 197         adcl    %edx,accum_1
 198         adcl    $0,accum_2
 199 
 200 // guess^2 now in accum_2:accum_1:accum_0
 201 
 202         movl    sq_1,%eax               // get normalized n
 203         subl    %eax,accum_1
 204         movl    sq_2,%eax               // ms word of normalized n
 205         sbbl    %eax,accum_2
 206         jnc     l60
 207 
 208 // subtraction gives a negative result
 209 // negate the reult before division
 210         notl    accum_0
 211         notl    accum_1
 212         notl    accum_2
 213         addl    $1,accum_0
 214         adcl    $0,accum_1
 215 
 216 #ifdef PARANOID
 217         adcl    $0,accum_2      // This must be zero
 218         jz      l51
 219 
 220         pushl   EX_INTERNAL|0x207
 221         call    EXCEPTION
 222 
 223 l51:
 224 #endif PARANOID
 225 
 226         movl    accum_1,%edx
 227         movl    accum_0,%eax
 228         divl    %esi
 229         movl    %eax,%ecx
 230 
 231         movl    %edx,%eax
 232         divl    %esi
 233 
 234         sarl    $1,%ecx         // divide by 2
 235         rcrl    $1,%eax
 236 
 237         // round the result
 238         addl    $0x80000000,%eax
 239         adcl    $0,%ecx
 240 
 241         addl    %ecx,%edi
 242         adcl    $0,%esi
 243 
 244         jmp     l65
 245 
 246 l60:
 247         movl    accum_1,%edx
 248         movl    accum_0,%eax
 249         divl    %esi
 250         movl    %eax,%ecx
 251 
 252         movl    %edx,%eax
 253         divl    %esi
 254 
 255         sarl    $1,%ecx         // divide by 2
 256         rcrl    $1,%eax
 257 
 258         // round the result
 259         addl    $0x80000000,%eax
 260         adcl    $0,%ecx
 261 
 262         subl    %ecx,%edi
 263         sbbl    $0,%esi
 264 
 265 l65:
 266         movl    PARAM1,%ecx
 267 
 268         movl    %edi,SIGL(%ecx)
 269         movl    %esi,SIGH(%ecx)
 270 
 271         movl    EXP_BIAS,EXP(%ecx)      /* Result is in  [1.0 .. 2.0) */
 272 
 273 //      popl    %ebx
 274         popl    %edi
 275         popl    %esi
 276         leave
 277         ret

/* [previous][next][first][last][top][bottom][index][help] */