1 .file "wm_sqrt.S"
2 /*---------------------------------------------------------------------------+ 3 | wm_sqrt.S | 4 | | 5 | Fixed point arithmetic square root evaluation. | 6 | | 7 | Copyright (C) 1992,1993 | 8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | 9 | Australia. E-mail apm233m@vaxc.cc.monash.edu.au | 10 | | 11 | Call from C as: | 12 | void wm_sqrt(FPU_REG *n, unsigned int control_word) | 13 | | 14 +---------------------------------------------------------------------------*/ 15
16 /*---------------------------------------------------------------------------+ 17 | wm_sqrt(FPU_REG *n, unsigned int control_word) | 18 | returns the square root of n in n. | 19 | | 20 | Use Newton's method to compute the square root of a number, which must | 21 | be in the range [1.0 .. 4.0), to 64 bits accuracy. | 22 | Does not check the sign or tag of the argument. | 23 | Sets the exponent, but not the sign or tag of the result. | 24 | | 25 | The guess is kept in %esi:%edi | 26 +---------------------------------------------------------------------------*/ 27
28 #include "exception.h"
29 #include "fpu_asm.h"
30
31
32 .data
33 /* 34 Local storage: 35 */ 36 .align 4,0
37 accum_3:
38 .long 0 // ms word 39 accum_2:
40 .long 0
41 accum_1:
42 .long 0
43 accum_0:
44 .long 0
45
46 // The de-normalised argument: 47 // sq_2 sq_1 sq_0 48 // b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 49 // ^ binary point here 50 fsqrt_arg_2:
51 .long 0 // ms word 52 fsqrt_arg_1:
53 .long 0
54 fsqrt_arg_0:
55 .long 0 // ls word, at most the ms bit is set 56
57 .text
58 .align 2,144
59
60 .globl _wm_sqrt
61
62 _wm_sqrt:
63 pushl %ebp
64 movl %esp,%ebp
65 pushl %esi
66 pushl %edi
67 pushl %ebx
68
69 movl PARAM1,%esi
70
71 movl SIGH(%esi),%eax
72 movl SIGL(%esi),%ecx
73 xorl %edx,%edx
74
75 // We use a rough linear estimate for the first guess.. 76
77 cmpl EXP_BIAS,EXP(%esi)
78 jnz sqrt_arg_ge_2
79
80 shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */ 81 rcrl $1,%ecx
82 rcrl $1,%edx
83
84 sqrt_arg_ge_2:
85 // From here on, n is never accessed directly again until it is 86 // replaced by the answer. 87
88 movl %eax,fsqrt_arg_2 // ms word of n 89 movl %ecx,fsqrt_arg_1
90 movl %edx,fsqrt_arg_0
91
92 // Make a linear first estimate 93 shrl $1,%eax
94 addl $0x40000000,%eax
95 movl $0xaaaaaaaa,%ecx
96 mull %ecx
97 shll %edx /* max result was 7fff... */ 98 testl $0x80000000,%edx /* but min was 3fff... */ 99 jnz sqrt_prelim_no_adjust
100
101 movl $0x80000000,%edx /* round up */ 102
103 sqrt_prelim_no_adjust:
104 movl %edx,%esi // Our first guess 105
106 /* We have now computed (approx) (2 + x) / 3, which forms the basis 107 for a few iterations of Newton's method */ 108
109 movl fsqrt_arg_2,%ecx // ms word 110
111 // From our initial estimate, three iterations are enough to get us 112 // to 30 bits or so. This will then allow two iterations at better 113 // precision to complete the process. 114
115 // Compute (g + n/g)/2 at each iteration (g is the guess). 116 shrl %ecx // Doing this first will prevent a divide 117 // overflow later. 118
119 movl %ecx,%edx // msw of the arg / 2 120 divl %esi // current estimate 121 shrl %esi // divide by 2 122 addl %eax,%esi // the new estimate 123
124 movl %ecx,%edx
125 divl %esi
126 shrl %esi
127 addl %eax,%esi
128
129 movl %ecx,%edx
130 divl %esi
131 shrl %esi
132 addl %eax,%esi
133
134 // Now that an estimate accurate to about 30 bits has been obtained (in %esi), 135 // we improve it to 60 bits or so. 136
137 // The strategy from now on is to compute new estimates from 138 // guess := guess + (n - guess^2) / (2 * guess) 139
140 // First, find the square of the guess 141 movl %esi,%eax
142 mull %esi
143 // guess^2 now in %edx:%eax 144
145 movl fsqrt_arg_1,%ecx
146 subl %ecx,%eax
147 movl fsqrt_arg_2,%ecx // ms word of normalized n 148 sbbl %ecx,%edx
149 jnc sqrt_stage_2_positive
150
151 // subtraction gives a negative result 152 // negate the result before division 153 notl %edx
154 notl %eax
155 addl $1,%eax
156 adcl $0,%edx
157
158 divl %esi
159 movl %eax,%ecx
160
161 movl %edx,%eax
162 divl %esi
163 jmp sqrt_stage_2_finish
164
165 sqrt_stage_2_positive:
166 divl %esi
167 movl %eax,%ecx
168
169 movl %edx,%eax
170 divl %esi
171
172 notl %ecx
173 notl %eax
174 addl $1,%eax
175 adcl $0,%ecx
176
177 sqrt_stage_2_finish:
178 sarl $1,%ecx // divide by 2 179 rcrl $1,%eax
180
181 // Form the new estimate in %esi:%edi 182 movl %eax,%edi
183 addl %ecx,%esi
184
185 jnz sqrt_stage_2_done // result should be [1..2) 186
187 #ifdefPARANOID 188 // It should be possible to get here only if the arg is ffff....ffff 189 cmp $0xffffffff,fsqrt_arg_1
190 jnz sqrt_stage_2_error
191 #endif PARANOID
192
193 // The best rounded result. 194 xorl %eax,%eax
195 decl %eax
196 movl %eax,%edi
197 movl %eax,%esi
198 movl $0x7fffffff,%eax
199 jmp sqrt_round_result
200
201 #ifdefPARANOID 202 sqrt_stage_2_error:
203 pushl EX_INTERNAL|0x213
204 call EXCEPTION
205 #endif PARANOID
206
207 sqrt_stage_2_done:
208
209 // Now the square root has been computed to better than 60 bits 210
211 // Find the square of the guess 212 movl %edi,%eax // ls word of guess 213 mull %edi
214 movl %edx,accum_1
215
216 movl %esi,%eax
217 mull %esi
218 movl %edx,accum_3
219 movl %eax,accum_2
220
221 movl %edi,%eax
222 mull %esi
223 addl %eax,accum_1
224 adcl %edx,accum_2
225 adcl $0,accum_3
226
227 // movl %esi,%eax 228 // mull %edi 229 addl %eax,accum_1
230 adcl %edx,accum_2
231 adcl $0,accum_3
232
233 // guess^2 now in accum_3:accum_2:accum_1 234
235 movl fsqrt_arg_0,%eax // get normalized n 236 subl %eax,accum_1
237 movl fsqrt_arg_1,%eax
238 sbbl %eax,accum_2
239 movl fsqrt_arg_2,%eax // ms word of normalized n 240 sbbl %eax,accum_3
241 jnc sqrt_stage_3_positive
242
243 // subtraction gives a negative result 244 // negate the result before division 245 notl accum_1
246 notl accum_2
247 notl accum_3
248 addl $1,accum_1
249 adcl $0,accum_2
250
251 #ifdefPARANOID 252 adcl $0,accum_3 // This must be zero 253 jz sqrt_stage_3_no_error
254
255 sqrt_stage_3_error:
256 pushl EX_INTERNAL|0x207
257 call EXCEPTION
258
259 sqrt_stage_3_no_error:
260 #endif PARANOID
261
262 movl accum_2,%edx
263 movl accum_1,%eax
264 divl %esi
265 movl %eax,%ecx
266
267 movl %edx,%eax
268 divl %esi
269
270 sarl $1,%ecx // divide by 2 271 rcrl $1,%eax
272
273 // prepare to round the result 274
275 addl %ecx,%edi
276 adcl $0,%esi
277
278 jmp sqrt_stage_3_finished
279
280 sqrt_stage_3_positive:
281 movl accum_2,%edx
282 movl accum_1,%eax
283 divl %esi
284 movl %eax,%ecx
285
286 movl %edx,%eax
287 divl %esi
288
289 sarl $1,%ecx // divide by 2 290 rcrl $1,%eax
291
292 // prepare to round the result 293
294 notl %eax // Negate the correction term 295 notl %ecx
296 addl $1,%eax
297 adcl $0,%ecx // carry here ==> correction == 0 298 adcl $0xffffffff,%esi
299
300 addl %ecx,%edi
301 adcl $0,%esi
302
303 sqrt_stage_3_finished:
304
305 // The result in %esi:%edi:%esi should be good to about 90 bits here, 306 // and the rounding information here does not have sufficient accuracy 307 // in a few rare cases. 308 cmpl $0xffffffe0,%eax
309 ja sqrt_near_exact_x
310
311 cmpl $0x00000020,%eax
312 jb sqrt_near_exact
313
314 cmpl $0x7fffffe0,%eax
315 jb sqrt_round_result
316
317 cmpl $0x80000020,%eax
318 jb sqrt_get_more_precision
319
320 sqrt_round_result:
321 // Set up for rounding operations 322 movl %eax,%edx
323 movl %esi,%eax
324 movl %edi,%ebx
325 movl PARAM1,%edi
326 movl EXP_BIAS,EXP(%edi) // Result is in [1.0 .. 2.0) 327 movl PARAM2,%ecx
328 jmp FPU_round_sqrt
329
330
331 sqrt_near_exact_x:
332 // First, the estimate must be rounded up. 333 addl $1,%edi
334 adcl $0,%esi
335
336 sqrt_near_exact:
337 // This is an easy case because x^1/2 is monotonic. 338 // We need just find the square of our estimate, compare it 339 // with the argument, and deduce whether our estimate is 340 // above, below, or exact. We use the fact that the estimate 341 // is known to be accurate to about 90 bits. 342 movl %edi,%eax // ls word of guess 343 mull %edi
344 movl %edx,%ebx // 2nd ls word of square 345 movl %eax,%ecx // ls word of square 346
347 movl %edi,%eax
348 mull %esi
349 addl %eax,%ebx
350 addl %eax,%ebx
351
352 #ifdefPARANOID 353 cmp $0xffffffb0,%ebx
354 jb sqrt_near_exact_ok
355
356 cmp $0x00000050,%ebx
357 ja sqrt_near_exact_ok
358
359 pushl EX_INTERNAL|0x214
360 call EXCEPTION
361
362 sqrt_near_exact_ok:
363 #endif PARANOID
364
365 or %ebx,%ebx
366 js sqrt_near_exact_small
367
368 jnz sqrt_near_exact_large
369
370 or %ebx,%edx
371 jnz sqrt_near_exact_large
372
373 // Our estimate is exactly the right answer 374 xorl %eax,%eax
375 jmp sqrt_round_result
376
377 sqrt_near_exact_small:
378 // Our estimate is too small 379 movl $0x000000ff,%eax
380 jmp sqrt_round_result
381
382 sqrt_near_exact_large:
383 // Our estimate is too large, we need to decrement it 384 subl $1,%edi
385 sbbl $0,%esi
386 movl $0xffffff00,%eax
387 jmp sqrt_round_result
388
389
390 sqrt_get_more_precision:
391 // This case is almost the same as the above, except we start 392 // with an extra bit of precision in the estimate. 393 stc // The extra bit. 394 rcll $1,%edi // Shift the estimate left one bit 395 rcll $1,%esi
396
397 movl %edi,%eax // ls word of guess 398 mull %edi
399 movl %edx,%ebx // 2nd ls word of square 400 movl %eax,%ecx // ls word of square 401
402 movl %edi,%eax
403 mull %esi
404 addl %eax,%ebx
405 addl %eax,%ebx
406
407 // Put our estimate back to its original value 408 stc // The ms bit. 409 rcrl $1,%esi // Shift the estimate left one bit 410 rcrl $1,%edi
411
412 #ifdefPARANOID 413 cmp $0xffffff60,%ebx
414 jb sqrt_more_prec_ok
415
416 cmp $0x000000a0,%ebx
417 ja sqrt_more_prec_ok
418
419 pushl EX_INTERNAL|0x215
420 call EXCEPTION
421
422 sqrt_more_prec_ok:
423 #endif PARANOID
424
425 or %ebx,%ebx
426 js sqrt_more_prec_small
427
428 jnz sqrt_more_prec_large
429
430 or %ebx,%ecx
431 jnz sqrt_more_prec_large
432
433 // Our estimate is exactly the right answer 434 movl $0x80000000,%eax
435 jmp sqrt_round_result
436
437 sqrt_more_prec_small:
438 // Our estimate is too small 439 movl $0x800000ff,%eax
440 jmp sqrt_round_result
441
442 sqrt_more_prec_large:
443 // Our estimate is too large 444 movl $0x7fffff00,%eax
445 jmp sqrt_round_result