1 .file "wm_sqrt.S"
2 /*---------------------------------------------------------------------------+ 3 | wm_sqrt.S | 4 | | 5 | Fixed point arithmetic square root evaluation. | 6 | | 7 | Copyright (C) 1992,1993 | 8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | 9 | Australia. E-mail billm@vaxc.cc.monash.edu.au | 10 | | 11 | Call from C as: | 12 | void wm_sqrt(FPU_REG *n, unsigned int control_word) | 13 | | 14 +---------------------------------------------------------------------------*/ 15
16 /*---------------------------------------------------------------------------+ 17 | wm_sqrt(FPU_REG *n, unsigned int control_word) | 18 | returns the square root of n in n. | 19 | | 20 | Use Newton's method to compute the square root of a number, which must | 21 | be in the range [1.0 .. 4.0), to 64 bits accuracy. | 22 | Does not check the sign or tag of the argument. | 23 | Sets the exponent, but not the sign or tag of the result. | 24 | | 25 | The guess is kept in %esi:%edi | 26 +---------------------------------------------------------------------------*/ 27
28 #include "exception.h"
29 #include "fpu_asm.h"
30
31
32 #ifndefNON_REENTRANT_FPU 33 /* Local storage on the stack: */ 34 #define FPU_accum_3 -4(%ebp) /* ms word */ 35 #define FPU_accum_2 -8(%ebp)
36 #define FPU_accum_1 -12(%ebp)
37 #define FPU_accum_0 -16(%ebp)
38
39 /* 40 * The de-normalised argument: 41 * sq_2 sq_1 sq_0 42 * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 43 * ^ binary point here 44 */ 45 #define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */ 46 #define FPU_fsqrt_arg_1 -24(%ebp)
47 #define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */ 48
49 #else 50 /* Local storage in a static area: */ 51 .data
52 .align 4,0
53 FPU_accum_3:
54 .long 0 /* ms word */ 55 FPU_accum_2:
56 .long 0
57 FPU_accum_1:
58 .long 0
59 FPU_accum_0:
60 .long 0
61
62 /* The de-normalised argument: 63 sq_2 sq_1 sq_0 64 b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 65 ^ binary point here 66 */ 67 FPU_fsqrt_arg_2:
68 .long 0 /* ms word */ 69 FPU_fsqrt_arg_1:
70 .long 0
71 FPU_fsqrt_arg_0:
72 .long 0 /* ls word, at most the ms bit is set */ 73 #endif NON_REENTRANT_FPU
74
75
76 .text
77 .align 2,144
78
79 .globl _wm_sqrt
80 _wm_sqrt:
81 pushl %ebp
82 movl %esp,%ebp
83 #ifndefNON_REENTRANT_FPU 84 subl $28,%esp
85 #endif NON_REENTRANT_FPU
86 pushl %esi
87 pushl %edi
88 pushl %ebx
89
90 movl PARAM1,%esi
91
92 movl SIGH(%esi),%eax
93 movl SIGL(%esi),%ecx
94 xorl %edx,%edx
95
96 /* We use a rough linear estimate for the first guess.. */ 97
98 cmpl EXP_BIAS,EXP(%esi)
99 jnz sqrt_arg_ge_2
100
101 shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */ 102 rcrl $1,%ecx
103 rcrl $1,%edx
104
105 sqrt_arg_ge_2:
106 /* From here on, n is never accessed directly again until it is 107 replaced by the answer. */ 108
109 movl %eax,FPU_fsqrt_arg_2 /* ms word of n */ 110 movl %ecx,FPU_fsqrt_arg_1
111 movl %edx,FPU_fsqrt_arg_0
112
113 /* Make a linear first estimate */ 114 shrl $1,%eax
115 addl $0x40000000,%eax
116 movl $0xaaaaaaaa,%ecx
117 mull %ecx
118 shll %edx /* max result was 7fff... */ 119 testl $0x80000000,%edx /* but min was 3fff... */ 120 jnz sqrt_prelim_no_adjust
121
122 movl $0x80000000,%edx /* round up */ 123
124 sqrt_prelim_no_adjust:
125 movl %edx,%esi /* Our first guess */ 126
127 /* We have now computed (approx) (2 + x) / 3, which forms the basis 128 for a few iterations of Newton's method */ 129
130 movl FPU_fsqrt_arg_2,%ecx /* ms word */ 131
132 /* 133 * From our initial estimate, three iterations are enough to get us 134 * to 30 bits or so. This will then allow two iterations at better 135 * precision to complete the process. 136 */ 137
138 /* Compute (g + n/g)/2 at each iteration (g is the guess). */ 139 shrl %ecx /* Doing this first will prevent a divide */ 140 /* overflow later. */ 141
142 movl %ecx,%edx /* msw of the arg / 2 */ 143 divl %esi /* current estimate */ 144 shrl %esi /* divide by 2 */ 145 addl %eax,%esi /* the new estimate */ 146
147 movl %ecx,%edx
148 divl %esi
149 shrl %esi
150 addl %eax,%esi
151
152 movl %ecx,%edx
153 divl %esi
154 shrl %esi
155 addl %eax,%esi
156
157 /* 158 * Now that an estimate accurate to about 30 bits has been obtained (in %esi), 159 * we improve it to 60 bits or so. 160 * 161 * The strategy from now on is to compute new estimates from 162 * guess := guess + (n - guess^2) / (2 * guess) 163 */ 164
165 /* First, find the square of the guess */ 166 movl %esi,%eax
167 mull %esi
168 /* guess^2 now in %edx:%eax */ 169
170 movl FPU_fsqrt_arg_1,%ecx
171 subl %ecx,%eax
172 movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */ 173 sbbl %ecx,%edx
174 jnc sqrt_stage_2_positive
175
176 /* Subtraction gives a negative result, 177 negate the result before division. */ 178 notl %edx
179 notl %eax
180 addl $1,%eax
181 adcl $0,%edx
182
183 divl %esi
184 movl %eax,%ecx
185
186 movl %edx,%eax
187 divl %esi
188 jmp sqrt_stage_2_finish
189
190 sqrt_stage_2_positive:
191 divl %esi
192 movl %eax,%ecx
193
194 movl %edx,%eax
195 divl %esi
196
197 notl %ecx
198 notl %eax
199 addl $1,%eax
200 adcl $0,%ecx
201
202 sqrt_stage_2_finish:
203 sarl $1,%ecx /* divide by 2 */ 204 rcrl $1,%eax
205
206 /* Form the new estimate in %esi:%edi */ 207 movl %eax,%edi
208 addl %ecx,%esi
209
210 jnz sqrt_stage_2_done /* result should be [1..2) */ 211
212 #ifdefPARANOID 213 /* It should be possible to get here only if the arg is ffff....ffff */ 214 cmp $0xffffffff,FPU_fsqrt_arg_1
215 jnz sqrt_stage_2_error
216 #endif PARANOID
217
218 /* The best rounded result. */ 219 xorl %eax,%eax
220 decl %eax
221 movl %eax,%edi
222 movl %eax,%esi
223 movl $0x7fffffff,%eax
224 jmp sqrt_round_result
225
226 #ifdefPARANOID 227 sqrt_stage_2_error:
228 pushl EX_INTERNAL|0x213
229 call EXCEPTION
230 #endif PARANOID
231
232 sqrt_stage_2_done:
233
234 /* Now the square root has been computed to better than 60 bits. */ 235
236 /* Find the square of the guess. */ 237 movl %edi,%eax /* ls word of guess */ 238 mull %edi
239 movl %edx,FPU_accum_1
240
241 movl %esi,%eax
242 mull %esi
243 movl %edx,FPU_accum_3
244 movl %eax,FPU_accum_2
245
246 movl %edi,%eax
247 mull %esi
248 addl %eax,FPU_accum_1
249 adcl %edx,FPU_accum_2
250 adcl $0,FPU_accum_3
251
252 /* movl %esi,%eax */ 253 /* mull %edi */ 254 addl %eax,FPU_accum_1
255 adcl %edx,FPU_accum_2
256 adcl $0,FPU_accum_3
257
258 /* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */ 259
260 movl FPU_fsqrt_arg_0,%eax /* get normalized n */ 261 subl %eax,FPU_accum_1
262 movl FPU_fsqrt_arg_1,%eax
263 sbbl %eax,FPU_accum_2
264 movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */ 265 sbbl %eax,FPU_accum_3
266 jnc sqrt_stage_3_positive
267
268 /* Subtraction gives a negative result, 269 negate the result before division */ 270 notl FPU_accum_1
271 notl FPU_accum_2
272 notl FPU_accum_3
273 addl $1,FPU_accum_1
274 adcl $0,FPU_accum_2
275
276 #ifdefPARANOID 277 adcl $0,FPU_accum_3 /* This must be zero */ 278 jz sqrt_stage_3_no_error
279
280 sqrt_stage_3_error:
281 pushl EX_INTERNAL|0x207
282 call EXCEPTION
283
284 sqrt_stage_3_no_error:
285 #endif PARANOID
286
287 movl FPU_accum_2,%edx
288 movl FPU_accum_1,%eax
289 divl %esi
290 movl %eax,%ecx
291
292 movl %edx,%eax
293 divl %esi
294
295 sarl $1,%ecx /* divide by 2 */ 296 rcrl $1,%eax
297
298 /* prepare to round the result */ 299
300 addl %ecx,%edi
301 adcl $0,%esi
302
303 jmp sqrt_stage_3_finished
304
305 sqrt_stage_3_positive:
306 movl FPU_accum_2,%edx
307 movl FPU_accum_1,%eax
308 divl %esi
309 movl %eax,%ecx
310
311 movl %edx,%eax
312 divl %esi
313
314 sarl $1,%ecx /* divide by 2 */ 315 rcrl $1,%eax
316
317 /* prepare to round the result */ 318
319 notl %eax /* Negate the correction term */ 320 notl %ecx
321 addl $1,%eax
322 adcl $0,%ecx /* carry here ==> correction == 0 */ 323 adcl $0xffffffff,%esi
324
325 addl %ecx,%edi
326 adcl $0,%esi
327
328 sqrt_stage_3_finished:
329
330 /* 331 * The result in %esi:%edi:%esi should be good to about 90 bits here, 332 * and the rounding information here does not have sufficient accuracy 333 * in a few rare cases. 334 */ 335 cmpl $0xffffffe0,%eax
336 ja sqrt_near_exact_x
337
338 cmpl $0x00000020,%eax
339 jb sqrt_near_exact
340
341 cmpl $0x7fffffe0,%eax
342 jb sqrt_round_result
343
344 cmpl $0x80000020,%eax
345 jb sqrt_get_more_precision
346
347 sqrt_round_result:
348 /* Set up for rounding operations */ 349 movl %eax,%edx
350 movl %esi,%eax
351 movl %edi,%ebx
352 movl PARAM1,%edi
353 movl EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */ 354 movl PARAM2,%ecx
355 jmp fpu_reg_round_sqrt
356
357
358 sqrt_near_exact_x:
359 /* First, the estimate must be rounded up. */ 360 addl $1,%edi
361 adcl $0,%esi
362
363 sqrt_near_exact:
364 /* 365 * This is an easy case because x^1/2 is monotonic. 366 * We need just find the square of our estimate, compare it 367 * with the argument, and deduce whether our estimate is 368 * above, below, or exact. We use the fact that the estimate 369 * is known to be accurate to about 90 bits. 370 */ 371 movl %edi,%eax /* ls word of guess */ 372 mull %edi
373 movl %edx,%ebx /* 2nd ls word of square */ 374 movl %eax,%ecx /* ls word of square */ 375
376 movl %edi,%eax
377 mull %esi
378 addl %eax,%ebx
379 addl %eax,%ebx
380
381 #ifdefPARANOID 382 cmp $0xffffffb0,%ebx
383 jb sqrt_near_exact_ok
384
385 cmp $0x00000050,%ebx
386 ja sqrt_near_exact_ok
387
388 pushl EX_INTERNAL|0x214
389 call EXCEPTION
390
391 sqrt_near_exact_ok:
392 #endif PARANOID
393
394 or %ebx,%ebx
395 js sqrt_near_exact_small
396
397 jnz sqrt_near_exact_large
398
399 or %ebx,%edx
400 jnz sqrt_near_exact_large
401
402 /* Our estimate is exactly the right answer */ 403 xorl %eax,%eax
404 jmp sqrt_round_result
405
406 sqrt_near_exact_small:
407 /* Our estimate is too small */ 408 movl $0x000000ff,%eax
409 jmp sqrt_round_result
410
411 sqrt_near_exact_large:
412 /* Our estimate is too large, we need to decrement it */ 413 subl $1,%edi
414 sbbl $0,%esi
415 movl $0xffffff00,%eax
416 jmp sqrt_round_result
417
418
419 sqrt_get_more_precision:
420 /* This case is almost the same as the above, except we start 421 with an extra bit of precision in the estimate. */ 422 stc /* The extra bit. */ 423 rcll $1,%edi /* Shift the estimate left one bit */ 424 rcll $1,%esi
425
426 movl %edi,%eax /* ls word of guess */ 427 mull %edi
428 movl %edx,%ebx /* 2nd ls word of square */ 429 movl %eax,%ecx /* ls word of square */ 430
431 movl %edi,%eax
432 mull %esi
433 addl %eax,%ebx
434 addl %eax,%ebx
435
436 /* Put our estimate back to its original value */ 437 stc /* The ms bit. */ 438 rcrl $1,%esi /* Shift the estimate left one bit */ 439 rcrl $1,%edi
440
441 #ifdefPARANOID 442 cmp $0xffffff60,%ebx
443 jb sqrt_more_prec_ok
444
445 cmp $0x000000a0,%ebx
446 ja sqrt_more_prec_ok
447
448 pushl EX_INTERNAL|0x215
449 call EXCEPTION
450
451 sqrt_more_prec_ok:
452 #endif PARANOID
453
454 or %ebx,%ebx
455 js sqrt_more_prec_small
456
457 jnz sqrt_more_prec_large
458
459 or %ebx,%ecx
460 jnz sqrt_more_prec_large
461
462 /* Our estimate is exactly the right answer */ 463 movl $0x80000000,%eax
464 jmp sqrt_round_result
465
466 sqrt_more_prec_small:
467 /* Our estimate is too small */ 468 movl $0x800000ff,%eax
469 jmp sqrt_round_result
470
471 sqrt_more_prec_large:
472 /* Our estimate is too large */ 473 movl $0x7fffff00,%eax
474 jmp sqrt_round_result