1 .file "wm_sqrt.S"
2 /*---------------------------------------------------------------------------+ 3 | wm_sqrt.S | 4 | | 5 | Fixed point arithmetic square root evaluation. | 6 | | 7 | Copyright (C) 1992,1993,1995 | 8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | 9 | Australia. E-mail billm@jacobi.maths.monash.edu.au | 10 | | 11 | Call from C as: | 12 | void wm_sqrt(FPU_REG *n, unsigned int control_word) | 13 | | 14 +---------------------------------------------------------------------------*/ 15
16 /*---------------------------------------------------------------------------+ 17 | wm_sqrt(FPU_REG *n, unsigned int control_word) | 18 | returns the square root of n in n. | 19 | | 20 | Use Newton's method to compute the square root of a number, which must | 21 | be in the range [1.0 .. 4.0), to 64 bits accuracy. | 22 | Does not check the sign or tag of the argument. | 23 | Sets the exponent, but not the sign or tag of the result. | 24 | | 25 | The guess is kept in %esi:%edi | 26 +---------------------------------------------------------------------------*/ 27
28 #include "exception.h"
29 #include "fpu_emu.h"
30
31
32 #ifndefNON_REENTRANT_FPU 33 /* Local storage on the stack: */ 34 #define FPU_accum_3 -4(%ebp) /* ms word */ 35 #define FPU_accum_2 -8(%ebp)
36 #define FPU_accum_1 -12(%ebp)
37 #define FPU_accum_0 -16(%ebp)
38
39 /* 40 * The de-normalised argument: 41 * sq_2 sq_1 sq_0 42 * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 43 * ^ binary point here 44 */ 45 #define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */ 46 #define FPU_fsqrt_arg_1 -24(%ebp)
47 #define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */ 48
49 #else 50 /* Local storage in a static area: */ 51 .data
52 .align 4,0
53 FPU_accum_3:
54 .long 0 /* ms word */ 55 FPU_accum_2:
56 .long 0
57 FPU_accum_1:
58 .long 0
59 FPU_accum_0:
60 .long 0
61
62 /* The de-normalised argument: 63 sq_2 sq_1 sq_0 64 b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 65 ^ binary point here 66 */ 67 FPU_fsqrt_arg_2:
68 .long 0 /* ms word */ 69 FPU_fsqrt_arg_1:
70 .long 0
71 FPU_fsqrt_arg_0:
72 .long 0 /* ls word, at most the ms bit is set */ 73 #endif NON_REENTRANT_FPU
74
75
76 .text
77 ENTRY(wm_sqrt)
78 pushl %ebp
79 movl %esp,%ebp
80 #ifndefNON_REENTRANT_FPU 81 subl $28,%esp
82 #endif NON_REENTRANT_FPU
83 pushl %esi
84 pushl %edi
85 pushl %ebx
86
87 movl PARAM1,%esi
88
89 movl SIGH(%esi),%eax
90 movl SIGL(%esi),%ecx
91 xorl %edx,%edx
92
93 /* We use a rough linear estimate for the first guess.. */ 94
95 cmpl EXP_BIAS,EXP(%esi)
96 jnz sqrt_arg_ge_2
97
98 shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */ 99 rcrl $1,%ecx
100 rcrl $1,%edx
101
102 sqrt_arg_ge_2:
103 /* From here on, n is never accessed directly again until it is 104 replaced by the answer. */ 105
106 movl %eax,FPU_fsqrt_arg_2 /* ms word of n */ 107 movl %ecx,FPU_fsqrt_arg_1
108 movl %edx,FPU_fsqrt_arg_0
109
110 /* Make a linear first estimate */ 111 shrl $1,%eax
112 addl $0x40000000,%eax
113 movl $0xaaaaaaaa,%ecx
114 mull %ecx
115 shll %edx /* max result was 7fff... */ 116 testl $0x80000000,%edx /* but min was 3fff... */ 117 jnz sqrt_prelim_no_adjust
118
119 movl $0x80000000,%edx /* round up */ 120
121 sqrt_prelim_no_adjust:
122 movl %edx,%esi /* Our first guess */ 123
124 /* We have now computed (approx) (2 + x) / 3, which forms the basis 125 for a few iterations of Newton's method */ 126
127 movl FPU_fsqrt_arg_2,%ecx /* ms word */ 128
129 /* 130 * From our initial estimate, three iterations are enough to get us 131 * to 30 bits or so. This will then allow two iterations at better 132 * precision to complete the process. 133 */ 134
135 /* Compute (g + n/g)/2 at each iteration (g is the guess). */ 136 shrl %ecx /* Doing this first will prevent a divide */ 137 /* overflow later. */ 138
139 movl %ecx,%edx /* msw of the arg / 2 */ 140 divl %esi /* current estimate */ 141 shrl %esi /* divide by 2 */ 142 addl %eax,%esi /* the new estimate */ 143
144 movl %ecx,%edx
145 divl %esi
146 shrl %esi
147 addl %eax,%esi
148
149 movl %ecx,%edx
150 divl %esi
151 shrl %esi
152 addl %eax,%esi
153
154 /* 155 * Now that an estimate accurate to about 30 bits has been obtained (in %esi), 156 * we improve it to 60 bits or so. 157 * 158 * The strategy from now on is to compute new estimates from 159 * guess := guess + (n - guess^2) / (2 * guess) 160 */ 161
162 /* First, find the square of the guess */ 163 movl %esi,%eax
164 mull %esi
165 /* guess^2 now in %edx:%eax */ 166
167 movl FPU_fsqrt_arg_1,%ecx
168 subl %ecx,%eax
169 movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */ 170 sbbl %ecx,%edx
171 jnc sqrt_stage_2_positive
172
173 /* Subtraction gives a negative result, 174 negate the result before division. */ 175 notl %edx
176 notl %eax
177 addl $1,%eax
178 adcl $0,%edx
179
180 divl %esi
181 movl %eax,%ecx
182
183 movl %edx,%eax
184 divl %esi
185 jmp sqrt_stage_2_finish
186
187 sqrt_stage_2_positive:
188 divl %esi
189 movl %eax,%ecx
190
191 movl %edx,%eax
192 divl %esi
193
194 notl %ecx
195 notl %eax
196 addl $1,%eax
197 adcl $0,%ecx
198
199 sqrt_stage_2_finish:
200 sarl $1,%ecx /* divide by 2 */ 201 rcrl $1,%eax
202
203 /* Form the new estimate in %esi:%edi */ 204 movl %eax,%edi
205 addl %ecx,%esi
206
207 jnz sqrt_stage_2_done /* result should be [1..2) */ 208
209 #ifdefPARANOID 210 /* It should be possible to get here only if the arg is ffff....ffff */ 211 cmp $0xffffffff,FPU_fsqrt_arg_1
212 jnz sqrt_stage_2_error
213 #endif PARANOID
214
215 /* The best rounded result. */ 216 xorl %eax,%eax
217 decl %eax
218 movl %eax,%edi
219 movl %eax,%esi
220 movl $0x7fffffff,%eax
221 jmp sqrt_round_result
222
223 #ifdefPARANOID 224 sqrt_stage_2_error:
225 pushl EX_INTERNAL|0x213
226 call EXCEPTION
227 #endif PARANOID
228
229 sqrt_stage_2_done:
230
231 /* Now the square root has been computed to better than 60 bits. */ 232
233 /* Find the square of the guess. */ 234 movl %edi,%eax /* ls word of guess */ 235 mull %edi
236 movl %edx,FPU_accum_1
237
238 movl %esi,%eax
239 mull %esi
240 movl %edx,FPU_accum_3
241 movl %eax,FPU_accum_2
242
243 movl %edi,%eax
244 mull %esi
245 addl %eax,FPU_accum_1
246 adcl %edx,FPU_accum_2
247 adcl $0,FPU_accum_3
248
249 /* movl %esi,%eax */ 250 /* mull %edi */ 251 addl %eax,FPU_accum_1
252 adcl %edx,FPU_accum_2
253 adcl $0,FPU_accum_3
254
255 /* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */ 256
257 movl FPU_fsqrt_arg_0,%eax /* get normalized n */ 258 subl %eax,FPU_accum_1
259 movl FPU_fsqrt_arg_1,%eax
260 sbbl %eax,FPU_accum_2
261 movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */ 262 sbbl %eax,FPU_accum_3
263 jnc sqrt_stage_3_positive
264
265 /* Subtraction gives a negative result, 266 negate the result before division */ 267 notl FPU_accum_1
268 notl FPU_accum_2
269 notl FPU_accum_3
270 addl $1,FPU_accum_1
271 adcl $0,FPU_accum_2
272
273 #ifdefPARANOID 274 adcl $0,FPU_accum_3 /* This must be zero */ 275 jz sqrt_stage_3_no_error
276
277 sqrt_stage_3_error:
278 pushl EX_INTERNAL|0x207
279 call EXCEPTION
280
281 sqrt_stage_3_no_error:
282 #endif PARANOID
283
284 movl FPU_accum_2,%edx
285 movl FPU_accum_1,%eax
286 divl %esi
287 movl %eax,%ecx
288
289 movl %edx,%eax
290 divl %esi
291
292 sarl $1,%ecx /* divide by 2 */ 293 rcrl $1,%eax
294
295 /* prepare to round the result */ 296
297 addl %ecx,%edi
298 adcl $0,%esi
299
300 jmp sqrt_stage_3_finished
301
302 sqrt_stage_3_positive:
303 movl FPU_accum_2,%edx
304 movl FPU_accum_1,%eax
305 divl %esi
306 movl %eax,%ecx
307
308 movl %edx,%eax
309 divl %esi
310
311 sarl $1,%ecx /* divide by 2 */ 312 rcrl $1,%eax
313
314 /* prepare to round the result */ 315
316 notl %eax /* Negate the correction term */ 317 notl %ecx
318 addl $1,%eax
319 adcl $0,%ecx /* carry here ==> correction == 0 */ 320 adcl $0xffffffff,%esi
321
322 addl %ecx,%edi
323 adcl $0,%esi
324
325 sqrt_stage_3_finished:
326
327 /* 328 * The result in %esi:%edi:%esi should be good to about 90 bits here, 329 * and the rounding information here does not have sufficient accuracy 330 * in a few rare cases. 331 */ 332 cmpl $0xffffffe0,%eax
333 ja sqrt_near_exact_x
334
335 cmpl $0x00000020,%eax
336 jb sqrt_near_exact
337
338 cmpl $0x7fffffe0,%eax
339 jb sqrt_round_result
340
341 cmpl $0x80000020,%eax
342 jb sqrt_get_more_precision
343
344 sqrt_round_result:
345 /* Set up for rounding operations */ 346 movl %eax,%edx
347 movl %esi,%eax
348 movl %edi,%ebx
349 movl PARAM1,%edi
350 movl EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */ 351 movl PARAM2,%ecx
352 jmp fpu_reg_round_sqrt
353
354
355 sqrt_near_exact_x:
356 /* First, the estimate must be rounded up. */ 357 addl $1,%edi
358 adcl $0,%esi
359
360 sqrt_near_exact:
361 /* 362 * This is an easy case because x^1/2 is monotonic. 363 * We need just find the square of our estimate, compare it 364 * with the argument, and deduce whether our estimate is 365 * above, below, or exact. We use the fact that the estimate 366 * is known to be accurate to about 90 bits. 367 */ 368 movl %edi,%eax /* ls word of guess */ 369 mull %edi
370 movl %edx,%ebx /* 2nd ls word of square */ 371 movl %eax,%ecx /* ls word of square */ 372
373 movl %edi,%eax
374 mull %esi
375 addl %eax,%ebx
376 addl %eax,%ebx
377
378 #ifdefPARANOID 379 cmp $0xffffffb0,%ebx
380 jb sqrt_near_exact_ok
381
382 cmp $0x00000050,%ebx
383 ja sqrt_near_exact_ok
384
385 pushl EX_INTERNAL|0x214
386 call EXCEPTION
387
388 sqrt_near_exact_ok:
389 #endif PARANOID
390
391 or %ebx,%ebx
392 js sqrt_near_exact_small
393
394 jnz sqrt_near_exact_large
395
396 or %ebx,%edx
397 jnz sqrt_near_exact_large
398
399 /* Our estimate is exactly the right answer */ 400 xorl %eax,%eax
401 jmp sqrt_round_result
402
403 sqrt_near_exact_small:
404 /* Our estimate is too small */ 405 movl $0x000000ff,%eax
406 jmp sqrt_round_result
407
408 sqrt_near_exact_large:
409 /* Our estimate is too large, we need to decrement it */ 410 subl $1,%edi
411 sbbl $0,%esi
412 movl $0xffffff00,%eax
413 jmp sqrt_round_result
414
415
416 sqrt_get_more_precision:
417 /* This case is almost the same as the above, except we start 418 with an extra bit of precision in the estimate. */ 419 stc /* The extra bit. */ 420 rcll $1,%edi /* Shift the estimate left one bit */ 421 rcll $1,%esi
422
423 movl %edi,%eax /* ls word of guess */ 424 mull %edi
425 movl %edx,%ebx /* 2nd ls word of square */ 426 movl %eax,%ecx /* ls word of square */ 427
428 movl %edi,%eax
429 mull %esi
430 addl %eax,%ebx
431 addl %eax,%ebx
432
433 /* Put our estimate back to its original value */ 434 stc /* The ms bit. */ 435 rcrl $1,%esi /* Shift the estimate left one bit */ 436 rcrl $1,%edi
437
438 #ifdefPARANOID 439 cmp $0xffffff60,%ebx
440 jb sqrt_more_prec_ok
441
442 cmp $0x000000a0,%ebx
443 ja sqrt_more_prec_ok
444
445 pushl EX_INTERNAL|0x215
446 call EXCEPTION
447
448 sqrt_more_prec_ok:
449 #endif PARANOID
450
451 or %ebx,%ebx
452 js sqrt_more_prec_small
453
454 jnz sqrt_more_prec_large
455
456 or %ebx,%ecx
457 jnz sqrt_more_prec_large
458
459 /* Our estimate is exactly the right answer */ 460 movl $0x80000000,%eax
461 jmp sqrt_round_result
462
463 sqrt_more_prec_small:
464 /* Our estimate is too small */ 465 movl $0x800000ff,%eax
466 jmp sqrt_round_result
467
468 sqrt_more_prec_large:
469 /* Our estimate is too large */ 470 movl $0x7fffff00,%eax
471 jmp sqrt_round_result