1 .file "reg_round.S"
2 /*---------------------------------------------------------------------------+ 3 | reg_round.S | 4 | | 5 | Rounding/truncation/etc for FPU basic arithmetic functions. | 6 | | 7 | Copyright (C) 1993 | 8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | 9 | Australia. E-mail apm233m@vaxc.cc.monash.edu.au | 10 | | 11 | This code has four possible entry points. | 12 | The following must be entered by a jmp intruction: | 13 | FPU_round, FPU_round_sqrt, and FPU_Arith_exit. | 14 | | 15 | The _round_reg entry point is intended to be used by C code. | 16 | From C, call as: | 17 | void round_reg(FPU_REG *arg, unsigned int extent, unsigned int control_w) | 18 | | 19 | For correct "up" and "down" rounding, the argument must have the correct | 20 | sign. | 21 | | 22 +---------------------------------------------------------------------------*/ 23
24 /*---------------------------------------------------------------------------+ 25 | Four entry points. | 26 | | 27 | Needed by both the FPU_round and FPU_round_sqrt entry points: | 28 | %eax:%ebx 64 bit significand | 29 | %edx 32 bit extension of the significand | 30 | %edi pointer to an FPU_REG for the result to be stored | 31 | stack calling function must have set up a C stack frame and | 32 | pushed %esi, %edi, and %ebx | 33 | | 34 | Needed just for the FPU_round_sqrt entry point: | 35 | %cx A control word in the same format as the FPU control word. | 36 | Otherwise, PARAM4 must give such a value. | 37 | | 38 | | 39 | The significand and its extension are assumed to be exact in the | 40 | following sense: | 41 | If the significand by itself is the exact result then the significand | 42 | extension (%edx) must contain 0, otherwise the significand extension | 43 | must be non-zero. | 44 | If the significand extension is non-zero then the significand is | 45 | smaller than the magnitude of the correct exact result by an amount | 46 | greater than zero and less than one ls bit of the significand. | 47 | The significand extension is only required to have three possible | 48 | non-zero values: | 49 | less than 0x80000000 <=> the significand is less than 1/2 an ls | 50 | bit smaller than the magnitude of the | 51 | true exact result. | 52 | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit | 53 | smaller than the magnitude of the true | 54 | exact result. | 55 | greater than 0x80000000 <=> the significand is more than 1/2 an ls | 56 | bit smaller than the magnitude of the | 57 | true exact result. | 58 | | 59 +---------------------------------------------------------------------------*/ 60
61 /*---------------------------------------------------------------------------+ 62 | The code in this module has become quite complex, but it should handle | 63 | all of the FPU flags which are set at this stage of the basic arithmetic | 64 | computations. | 65 | There are a few rare cases where the results are not set identically to | 66 | a real FPU. These require a bit more thought because at this stage the | 67 | results of the code here appear to be more consistent... | 68 | This may be changed in a future version. | 69 +---------------------------------------------------------------------------*/ 70
71
72 #include "fpu_asm.h"
73 #include "exception.h"
74 #include "control_w.h"
75
76 /* Flags for FPU_bits_lost */ 77 #define LOST_DOWN $1
78 #define LOST_UP $2
79
80 /* Flags for FPU_denormal */ 81 #define DENORMAL $1
82 #define UNMASKED_UNDERFLOW $2
83
84 .data
85 .align 2,0
86 FPU_bits_lost:
87 .byte 0
88 FPU_denormal:
89 .byte 0
90
91 .text
92 .align 2,144
93 .globl FPU_round
94 .globl FPU_round_sqrt
95 .globl FPU_Arith_exit
96 .globl _round_reg
97
98 // Entry point when called from C 99 _round_reg:
100 pushl %ebp
101 movl %esp,%ebp
102 pushl %esi
103 pushl %edi
104 pushl %ebx
105
106 movl PARAM1,%edi
107 movl SIGH(%edi),%eax
108 movl SIGL(%edi),%ebx
109 movl PARAM2,%edx
110 movl PARAM3,%ecx
111 jmp FPU_round_sqrt
112
113 FPU_round: // Normal entry point 114 movl PARAM4,%ecx
115
116 FPU_round_sqrt: // Entry point from wm_sqrt.S 117
118 #ifdefPARANOID 119 // Cannot use this here yet 120 // orl %eax,%eax 121 // jns L_entry_bugged 122 #endif PARANOID
123
124 cmpl EXP_UNDER,EXP(%edi)
125 jle xMake_denorm // The number is a de-normal 126
127 movb $0,FPU_denormal // 0 -> not a de-normal 128
129 xDenorm_done:
130 movb $0,FPU_bits_lost // No bits yet lost in rounding 131
132 movl %ecx,%esi
133 andl CW_PC,%ecx
134 cmpl PR_64_BITS,%ecx
135 je LRound_To_64
136
137 cmpl PR_53_BITS,%ecx
138 je LRound_To_53
139
140 cmpl PR_24_BITS,%ecx
141 je LRound_To_24
142
143 #ifdefPARANOID 144 jmp L_bugged // There is no bug, just a bad control word 145 #endif PARANOID
146
147
148 // Round etc to 24 bit precision 149 LRound_To_24:
150 movl %esi,%ecx
151 andl CW_RC,%ecx
152 cmpl RC_RND,%ecx
153 je LRound_nearest_24
154
155 cmpl RC_CHOP,%ecx
156 je LCheck_truncate_24
157
158 cmpl RC_UP,%ecx // Towards +infinity 159 je LUp_24
160
161 cmpl RC_DOWN,%ecx // Towards -infinity 162 je LDown_24
163
164 #ifdefPARANOID 165 jmp L_bugged
166 #endif PARANOID
167
168 LUp_24:
169 cmpb SIGN_POS,SIGN(%edi)
170 jne LCheck_truncate_24 // If negative then up==truncate 171
172 jmp LCheck_24_round_up
173
174 LDown_24:
175 cmpb SIGN_POS,SIGN(%edi)
176 je LCheck_truncate_24 // If positive then down==truncate 177
178 LCheck_24_round_up:
179 movl %eax,%ecx
180 andl $0x000000ff,%ecx
181 orl %ebx,%ecx
182 orl %edx,%ecx
183 jnz LDo_24_round_up
184 jmp LRe_normalise
185
186 LRound_nearest_24:
187 // Do rounding of the 24th bit if needed (nearest or even) 188 movl %eax,%ecx
189 andl $0x000000ff,%ecx
190 cmpl $0x00000080,%ecx
191 jc LCheck_truncate_24 // less than half, no increment needed 192
193 jne LGreater_Half_24 // greater than half, increment needed 194
195 // Possibly half, we need to check the ls bits 196 orl %ebx,%ebx
197 jnz LGreater_Half_24 // greater than half, increment needed 198
199 orl %edx,%edx
200 jnz LGreater_Half_24 // greater than half, increment needed 201
202 // Exactly half, increment only if 24th bit is 1 (round to even) 203 testl $0x00000100,%eax
204 jz LDo_truncate_24
205
206 LGreater_Half_24: // Rounding: increment at the 24th bit 207 LDo_24_round_up:
208 andl $0xffffff00,%eax // Truncate to 24 bits 209 xorl %ebx,%ebx
210 movb LOST_UP,FPU_bits_lost
211 addl $0x00000100,%eax
212 jmp LCheck_Round_Overflow
213
214 LCheck_truncate_24:
215 movl %eax,%ecx
216 andl $0x000000ff,%ecx
217 orl %ebx,%ecx
218 orl %edx,%ecx
219 jz LRe_normalise // No truncation needed 220
221 LDo_truncate_24:
222 andl $0xffffff00,%eax // Truncate to 24 bits 223 xorl %ebx,%ebx
224 movb LOST_DOWN,FPU_bits_lost
225 jmp LRe_normalise
226
227
228 // Round etc to 53 bit precision 229 LRound_To_53:
230 movl %esi,%ecx
231 andl CW_RC,%ecx
232 cmpl RC_RND,%ecx
233 je LRound_nearest_53
234
235 cmpl RC_CHOP,%ecx
236 je LCheck_truncate_53
237
238 cmpl RC_UP,%ecx // Towards +infinity 239 je LUp_53
240
241 cmpl RC_DOWN,%ecx // Towards -infinity 242 je LDown_53
243
244 #ifdefPARANOID 245 jmp L_bugged
246 #endif PARANOID
247
248 LUp_53:
249 cmpb SIGN_POS,SIGN(%edi)
250 jne LCheck_truncate_53 // If negative then up==truncate 251
252 jmp LCheck_53_round_up
253
254 LDown_53:
255 cmpb SIGN_POS,SIGN(%edi)
256 je LCheck_truncate_53 // If positive then down==truncate 257
258 LCheck_53_round_up:
259 movl %ebx,%ecx
260 andl $0x000007ff,%ecx
261 orl %edx,%ecx
262 jnz LDo_53_round_up
263 jmp LRe_normalise
264
265 LRound_nearest_53:
266 // Do rounding of the 53rd bit if needed (nearest or even) 267 movl %ebx,%ecx
268 andl $0x000007ff,%ecx
269 cmpl $0x00000400,%ecx
270 jc LCheck_truncate_53 // less than half, no increment needed 271
272 jnz LGreater_Half_53 // greater than half, increment needed 273
274 // Possibly half, we need to check the ls bits 275 orl %edx,%edx
276 jnz LGreater_Half_53 // greater than half, increment needed 277
278 // Exactly half, increment only if 53rd bit is 1 (round to even) 279 testl $0x00000800,%ebx
280 jz LTruncate_53
281
282 LGreater_Half_53: // Rounding: increment at the 53rd bit 283 LDo_53_round_up:
284 movb LOST_UP,FPU_bits_lost
285 andl $0xfffff800,%ebx // Truncate to 53 bits 286 addl $0x00000800,%ebx
287 adcl $0,%eax
288 jmp LCheck_Round_Overflow
289
290 LCheck_truncate_53:
291 movl %ebx,%ecx
292 andl $0x000007ff,%ecx
293 orl %edx,%ecx
294 jz LRe_normalise
295
296 LTruncate_53:
297 movb LOST_DOWN,FPU_bits_lost
298 andl $0xfffff800,%ebx // Truncate to 53 bits 299 jmp LRe_normalise
300
301
302 // Round etc to 64 bit precision 303 LRound_To_64:
304 movl %esi,%ecx
305 andl CW_RC,%ecx
306 cmpl RC_RND,%ecx
307 je LRound_nearest_64
308
309 cmpl RC_CHOP,%ecx
310 je LCheck_truncate_64
311
312 cmpl RC_UP,%ecx // Towards +infinity 313 je LUp_64
314
315 cmpl RC_DOWN,%ecx // Towards -infinity 316 je LDown_64
317
318 #ifdefPARANOID 319 jmp L_bugged
320 #endif PARANOID
321
322 LUp_64:
323 cmpb SIGN_POS,SIGN(%edi)
324 jne LCheck_truncate_64 // If negative then up==truncate 325
326 orl %edx,%edx
327 jnz LDo_64_round_up
328 jmp LRe_normalise
329
330 LDown_64:
331 cmpb SIGN_POS,SIGN(%edi)
332 je LCheck_truncate_64 // If positive then down==truncate 333
334 orl %edx,%edx
335 jnz LDo_64_round_up
336 jmp LRe_normalise
337
338 LRound_nearest_64:
339 cmpl $0x80000000,%edx
340 jc LCheck_truncate_64
341
342 jne LDo_64_round_up
343
344 /* Now test for round-to-even */ 345 testb $1,%ebx
346 jz LCheck_truncate_64
347
348 LDo_64_round_up:
349 movb LOST_UP,FPU_bits_lost
350 addl $1,%ebx
351 adcl $0,%eax
352
353 LCheck_Round_Overflow:
354 jnc LRe_normalise
355
356 /* Overflow, adjust the result (significand to 1.0) */ 357 rcrl $1,%eax
358 rcrl $1,%ebx
359 incl EXP(%edi)
360 jmp LRe_normalise
361
362 LCheck_truncate_64:
363 orl %edx,%edx
364 jz LRe_normalise
365
366 LTruncate_64:
367 movb LOST_DOWN,FPU_bits_lost
368
369 LRe_normalise:
370 testb $0xff,FPU_denormal
371 jnz xNormalise_result
372
373 xL_Normalised:
374 cmpb LOST_UP,FPU_bits_lost
375 je xL_precision_lost_up
376
377 cmpb LOST_DOWN,FPU_bits_lost
378 je xL_precision_lost_down
379
380 xL_no_precision_loss:
381 /* store the result */ 382 movb TW_Valid,TAG(%edi)
383
384 xL_Store_significand:
385 movl %eax,SIGH(%edi)
386 movl %ebx,SIGL(%edi)
387
388 xorl %eax,%eax // No errors detected. 389
390 cmpl EXP_OVER,EXP(%edi)
391 jge L_overflow
392
393 FPU_Arith_exit:
394 popl %ebx
395 popl %edi
396 popl %esi
397 leave
398 ret
399
400
401 // Set the FPU status flags to represent precision loss due to 402 // round-up. 403 xL_precision_lost_up:
404 push %eax
405 call _set_precision_flag_up
406 popl %eax
407 jmp xL_no_precision_loss
408
409 // Set the FPU status flags to represent precision loss due to 410 // truncation. 411 xL_precision_lost_down:
412 push %eax
413 call _set_precision_flag_down
414 popl %eax
415 jmp xL_no_precision_loss
416
417
418 // The number is a denormal (which might get rounded up to a normal) 419 // Shift the number right the required number of bits, which will 420 // have to be undone later... 421 xMake_denorm:
422 // The action to be taken depends upon whether the underflow 423 // exception is masked 424 testb CW_Underflow,%cl // Underflow mask. 425 jz xUnmasked_underflow // Do not make a denormal. 426
427 movb DENORMAL,FPU_denormal
428
429 pushl %ecx // Save 430 movl EXP_UNDER+1,%ecx
431 subl EXP(%edi),%ecx
432
433 cmpl $64,%ecx /* shrd only works for 0..31 bits */ 434 jnc xDenorm_shift_more_than_63
435
436 cmpl $32,%ecx /* shrd only works for 0..31 bits */ 437 jnc xDenorm_shift_more_than_32
438
439 // We got here without jumps by assuming that the most common requirement 440 // is for a small de-normalising shift. 441 // Shift by [1..31] bits 442 addl %ecx,EXP(%edi)
443 orl %edx,%edx // extension 444 setne %ch // Save whether %edx is non-zero 445 xorl %edx,%edx
446 shrd %cl,%ebx,%edx
447 shrd %cl,%eax,%ebx
448 shr %cl,%eax
449 orb %ch,%dl
450 popl %ecx
451 jmp xDenorm_done
452
453 // Shift by [32..63] bits 454 xDenorm_shift_more_than_32:
455 addl %ecx,EXP(%edi)
456 subb $32,%cl
457 orl %edx,%edx
458 setne %ch
459 orb %ch,%bl
460 xorl %edx,%edx
461 shrd %cl,%ebx,%edx
462 shrd %cl,%eax,%ebx
463 shr %cl,%eax
464 orl %edx,%edx // test these 32 bits 465 setne %cl
466 orb %ch,%bl
467 orb %cl,%bl
468 movl %ebx,%edx
469 movl %eax,%ebx
470 xorl %eax,%eax
471 popl %ecx
472 jmp xDenorm_done
473
474 // Shift by [64..) bits 475 xDenorm_shift_more_than_63:
476 cmpl $64,%ecx
477 jne xDenorm_shift_more_than_64
478
479 // Exactly 64 bit shift 480 addl %ecx,EXP(%edi)
481 xorl %ecx,%ecx
482 orl %edx,%edx
483 setne %cl
484 orl %ebx,%ebx
485 setne %ch
486 orb %ch,%cl
487 orb %cl,%al
488 movl %eax,%edx
489 xorl %eax,%eax
490 xorl %ebx,%ebx
491 popl %ecx
492 jmp xDenorm_done
493
494 xDenorm_shift_more_than_64:
495 movl EXP_UNDER+1,EXP(%edi)
496 // This is easy, %eax must be non-zero, so.. 497 movl $1,%edx
498 xorl %eax,%eax
499 xorl %ebx,%ebx
500 popl %ecx
501 jmp xDenorm_done
502
503
504 xUnmasked_underflow:
505 movb UNMASKED_UNDERFLOW,FPU_denormal
506 jmp xDenorm_done
507
508
509 // Undo the de-normalisation. 510 xNormalise_result:
511 cmpb UNMASKED_UNDERFLOW,FPU_denormal
512 je xSignal_underflow
513
514 // The number must be a denormal if we got here. 515 #ifdefPARANOID 516 // But check it... just in case. 517 cmpl EXP_UNDER+1,EXP(%edi)
518 jne L_norm_bugged
519 #endif PARANOID
520
521 #ifdefPECULIAR_486 522 // This implements a special feature of 80486 behaviour. 523 // Underflow will be signalled even if the number is 524 // not a denormal after rounding. 525 // This difference occurs only for masked underflow, and not 526 // in the unmasked case. 527 // Actual 80486 behaviour differs from this in some circumstances. 528 orl %eax,%eax // ms bits 529 js LNormalise_shift_done // Will be masked underflow 530 #endif PECULIAR_486
531
532 orl %eax,%eax // ms bits 533 js xL_Normalised // No longer a denormal 534
535 jnz LNormalise_shift_up_to_31 // Shift left 0 - 31 bits 536
537 orl %ebx,%ebx
538 jz L_underflow_to_zero // The contents are zero 539
540 // Shift left 32 - 63 bits 541 movl %ebx,%eax
542 xorl %ebx,%ebx
543 subl $32,EXP(%edi)
544
545 LNormalise_shift_up_to_31:
546 bsrl %eax,%ecx /* get the required shift in %ecx */ 547 subl $31,%ecx
548 negl %ecx
549 shld %cl,%ebx,%eax
550 shl %cl,%ebx
551 subl %ecx,EXP(%edi)
552
553 LNormalise_shift_done:
554 testb $0xff,FPU_bits_lost // bits lost == underflow 555 jz xL_Normalised
556
557 // There must be a masked underflow 558 push %eax
559 pushl EX_Underflow
560 call _exception
561 popl %eax
562 popl %eax
563 jmp xL_Normalised
564
565
566 // The operations resulted in a number too small to represent. 567 // Masked response. 568 L_underflow_to_zero:
569 push %eax
570 call _set_precision_flag_down
571 popl %eax
572
573 push %eax
574 pushl EX_Underflow
575 call _exception
576 popl %eax
577 popl %eax
578
579 // Reduce the exponent to EXP_UNDER 580 movl EXP_UNDER,EXP(%edi)
581 movb TW_Zero,TAG(%edi)
582 jmp xL_Store_significand
583
584
585 // The operations resulted in a number too large to represent. 586 L_overflow:
587 push %edi
588 call _arith_overflow
589 pop %edi
590 jmp FPU_Arith_exit
591
592
593 xSignal_underflow:
594 // The number may have been changed to a non-denormal 595 // by the rounding operations. 596 cmpl EXP_UNDER,EXP(%edi)
597 jle xDo_unmasked_underflow
598
599 jmp xL_Normalised
600
601 xDo_unmasked_underflow:
602 // Increase the exponent by the magic number 603 addl $(3*(1<<13)),EXP(%edi)
604 push %eax
605 pushl EX_Underflow
606 call EXCEPTION
607 popl %eax
608 popl %eax
609 jmp xL_Normalised
610
611
612 #ifdefPARANOID 613 /* If we ever get here then we have problems! */ 614 L_bugged:
615 pushl EX_INTERNAL|0x201
616 call EXCEPTION
617 popl %ebx
618 jmp L_exception_exit
619
620 L_norm_bugged:
621 pushl EX_INTERNAL|0x216
622 call EXCEPTION
623 popl %ebx
624 jmp L_exception_exit
625
626 L_entry_bugged:
627 pushl EX_INTERNAL|0x217
628 call EXCEPTION
629 popl %ebx
630 L_exception_exit:
631 mov $1,%eax
632 jmp FPU_Arith_exit
633 #endif PARANOID