1 .file "reg_round.S"
2 /*---------------------------------------------------------------------------+ 3 | reg_round.S | 4 | | 5 | Rounding/truncation/etc for FPU basic arithmetic functions. | 6 | | 7 | Copyright (C) 1993 | 8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | 9 | Australia. E-mail apm233m@vaxc.cc.monash.edu.au | 10 | | 11 | This code has four possible entry points. | 12 | The following must be entered by a jmp intruction: | 13 | FPU_round, FPU_round_sqrt, and FPU_Arith_exit. | 14 | | 15 | The _round_reg entry point is intended to be used by C code. | 16 | From C, call as: | 17 | void round_reg(FPU_REG *arg, unsigned int extent, unsigned int control_w) | 18 | | 19 +---------------------------------------------------------------------------*/ 20
21 /*---------------------------------------------------------------------------+ 22 | Four entry points. | 23 | | 24 | Needed by both the FPU_round and FPU_round_sqrt entry points: | 25 | %eax:%ebx 64 bit significand | 26 | %edx 32 bit extension of the significand | 27 | %edi pointer to an FPU_REG for the result to be stored | 28 | stack calling function must have set up a C stack frame and | 29 | pushed %esi, %edi, and %ebx | 30 | | 31 | Needed just for the FPU_round_sqrt entry point: | 32 | %cx A control word in the same format as the FPU control word. | 33 | Otherwise, PARAM4 must give such a value. | 34 | | 35 | | 36 | The significand and its extension are assumed to be exact in the | 37 | following sense: | 38 | If the significand by itself is the exact result then the significand | 39 | extension (%edx) must contain 0, otherwise the significand extension | 40 | must be non-zero. | 41 | If the significand extension is non-zero then the significand is | 42 | smaller than the magnitude of the correct exact result by an amount | 43 | greater than zero and less than one ls bit of the significand. | 44 | The significand extension is only required to have three possible | 45 | non-zero values: | 46 | less than 0x80000000 <=> the significand is less than 1/2 an ls | 47 | bit smaller than the magnitude of the | 48 | true exact result. | 49 | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit | 50 | smaller than the magnitude of the true | 51 | exact result. | 52 | greater than 0x80000000 <=> the significand is more than 1/2 an ls | 53 | bit smaller than the magnitude of the | 54 | true exact result. | 55 | | 56 +---------------------------------------------------------------------------*/ 57
58 /*---------------------------------------------------------------------------+ 59 | The code in this module has become quite complex, but it should handle | 60 | all of the FPU flags which are set at this stage of the basic arithmetic | 61 | computations. | 62 | There are a few rare cases where the results are not set identically to | 63 | a real FPU. These require a bit more thought because at this stage the | 64 | results of the code here appear to be more consistent... | 65 | This may be changed in a future version. | 66 +---------------------------------------------------------------------------*/ 67
68
69 #include "fpu_asm.h"
70 #include "exception.h"
71 #include "control_w.h"
72
73 #defineLOST_DOWN $1
74 #defineLOST_UP $2
75 #define DENORMAL $1
76 #define UNMASKED_UNDERFLOW $2
77
78 .data
79 .align 2,0
80 FPU_bits_lost:
81 .byte 0
82 FPU_denormal:
83 .byte 0
84
85 .text
86 .align 2,144
87 .globl FPU_round
88 .globl FPU_round_sqrt
89 .globl FPU_Arith_exit
90 .globl _round_reg
91
92 // Entry point when called from C 93 _round_reg:
94 pushl %ebp
95 movl %esp,%ebp
96 pushl %esi
97 pushl %edi
98 pushl %ebx
99
100 movl PARAM1,%edi
101 movl SIGH(%edi),%eax
102 movl SIGL(%edi),%ebx
103 movl PARAM2,%edx
104 movl PARAM3,%ecx
105 jmp FPU_round_sqrt
106
107 FPU_round: // Normal entry point 108 movl PARAM4,%ecx
109
110 FPU_round_sqrt: // Entry point from wm_sqrt.S 111
112 #ifdefPARANOID 113 // Cannot use this here yet 114 // orl %eax,%eax 115 // jns L_entry_bugged 116 #endif PARANOID
117
118 cmpl EXP_UNDER,EXP(%edi)
119 jle xMake_denorm // The number is a de-normal 120
121 movb $0,FPU_denormal // 0 -> not a de-normal 122
123 xDenorm_done:
124 movb $0,FPU_bits_lost // No bits yet lost in rounding 125
126 movl %ecx,%esi
127 andl CW_PC,%ecx
128 cmpl PR_64_BITS,%ecx
129 je LRound_To_64
130
131 cmpl PR_53_BITS,%ecx
132 je LRound_To_53
133
134 cmpl PR_24_BITS,%ecx
135 je LRound_To_24
136
137 #ifdefPARANOID 138 jmp L_bugged // There is no bug, just a bad control word 139 #endif PARANOID
140
141
142 // Round etc to 24 bit precision 143 LRound_To_24:
144 movl %esi,%ecx
145 andl CW_RC,%ecx
146 cmpl RC_RND,%ecx
147 je LRound_nearest_24
148
149 cmpl RC_CHOP,%ecx
150 je LCheck_truncate_24
151
152 cmpl RC_UP,%ecx // Towards +infinity 153 je LUp_24
154
155 cmpl RC_DOWN,%ecx // Towards -infinity 156 je LDown_24
157
158 #ifdefPARANOID 159 jmp L_bugged
160 #endif PARANOID
161
162 LUp_24:
163 cmpb SIGN_POS,SIGN(%edi)
164 jne LCheck_truncate_24 // If negative then up==truncate 165
166 jmp LCheck_24_round_up
167
168 LDown_24:
169 cmpb SIGN_POS,SIGN(%edi)
170 je LCheck_truncate_24 // If positive then down==truncate 171
172 LCheck_24_round_up:
173 movl %eax,%ecx
174 andl $0x000000ff,%ecx
175 orl %ebx,%ecx
176 orl %edx,%ecx
177 jnz LDo_24_round_up
178 jmp LRe_normalise
179
180 LRound_nearest_24:
181 // Do rounding of the 24th bit if needed (nearest or even) 182 movl %eax,%ecx
183 andl $0x000000ff,%ecx
184 cmpl $0x00000080,%ecx
185 jc LCheck_truncate_24 // less than half, no increment needed 186
187 jne LGreater_Half_24 // greater than half, increment needed 188
189 // Possibly half, we need to check the ls bits 190 orl %ebx,%ebx
191 jnz LGreater_Half_24 // greater than half, increment needed 192
193 orl %edx,%edx
194 jnz LGreater_Half_24 // greater than half, increment needed 195
196 // Exactly half, increment only if 24th bit is 1 (round to even) 197 testl $0x00000100,%eax
198 jz LDo_truncate_24
199
200 LGreater_Half_24: // Rounding: increment at the 24th bit 201 LDo_24_round_up:
202 andl $0xffffff00,%eax // Truncate to 24 bits 203 xorl %ebx,%ebx
204 movb LOST_UP,FPU_bits_lost
205 addl $0x00000100,%eax
206 jmp LCheck_Round_Overflow
207
208 LCheck_truncate_24:
209 movl %eax,%ecx
210 andl $0x000000ff,%ecx
211 orl %ebx,%ecx
212 orl %edx,%ecx
213 jz LRe_normalise // No truncation needed 214
215 LDo_truncate_24:
216 andl $0xffffff00,%eax // Truncate to 24 bits 217 xorl %ebx,%ebx
218 movb LOST_DOWN,FPU_bits_lost
219 jmp LRe_normalise
220
221
222 // Round etc to 53 bit precision 223 LRound_To_53:
224 movl %esi,%ecx
225 andl CW_RC,%ecx
226 cmpl RC_RND,%ecx
227 je LRound_nearest_53
228
229 cmpl RC_CHOP,%ecx
230 je LCheck_truncate_53
231
232 cmpl RC_UP,%ecx // Towards +infinity 233 je LUp_53
234
235 cmpl RC_DOWN,%ecx // Towards -infinity 236 je LDown_53
237
238 #ifdefPARANOID 239 jmp L_bugged
240 #endif PARANOID
241
242 LUp_53:
243 cmpb SIGN_POS,SIGN(%edi)
244 jne LCheck_truncate_53 // If negative then up==truncate 245
246 jmp LCheck_53_round_up
247
248 LDown_53:
249 cmpb SIGN_POS,SIGN(%edi)
250 je LCheck_truncate_53 // If positive then down==truncate 251
252 LCheck_53_round_up:
253 movl %ebx,%ecx
254 andl $0x000007ff,%ecx
255 orl %edx,%ecx
256 jnz LDo_53_round_up
257 jmp LRe_normalise
258
259 LRound_nearest_53:
260 // Do rounding of the 53rd bit if needed (nearest or even) 261 movl %ebx,%ecx
262 andl $0x000007ff,%ecx
263 cmpl $0x00000400,%ecx
264 jc LCheck_truncate_53 // less than half, no increment needed 265
266 jnz LGreater_Half_53 // greater than half, increment needed 267
268 // Possibly half, we need to check the ls bits 269 orl %edx,%edx
270 jnz LGreater_Half_53 // greater than half, increment needed 271
272 // Exactly half, increment only if 53rd bit is 1 (round to even) 273 testl $0x00000800,%ebx
274 jz LTruncate_53
275
276 LGreater_Half_53: // Rounding: increment at the 53rd bit 277 LDo_53_round_up:
278 movb LOST_UP,FPU_bits_lost
279 andl $0xfffff800,%ebx // Truncate to 53 bits 280 addl $0x00000800,%ebx
281 adcl $0,%eax
282 jmp LCheck_Round_Overflow
283
284 LCheck_truncate_53:
285 movl %ebx,%ecx
286 andl $0x000007ff,%ecx
287 orl %edx,%ecx
288 jz LRe_normalise
289
290 LTruncate_53:
291 movb LOST_DOWN,FPU_bits_lost
292 andl $0xfffff800,%ebx // Truncate to 53 bits 293 jmp LRe_normalise
294
295
296 // Round etc to 64 bit precision 297 LRound_To_64:
298 movl %esi,%ecx
299 andl CW_RC,%ecx
300 cmpl RC_RND,%ecx
301 je LRound_nearest_64
302
303 cmpl RC_CHOP,%ecx
304 je LCheck_truncate_64
305
306 cmpl RC_UP,%ecx // Towards +infinity 307 je LUp_64
308
309 cmpl RC_DOWN,%ecx // Towards -infinity 310 je LDown_64
311
312 #ifdefPARANOID 313 jmp L_bugged
314 #endif PARANOID
315
316 LUp_64:
317 cmpb SIGN_POS,SIGN(%edi)
318 jne LCheck_truncate_64 // If negative then up==truncate 319
320 orl %edx,%edx
321 jnz LDo_64_round_up
322 jmp LRe_normalise
323
324 LDown_64:
325 cmpb SIGN_POS,SIGN(%edi)
326 je LCheck_truncate_64 // If positive then down==truncate 327
328 orl %edx,%edx
329 jnz LDo_64_round_up
330 jmp LRe_normalise
331
332 LRound_nearest_64:
333 cmpl $0x80000000,%edx
334 jc LCheck_truncate_64
335
336 jne LDo_64_round_up
337
338 /* Now test for round-to-even */ 339 testb $1,%ebx
340 jz LCheck_truncate_64
341
342 LDo_64_round_up:
343 movb LOST_UP,FPU_bits_lost
344 addl $1,%ebx
345 adcl $0,%eax
346
347 LCheck_Round_Overflow:
348 jnc LRe_normalise /* Rounding done, no overflow */ 349
350 /* Overflow, adjust the result (to 1.0) */ 351 rcrl $1,%eax
352 rcrl $1,%ebx
353 incl EXP(%edi)
354 jmp LRe_normalise
355
356 LCheck_truncate_64:
357 orl %edx,%edx
358 jz LRe_normalise
359
360 LTruncate_64:
361 movb LOST_DOWN,FPU_bits_lost
362
363 LRe_normalise:
364 testb $0xff,FPU_denormal
365 jnz xNormalise_result
366
367 xL_Normalised:
368 cmpb LOST_UP,FPU_bits_lost
369 je xL_precision_lost_up
370
371 cmpb LOST_DOWN,FPU_bits_lost
372 je xL_precision_lost_down
373
374 xL_no_precision_loss:
375 cmpl EXP_OVER,EXP(%edi)
376 jge L_overflow
377
378 /* store the result */ 379 movb TW_Valid,TAG(%edi)
380
381 xL_Store_significand:
382 movl %eax,SIGH(%edi)
383 movl %ebx,SIGL(%edi)
384
385 FPU_Arith_exit:
386 popl %ebx
387 popl %edi
388 popl %esi
389 leave
390 ret
391
392
393 // Set the FPU status flags to represent precision loss due to 394 // round-up. 395 xL_precision_lost_up:
396 push %eax
397 call _set_precision_flag_up
398 popl %eax
399 jmp xL_no_precision_loss
400
401 // Set the FPU status flags to represent precision loss due to 402 // truncation. 403 xL_precision_lost_down:
404 push %eax
405 call _set_precision_flag_down
406 popl %eax
407 jmp xL_no_precision_loss
408
409
410 // The number is a denormal (which might get rounded up to a normal) 411 // Shift the number right the required number of bits, which will 412 // have to be undone later... 413 xMake_denorm:
414 // The action to be taken depends upon whether the underflow 415 // exception is masked 416 testb CW_Underflow,%cl // Underflow mask. 417 jz xUnmasked_underflow // Do not make a denormal. 418
419 movb DENORMAL,FPU_denormal
420
421 pushl %ecx // Save 422 movl EXP(%edi),%ecx
423 subl EXP_UNDER+1,%ecx
424 negl %ecx
425
426 cmpl $64,%ecx /* shrd only works for 0..31 bits */ 427 jnc xDenorm_shift_more_than_63
428
429 cmpl $32,%ecx /* shrd only works for 0..31 bits */ 430 jnc xDenorm_shift_more_than_32
431
432 // We got here without jumps by assuming that the most common requirement 433 // is for a small de-normalising shift. 434 // Shift by [1..31] bits 435 addl %ecx,EXP(%edi)
436 orl %edx,%edx // extension 437 setne %ch
438 xorl %edx,%edx
439 shrd %cl,%ebx,%edx
440 shrd %cl,%eax,%ebx
441 shr %cl,%eax
442 orb %ch,%dl
443 popl %ecx
444 jmp xDenorm_done
445
446 // Shift by [32..63] bits 447 xDenorm_shift_more_than_32:
448 addl %ecx,EXP(%edi)
449 subb $32,%cl
450 orl %edx,%edx
451 setne %ch
452 orb %ch,%bl
453 xorl %edx,%edx
454 shrd %cl,%ebx,%edx
455 shrd %cl,%eax,%ebx
456 shr %cl,%eax
457 orl %edx,%edx // test these 32 bits 458 setne %cl
459 orb %ch,%bl
460 orb %cl,%bl
461 movl %ebx,%edx
462 movl %eax,%ebx
463 xorl %eax,%eax
464 popl %ecx
465 jmp xDenorm_done
466
467 // Shift by [64..) bits 468 xDenorm_shift_more_than_63:
469 cmpl $64,%ecx
470 jne xDenorm_shift_more_than_64
471
472 // Exactly 64 bit shift 473 addl %ecx,EXP(%edi)
474 xorl %ecx,%ecx
475 orl %edx,%edx
476 setne %cl
477 orl %ebx,%ebx
478 setne %ch
479 orb %ch,%cl
480 orb %cl,%al
481 movl %eax,%edx
482 xorl %eax,%eax
483 xorl %ebx,%ebx
484 popl %ecx
485 jmp xDenorm_done
486
487 xDenorm_shift_more_than_64:
488 movl EXP_UNDER+1,EXP(%edi)
489 // This is easy, %eax must be non-zero, so.. 490 movl $1,%edx
491 xorl %eax,%eax
492 xorl %ebx,%ebx
493 popl %ecx
494 jmp xDenorm_done
495
496
497 xUnmasked_underflow:
498 // Increase the exponent by the magic number 499 addl $(3*(1<<13)),EXP(%edi)
500 movb UNMASKED_UNDERFLOW,FPU_denormal
501 jmp xDenorm_done
502
503
504 // Undo the de-normalisation. 505 xNormalise_result:
506 cmpb UNMASKED_UNDERFLOW,FPU_denormal
507 je xSignal_underflow
508
509 // The number must be a denormal if we got here. 510 #ifdefPARANOID 511 // But check it... just in case. 512 cmpl EXP_UNDER+1,EXP(%edi)
513 jne L_norm_bugged
514 #endif PARANOID
515
516 orl %eax,%eax // ms bits 517 jnz LNormalise_shift_up_to_31 // Shift left 0 - 31 bits 518
519 orl %ebx,%ebx
520 jz L_underflow_to_zero // The contents are zero 521
522 // Shift left 32 - 63 bits 523 movl %ebx,%eax
524 xorl %ebx,%ebx
525 subl $32,EXP(%edi)
526
527 LNormalise_shift_up_to_31:
528 bsrl %eax,%ecx /* get the required shift in %ecx */ 529 subl $31,%ecx
530 negl %ecx
531 shld %cl,%ebx,%eax
532 shl %cl,%ebx
533 subl %ecx,EXP(%edi)
534
535 LNormalise_shift_done:
536 testb $0xff,FPU_bits_lost // bits lost == underflow 537 jz xL_Normalised
538
539 // There must be a masked underflow 540 push %eax
541 pushl EX_Underflow
542 call _exception
543 popl %eax
544 popl %eax
545 jmp xL_Normalised
546
547
548 // The operations resulted in a number too small to represent. 549 // Masked response. 550 L_underflow_to_zero:
551 push %eax
552 call _set_precision_flag_down
553 popl %eax
554
555 push %eax
556 pushl EX_Underflow
557 call _exception
558 popl %eax
559 popl %eax
560
561 movb TW_Zero,TAG(%edi)
562 jmp xL_Store_significand
563
564
565 // The operations resulted in a number too large to represent. 566 L_overflow:
567 push %edi
568 call _arith_overflow
569 pop %edi
570 jmp FPU_Arith_exit
571
572
573 xSignal_underflow:
574 push %eax
575 pushl EX_Underflow
576 call EXCEPTION
577 popl %eax
578 popl %eax
579 jmp xL_Normalised
580
581
582 #ifdefPARANOID 583 /* If we ever get here then we have problems! */ 584 L_bugged:
585 pushl EX_INTERNAL|0x201
586 call EXCEPTION
587 popl %ebx
588 jmp FPU_Arith_exit
589
590 L_norm_bugged:
591 pushl EX_INTERNAL|0x216
592 call EXCEPTION
593 popl %ebx
594 jmp FPU_Arith_exit
595
596 L_entry_bugged:
597 pushl EX_INTERNAL|0x217
598 call EXCEPTION
599 popl %ebx
600 jmp FPU_Arith_exit
601 #endif PARANOID