root/kernel/FPU-emu/reg_round.S

/* [previous][next][first][last][top][bottom][index][help] */
   1         .file "reg_round.S"
   2 /*---------------------------------------------------------------------------+
   3  |  reg_round.S                                                              |
   4  |                                                                           |
   5  | Rounding/truncation/etc for FPU basic arithmetic functions.               |
   6  |                                                                           |
   7  | Copyright (C) 1993                                                        |
   8  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
   9  |                       Australia.  E-mail apm233m@vaxc.cc.monash.edu.au    |
  10  |                                                                           |
  11  | This code has four possible entry points.                                 |
  12  | The following must be entered by a jmp intruction:                        |
  13  |   FPU_round, FPU_round_sqrt, and FPU_Arith_exit.                          |
  14  |                                                                           |
  15  | The _round_reg entry point is intended to be used by C code.              |
  16  | From C, call as:                                                          |
  17  | void round_reg(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
  18  |                                                                           |
  19  | For correct "up" and "down" rounding, the argument must have the correct  |
  20  | sign.                                                                     |
  21  |                                                                           |
  22  +---------------------------------------------------------------------------*/
  23 
  24 /*---------------------------------------------------------------------------+
  25  | Four entry points.                                                        |
  26  |                                                                           |
  27  | Needed by both the FPU_round and FPU_round_sqrt entry points:             |
  28  |  %eax:%ebx  64 bit significand                                            |
  29  |  %edx       32 bit extension of the significand                           |
  30  |  %edi       pointer to an FPU_REG for the result to be stored             |
  31  |  stack      calling function must have set up a C stack frame and         |
  32  |             pushed %esi, %edi, and %ebx                                   |
  33  |                                                                           |
  34  | Needed just for the FPU_round_sqrt entry point:                           |
  35  |  %cx  A control word in the same format as the FPU control word.          |
  36  | Otherwise, PARAM4 must give such a value.                                 |
  37  |                                                                           |
  38  |                                                                           |
  39  | The significand and its extension are assumed to be exact in the          |
  40  | following sense:                                                          |
  41  |   If the significand by itself is the exact result then the significand   |
  42  |   extension (%edx) must contain 0, otherwise the significand extension    |
  43  |   must be non-zero.                                                       |
  44  |   If the significand extension is non-zero then the significand is        |
  45  |   smaller than the magnitude of the correct exact result by an amount     |
  46  |   greater than zero and less than one ls bit of the significand.          |
  47  |   The significand extension is only required to have three possible       |
  48  |   non-zero values:                                                        |
  49  |       less than 0x80000000  <=> the significand is less than 1/2 an ls    |
  50  |                                 bit smaller than the magnitude of the     |
  51  |                                 true exact result.                        |
  52  |         exactly 0x80000000  <=> the significand is exactly 1/2 an ls bit  |
  53  |                                 smaller than the magnitude of the true    |
  54  |                                 exact result.                             |
  55  |    greater than 0x80000000  <=> the significand is more than 1/2 an ls    |
  56  |                                 bit smaller than the magnitude of the     |
  57  |                                 true exact result.                        |
  58  |                                                                           |
  59  +---------------------------------------------------------------------------*/
  60 
  61 /*---------------------------------------------------------------------------+
  62  |  The code in this module has become quite complex, but it should handle   |
  63  |  all of the FPU flags which are set at this stage of the basic arithmetic |
  64  |  computations.                                                            |
  65  |  There are a few rare cases where the results are not set identically to  |
  66  |  a real FPU. These require a bit more thought because at this stage the   |
  67  |  results of the code here appear to be more consistent...                 |
  68  |  This may be changed in a future version.                                 |
  69  +---------------------------------------------------------------------------*/
  70 
  71 
  72 #include "fpu_asm.h"
  73 #include "exception.h"
  74 #include "control_w.h"
  75 
  76 /* Flags for FPU_bits_lost */
  77 #define LOST_DOWN       $1
  78 #define LOST_UP         $2
  79 
  80 /* Flags for FPU_denormal */
  81 #define DENORMAL        $1
  82 #define UNMASKED_UNDERFLOW $2
  83 
  84 .data
  85         .align 2,0
  86 FPU_bits_lost:
  87         .byte   0
  88 FPU_denormal:
  89         .byte   0
  90 
  91 .text
  92         .align 2,144
  93 .globl FPU_round
  94 .globl FPU_round_sqrt
  95 .globl FPU_Arith_exit
  96 .globl _round_reg
  97 
  98 // Entry point when called from C
  99 _round_reg:
 100         pushl   %ebp
 101         movl    %esp,%ebp
 102         pushl   %esi
 103         pushl   %edi
 104         pushl   %ebx
 105 
 106         movl    PARAM1,%edi
 107         movl    SIGH(%edi),%eax
 108         movl    SIGL(%edi),%ebx
 109         movl    PARAM2,%edx
 110         movl    PARAM3,%ecx
 111         jmp     FPU_round_sqrt
 112 
 113 FPU_round:              // Normal entry point
 114         movl    PARAM4,%ecx
 115 
 116 FPU_round_sqrt:         // Entry point from wm_sqrt.S
 117 
 118 #ifdef PARANOID
 119 // Cannot use this here yet
 120 //      orl     %eax,%eax
 121 //      jns     L_entry_bugged
 122 #endif PARANOID
 123 
 124         cmpl    EXP_UNDER,EXP(%edi)
 125         jle     xMake_denorm                    // The number is a de-normal
 126 
 127         movb    $0,FPU_denormal                 // 0 -> not a de-normal
 128 
 129 xDenorm_done:
 130         movb    $0,FPU_bits_lost                // No bits yet lost in rounding
 131 
 132         movl    %ecx,%esi
 133         andl    CW_PC,%ecx
 134         cmpl    PR_64_BITS,%ecx
 135         je      LRound_To_64
 136 
 137         cmpl    PR_53_BITS,%ecx
 138         je      LRound_To_53
 139 
 140         cmpl    PR_24_BITS,%ecx
 141         je      LRound_To_24
 142 
 143 #ifdef PARANOID
 144         jmp     L_bugged        // There is no bug, just a bad control word
 145 #endif PARANOID
 146 
 147 
 148 // Round etc to 24 bit precision
 149 LRound_To_24:
 150         movl    %esi,%ecx
 151         andl    CW_RC,%ecx
 152         cmpl    RC_RND,%ecx
 153         je      LRound_nearest_24
 154 
 155         cmpl    RC_CHOP,%ecx
 156         je      LCheck_truncate_24
 157 
 158         cmpl    RC_UP,%ecx              // Towards +infinity
 159         je      LUp_24
 160 
 161         cmpl    RC_DOWN,%ecx            // Towards -infinity
 162         je      LDown_24
 163 
 164 #ifdef PARANOID
 165         jmp     L_bugged
 166 #endif PARANOID
 167 
 168 LUp_24:
 169         cmpb    SIGN_POS,SIGN(%edi)
 170         jne     LCheck_truncate_24      // If negative then  up==truncate
 171 
 172         jmp     LCheck_24_round_up
 173 
 174 LDown_24:
 175         cmpb    SIGN_POS,SIGN(%edi)
 176         je      LCheck_truncate_24      // If positive then  down==truncate
 177 
 178 LCheck_24_round_up:
 179         movl    %eax,%ecx
 180         andl    $0x000000ff,%ecx
 181         orl     %ebx,%ecx
 182         orl     %edx,%ecx
 183         jnz     LDo_24_round_up
 184         jmp     LRe_normalise
 185 
 186 LRound_nearest_24:
 187         // Do rounding of the 24th bit if needed (nearest or even)
 188         movl    %eax,%ecx
 189         andl    $0x000000ff,%ecx
 190         cmpl    $0x00000080,%ecx
 191         jc      LCheck_truncate_24      // less than half, no increment needed
 192 
 193         jne     LGreater_Half_24        // greater than half, increment needed
 194 
 195         // Possibly half, we need to check the ls bits
 196         orl     %ebx,%ebx
 197         jnz     LGreater_Half_24        // greater than half, increment needed
 198 
 199         orl     %edx,%edx
 200         jnz     LGreater_Half_24        // greater than half, increment needed
 201 
 202         // Exactly half, increment only if 24th bit is 1 (round to even)
 203         testl   $0x00000100,%eax
 204         jz      LDo_truncate_24
 205 
 206 LGreater_Half_24:                       // Rounding: increment at the 24th bit
 207 LDo_24_round_up:
 208         andl    $0xffffff00,%eax        // Truncate to 24 bits
 209         xorl    %ebx,%ebx
 210         movb    LOST_UP,FPU_bits_lost
 211         addl    $0x00000100,%eax
 212         jmp     LCheck_Round_Overflow
 213 
 214 LCheck_truncate_24:
 215         movl    %eax,%ecx
 216         andl    $0x000000ff,%ecx
 217         orl     %ebx,%ecx
 218         orl     %edx,%ecx
 219         jz      LRe_normalise                   // No truncation needed
 220 
 221 LDo_truncate_24:
 222         andl    $0xffffff00,%eax        // Truncate to 24 bits
 223         xorl    %ebx,%ebx
 224         movb    LOST_DOWN,FPU_bits_lost
 225         jmp     LRe_normalise
 226 
 227 
 228 // Round etc to 53 bit precision
 229 LRound_To_53:
 230         movl    %esi,%ecx
 231         andl    CW_RC,%ecx
 232         cmpl    RC_RND,%ecx
 233         je      LRound_nearest_53
 234 
 235         cmpl    RC_CHOP,%ecx
 236         je      LCheck_truncate_53
 237 
 238         cmpl    RC_UP,%ecx              // Towards +infinity
 239         je      LUp_53
 240 
 241         cmpl    RC_DOWN,%ecx            // Towards -infinity
 242         je      LDown_53
 243 
 244 #ifdef PARANOID
 245         jmp     L_bugged
 246 #endif PARANOID
 247 
 248 LUp_53:
 249         cmpb    SIGN_POS,SIGN(%edi)
 250         jne     LCheck_truncate_53      // If negative then  up==truncate
 251 
 252         jmp     LCheck_53_round_up
 253 
 254 LDown_53:
 255         cmpb    SIGN_POS,SIGN(%edi)
 256         je      LCheck_truncate_53      // If positive then  down==truncate
 257 
 258 LCheck_53_round_up:
 259         movl    %ebx,%ecx
 260         andl    $0x000007ff,%ecx
 261         orl     %edx,%ecx
 262         jnz     LDo_53_round_up
 263         jmp     LRe_normalise
 264 
 265 LRound_nearest_53:
 266         // Do rounding of the 53rd bit if needed (nearest or even)
 267         movl    %ebx,%ecx
 268         andl    $0x000007ff,%ecx
 269         cmpl    $0x00000400,%ecx
 270         jc      LCheck_truncate_53      // less than half, no increment needed
 271 
 272         jnz     LGreater_Half_53        // greater than half, increment needed
 273 
 274         // Possibly half, we need to check the ls bits
 275         orl     %edx,%edx
 276         jnz     LGreater_Half_53        // greater than half, increment needed
 277 
 278         // Exactly half, increment only if 53rd bit is 1 (round to even)
 279         testl   $0x00000800,%ebx
 280         jz      LTruncate_53
 281 
 282 LGreater_Half_53:                       // Rounding: increment at the 53rd bit
 283 LDo_53_round_up:
 284         movb    LOST_UP,FPU_bits_lost
 285         andl    $0xfffff800,%ebx        // Truncate to 53 bits
 286         addl    $0x00000800,%ebx
 287         adcl    $0,%eax
 288         jmp     LCheck_Round_Overflow
 289 
 290 LCheck_truncate_53:
 291         movl    %ebx,%ecx
 292         andl    $0x000007ff,%ecx
 293         orl     %edx,%ecx
 294         jz      LRe_normalise
 295 
 296 LTruncate_53:
 297         movb    LOST_DOWN,FPU_bits_lost
 298         andl    $0xfffff800,%ebx        // Truncate to 53 bits
 299         jmp     LRe_normalise
 300 
 301 
 302 // Round etc to 64 bit precision
 303 LRound_To_64:
 304         movl    %esi,%ecx
 305         andl    CW_RC,%ecx
 306         cmpl    RC_RND,%ecx
 307         je      LRound_nearest_64
 308 
 309         cmpl    RC_CHOP,%ecx
 310         je      LCheck_truncate_64
 311 
 312         cmpl    RC_UP,%ecx              // Towards +infinity
 313         je      LUp_64
 314 
 315         cmpl    RC_DOWN,%ecx            // Towards -infinity
 316         je      LDown_64
 317 
 318 #ifdef PARANOID
 319         jmp     L_bugged
 320 #endif PARANOID
 321 
 322 LUp_64:
 323         cmpb    SIGN_POS,SIGN(%edi)
 324         jne     LCheck_truncate_64      // If negative then  up==truncate
 325 
 326         orl     %edx,%edx
 327         jnz     LDo_64_round_up
 328         jmp     LRe_normalise
 329 
 330 LDown_64:
 331         cmpb    SIGN_POS,SIGN(%edi)
 332         je      LCheck_truncate_64      // If positive then  down==truncate
 333 
 334         orl     %edx,%edx
 335         jnz     LDo_64_round_up
 336         jmp     LRe_normalise
 337 
 338 LRound_nearest_64:
 339         cmpl    $0x80000000,%edx
 340         jc      LCheck_truncate_64
 341 
 342         jne     LDo_64_round_up
 343 
 344         /* Now test for round-to-even */
 345         testb   $1,%ebx
 346         jz      LCheck_truncate_64
 347 
 348 LDo_64_round_up:
 349         movb    LOST_UP,FPU_bits_lost
 350         addl    $1,%ebx
 351         adcl    $0,%eax
 352 
 353 LCheck_Round_Overflow:
 354         jnc     LRe_normalise
 355 
 356         /* Overflow, adjust the result (significand to 1.0) */
 357         rcrl    $1,%eax
 358         rcrl    $1,%ebx
 359         incl    EXP(%edi)
 360         jmp     LRe_normalise
 361 
 362 LCheck_truncate_64:
 363         orl     %edx,%edx
 364         jz      LRe_normalise
 365 
 366 LTruncate_64:
 367         movb    LOST_DOWN,FPU_bits_lost
 368 
 369 LRe_normalise:
 370         testb   $0xff,FPU_denormal
 371         jnz     xNormalise_result
 372 
 373 xL_Normalised:
 374         cmpb    LOST_UP,FPU_bits_lost
 375         je      xL_precision_lost_up
 376 
 377         cmpb    LOST_DOWN,FPU_bits_lost
 378         je      xL_precision_lost_down
 379 
 380 xL_no_precision_loss:
 381         /* store the result */
 382         movb    TW_Valid,TAG(%edi)
 383 
 384 xL_Store_significand:
 385         movl    %eax,SIGH(%edi)
 386         movl    %ebx,SIGL(%edi)
 387 
 388         xorl    %eax,%eax       // No errors detected.
 389 
 390         cmpl    EXP_OVER,EXP(%edi)
 391         jge     L_overflow
 392 
 393 FPU_Arith_exit:
 394         popl    %ebx
 395         popl    %edi
 396         popl    %esi
 397         leave
 398         ret
 399 
 400 
 401 // Set the FPU status flags to represent precision loss due to
 402 // round-up.
 403 xL_precision_lost_up:
 404         push    %eax
 405         call    _set_precision_flag_up
 406         popl    %eax
 407         jmp     xL_no_precision_loss
 408 
 409 // Set the FPU status flags to represent precision loss due to
 410 // truncation.
 411 xL_precision_lost_down:
 412         push    %eax
 413         call    _set_precision_flag_down
 414         popl    %eax
 415         jmp     xL_no_precision_loss
 416 
 417 
 418 // The number is a denormal (which might get rounded up to a normal)
 419 // Shift the number right the required number of bits, which will
 420 // have to be undone later...
 421 xMake_denorm:
 422         // The action to be taken depends upon whether the underflow
 423         // exception is masked
 424         testb   CW_Underflow,%cl                // Underflow mask.
 425         jz      xUnmasked_underflow             // Do not make a denormal.
 426 
 427         movb    DENORMAL,FPU_denormal
 428 
 429         pushl   %ecx            // Save
 430         movl    EXP_UNDER+1,%ecx
 431         subl    EXP(%edi),%ecx
 432 
 433         cmpl    $64,%ecx        /* shrd only works for 0..31 bits */
 434         jnc     xDenorm_shift_more_than_63
 435 
 436         cmpl    $32,%ecx        /* shrd only works for 0..31 bits */
 437         jnc     xDenorm_shift_more_than_32
 438 
 439 // We got here without jumps by assuming that the most common requirement
 440 //   is for a small de-normalising shift.
 441 // Shift by [1..31] bits
 442         addl    %ecx,EXP(%edi)
 443         orl     %edx,%edx       // extension
 444         setne   %ch             // Save whether %edx is non-zero
 445         xorl    %edx,%edx
 446         shrd    %cl,%ebx,%edx
 447         shrd    %cl,%eax,%ebx
 448         shr     %cl,%eax
 449         orb     %ch,%dl
 450         popl    %ecx
 451         jmp     xDenorm_done
 452 
 453 // Shift by [32..63] bits
 454 xDenorm_shift_more_than_32:
 455         addl    %ecx,EXP(%edi)
 456         subb    $32,%cl
 457         orl     %edx,%edx
 458         setne   %ch
 459         orb     %ch,%bl
 460         xorl    %edx,%edx
 461         shrd    %cl,%ebx,%edx
 462         shrd    %cl,%eax,%ebx
 463         shr     %cl,%eax
 464         orl     %edx,%edx               // test these 32 bits
 465         setne   %cl
 466         orb     %ch,%bl
 467         orb     %cl,%bl
 468         movl    %ebx,%edx
 469         movl    %eax,%ebx
 470         xorl    %eax,%eax
 471         popl    %ecx
 472         jmp     xDenorm_done
 473 
 474 // Shift by [64..) bits
 475 xDenorm_shift_more_than_63:
 476         cmpl    $64,%ecx
 477         jne     xDenorm_shift_more_than_64
 478 
 479 // Exactly 64 bit shift
 480         addl    %ecx,EXP(%edi)
 481         xorl    %ecx,%ecx
 482         orl     %edx,%edx
 483         setne   %cl
 484         orl     %ebx,%ebx
 485         setne   %ch
 486         orb     %ch,%cl
 487         orb     %cl,%al
 488         movl    %eax,%edx
 489         xorl    %eax,%eax
 490         xorl    %ebx,%ebx
 491         popl    %ecx
 492         jmp     xDenorm_done
 493 
 494 xDenorm_shift_more_than_64:
 495         movl    EXP_UNDER+1,EXP(%edi)
 496 // This is easy, %eax must be non-zero, so..
 497         movl    $1,%edx
 498         xorl    %eax,%eax
 499         xorl    %ebx,%ebx
 500         popl    %ecx
 501         jmp     xDenorm_done
 502 
 503 
 504 xUnmasked_underflow:
 505         movb    UNMASKED_UNDERFLOW,FPU_denormal
 506         jmp     xDenorm_done
 507 
 508 
 509 // Undo the de-normalisation.
 510 xNormalise_result:
 511         cmpb    UNMASKED_UNDERFLOW,FPU_denormal
 512         je      xSignal_underflow
 513 
 514 // The number must be a denormal if we got here.
 515 #ifdef PARANOID
 516         // But check it... just in case.
 517         cmpl    EXP_UNDER+1,EXP(%edi)
 518         jne     L_norm_bugged
 519 #endif PARANOID
 520 
 521 #ifdef PECULIAR_486
 522         // This implements a special feature of 80486 behaviour.
 523         // Underflow will be signalled even if the number is
 524         // not a denormal after rounding.
 525         // This difference occurs only for masked underflow, and not
 526         // in the unmasked case.
 527         // Actual 80486 behaviour differs from this in some circumstances.
 528         orl     %eax,%eax               // ms bits
 529         js      LNormalise_shift_done   // Will be masked underflow
 530 #endif PECULIAR_486
 531 
 532         orl     %eax,%eax               // ms bits
 533         js      xL_Normalised           // No longer a denormal
 534 
 535         jnz     LNormalise_shift_up_to_31       // Shift left 0 - 31 bits
 536 
 537         orl     %ebx,%ebx
 538         jz      L_underflow_to_zero     // The contents are zero
 539 
 540 // Shift left 32 - 63 bits
 541         movl    %ebx,%eax
 542         xorl    %ebx,%ebx
 543         subl    $32,EXP(%edi)
 544 
 545 LNormalise_shift_up_to_31:
 546         bsrl    %eax,%ecx       /* get the required shift in %ecx */
 547         subl    $31,%ecx
 548         negl    %ecx
 549         shld    %cl,%ebx,%eax
 550         shl     %cl,%ebx
 551         subl    %ecx,EXP(%edi)
 552 
 553 LNormalise_shift_done:
 554         testb   $0xff,FPU_bits_lost     // bits lost == underflow
 555         jz      xL_Normalised
 556 
 557         // There must be a masked underflow
 558         push    %eax
 559         pushl   EX_Underflow
 560         call    _exception
 561         popl    %eax
 562         popl    %eax
 563         jmp     xL_Normalised
 564 
 565 
 566 // The operations resulted in a number too small to represent.
 567 // Masked response.
 568 L_underflow_to_zero:
 569         push    %eax
 570         call    _set_precision_flag_down
 571         popl    %eax
 572 
 573         push    %eax
 574         pushl   EX_Underflow
 575         call    _exception
 576         popl    %eax
 577         popl    %eax
 578 
 579 // Reduce the exponent to EXP_UNDER
 580         movl    EXP_UNDER,EXP(%edi)
 581         movb    TW_Zero,TAG(%edi)
 582         jmp     xL_Store_significand
 583 
 584 
 585 // The operations resulted in a number too large to represent.
 586 L_overflow:
 587         push    %edi
 588         call    _arith_overflow
 589         pop     %edi
 590         jmp     FPU_Arith_exit
 591 
 592 
 593 xSignal_underflow:
 594         // The number may have been changed to a non-denormal
 595         // by the rounding operations.
 596         cmpl    EXP_UNDER,EXP(%edi)
 597         jle     xDo_unmasked_underflow
 598 
 599         jmp     xL_Normalised
 600 
 601 xDo_unmasked_underflow:
 602         // Increase the exponent by the magic number
 603         addl    $(3*(1<<13)),EXP(%edi)
 604         push    %eax
 605         pushl   EX_Underflow
 606         call    EXCEPTION
 607         popl    %eax
 608         popl    %eax
 609         jmp     xL_Normalised
 610 
 611 
 612 #ifdef PARANOID
 613 /* If we ever get here then we have problems! */
 614 L_bugged:
 615         pushl   EX_INTERNAL|0x201
 616         call    EXCEPTION
 617         popl    %ebx
 618         jmp     L_exception_exit
 619 
 620 L_norm_bugged:
 621         pushl   EX_INTERNAL|0x216
 622         call    EXCEPTION
 623         popl    %ebx
 624         jmp     L_exception_exit
 625 
 626 L_entry_bugged:
 627         pushl   EX_INTERNAL|0x217
 628         call    EXCEPTION
 629         popl    %ebx
 630 L_exception_exit:
 631         mov     $1,%eax
 632         jmp     FPU_Arith_exit
 633 #endif PARANOID

/* [previous][next][first][last][top][bottom][index][help] */