kernel/FPU-emu/reg

/* */
   1         .file "reg_round.S"
   2 /*---------------------------------------------------------------------------+
   3  |  reg_round.S                                                              |
   4  |                                                                           |
   5  | Rounding/truncation/etc for FPU basic arithmetic functions.               |
   6  |                                                                           |
   7  | Copyright (C) 1993                                                        |
   8  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
   9  |                       Australia.  E-mail apm233m@vaxc.cc.monash.edu.au    |
  10  |                                                                           |
  11  | This code has four possible entry points.                                 |
  12  | The following must be entered by a jmp intruction:                        |
  13  |   FPU_round, FPU_round_sqrt, and FPU_Arith_exit.                          |
  14  |                                                                           |
  15  | The _round_reg entry point is intended to be used by C code.              |
  16  | From C, call as:                                                          |
  17  | void round_reg(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
  18  |                                                                           |
  19  +---------------------------------------------------------------------------*/
  20 
  21 /*---------------------------------------------------------------------------+
  22  | Four entry points.                                                        |
  23  |                                                                           |
  24  | Needed by both the FPU_round and FPU_round_sqrt entry points:             |
  25  |  %eax:%ebx  64 bit significand                                            |
  26  |  %edx       32 bit extension of the significand                           |
  27  |  %edi       pointer to an FPU_REG for the result to be stored             |
  28  |  stack      calling function must have set up a C stack frame and         |
  29  |             pushed %esi, %edi, and %ebx                                   |
  30  |                                                                           |
  31  | Needed just for the FPU_round_sqrt entry point:                           |
  32  |  %cx  A control word in the same format as the FPU control word.          |
  33  | Otherwise, PARAM4 must give such a value.                                 |
  34  |                                                                           |
  35  |                                                                           |
  36  | The significand and its extension are assumed to be exact in the          |
  37  | following sense:                                                          |
  38  |   If the significand by itself is the exact result then the significand   |
  39  |   extension (%edx) must contain 0, otherwise the significand extension    |
  40  |   must be non-zero.                                                       |
  41  |   If the significand extension is non-zero then the significand is        |
  42  |   smaller than the magnitude of the correct exact result by an amount     |
  43  |   greater than zero and less than one ls bit of the significand.          |
  44  |   The significand extension is only required to have three possible       |
  45  |   non-zero values:                                                        |
  46  |       less than 0x80000000  <=> the significand is less than 1/2 an ls    |
  47  |                                 bit smaller than the magnitude of the     |
  48  |                                 true exact result.                        |
  49  |         exactly 0x80000000  <=> the significand is exactly 1/2 an ls bit  |
  50  |                                 smaller than the magnitude of the true    |
  51  |                                 exact result.                             |
  52  |    greater than 0x80000000  <=> the significand is more than 1/2 an ls    |
  53  |                                 bit smaller than the magnitude of the     |
  54  |                                 true exact result.                        |
  55  |                                                                           |
  56  +---------------------------------------------------------------------------*/
  57 
  58 /*---------------------------------------------------------------------------+
  59  |  The code in this module has become quite complex, but it should handle   |
  60  |  all of the FPU flags which are set at this stage of the basic arithmetic |
  61  |  computations.                                                            |
  62  |  There are a few rare cases where the results are not set identically to  |
  63  |  a real FPU. These require a bit more thought because at this stage the   |
  64  |  results of the code here appear to be more consistent...                 |
  65  |  This may be changed in a future version.                                 |
  66  +---------------------------------------------------------------------------*/
  67 
  68 
  69 #include "fpu_asm.h"
  70 #include "exception.h"
  71 #include "control_w.h"
  72 
  73 #define LOST_DOWN       $1
  74 #define LOST_UP         $2
  75 #define DENORMAL        $1
  76 #define UNMASKED_UNDERFLOW $2
  77 
  78 .data
  79         .align 2,0
  80 FPU_bits_lost:
  81         .byte   0
  82 FPU_denormal:
  83         .byte   0
  84 
  85 .text
  86         .align 2,144
  87 .globl FPU_round
  88 .globl FPU_round_sqrt
  89 .globl FPU_Arith_exit
  90 .globl _round_reg
  91 
  92 // Entry point when called from C
  93 _round_reg:
  94         pushl   %ebp
  95         movl    %esp,%ebp
  96         pushl   %esi
  97         pushl   %edi
  98         pushl   %ebx
  99 
 100         movl    PARAM1,%edi
 101         movl    SIGH(%edi),%eax
 102         movl    SIGL(%edi),%ebx
 103         movl    PARAM2,%edx
 104         movl    PARAM3,%ecx
 105         jmp     FPU_round_sqrt
 106 
 107 FPU_round:              // Normal entry point
 108         movl    PARAM4,%ecx
 109 
 110 FPU_round_sqrt:         // Entry point from wm_sqrt.S
 111 
 112 #ifdef PARANOID
 113 // Cannot use this here yet
 114 //      orl     %eax,%eax
 115 //      jns     L_entry_bugged
 116 #endif PARANOID
 117 
 118         cmpl    EXP_UNDER,EXP(%edi)
 119         jle     xMake_denorm                    // The number is a de-normal
 120 
 121         movb    $0,FPU_denormal                 // 0 -> not a de-normal
 122 
 123 xDenorm_done:
 124         movb    $0,FPU_bits_lost                // No bits yet lost in rounding
 125 
 126         movl    %ecx,%esi
 127         andl    CW_PC,%ecx
 128         cmpl    PR_64_BITS,%ecx
 129         je      LRound_To_64
 130 
 131         cmpl    PR_53_BITS,%ecx
 132         je      LRound_To_53
 133 
 134         cmpl    PR_24_BITS,%ecx
 135         je      LRound_To_24
 136 
 137 #ifdef PARANOID
 138         jmp     L_bugged        // There is no bug, just a bad control word
 139 #endif PARANOID
 140 
 141 
 142 // Round etc to 24 bit precision
 143 LRound_To_24:
 144         movl    %esi,%ecx
 145         andl    CW_RC,%ecx
 146         cmpl    RC_RND,%ecx
 147         je      LRound_nearest_24
 148 
 149         cmpl    RC_CHOP,%ecx
 150         je      LCheck_truncate_24
 151 
 152         cmpl    RC_UP,%ecx              // Towards +infinity
 153         je      LUp_24
 154 
 155         cmpl    RC_DOWN,%ecx            // Towards -infinity
 156         je      LDown_24
 157 
 158 #ifdef PARANOID
 159         jmp     L_bugged
 160 #endif PARANOID
 161 
 162 LUp_24:
 163         cmpb    SIGN_POS,SIGN(%edi)
 164         jne     LCheck_truncate_24      // If negative then  up==truncate
 165 
 166         jmp     LCheck_24_round_up
 167 
 168 LDown_24:
 169         cmpb    SIGN_POS,SIGN(%edi)
 170         je      LCheck_truncate_24      // If positive then  down==truncate
 171 
 172 LCheck_24_round_up:
 173         movl    %eax,%ecx
 174         andl    $0x000000ff,%ecx
 175         orl     %ebx,%ecx
 176         orl     %edx,%ecx
 177         jnz     LDo_24_round_up
 178         jmp     LRe_normalise
 179 
 180 LRound_nearest_24:
 181         // Do rounding of the 24th bit if needed (nearest or even)
 182         movl    %eax,%ecx
 183         andl    $0x000000ff,%ecx
 184         cmpl    $0x00000080,%ecx
 185         jc      LCheck_truncate_24      // less than half, no increment needed
 186 
 187         jne     LGreater_Half_24        // greater than half, increment needed
 188 
 189         // Possibly half, we need to check the ls bits
 190         orl     %ebx,%ebx
 191         jnz     LGreater_Half_24        // greater than half, increment needed
 192 
 193         orl     %edx,%edx
 194         jnz     LGreater_Half_24        // greater than half, increment needed
 195 
 196         // Exactly half, increment only if 24th bit is 1 (round to even)
 197         testl   $0x00000100,%eax
 198         jz      LDo_truncate_24
 199 
 200 LGreater_Half_24:                       // Rounding: increment at the 24th bit
 201 LDo_24_round_up:
 202         andl    $0xffffff00,%eax        // Truncate to 24 bits
 203         xorl    %ebx,%ebx
 204         movb    LOST_UP,FPU_bits_lost
 205         addl    $0x00000100,%eax
 206         jmp     LCheck_Round_Overflow
 207 
 208 LCheck_truncate_24:
 209         movl    %eax,%ecx
 210         andl    $0x000000ff,%ecx
 211         orl     %ebx,%ecx
 212         orl     %edx,%ecx
 213         jz      LRe_normalise                   // No truncation needed
 214 
 215 LDo_truncate_24:
 216         andl    $0xffffff00,%eax        // Truncate to 24 bits
 217         xorl    %ebx,%ebx
 218         movb    LOST_DOWN,FPU_bits_lost
 219         jmp     LRe_normalise
 220 
 221 
 222 // Round etc to 53 bit precision
 223 LRound_To_53:
 224         movl    %esi,%ecx
 225         andl    CW_RC,%ecx
 226         cmpl    RC_RND,%ecx
 227         je      LRound_nearest_53
 228 
 229         cmpl    RC_CHOP,%ecx
 230         je      LCheck_truncate_53
 231 
 232         cmpl    RC_UP,%ecx              // Towards +infinity
 233         je      LUp_53
 234 
 235         cmpl    RC_DOWN,%ecx            // Towards -infinity
 236         je      LDown_53
 237 
 238 #ifdef PARANOID
 239         jmp     L_bugged
 240 #endif PARANOID
 241 
 242 LUp_53:
 243         cmpb    SIGN_POS,SIGN(%edi)
 244         jne     LCheck_truncate_53      // If negative then  up==truncate
 245 
 246         jmp     LCheck_53_round_up
 247 
 248 LDown_53:
 249         cmpb    SIGN_POS,SIGN(%edi)
 250         je      LCheck_truncate_53      // If positive then  down==truncate
 251 
 252 LCheck_53_round_up:
 253         movl    %ebx,%ecx
 254         andl    $0x000007ff,%ecx
 255         orl     %edx,%ecx
 256         jnz     LDo_53_round_up
 257         jmp     LRe_normalise
 258 
 259 LRound_nearest_53:
 260         // Do rounding of the 53rd bit if needed (nearest or even)
 261         movl    %ebx,%ecx
 262         andl    $0x000007ff,%ecx
 263         cmpl    $0x00000400,%ecx
 264         jc      LCheck_truncate_53      // less than half, no increment needed
 265 
 266         jnz     LGreater_Half_53        // greater than half, increment needed
 267 
 268         // Possibly half, we need to check the ls bits
 269         orl     %edx,%edx
 270         jnz     LGreater_Half_53        // greater than half, increment needed
 271 
 272         // Exactly half, increment only if 53rd bit is 1 (round to even)
 273         testl   $0x00000800,%ebx
 274         jz      LTruncate_53
 275 
 276 LGreater_Half_53:                       // Rounding: increment at the 53rd bit
 277 LDo_53_round_up:
 278         movb    LOST_UP,FPU_bits_lost
 279         andl    $0xfffff800,%ebx        // Truncate to 53 bits
 280         addl    $0x00000800,%ebx
 281         adcl    $0,%eax
 282         jmp     LCheck_Round_Overflow
 283 
 284 LCheck_truncate_53:
 285         movl    %ebx,%ecx
 286         andl    $0x000007ff,%ecx
 287         orl     %edx,%ecx
 288         jz      LRe_normalise
 289 
 290 LTruncate_53:
 291         movb    LOST_DOWN,FPU_bits_lost
 292         andl    $0xfffff800,%ebx        // Truncate to 53 bits
 293         jmp     LRe_normalise
 294 
 295 
 296 // Round etc to 64 bit precision
 297 LRound_To_64:
 298         movl    %esi,%ecx
 299         andl    CW_RC,%ecx
 300         cmpl    RC_RND,%ecx
 301         je      LRound_nearest_64
 302 
 303         cmpl    RC_CHOP,%ecx
 304         je      LCheck_truncate_64
 305 
 306         cmpl    RC_UP,%ecx              // Towards +infinity
 307         je      LUp_64
 308 
 309         cmpl    RC_DOWN,%ecx            // Towards -infinity
 310         je      LDown_64
 311 
 312 #ifdef PARANOID
 313         jmp     L_bugged
 314 #endif PARANOID
 315 
 316 LUp_64:
 317         cmpb    SIGN_POS,SIGN(%edi)
 318         jne     LCheck_truncate_64      // If negative then  up==truncate
 319 
 320         orl     %edx,%edx
 321         jnz     LDo_64_round_up
 322         jmp     LRe_normalise
 323 
 324 LDown_64:
 325         cmpb    SIGN_POS,SIGN(%edi)
 326         je      LCheck_truncate_64      // If positive then  down==truncate
 327 
 328         orl     %edx,%edx
 329         jnz     LDo_64_round_up
 330         jmp     LRe_normalise
 331 
 332 LRound_nearest_64:
 333         cmpl    $0x80000000,%edx
 334         jc      LCheck_truncate_64
 335 
 336         jne     LDo_64_round_up
 337 
 338         /* Now test for round-to-even */
 339         testb   $1,%ebx
 340         jz      LCheck_truncate_64
 341 
 342 LDo_64_round_up:
 343         movb    LOST_UP,FPU_bits_lost
 344         addl    $1,%ebx
 345         adcl    $0,%eax
 346 
 347 LCheck_Round_Overflow:
 348         jnc     LRe_normalise           /* Rounding done, no overflow */
 349 
 350         /* Overflow, adjust the result (to 1.0) */
 351         rcrl    $1,%eax
 352         rcrl    $1,%ebx
 353         incl    EXP(%edi)
 354         jmp     LRe_normalise
 355 
 356 LCheck_truncate_64:
 357         orl     %edx,%edx
 358         jz      LRe_normalise
 359 
 360 LTruncate_64:
 361         movb    LOST_DOWN,FPU_bits_lost
 362 
 363 LRe_normalise:
 364         testb   $0xff,FPU_denormal
 365         jnz     xNormalise_result
 366 
 367 xL_Normalised:
 368         cmpb    LOST_UP,FPU_bits_lost
 369         je      xL_precision_lost_up
 370 
 371         cmpb    LOST_DOWN,FPU_bits_lost
 372         je      xL_precision_lost_down
 373 
 374 xL_no_precision_loss:
 375         cmpl    EXP_OVER,EXP(%edi)
 376         jge     L_overflow
 377 
 378         /* store the result */
 379         movb    TW_Valid,TAG(%edi)
 380 
 381 xL_Store_significand:
 382         movl    %eax,SIGH(%edi)
 383         movl    %ebx,SIGL(%edi)
 384 
 385 FPU_Arith_exit:
 386         popl    %ebx
 387         popl    %edi
 388         popl    %esi
 389         leave
 390         ret
 391 
 392 
 393 // Set the FPU status flags to represent precision loss due to
 394 // round-up.
 395 xL_precision_lost_up:
 396         push    %eax
 397         call    _set_precision_flag_up
 398         popl    %eax
 399         jmp     xL_no_precision_loss
 400 
 401 // Set the FPU status flags to represent precision loss due to
 402 // truncation.
 403 xL_precision_lost_down:
 404         push    %eax
 405         call    _set_precision_flag_down
 406         popl    %eax
 407         jmp     xL_no_precision_loss
 408 
 409 
 410 // The number is a denormal (which might get rounded up to a normal)
 411 // Shift the number right the required number of bits, which will
 412 // have to be undone later...
 413 xMake_denorm:
 414         // The action to be taken depends upon whether the underflow
 415         // exception is masked
 416         testb   CW_Underflow,%cl                // Underflow mask.
 417         jz      xUnmasked_underflow             // Do not make a denormal.
 418 
 419         movb    DENORMAL,FPU_denormal
 420 
 421         pushl   %ecx            // Save
 422         movl    EXP(%edi),%ecx
 423         subl    EXP_UNDER+1,%ecx
 424         negl    %ecx
 425 
 426         cmpl    $64,%ecx        /* shrd only works for 0..31 bits */
 427         jnc     xDenorm_shift_more_than_63
 428 
 429         cmpl    $32,%ecx        /* shrd only works for 0..31 bits */
 430         jnc     xDenorm_shift_more_than_32
 431 
 432 // We got here without jumps by assuming that the most common requirement
 433 //   is for a small de-normalising shift.
 434 // Shift by [1..31] bits
 435         addl    %ecx,EXP(%edi)
 436         orl     %edx,%edx       // extension
 437         setne   %ch
 438         xorl    %edx,%edx
 439         shrd    %cl,%ebx,%edx
 440         shrd    %cl,%eax,%ebx
 441         shr     %cl,%eax
 442         orb     %ch,%dl
 443         popl    %ecx
 444         jmp     xDenorm_done
 445 
 446 // Shift by [32..63] bits
 447 xDenorm_shift_more_than_32:
 448         addl    %ecx,EXP(%edi)
 449         subb    $32,%cl
 450         orl     %edx,%edx
 451         setne   %ch
 452         orb     %ch,%bl
 453         xorl    %edx,%edx
 454         shrd    %cl,%ebx,%edx
 455         shrd    %cl,%eax,%ebx
 456         shr     %cl,%eax
 457         orl     %edx,%edx               // test these 32 bits
 458         setne   %cl
 459         orb     %ch,%bl
 460         orb     %cl,%bl
 461         movl    %ebx,%edx
 462         movl    %eax,%ebx
 463         xorl    %eax,%eax
 464         popl    %ecx
 465         jmp     xDenorm_done
 466 
 467 // Shift by [64..) bits
 468 xDenorm_shift_more_than_63:
 469         cmpl    $64,%ecx
 470         jne     xDenorm_shift_more_than_64
 471 
 472 // Exactly 64 bit shift
 473         addl    %ecx,EXP(%edi)
 474         xorl    %ecx,%ecx
 475         orl     %edx,%edx
 476         setne   %cl
 477         orl     %ebx,%ebx
 478         setne   %ch
 479         orb     %ch,%cl
 480         orb     %cl,%al
 481         movl    %eax,%edx
 482         xorl    %eax,%eax
 483         xorl    %ebx,%ebx
 484         popl    %ecx
 485         jmp     xDenorm_done
 486 
 487 xDenorm_shift_more_than_64:
 488         movl    EXP_UNDER+1,EXP(%edi)
 489 // This is easy, %eax must be non-zero, so..
 490         movl    $1,%edx
 491         xorl    %eax,%eax
 492         xorl    %ebx,%ebx
 493         popl    %ecx
 494         jmp     xDenorm_done
 495 
 496 
 497 xUnmasked_underflow:
 498         // Increase the exponent by the magic number
 499         addl    $(3*(1<<13)),EXP(%edi)
 500         movb    UNMASKED_UNDERFLOW,FPU_denormal
 501         jmp     xDenorm_done
 502 
 503 
 504 // Undo the de-normalisation.
 505 xNormalise_result:
 506         cmpb    UNMASKED_UNDERFLOW,FPU_denormal
 507         je      xSignal_underflow
 508 
 509 // The number must be a denormal if we got here.
 510 #ifdef PARANOID
 511         // But check it... just in case.
 512         cmpl    EXP_UNDER+1,EXP(%edi)
 513         jne     L_norm_bugged
 514 #endif PARANOID
 515 
 516         orl     %eax,%eax       // ms bits
 517         jnz     LNormalise_shift_up_to_31       // Shift left 0 - 31 bits
 518 
 519         orl     %ebx,%ebx
 520         jz      L_underflow_to_zero     // The contents are zero
 521 
 522 // Shift left 32 - 63 bits
 523         movl    %ebx,%eax
 524         xorl    %ebx,%ebx
 525         subl    $32,EXP(%edi)
 526 
 527 LNormalise_shift_up_to_31:
 528         bsrl    %eax,%ecx       /* get the required shift in %ecx */
 529         subl    $31,%ecx
 530         negl    %ecx
 531         shld    %cl,%ebx,%eax
 532         shl     %cl,%ebx
 533         subl    %ecx,EXP(%edi)
 534 
 535 LNormalise_shift_done:
 536         testb   $0xff,FPU_bits_lost     // bits lost == underflow
 537         jz      xL_Normalised
 538 
 539         // There must be a masked underflow
 540         push    %eax
 541         pushl   EX_Underflow
 542         call    _exception
 543         popl    %eax
 544         popl    %eax
 545         jmp     xL_Normalised
 546 
 547 
 548 // The operations resulted in a number too small to represent.
 549 // Masked response.
 550 L_underflow_to_zero:
 551         push    %eax
 552         call    _set_precision_flag_down
 553         popl    %eax
 554 
 555         push    %eax
 556         pushl   EX_Underflow
 557         call    _exception
 558         popl    %eax
 559         popl    %eax
 560 
 561         movb    TW_Zero,TAG(%edi)
 562         jmp     xL_Store_significand
 563 
 564 
 565 // The operations resulted in a number too large to represent.
 566 L_overflow:
 567         push    %edi
 568         call    _arith_overflow
 569         pop     %edi
 570         jmp     FPU_Arith_exit
 571 
 572 
 573 xSignal_underflow:
 574         push    %eax
 575         pushl   EX_Underflow
 576         call    EXCEPTION
 577         popl    %eax
 578         popl    %eax
 579         jmp     xL_Normalised
 580 
 581 
 582 #ifdef PARANOID
 583 /* If we ever get here then we have problems! */
 584 L_bugged:
 585         pushl   EX_INTERNAL|0x201
 586         call    EXCEPTION
 587         popl    %ebx
 588         jmp     FPU_Arith_exit
 589 
 590 L_norm_bugged:
 591         pushl   EX_INTERNAL|0x216
 592         call    EXCEPTION
 593         popl    %ebx
 594         jmp     FPU_Arith_exit
 595 
 596 L_entry_bugged:
 597         pushl   EX_INTERNAL|0x217
 598         call    EXCEPTION
 599         popl    %ebx
 600         jmp     FPU_Arith_exit
 601 #endif PARANOID
/* */
root/kernel/FPU-emu/reg_round.S