arch/i386/math-emu/div

/* */
   1         .file   "div_Xsig.S"
   2 /*---------------------------------------------------------------------------+
   3  |  div_Xsig.S                                                               |
   4  |                                                                           |
   5  | Division subroutine for 96 bit quantities                                 |
   6  |                                                                           |
   7  | Copyright (C) 1994                                                        |
   8  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
   9  |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
  10  |                                                                           |
  11  |                                                                           |
  12  +---------------------------------------------------------------------------*/
  13 
  14 /*---------------------------------------------------------------------------+
  15  | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and  |
  16  | put the 96 bit result at the location d.                                  |
  17  |                                                                           |
  18  | The result may not be accurate to 96 bits. It is intended for use where   |
  19  | a result better than 64 bits is required. The result should usually be    |
  20  | good to at least 94 bits.                                                 |
  21  | The returned result is actually divided by one half. This is done to      |
  22  | prevent overflow.                                                         |
  23  |                                                                           |
  24  |  .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb  ->  .dddddddddddd                      |
  25  |                                                                           |
  26  |  void div_Xsig(Xsig *a, Xsig *b, Xsig *dest)                              |
  27  |                                                                           |
  28  +---------------------------------------------------------------------------*/
  29 
  30 #include "exception.h"
  31 #include "fpu_asm.h"
  32 
  33 
  34 #define XsigLL(x)       (x)
  35 #define XsigL(x)        4(x)
  36 #define XsigH(x)        8(x)
  37 
  38 
  39 #ifndef NON_REENTRANT_FPU
  40 /*
  41         Local storage on the stack:
  42         Accumulator:    FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
  43  */
  44 #define FPU_accum_3     -4(%ebp)
  45 #define FPU_accum_2     -8(%ebp)
  46 #define FPU_accum_1     -12(%ebp)
  47 #define FPU_accum_0     -16(%ebp)
  48 #define FPU_result_3    -20(%ebp)
  49 #define FPU_result_2    -24(%ebp)
  50 #define FPU_result_1    -28(%ebp)
  51 
  52 #else
  53 .data
  54 /*
  55         Local storage in a static area:
  56         Accumulator:    FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
  57  */
  58         .align 2,0
  59 FPU_accum_3:
  60         .long   0
  61 FPU_accum_2:
  62         .long   0
  63 FPU_accum_1:
  64         .long   0
  65 FPU_accum_0:
  66         .long   0
  67 FPU_result_3:
  68         .long   0
  69 FPU_result_2:
  70         .long   0
  71 FPU_result_1:
  72         .long   0
  73 #endif NON_REENTRANT_FPU
  74 
  75 
  76 .text
  77         .align 2,144
  78 
  79 .globl _div_Xsig
  80 
  81 _div_Xsig:
  82         pushl   %ebp
  83         movl    %esp,%ebp
  84 #ifndef NON_REENTRANT_FPU
  85         subl    $28,%esp
  86 #endif NON_REENTRANT_FPU
  87 
  88         pushl   %esi
  89         pushl   %edi
  90         pushl   %ebx
  91 
  92         movl    PARAM1,%esi     /* pointer to num */
  93         movl    PARAM2,%ebx     /* pointer to denom */
  94 
  95 #ifdef PARANOID
  96         testl   $0x80000000, XsigH(%ebx)        /* Divisor */
  97         je      L_bugged
  98 #endif PARANOID
  99 
 100 
 101 /*---------------------------------------------------------------------------+
 102  |  Divide:   Return  arg1/arg2 to arg3.                                     |
 103  |                                                                           |
 104  |  The maximum returned value is (ignoring exponents)                       |
 105  |               .ffffffff ffffffff                                          |
 106  |               ------------------  =  1.ffffffff fffffffe                  |
 107  |               .80000000 00000000                                          |
 108  | and the minimum is                                                        |
 109  |               .80000000 00000000                                          |
 110  |               ------------------  =  .80000000 00000001   (rounded)       |
 111  |               .ffffffff ffffffff                                          |
 112  |                                                                           |
 113  +---------------------------------------------------------------------------*/
 114 
 115         /* Save extended dividend in local register */
 116 
 117         /* Divide by 2 to prevent overflow */
 118         clc
 119         movl    XsigH(%esi),%eax
 120         rcrl    %eax
 121         movl    %eax,FPU_accum_3
 122         movl    XsigL(%esi),%eax
 123         rcrl    %eax
 124         movl    %eax,FPU_accum_2
 125         movl    XsigLL(%esi),%eax
 126         rcrl    %eax
 127         movl    %eax,FPU_accum_1
 128         movl    $0,%eax
 129         rcrl    %eax
 130         movl    %eax,FPU_accum_0
 131 
 132         movl    FPU_accum_2,%eax        /* Get the current num */
 133         movl    FPU_accum_3,%edx
 134 
 135 /*----------------------------------------------------------------------*/
 136 /* Initialization done.
 137    Do the first 32 bits. */
 138 
 139         /* We will divide by a number which is too large */
 140         movl    XsigH(%ebx),%ecx
 141         addl    $1,%ecx
 142         jnc     LFirst_div_not_1
 143 
 144         /* here we need to divide by 100000000h,
 145            i.e., no division at all.. */
 146         mov     %edx,%eax
 147         jmp     LFirst_div_done
 148 
 149 LFirst_div_not_1:
 150         divl    %ecx            /* Divide the numerator by the augmented
 151                                    denom ms dw */
 152 
 153 LFirst_div_done:
 154         movl    %eax,FPU_result_3       /* Put the result in the answer */
 155 
 156         mull    XsigH(%ebx)     /* mul by the ms dw of the denom */
 157 
 158         subl    %eax,FPU_accum_2        /* Subtract from the num local reg */
 159         sbbl    %edx,FPU_accum_3
 160 
 161         movl    FPU_result_3,%eax       /* Get the result back */
 162         mull    XsigL(%ebx)     /* now mul the ls dw of the denom */
 163 
 164         subl    %eax,FPU_accum_1        /* Subtract from the num local reg */
 165         sbbl    %edx,FPU_accum_2
 166         sbbl    $0,FPU_accum_3
 167         je      LDo_2nd_32_bits         /* Must check for non-zero result here */
 168 
 169 #ifdef PARANOID
 170         jb      L_bugged_1
 171 #endif PARANOID
 172 
 173         /* need to subtract another once of the denom */
 174         incl    FPU_result_3    /* Correct the answer */
 175 
 176         movl    XsigL(%ebx),%eax
 177         movl    XsigH(%ebx),%edx
 178         subl    %eax,FPU_accum_1        /* Subtract from the num local reg */
 179         sbbl    %edx,FPU_accum_2
 180 
 181 #ifdef PARANOID
 182         sbbl    $0,FPU_accum_3
 183         jne     L_bugged_1      /* Must check for non-zero result here */
 184 #endif PARANOID
 185 
 186 /*----------------------------------------------------------------------*/
 187 /* Half of the main problem is done, there is just a reduced numerator
 188    to handle now.
 189    Work with the second 32 bits, FPU_accum_0 not used from now on */
 190 LDo_2nd_32_bits:
 191         movl    FPU_accum_2,%edx        /* get the reduced num */
 192         movl    FPU_accum_1,%eax
 193 
 194         /* need to check for possible subsequent overflow */
 195         cmpl    XsigH(%ebx),%edx
 196         jb      LDo_2nd_div
 197         ja      LPrevent_2nd_overflow
 198 
 199         cmpl    XsigL(%ebx),%eax
 200         jb      LDo_2nd_div
 201 
 202 LPrevent_2nd_overflow:
 203 /* The numerator is greater or equal, would cause overflow */
 204         /* prevent overflow */
 205         subl    XsigL(%ebx),%eax
 206         sbbl    XsigH(%ebx),%edx
 207         movl    %edx,FPU_accum_2
 208         movl    %eax,FPU_accum_1
 209 
 210         incl    FPU_result_3    /* Reflect the subtraction in the answer */
 211 
 212 #ifdef PARANOID
 213         je      L_bugged_2      /* Can't bump the result to 1.0 */
 214 #endif PARANOID
 215 
 216 LDo_2nd_div:
 217         cmpl    $0,%ecx         /* augmented denom msw */
 218         jnz     LSecond_div_not_1
 219 
 220         /* %ecx == 0, we are dividing by 1.0 */
 221         mov     %edx,%eax
 222         jmp     LSecond_div_done
 223 
 224 LSecond_div_not_1:
 225         divl    %ecx            /* Divide the numerator by the denom ms dw */
 226 
 227 LSecond_div_done:
 228         movl    %eax,FPU_result_2       /* Put the result in the answer */
 229 
 230         mull    XsigH(%ebx)     /* mul by the ms dw of the denom */
 231 
 232         subl    %eax,FPU_accum_1        /* Subtract from the num local reg */
 233         sbbl    %edx,FPU_accum_2
 234 
 235 #ifdef PARANOID
 236         jc      L_bugged_2
 237 #endif PARANOID
 238 
 239         movl    FPU_result_2,%eax       /* Get the result back */
 240         mull    XsigL(%ebx)     /* now mul the ls dw of the denom */
 241 
 242         subl    %eax,FPU_accum_0        /* Subtract from the num local reg */
 243         sbbl    %edx,FPU_accum_1        /* Subtract from the num local reg */
 244         sbbl    $0,FPU_accum_2
 245 
 246 #ifdef PARANOID
 247         jc      L_bugged_2
 248 #endif PARANOID
 249 
 250         jz      LDo_3rd_32_bits
 251 
 252 #ifdef PARANOID
 253         cmpl    $1,FPU_accum_2
 254         jne     L_bugged_2
 255 #endif PARANOID
 256 
 257         /* need to subtract another once of the denom */
 258         movl    XsigL(%ebx),%eax
 259         movl    XsigH(%ebx),%edx
 260         subl    %eax,FPU_accum_0        /* Subtract from the num local reg */
 261         sbbl    %edx,FPU_accum_1
 262         sbbl    $0,FPU_accum_2
 263 
 264 #ifdef PARANOID
 265         jc      L_bugged_2
 266         jne     L_bugged_2
 267 #endif PARANOID
 268 
 269         addl    $1,FPU_result_2 /* Correct the answer */
 270         adcl    $0,FPU_result_3
 271 
 272 #ifdef PARANOID
 273         jc      L_bugged_2      /* Must check for non-zero result here */
 274 #endif PARANOID
 275 
 276 /*----------------------------------------------------------------------*/
 277 /* The division is essentially finished here, we just need to perform
 278    tidying operations.
 279    Deal with the 3rd 32 bits */
 280 LDo_3rd_32_bits:
 281         /* We use an approximation for the third 32 bits.
 282         To take account of the 3rd 32 bits of the divisor
 283         (call them del), we subtract  del * (a/b) */
 284 
 285         movl    FPU_result_3,%eax       /* a/b */
 286         mull    XsigLL(%ebx)            /* del */
 287 
 288         subl    %edx,FPU_accum_1
 289 
 290         /* A borrow indicates that the result is negative */
 291         jnb     LTest_over
 292 
 293         movl    XsigH(%ebx),%edx
 294         addl    %edx,FPU_accum_1
 295 
 296         subl    $1,FPU_result_2         /* Adjust the answer */
 297         sbbl    $0,FPU_result_3
 298 
 299         /* The above addition might not have been enough, check again. */
 300         movl    FPU_accum_1,%edx        /* get the reduced num */
 301         cmpl    XsigH(%ebx),%edx        /* denom */
 302         jb      LDo_3rd_div
 303 
 304         movl    XsigH(%ebx),%edx
 305         addl    %edx,FPU_accum_1
 306 
 307         subl    $1,FPU_result_2         /* Adjust the answer */
 308         sbbl    $0,FPU_result_3
 309         jmp     LDo_3rd_div
 310 
 311 LTest_over:
 312         movl    FPU_accum_1,%edx        /* get the reduced num */
 313 
 314         /* need to check for possible subsequent overflow */
 315         cmpl    XsigH(%ebx),%edx        /* denom */
 316         jb      LDo_3rd_div
 317 
 318         /* prevent overflow */
 319         subl    XsigH(%ebx),%edx
 320         movl    %edx,FPU_accum_1
 321 
 322         addl    $1,FPU_result_2 /* Reflect the subtraction in the answer */
 323         adcl    $0,FPU_result_3
 324 
 325 LDo_3rd_div:
 326         movl    FPU_accum_0,%eax
 327         movl    FPU_accum_1,%edx
 328         divl    XsigH(%ebx)
 329 
 330         movl    %eax,FPU_result_1       /* Rough estimate of third word */
 331 
 332         movl    PARAM3,%esi             /* pointer to answer */
 333 
 334         movl    FPU_result_1,%eax
 335         movl    %eax,XsigLL(%esi)
 336         movl    FPU_result_2,%eax
 337         movl    %eax,XsigL(%esi)
 338         movl    FPU_result_3,%eax
 339         movl    %eax,XsigH(%esi)
 340 
 341 L_exit:
 342         popl    %ebx
 343         popl    %edi
 344         popl    %esi
 345 
 346         leave
 347         ret
 348 
 349 
 350 #ifdef PARANOID
 351 /* The logic is wrong if we got here */
 352 L_bugged:
 353         pushl   EX_INTERNAL|0x240
 354         call    EXCEPTION
 355         pop     %ebx
 356         jmp     L_exit
 357 
 358 L_bugged_1:
 359         pushl   EX_INTERNAL|0x241
 360         call    EXCEPTION
 361         pop     %ebx
 362         jmp     L_exit
 363 
 364 L_bugged_2:
 365         pushl   EX_INTERNAL|0x242
 366         call    EXCEPTION
 367         pop     %ebx
 368         jmp     L_exit
 369 #endif PARANOID
/* */
root/arch/i386/math-emu/div_Xsig.S