1 .file "reg_u_div.S"
2 /*---------------------------------------------------------------------------+ 3 | reg_u_div.S | 4 | | 5 | Core division routines | 6 | | 7 | Copyright (C) 1992,1993 | 8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | 9 | Australia. E-mail billm@vaxc.cc.monash.edu.au | 10 | | 11 | | 12 +---------------------------------------------------------------------------*/ 13
14 /*---------------------------------------------------------------------------+ 15 | Kernel for the division routines. | 16 | | 17 | void reg_u_div(FPU_REG *a, FPU_REG *a, | 18 | FPU_REG *dest, unsigned int control_word) | 19 | | 20 | Does not compute the destination exponent, but does adjust it. | 21 +---------------------------------------------------------------------------*/ 22
23 #include "exception.h"
24 #include "fpu_asm.h"
25 #include "control_w.h"
26
27
28 /* #define dSIGL(x) (x) */ 29 /* #define dSIGH(x) 4(x) */ 30
31
32 #ifndefNON_REENTRANT_FPU 33 /* 34 Local storage on the stack: 35 Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 36 Overflow flag: ovfl_flag 37 */ 38 #define FPU_accum_3 -4(%ebp)
39 #define FPU_accum_2 -8(%ebp)
40 #define FPU_accum_1 -12(%ebp)
41 #define FPU_accum_0 -16(%ebp)
42 #define FPU_result_1 -20(%ebp)
43 #define FPU_result_2 -24(%ebp)
44 #define FPU_ovfl_flag -28(%ebp)
45
46 #else 47 .data
48 /* 49 Local storage in a static area: 50 Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 51 Overflow flag: ovfl_flag 52 */ 53 .align 2,0
54 FPU_accum_3:
55 .long 0
56 FPU_accum_2:
57 .long 0
58 FPU_accum_1:
59 .long 0
60 FPU_accum_0:
61 .long 0
62 FPU_result_1:
63 .long 0
64 FPU_result_2:
65 .long 0
66 FPU_ovfl_flag:
67 .byte 0
68 #endif NON_REENTRANT_FPU
69
70
71 .text
72 .align 2,144
73
74 .globl _reg_u_div
75
76 .globl _divide_kernel
77
78 _reg_u_div:
79 pushl %ebp
80 movl %esp,%ebp
81 #ifndefNON_REENTRANT_FPU 82 subl $28,%esp
83 #endif NON_REENTRANT_FPU
84
85 pushl %esi
86 pushl %edi
87 pushl %ebx
88
89 movl PARAM1,%esi /* pointer to num */ 90 movl PARAM2,%ebx /* pointer to denom */ 91 movl PARAM3,%edi /* pointer to answer */ 92
93 #ifdefDENORM_OPERAND 94 movl EXP(%esi),%eax
95 cmpl EXP_UNDER,%eax
96 jg xOp1_not_denorm
97
98 call _denormal_operand
99 orl %eax,%eax
100 jnz fpu_Arith_exit
101
102 xOp1_not_denorm:
103 movl EXP(%ebx),%eax
104 cmpl EXP_UNDER,%eax
105 jg xOp2_not_denorm
106
107 call _denormal_operand
108 orl %eax,%eax
109 jnz fpu_Arith_exit
110
111 xOp2_not_denorm:
112 #endif DENORM_OPERAND
113
114 _divide_kernel:
115 #ifdefPARANOID 116 /* testl $0x80000000, SIGH(%esi) // Dividend */ 117 /* je L_bugged */ 118 testl $0x80000000, SIGH(%ebx) /* Divisor */ 119 je L_bugged
120 #endif PARANOID
121
122 /* Check if the divisor can be treated as having just 32 bits */ 123 cmpl $0,SIGL(%ebx)
124 jnz L_Full_Division /* Can't do a quick divide */ 125
126 /* We should be able to zip through the division here */ 127 movl SIGH(%ebx),%ecx /* The divisor */ 128 movl SIGH(%esi),%edx /* Dividend */ 129 movl SIGL(%esi),%eax /* Dividend */ 130
131 cmpl %ecx,%edx
132 setaeb FPU_ovfl_flag /* Keep a record */ 133 jb L_no_adjust
134
135 subl %ecx,%edx /* Prevent the overflow */ 136
137 L_no_adjust:
138 /* Divide the 64 bit number by the 32 bit denominator */ 139 divl %ecx
140 movl %eax,FPU_result_2
141
142 /* Work on the remainder of the first division */ 143 xorl %eax,%eax
144 divl %ecx
145 movl %eax,FPU_result_1
146
147 /* Work on the remainder of the 64 bit division */ 148 xorl %eax,%eax
149 divl %ecx
150
151 testb $255,FPU_ovfl_flag /* was the num > denom ? */ 152 je L_no_overflow
153
154 /* Do the shifting here */ 155 /* increase the exponent */ 156 incl EXP(%edi)
157
158 /* shift the mantissa right one bit */ 159 stc /* To set the ms bit */ 160 rcrl FPU_result_2
161 rcrl FPU_result_1
162 rcrl %eax
163
164 L_no_overflow:
165 jmp LRound_precision /* Do the rounding as required */ 166
167
168 /*---------------------------------------------------------------------------+ 169 | Divide: Return arg1/arg2 to arg3. | 170 | | 171 | This routine does not use the exponents of arg1 and arg2, but does | 172 | adjust the exponent of arg3. | 173 | | 174 | The maximum returned value is (ignoring exponents) | 175 | .ffffffff ffffffff | 176 | ------------------ = 1.ffffffff fffffffe | 177 | .80000000 00000000 | 178 | and the minimum is | 179 | .80000000 00000000 | 180 | ------------------ = .80000000 00000001 (rounded) | 181 | .ffffffff ffffffff | 182 | | 183 +---------------------------------------------------------------------------*/ 184
185
186 L_Full_Division:
187 /* Save extended dividend in local register */ 188 movl SIGL(%esi),%eax
189 movl %eax,FPU_accum_2
190 movl SIGH(%esi),%eax
191 movl %eax,FPU_accum_3
192 xorl %eax,%eax
193 movl %eax,FPU_accum_1 /* zero the extension */ 194 movl %eax,FPU_accum_0 /* zero the extension */ 195
196 movl SIGL(%esi),%eax /* Get the current num */ 197 movl SIGH(%esi),%edx
198
199 /*----------------------------------------------------------------------*/ 200 /* Initialization done. 201 Do the first 32 bits. */ 202
203 movb $0,FPU_ovfl_flag
204 cmpl SIGH(%ebx),%edx /* Test for imminent overflow */ 205 jb LLess_than_1
206 ja LGreater_than_1
207
208 cmpl SIGL(%ebx),%eax
209 jb LLess_than_1
210
211 LGreater_than_1:
212 /* The dividend is greater or equal, would cause overflow */ 213 setaeb FPU_ovfl_flag /* Keep a record */ 214
215 subl SIGL(%ebx),%eax
216 sbbl SIGH(%ebx),%edx /* Prevent the overflow */ 217 movl %eax,FPU_accum_2
218 movl %edx,FPU_accum_3
219
220 LLess_than_1:
221 /* At this point, we have a dividend < divisor, with a record of 222 adjustment in FPU_ovfl_flag */ 223
224 /* We will divide by a number which is too large */ 225 movl SIGH(%ebx),%ecx
226 addl $1,%ecx
227 jnc LFirst_div_not_1
228
229 /* here we need to divide by 100000000h, 230 i.e., no division at all.. */ 231 mov %edx,%eax
232 jmp LFirst_div_done
233
234 LFirst_div_not_1:
235 divl %ecx /* Divide the numerator by the augmented 236 denom ms dw */ 237
238 LFirst_div_done:
239 movl %eax,FPU_result_2 /* Put the result in the answer */ 240
241 mull SIGH(%ebx) /* mul by the ms dw of the denom */ 242
243 subl %eax,FPU_accum_2 /* Subtract from the num local reg */ 244 sbbl %edx,FPU_accum_3
245
246 movl FPU_result_2,%eax /* Get the result back */ 247 mull SIGL(%ebx) /* now mul the ls dw of the denom */ 248
249 subl %eax,FPU_accum_1 /* Subtract from the num local reg */ 250 sbbl %edx,FPU_accum_2
251 sbbl $0,FPU_accum_3
252 je LDo_2nd_32_bits /* Must check for non-zero result here */ 253
254 #ifdefPARANOID 255 jb L_bugged_1
256 #endif PARANOID
257
258 /* need to subtract another once of the denom */ 259 incl FPU_result_2 /* Correct the answer */ 260
261 movl SIGL(%ebx),%eax
262 movl SIGH(%ebx),%edx
263 subl %eax,FPU_accum_1 /* Subtract from the num local reg */ 264 sbbl %edx,FPU_accum_2
265
266 #ifdefPARANOID 267 sbbl $0,FPU_accum_3
268 jne L_bugged_1 /* Must check for non-zero result here */ 269 #endif PARANOID
270
271 /*----------------------------------------------------------------------*/ 272 /* Half of the main problem is done, there is just a reduced numerator 273 to handle now. 274 Work with the second 32 bits, FPU_accum_0 not used from now on */ 275 LDo_2nd_32_bits:
276 movl FPU_accum_2,%edx /* get the reduced num */ 277 movl FPU_accum_1,%eax
278
279 /* need to check for possible subsequent overflow */ 280 cmpl SIGH(%ebx),%edx
281 jb LDo_2nd_div
282 ja LPrevent_2nd_overflow
283
284 cmpl SIGL(%ebx),%eax
285 jb LDo_2nd_div
286
287 LPrevent_2nd_overflow:
288 /* The numerator is greater or equal, would cause overflow */ 289 /* prevent overflow */ 290 subl SIGL(%ebx),%eax
291 sbbl SIGH(%ebx),%edx
292 movl %edx,FPU_accum_2
293 movl %eax,FPU_accum_1
294
295 incl FPU_result_2 /* Reflect the subtraction in the answer */ 296
297 #ifdefPARANOID 298 je L_bugged_2 /* Can't bump the result to 1.0 */ 299 #endif PARANOID
300
301 LDo_2nd_div:
302 cmpl $0,%ecx /* augmented denom msw */ 303 jnz LSecond_div_not_1
304
305 /* %ecx == 0, we are dividing by 1.0 */ 306 mov %edx,%eax
307 jmp LSecond_div_done
308
309 LSecond_div_not_1:
310 divl %ecx /* Divide the numerator by the denom ms dw */ 311
312 LSecond_div_done:
313 movl %eax,FPU_result_1 /* Put the result in the answer */ 314
315 mull SIGH(%ebx) /* mul by the ms dw of the denom */ 316
317 subl %eax,FPU_accum_1 /* Subtract from the num local reg */ 318 sbbl %edx,FPU_accum_2
319
320 #ifdefPARANOID 321 jc L_bugged_2
322 #endif PARANOID
323
324 movl FPU_result_1,%eax /* Get the result back */ 325 mull SIGL(%ebx) /* now mul the ls dw of the denom */ 326
327 subl %eax,FPU_accum_0 /* Subtract from the num local reg */ 328 sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */ 329 sbbl $0,FPU_accum_2
330
331 #ifdefPARANOID 332 jc L_bugged_2
333 #endif PARANOID
334
335 jz LDo_3rd_32_bits
336
337 #ifdefPARANOID 338 cmpl $1,FPU_accum_2
339 jne L_bugged_2
340 #endif PARANOID
341
342 /* need to subtract another once of the denom */ 343 movl SIGL(%ebx),%eax
344 movl SIGH(%ebx),%edx
345 subl %eax,FPU_accum_0 /* Subtract from the num local reg */ 346 sbbl %edx,FPU_accum_1
347 sbbl $0,FPU_accum_2
348
349 #ifdefPARANOID 350 jc L_bugged_2
351 jne L_bugged_2
352 #endif PARANOID
353
354 addl $1,FPU_result_1 /* Correct the answer */ 355 adcl $0,FPU_result_2
356
357 #ifdefPARANOID 358 jc L_bugged_2 /* Must check for non-zero result here */ 359 #endif PARANOID
360
361 /*----------------------------------------------------------------------*/ 362 /* The division is essentially finished here, we just need to perform 363 tidying operations. 364 Deal with the 3rd 32 bits */ 365 LDo_3rd_32_bits:
366 movl FPU_accum_1,%edx /* get the reduced num */ 367 movl FPU_accum_0,%eax
368
369 /* need to check for possible subsequent overflow */ 370 cmpl SIGH(%ebx),%edx /* denom */ 371 jb LRound_prep
372 ja LPrevent_3rd_overflow
373
374 cmpl SIGL(%ebx),%eax /* denom */ 375 jb LRound_prep
376
377 LPrevent_3rd_overflow:
378 /* prevent overflow */ 379 subl SIGL(%ebx),%eax
380 sbbl SIGH(%ebx),%edx
381 movl %edx,FPU_accum_1
382 movl %eax,FPU_accum_0
383
384 addl $1,FPU_result_1 /* Reflect the subtraction in the answer */ 385 adcl $0,FPU_result_2
386 jne LRound_prep
387 jnc LRound_prep
388
389 /* This is a tricky spot, there is an overflow of the answer */ 390 movb $255,FPU_ovfl_flag /* Overflow -> 1.000 */ 391
392 LRound_prep:
393 /* 394 * Prepare for rounding. 395 * To test for rounding, we just need to compare 2*accum with the 396 * denom. 397 */ 398 movl FPU_accum_0,%ecx
399 movl FPU_accum_1,%edx
400 movl %ecx,%eax
401 orl %edx,%eax
402 jz LRound_ovfl /* The accumulator contains zero. */ 403
404 /* Multiply by 2 */ 405 clc
406 rcll $1,%ecx
407 rcll $1,%edx
408 jc LRound_large /* No need to compare, denom smaller */ 409
410 subl SIGL(%ebx),%ecx
411 sbbl SIGH(%ebx),%edx
412 jnc LRound_not_small
413
414 movl $0x70000000,%eax /* Denom was larger */ 415 jmp LRound_ovfl
416
417 LRound_not_small:
418 jnz LRound_large
419
420 movl $0x80000000,%eax /* Remainder was exactly 1/2 denom */ 421 jmp LRound_ovfl
422
423 LRound_large:
424 movl $0xff000000,%eax /* Denom was smaller */ 425
426 LRound_ovfl:
427 /* We are now ready to deal with rounding, but first we must get 428 the bits properly aligned */ 429 testb $255,FPU_ovfl_flag /* was the num > denom ? */ 430 je LRound_precision
431
432 incl EXP(%edi)
433
434 /* shift the mantissa right one bit */ 435 stc /* Will set the ms bit */ 436 rcrl FPU_result_2
437 rcrl FPU_result_1
438 rcrl %eax
439
440 /* Round the result as required */ 441 LRound_precision:
442 decl EXP(%edi) /* binary point between 1st & 2nd bits */ 443
444 movl %eax,%edx
445 movl FPU_result_1,%ebx
446 movl FPU_result_2,%eax
447 jmp fpu_reg_round
448
449
450 #ifdefPARANOID 451 /* The logic is wrong if we got here */ 452 L_bugged:
453 pushl EX_INTERNAL|0x202
454 call EXCEPTION
455 pop %ebx
456 jmp L_exit
457
458 L_bugged_1:
459 pushl EX_INTERNAL|0x203
460 call EXCEPTION
461 pop %ebx
462 jmp L_exit
463
464 L_bugged_2:
465 pushl EX_INTERNAL|0x204
466 call EXCEPTION
467 pop %ebx
468 jmp L_exit
469
470 L_exit:
471 popl %ebx
472 popl %edi
473 popl %esi
474
475 leave
476 ret
477 #endif PARANOID