1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45 .globl .rem
46 .rem:
47 ! compute sign of result; if neither is negative, no problem
48 orcc %o1, %o0, %g0 ! either negative?
49 bge 2f ! no, go do the divide
50 xor %o1, %o0, %g6 ! compute sign in any case
51 tst %o1
52 bge 1f
53 tst %o0
54 ! %o1 is definitely negative; %o0 might also be negative
55 bge 2f ! if %o0 not negative...
56 sub %g0, %o1, %o1 ! in any case, make %o1 nonneg
57 1: ! %o0 is negative, %o1 is nonnegative
58 sub %g0, %o0, %o0 ! make %o0 nonnegative
59 2:
60
61 ! Ready to divide. Compute size of quotient; scale comparand.
62 orcc %o1, %g0, %o5
63 bne 1f
64 mov %o0, %o3
65
66 ! Divide by zero trap. If it returns, return 0 (about as
67 ! wrong as possible, but that is what SunOS does...).
68 ta ST_DIV0
69 retl
70 clr %o0
71
72 1:
73 cmp %o3, %o5 ! if %o1 exceeds %o0, done
74 blu Lgot_result ! (and algorithm fails otherwise)
75 clr %o2
76 sethi %hi(1 << (32 - 4 - 1)), %g1
77 cmp %o3, %g1
78 blu Lnot_really_big
79 clr %o4
80
81 ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
82 ! as our usual N-at-a-shot divide step will cause overflow and havoc.
83 ! The number of bits in the result here is N*ITER+SC, where SC <= N.
84 ! Compute ITER in an unorthodox manner: know we need to shift V into
85 ! the top decade: so do not even bother to compare to R.
86 1:
87 cmp %o5, %g1
88 bgeu 3f
89 mov 1, %g7
90 sll %o5, 4, %o5
91 b 1b
92 add %o4, 1, %o4
93
94 ! Now compute %g7.
95 2: addcc %o5, %o5, %o5
96 bcc Lnot_too_big
97 add %g7, 1, %g7
98
99 ! We get here if the %o1 overflowed while shifting.
100 ! This means that %o3 has the high-order bit set.
101 ! Restore %o5 and subtract from %o3.
102 sll %g1, 4, %g1 ! high order bit
103 srl %o5, 1, %o5 ! rest of %o5
104 add %o5, %g1, %o5
105 b Ldo_single_div
106 sub %g7, 1, %g7
107
108 Lnot_too_big:
109 3: cmp %o5, %o3
110 blu 2b
111 nop
112 be Ldo_single_div
113 nop
114
115
116 ! %o5 > %o3: went too far: back up 1 step
117 ! srl %o5, 1, %o5
118 ! dec %g7
119 ! do single-bit divide steps
120 !
121 ! We have to be careful here. We know that %o3 >= %o5, so we can do the
122 ! first divide step without thinking. BUT, the others are conditional,
123 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
124 ! order bit set in the first step, just falling into the regular
125 ! division loop will mess up the first time around.
126 ! So we unroll slightly...
127 Ldo_single_div:
128 subcc %g7, 1, %g7
129 bl Lend_regular_divide
130 nop
131 sub %o3, %o5, %o3
132 mov 1, %o2
133 b Lend_single_divloop
134 nop
135 Lsingle_divloop:
136 sll %o2, 1, %o2
137 bl 1f
138 srl %o5, 1, %o5
139 ! %o3 >= 0
140 sub %o3, %o5, %o3
141 b 2f
142 add %o2, 1, %o2
143 1: ! %o3 < 0
144 add %o3, %o5, %o3
145 sub %o2, 1, %o2
146 2:
147 Lend_single_divloop:
148 subcc %g7, 1, %g7
149 bge Lsingle_divloop
150 tst %o3
151 b,a Lend_regular_divide
152
153 Lnot_really_big:
154 1:
155 sll %o5, 4, %o5
156 cmp %o5, %o3
157 bleu 1b
158 addcc %o4, 1, %o4
159 be Lgot_result
160 sub %o4, 1, %o4
161
162 tst %o3 ! set up for initial iteration
163 Ldivloop:
164 sll %o2, 4, %o2
165 ! depth 1, accumulated bits 0
166 bl L.1.16
167 srl %o5,1,%o5
168 ! remainder is positive
169 subcc %o3,%o5,%o3
170 ! depth 2, accumulated bits 1
171 bl L.2.17
172 srl %o5,1,%o5
173 ! remainder is positive
174 subcc %o3,%o5,%o3
175 ! depth 3, accumulated bits 3
176 bl L.3.19
177 srl %o5,1,%o5
178 ! remainder is positive
179 subcc %o3,%o5,%o3
180 ! depth 4, accumulated bits 7
181 bl L.4.23
182 srl %o5,1,%o5
183 ! remainder is positive
184 subcc %o3,%o5,%o3
185 b 9f
186 add %o2, (7*2+1), %o2
187
188 L.4.23:
189 ! remainder is negative
190 addcc %o3,%o5,%o3
191 b 9f
192 add %o2, (7*2-1), %o2
193
194
195 L.3.19:
196 ! remainder is negative
197 addcc %o3,%o5,%o3
198 ! depth 4, accumulated bits 5
199 bl L.4.21
200 srl %o5,1,%o5
201 ! remainder is positive
202 subcc %o3,%o5,%o3
203 b 9f
204 add %o2, (5*2+1), %o2
205
206 L.4.21:
207 ! remainder is negative
208 addcc %o3,%o5,%o3
209 b 9f
210 add %o2, (5*2-1), %o2
211
212
213
214 L.2.17:
215 ! remainder is negative
216 addcc %o3,%o5,%o3
217 ! depth 3, accumulated bits 1
218 bl L.3.17
219 srl %o5,1,%o5
220 ! remainder is positive
221 subcc %o3,%o5,%o3
222 ! depth 4, accumulated bits 3
223 bl L.4.19
224 srl %o5,1,%o5
225 ! remainder is positive
226 subcc %o3,%o5,%o3
227 b 9f
228 add %o2, (3*2+1), %o2
229
230 L.4.19:
231 ! remainder is negative
232 addcc %o3,%o5,%o3
233 b 9f
234 add %o2, (3*2-1), %o2
235
236
237 L.3.17:
238 ! remainder is negative
239 addcc %o3,%o5,%o3
240 ! depth 4, accumulated bits 1
241 bl L.4.17
242 srl %o5,1,%o5
243 ! remainder is positive
244 subcc %o3,%o5,%o3
245 b 9f
246 add %o2, (1*2+1), %o2
247
248 L.4.17:
249 ! remainder is negative
250 addcc %o3,%o5,%o3
251 b 9f
252 add %o2, (1*2-1), %o2
253
254
255
256
257 L.1.16:
258 ! remainder is negative
259 addcc %o3,%o5,%o3
260 ! depth 2, accumulated bits -1
261 bl L.2.15
262 srl %o5,1,%o5
263 ! remainder is positive
264 subcc %o3,%o5,%o3
265 ! depth 3, accumulated bits -1
266 bl L.3.15
267 srl %o5,1,%o5
268 ! remainder is positive
269 subcc %o3,%o5,%o3
270 ! depth 4, accumulated bits -1
271 bl L.4.15
272 srl %o5,1,%o5
273 ! remainder is positive
274 subcc %o3,%o5,%o3
275 b 9f
276 add %o2, (-1*2+1), %o2
277
278 L.4.15:
279 ! remainder is negative
280 addcc %o3,%o5,%o3
281 b 9f
282 add %o2, (-1*2-1), %o2
283
284
285 L.3.15:
286 ! remainder is negative
287 addcc %o3,%o5,%o3
288 ! depth 4, accumulated bits -3
289 bl L.4.13
290 srl %o5,1,%o5
291 ! remainder is positive
292 subcc %o3,%o5,%o3
293 b 9f
294 add %o2, (-3*2+1), %o2
295
296 L.4.13:
297 ! remainder is negative
298 addcc %o3,%o5,%o3
299 b 9f
300 add %o2, (-3*2-1), %o2
301
302
303
304 L.2.15:
305 ! remainder is negative
306 addcc %o3,%o5,%o3
307 ! depth 3, accumulated bits -3
308 bl L.3.13
309 srl %o5,1,%o5
310 ! remainder is positive
311 subcc %o3,%o5,%o3
312 ! depth 4, accumulated bits -5
313 bl L.4.11
314 srl %o5,1,%o5
315 ! remainder is positive
316 subcc %o3,%o5,%o3
317 b 9f
318 add %o2, (-5*2+1), %o2
319
320 L.4.11:
321 ! remainder is negative
322 addcc %o3,%o5,%o3
323 b 9f
324 add %o2, (-5*2-1), %o2
325
326
327 L.3.13:
328 ! remainder is negative
329 addcc %o3,%o5,%o3
330 ! depth 4, accumulated bits -7
331 bl L.4.9
332 srl %o5,1,%o5
333 ! remainder is positive
334 subcc %o3,%o5,%o3
335 b 9f
336 add %o2, (-7*2+1), %o2
337
338 L.4.9:
339 ! remainder is negative
340 addcc %o3,%o5,%o3
341 b 9f
342 add %o2, (-7*2-1), %o2
343
344
345
346
347 9:
348 Lend_regular_divide:
349 subcc %o4, 1, %o4
350 bge Ldivloop
351 tst %o3
352 bl,a Lgot_result
353 ! non-restoring fixup here (one instruction only!)
354 add %o3, %o1, %o3
355
356
357 Lgot_result:
358
359 retl
360 mov %o3, %o0