1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44 .globl .rem
45 .rem:
46 ! compute sign of result; if neither is negative, no problem
47 orcc %o1, %o0, %g0 ! either negative?
48 bge 2f ! no, go do the divide
49 xor %o1, %o0, %g6 ! compute sign in any case
50 tst %o1
51 bge 1f
52 tst %o0
53 ! %o1 is definitely negative; %o0 might also be negative
54 bge 2f ! if %o0 not negative...
55 sub %g0, %o1, %o1 ! in any case, make %o1 nonneg
56 1: ! %o0 is negative, %o1 is nonnegative
57 sub %g0, %o0, %o0 ! make %o0 nonnegative
58 2:
59
60 ! Ready to divide. Compute size of quotient; scale comparand.
61 orcc %o1, %g0, %o5
62 bne 1f
63 mov %o0, %o3
64
65 ! Divide by zero trap. If it returns, return 0 (about as
66 ! wrong as possible, but that is what SunOS does...).
67 ta ST_DIV0
68 retl
69 clr %o0
70
71 1:
72 cmp %o3, %o5 ! if %o1 exceeds %o0, done
73 blu Lgot_result ! (and algorithm fails otherwise)
74 clr %o2
75 sethi %hi(1 << (32 - 4 - 1)), %g1
76 cmp %o3, %g1
77 blu Lnot_really_big
78 clr %o4
79
80 ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
81 ! as our usual N-at-a-shot divide step will cause overflow and havoc.
82 ! The number of bits in the result here is N*ITER+SC, where SC <= N.
83 ! Compute ITER in an unorthodox manner: know we need to shift V into
84 ! the top decade: so do not even bother to compare to R.
85 1:
86 cmp %o5, %g1
87 bgeu 3f
88 mov 1, %g7
89 sll %o5, 4, %o5
90 b 1b
91 add %o4, 1, %o4
92
93 ! Now compute %g7.
94 2: addcc %o5, %o5, %o5
95 bcc Lnot_too_big
96 add %g7, 1, %g7
97
98 ! We get here if the %o1 overflowed while shifting.
99 ! This means that %o3 has the high-order bit set.
100 ! Restore %o5 and subtract from %o3.
101 sll %g1, 4, %g1 ! high order bit
102 srl %o5, 1, %o5 ! rest of %o5
103 add %o5, %g1, %o5
104 b Ldo_single_div
105 sub %g7, 1, %g7
106
107 Lnot_too_big:
108 3: cmp %o5, %o3
109 blu 2b
110 nop
111 be Ldo_single_div
112 nop
113
114
115 ! %o5 > %o3: went too far: back up 1 step
116 ! srl %o5, 1, %o5
117 ! dec %g7
118 ! do single-bit divide steps
119 !
120 ! We have to be careful here. We know that %o3 >= %o5, so we can do the
121 ! first divide step without thinking. BUT, the others are conditional,
122 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
123 ! order bit set in the first step, just falling into the regular
124 ! division loop will mess up the first time around.
125 ! So we unroll slightly...
126 Ldo_single_div:
127 subcc %g7, 1, %g7
128 bl Lend_regular_divide
129 nop
130 sub %o3, %o5, %o3
131 mov 1, %o2
132 b Lend_single_divloop
133 nop
134 Lsingle_divloop:
135 sll %o2, 1, %o2
136 bl 1f
137 srl %o5, 1, %o5
138 ! %o3 >= 0
139 sub %o3, %o5, %o3
140 b 2f
141 add %o2, 1, %o2
142 1: ! %o3 < 0
143 add %o3, %o5, %o3
144 sub %o2, 1, %o2
145 2:
146 Lend_single_divloop:
147 subcc %g7, 1, %g7
148 bge Lsingle_divloop
149 tst %o3
150 b,a Lend_regular_divide
151
152 Lnot_really_big:
153 1:
154 sll %o5, 4, %o5
155 cmp %o5, %o3
156 bleu 1b
157 addcc %o4, 1, %o4
158 be Lgot_result
159 sub %o4, 1, %o4
160
161 tst %o3 ! set up for initial iteration
162 Ldivloop:
163 sll %o2, 4, %o2
164 ! depth 1, accumulated bits 0
165 bl L.1.16
166 srl %o5,1,%o5
167 ! remainder is positive
168 subcc %o3,%o5,%o3
169 ! depth 2, accumulated bits 1
170 bl L.2.17
171 srl %o5,1,%o5
172 ! remainder is positive
173 subcc %o3,%o5,%o3
174 ! depth 3, accumulated bits 3
175 bl L.3.19
176 srl %o5,1,%o5
177 ! remainder is positive
178 subcc %o3,%o5,%o3
179 ! depth 4, accumulated bits 7
180 bl L.4.23
181 srl %o5,1,%o5
182 ! remainder is positive
183 subcc %o3,%o5,%o3
184 b 9f
185 add %o2, (7*2+1), %o2
186
187 L.4.23:
188 ! remainder is negative
189 addcc %o3,%o5,%o3
190 b 9f
191 add %o2, (7*2-1), %o2
192
193
194 L.3.19:
195 ! remainder is negative
196 addcc %o3,%o5,%o3
197 ! depth 4, accumulated bits 5
198 bl L.4.21
199 srl %o5,1,%o5
200 ! remainder is positive
201 subcc %o3,%o5,%o3
202 b 9f
203 add %o2, (5*2+1), %o2
204
205 L.4.21:
206 ! remainder is negative
207 addcc %o3,%o5,%o3
208 b 9f
209 add %o2, (5*2-1), %o2
210
211
212
213 L.2.17:
214 ! remainder is negative
215 addcc %o3,%o5,%o3
216 ! depth 3, accumulated bits 1
217 bl L.3.17
218 srl %o5,1,%o5
219 ! remainder is positive
220 subcc %o3,%o5,%o3
221 ! depth 4, accumulated bits 3
222 bl L.4.19
223 srl %o5,1,%o5
224 ! remainder is positive
225 subcc %o3,%o5,%o3
226 b 9f
227 add %o2, (3*2+1), %o2
228
229 L.4.19:
230 ! remainder is negative
231 addcc %o3,%o5,%o3
232 b 9f
233 add %o2, (3*2-1), %o2
234
235
236 L.3.17:
237 ! remainder is negative
238 addcc %o3,%o5,%o3
239 ! depth 4, accumulated bits 1
240 bl L.4.17
241 srl %o5,1,%o5
242 ! remainder is positive
243 subcc %o3,%o5,%o3
244 b 9f
245 add %o2, (1*2+1), %o2
246
247 L.4.17:
248 ! remainder is negative
249 addcc %o3,%o5,%o3
250 b 9f
251 add %o2, (1*2-1), %o2
252
253
254
255
256 L.1.16:
257 ! remainder is negative
258 addcc %o3,%o5,%o3
259 ! depth 2, accumulated bits -1
260 bl L.2.15
261 srl %o5,1,%o5
262 ! remainder is positive
263 subcc %o3,%o5,%o3
264 ! depth 3, accumulated bits -1
265 bl L.3.15
266 srl %o5,1,%o5
267 ! remainder is positive
268 subcc %o3,%o5,%o3
269 ! depth 4, accumulated bits -1
270 bl L.4.15
271 srl %o5,1,%o5
272 ! remainder is positive
273 subcc %o3,%o5,%o3
274 b 9f
275 add %o2, (-1*2+1), %o2
276
277 L.4.15:
278 ! remainder is negative
279 addcc %o3,%o5,%o3
280 b 9f
281 add %o2, (-1*2-1), %o2
282
283
284 L.3.15:
285 ! remainder is negative
286 addcc %o3,%o5,%o3
287 ! depth 4, accumulated bits -3
288 bl L.4.13
289 srl %o5,1,%o5
290 ! remainder is positive
291 subcc %o3,%o5,%o3
292 b 9f
293 add %o2, (-3*2+1), %o2
294
295 L.4.13:
296 ! remainder is negative
297 addcc %o3,%o5,%o3
298 b 9f
299 add %o2, (-3*2-1), %o2
300
301
302
303 L.2.15:
304 ! remainder is negative
305 addcc %o3,%o5,%o3
306 ! depth 3, accumulated bits -3
307 bl L.3.13
308 srl %o5,1,%o5
309 ! remainder is positive
310 subcc %o3,%o5,%o3
311 ! depth 4, accumulated bits -5
312 bl L.4.11
313 srl %o5,1,%o5
314 ! remainder is positive
315 subcc %o3,%o5,%o3
316 b 9f
317 add %o2, (-5*2+1), %o2
318
319 L.4.11:
320 ! remainder is negative
321 addcc %o3,%o5,%o3
322 b 9f
323 add %o2, (-5*2-1), %o2
324
325
326 L.3.13:
327 ! remainder is negative
328 addcc %o3,%o5,%o3
329 ! depth 4, accumulated bits -7
330 bl L.4.9
331 srl %o5,1,%o5
332 ! remainder is positive
333 subcc %o3,%o5,%o3
334 b 9f
335 add %o2, (-7*2+1), %o2
336
337 L.4.9:
338 ! remainder is negative
339 addcc %o3,%o5,%o3
340 b 9f
341 add %o2, (-7*2-1), %o2
342
343
344
345
346 9:
347 Lend_regular_divide:
348 subcc %o4, 1, %o4
349 bge Ldivloop
350 tst %o3
351 bl,a Lgot_result
352 ! non-restoring fixup here (one instruction only!)
353 add %o3, %o1, %o3
354
355
356 Lgot_result:
357
358 retl
359 mov %o3, %o0