1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44 .globl .udiv
45 .udiv:
46
47 ! Ready to divide. Compute size of quotient; scale comparand.
48 orcc %o1, %g0, %o5
49 bne 1f
50 mov %o0, %o3
51
52 ! Divide by zero trap. If it returns, return 0 (about as
53 ! wrong as possible, but that is what SunOS does...).
54 ta ST_DIV0
55 retl
56 clr %o0
57
58 1:
59 cmp %o3, %o5 ! if %o1 exceeds %o0, done
60 blu Lgot_result ! (and algorithm fails otherwise)
61 clr %o2
62 sethi %hi(1 << (32 - 4 - 1)), %g1
63 cmp %o3, %g1
64 blu Lnot_really_big
65 clr %o4
66
67 ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
68 ! as our usual N-at-a-shot divide step will cause overflow and havoc.
69 ! The number of bits in the result here is N*ITER+SC, where SC <= N.
70 ! Compute ITER in an unorthodox manner: know we need to shift V into
71 ! the top decade: so do not even bother to compare to R.
72 1:
73 cmp %o5, %g1
74 bgeu 3f
75 mov 1, %g7
76 sll %o5, 4, %o5
77 b 1b
78 add %o4, 1, %o4
79
80 ! Now compute %g7.
81 2: addcc %o5, %o5, %o5
82 bcc Lnot_too_big
83 add %g7, 1, %g7
84
85 ! We get here if the %o1 overflowed while shifting.
86 ! This means that %o3 has the high-order bit set.
87 ! Restore %o5 and subtract from %o3.
88 sll %g1, 4, %g1 ! high order bit
89 srl %o5, 1, %o5 ! rest of %o5
90 add %o5, %g1, %o5
91 b Ldo_single_div
92 sub %g7, 1, %g7
93
94 Lnot_too_big:
95 3: cmp %o5, %o3
96 blu 2b
97 nop
98 be Ldo_single_div
99 nop
100
101
102 ! %o5 > %o3: went too far: back up 1 step
103 ! srl %o5, 1, %o5
104 ! dec %g7
105 ! do single-bit divide steps
106 !
107 ! We have to be careful here. We know that %o3 >= %o5, so we can do the
108 ! first divide step without thinking. BUT, the others are conditional,
109 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
110 ! order bit set in the first step, just falling into the regular
111 ! division loop will mess up the first time around.
112 ! So we unroll slightly...
113 Ldo_single_div:
114 subcc %g7, 1, %g7
115 bl Lend_regular_divide
116 nop
117 sub %o3, %o5, %o3
118 mov 1, %o2
119 b Lend_single_divloop
120 nop
121 Lsingle_divloop:
122 sll %o2, 1, %o2
123 bl 1f
124 srl %o5, 1, %o5
125 ! %o3 >= 0
126 sub %o3, %o5, %o3
127 b 2f
128 add %o2, 1, %o2
129 1: ! %o3 < 0
130 add %o3, %o5, %o3
131 sub %o2, 1, %o2
132 2:
133 Lend_single_divloop:
134 subcc %g7, 1, %g7
135 bge Lsingle_divloop
136 tst %o3
137 b,a Lend_regular_divide
138
139 Lnot_really_big:
140 1:
141 sll %o5, 4, %o5
142 cmp %o5, %o3
143 bleu 1b
144 addcc %o4, 1, %o4
145 be Lgot_result
146 sub %o4, 1, %o4
147
148 tst %o3 ! set up for initial iteration
149 Ldivloop:
150 sll %o2, 4, %o2
151 ! depth 1, accumulated bits 0
152 bl L.1.16
153 srl %o5,1,%o5
154 ! remainder is positive
155 subcc %o3,%o5,%o3
156 ! depth 2, accumulated bits 1
157 bl L.2.17
158 srl %o5,1,%o5
159 ! remainder is positive
160 subcc %o3,%o5,%o3
161 ! depth 3, accumulated bits 3
162 bl L.3.19
163 srl %o5,1,%o5
164 ! remainder is positive
165 subcc %o3,%o5,%o3
166 ! depth 4, accumulated bits 7
167 bl L.4.23
168 srl %o5,1,%o5
169 ! remainder is positive
170 subcc %o3,%o5,%o3
171 b 9f
172 add %o2, (7*2+1), %o2
173
174 L.4.23:
175 ! remainder is negative
176 addcc %o3,%o5,%o3
177 b 9f
178 add %o2, (7*2-1), %o2
179
180
181 L.3.19:
182 ! remainder is negative
183 addcc %o3,%o5,%o3
184 ! depth 4, accumulated bits 5
185 bl L.4.21
186 srl %o5,1,%o5
187 ! remainder is positive
188 subcc %o3,%o5,%o3
189 b 9f
190 add %o2, (5*2+1), %o2
191
192 L.4.21:
193 ! remainder is negative
194 addcc %o3,%o5,%o3
195 b 9f
196 add %o2, (5*2-1), %o2
197
198
199
200 L.2.17:
201 ! remainder is negative
202 addcc %o3,%o5,%o3
203 ! depth 3, accumulated bits 1
204 bl L.3.17
205 srl %o5,1,%o5
206 ! remainder is positive
207 subcc %o3,%o5,%o3
208 ! depth 4, accumulated bits 3
209 bl L.4.19
210 srl %o5,1,%o5
211 ! remainder is positive
212 subcc %o3,%o5,%o3
213 b 9f
214 add %o2, (3*2+1), %o2
215
216 L.4.19:
217 ! remainder is negative
218 addcc %o3,%o5,%o3
219 b 9f
220 add %o2, (3*2-1), %o2
221
222
223 L.3.17:
224 ! remainder is negative
225 addcc %o3,%o5,%o3
226 ! depth 4, accumulated bits 1
227 bl L.4.17
228 srl %o5,1,%o5
229 ! remainder is positive
230 subcc %o3,%o5,%o3
231 b 9f
232 add %o2, (1*2+1), %o2
233
234 L.4.17:
235 ! remainder is negative
236 addcc %o3,%o5,%o3
237 b 9f
238 add %o2, (1*2-1), %o2
239
240
241
242
243 L.1.16:
244 ! remainder is negative
245 addcc %o3,%o5,%o3
246 ! depth 2, accumulated bits -1
247 bl L.2.15
248 srl %o5,1,%o5
249 ! remainder is positive
250 subcc %o3,%o5,%o3
251 ! depth 3, accumulated bits -1
252 bl L.3.15
253 srl %o5,1,%o5
254 ! remainder is positive
255 subcc %o3,%o5,%o3
256 ! depth 4, accumulated bits -1
257 bl L.4.15
258 srl %o5,1,%o5
259 ! remainder is positive
260 subcc %o3,%o5,%o3
261 b 9f
262 add %o2, (-1*2+1), %o2
263
264 L.4.15:
265 ! remainder is negative
266 addcc %o3,%o5,%o3
267 b 9f
268 add %o2, (-1*2-1), %o2
269
270
271 L.3.15:
272 ! remainder is negative
273 addcc %o3,%o5,%o3
274 ! depth 4, accumulated bits -3
275 bl L.4.13
276 srl %o5,1,%o5
277 ! remainder is positive
278 subcc %o3,%o5,%o3
279 b 9f
280 add %o2, (-3*2+1), %o2
281
282 L.4.13:
283 ! remainder is negative
284 addcc %o3,%o5,%o3
285 b 9f
286 add %o2, (-3*2-1), %o2
287
288
289
290 L.2.15:
291 ! remainder is negative
292 addcc %o3,%o5,%o3
293 ! depth 3, accumulated bits -3
294 bl L.3.13
295 srl %o5,1,%o5
296 ! remainder is positive
297 subcc %o3,%o5,%o3
298 ! depth 4, accumulated bits -5
299 bl L.4.11
300 srl %o5,1,%o5
301 ! remainder is positive
302 subcc %o3,%o5,%o3
303 b 9f
304 add %o2, (-5*2+1), %o2
305
306 L.4.11:
307 ! remainder is negative
308 addcc %o3,%o5,%o3
309 b 9f
310 add %o2, (-5*2-1), %o2
311
312
313 L.3.13:
314 ! remainder is negative
315 addcc %o3,%o5,%o3
316 ! depth 4, accumulated bits -7
317 bl L.4.9
318 srl %o5,1,%o5
319 ! remainder is positive
320 subcc %o3,%o5,%o3
321 b 9f
322 add %o2, (-7*2+1), %o2
323
324 L.4.9:
325 ! remainder is negative
326 addcc %o3,%o5,%o3
327 b 9f
328 add %o2, (-7*2-1), %o2
329
330
331
332
333 9:
334 Lend_regular_divide:
335 subcc %o4, 1, %o4
336 bge Ldivloop
337 tst %o3
338 bl,a Lgot_result
339 ! non-restoring fixup here (one instruction only!)
340 sub %o2, 1, %o2
341
342
343 Lgot_result:
344
345 retl
346 mov %o2, %o0