1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45 .globl .udiv
46 .udiv:
47
48 ! Ready to divide. Compute size of quotient; scale comparand.
49 orcc %o1, %g0, %o5
50 bne 1f
51 mov %o0, %o3
52
53 ! Divide by zero trap. If it returns, return 0 (about as
54 ! wrong as possible, but that is what SunOS does...).
55 ta ST_DIV0
56 retl
57 clr %o0
58
59 1:
60 cmp %o3, %o5 ! if %o1 exceeds %o0, done
61 blu Lgot_result ! (and algorithm fails otherwise)
62 clr %o2
63 sethi %hi(1 << (32 - 4 - 1)), %g1
64 cmp %o3, %g1
65 blu Lnot_really_big
66 clr %o4
67
68 ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
69 ! as our usual N-at-a-shot divide step will cause overflow and havoc.
70 ! The number of bits in the result here is N*ITER+SC, where SC <= N.
71 ! Compute ITER in an unorthodox manner: know we need to shift V into
72 ! the top decade: so do not even bother to compare to R.
73 1:
74 cmp %o5, %g1
75 bgeu 3f
76 mov 1, %g7
77 sll %o5, 4, %o5
78 b 1b
79 add %o4, 1, %o4
80
81 ! Now compute %g7.
82 2: addcc %o5, %o5, %o5
83 bcc Lnot_too_big
84 add %g7, 1, %g7
85
86 ! We get here if the %o1 overflowed while shifting.
87 ! This means that %o3 has the high-order bit set.
88 ! Restore %o5 and subtract from %o3.
89 sll %g1, 4, %g1 ! high order bit
90 srl %o5, 1, %o5 ! rest of %o5
91 add %o5, %g1, %o5
92 b Ldo_single_div
93 sub %g7, 1, %g7
94
95 Lnot_too_big:
96 3: cmp %o5, %o3
97 blu 2b
98 nop
99 be Ldo_single_div
100 nop
101
102
103 ! %o5 > %o3: went too far: back up 1 step
104 ! srl %o5, 1, %o5
105 ! dec %g7
106 ! do single-bit divide steps
107 !
108 ! We have to be careful here. We know that %o3 >= %o5, so we can do the
109 ! first divide step without thinking. BUT, the others are conditional,
110 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
111 ! order bit set in the first step, just falling into the regular
112 ! division loop will mess up the first time around.
113 ! So we unroll slightly...
114 Ldo_single_div:
115 subcc %g7, 1, %g7
116 bl Lend_regular_divide
117 nop
118 sub %o3, %o5, %o3
119 mov 1, %o2
120 b Lend_single_divloop
121 nop
122 Lsingle_divloop:
123 sll %o2, 1, %o2
124 bl 1f
125 srl %o5, 1, %o5
126 ! %o3 >= 0
127 sub %o3, %o5, %o3
128 b 2f
129 add %o2, 1, %o2
130 1: ! %o3 < 0
131 add %o3, %o5, %o3
132 sub %o2, 1, %o2
133 2:
134 Lend_single_divloop:
135 subcc %g7, 1, %g7
136 bge Lsingle_divloop
137 tst %o3
138 b,a Lend_regular_divide
139
140 Lnot_really_big:
141 1:
142 sll %o5, 4, %o5
143 cmp %o5, %o3
144 bleu 1b
145 addcc %o4, 1, %o4
146 be Lgot_result
147 sub %o4, 1, %o4
148
149 tst %o3 ! set up for initial iteration
150 Ldivloop:
151 sll %o2, 4, %o2
152 ! depth 1, accumulated bits 0
153 bl L.1.16
154 srl %o5,1,%o5
155 ! remainder is positive
156 subcc %o3,%o5,%o3
157 ! depth 2, accumulated bits 1
158 bl L.2.17
159 srl %o5,1,%o5
160 ! remainder is positive
161 subcc %o3,%o5,%o3
162 ! depth 3, accumulated bits 3
163 bl L.3.19
164 srl %o5,1,%o5
165 ! remainder is positive
166 subcc %o3,%o5,%o3
167 ! depth 4, accumulated bits 7
168 bl L.4.23
169 srl %o5,1,%o5
170 ! remainder is positive
171 subcc %o3,%o5,%o3
172 b 9f
173 add %o2, (7*2+1), %o2
174
175 L.4.23:
176 ! remainder is negative
177 addcc %o3,%o5,%o3
178 b 9f
179 add %o2, (7*2-1), %o2
180
181
182 L.3.19:
183 ! remainder is negative
184 addcc %o3,%o5,%o3
185 ! depth 4, accumulated bits 5
186 bl L.4.21
187 srl %o5,1,%o5
188 ! remainder is positive
189 subcc %o3,%o5,%o3
190 b 9f
191 add %o2, (5*2+1), %o2
192
193 L.4.21:
194 ! remainder is negative
195 addcc %o3,%o5,%o3
196 b 9f
197 add %o2, (5*2-1), %o2
198
199
200
201 L.2.17:
202 ! remainder is negative
203 addcc %o3,%o5,%o3
204 ! depth 3, accumulated bits 1
205 bl L.3.17
206 srl %o5,1,%o5
207 ! remainder is positive
208 subcc %o3,%o5,%o3
209 ! depth 4, accumulated bits 3
210 bl L.4.19
211 srl %o5,1,%o5
212 ! remainder is positive
213 subcc %o3,%o5,%o3
214 b 9f
215 add %o2, (3*2+1), %o2
216
217 L.4.19:
218 ! remainder is negative
219 addcc %o3,%o5,%o3
220 b 9f
221 add %o2, (3*2-1), %o2
222
223
224 L.3.17:
225 ! remainder is negative
226 addcc %o3,%o5,%o3
227 ! depth 4, accumulated bits 1
228 bl L.4.17
229 srl %o5,1,%o5
230 ! remainder is positive
231 subcc %o3,%o5,%o3
232 b 9f
233 add %o2, (1*2+1), %o2
234
235 L.4.17:
236 ! remainder is negative
237 addcc %o3,%o5,%o3
238 b 9f
239 add %o2, (1*2-1), %o2
240
241
242
243
244 L.1.16:
245 ! remainder is negative
246 addcc %o3,%o5,%o3
247 ! depth 2, accumulated bits -1
248 bl L.2.15
249 srl %o5,1,%o5
250 ! remainder is positive
251 subcc %o3,%o5,%o3
252 ! depth 3, accumulated bits -1
253 bl L.3.15
254 srl %o5,1,%o5
255 ! remainder is positive
256 subcc %o3,%o5,%o3
257 ! depth 4, accumulated bits -1
258 bl L.4.15
259 srl %o5,1,%o5
260 ! remainder is positive
261 subcc %o3,%o5,%o3
262 b 9f
263 add %o2, (-1*2+1), %o2
264
265 L.4.15:
266 ! remainder is negative
267 addcc %o3,%o5,%o3
268 b 9f
269 add %o2, (-1*2-1), %o2
270
271
272 L.3.15:
273 ! remainder is negative
274 addcc %o3,%o5,%o3
275 ! depth 4, accumulated bits -3
276 bl L.4.13
277 srl %o5,1,%o5
278 ! remainder is positive
279 subcc %o3,%o5,%o3
280 b 9f
281 add %o2, (-3*2+1), %o2
282
283 L.4.13:
284 ! remainder is negative
285 addcc %o3,%o5,%o3
286 b 9f
287 add %o2, (-3*2-1), %o2
288
289
290
291 L.2.15:
292 ! remainder is negative
293 addcc %o3,%o5,%o3
294 ! depth 3, accumulated bits -3
295 bl L.3.13
296 srl %o5,1,%o5
297 ! remainder is positive
298 subcc %o3,%o5,%o3
299 ! depth 4, accumulated bits -5
300 bl L.4.11
301 srl %o5,1,%o5
302 ! remainder is positive
303 subcc %o3,%o5,%o3
304 b 9f
305 add %o2, (-5*2+1), %o2
306
307 L.4.11:
308 ! remainder is negative
309 addcc %o3,%o5,%o3
310 b 9f
311 add %o2, (-5*2-1), %o2
312
313
314 L.3.13:
315 ! remainder is negative
316 addcc %o3,%o5,%o3
317 ! depth 4, accumulated bits -7
318 bl L.4.9
319 srl %o5,1,%o5
320 ! remainder is positive
321 subcc %o3,%o5,%o3
322 b 9f
323 add %o2, (-7*2+1), %o2
324
325 L.4.9:
326 ! remainder is negative
327 addcc %o3,%o5,%o3
328 b 9f
329 add %o2, (-7*2-1), %o2
330
331
332
333
334 9:
335 Lend_regular_divide:
336 subcc %o4, 1, %o4
337 bge Ldivloop
338 tst %o3
339 bl,a Lgot_result
340 ! non-restoring fixup here (one instruction only!)
341 sub %o2, 1, %o2
342
343
344 Lgot_result:
345
346 retl
347 mov %o2, %o0