1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43 .globl .urem
44 .urem:
45
46 ! Ready to divide. Compute size of quotient; scale comparand.
47 orcc %o1, %g0, %o5
48 bne 1f
49 mov %o0, %o3
50
51 ! Divide by zero trap. If it returns, return 0 (about as
52 ! wrong as possible, but that is what SunOS does...).
53 ta ST_DIV0
54 retl
55 clr %o0
56
57 1:
58 cmp %o3, %o5 ! if %o1 exceeds %o0, done
59 blu Lgot_result ! (and algorithm fails otherwise)
60 clr %o2
61 sethi %hi(1 << (32 - 4 - 1)), %g1
62 cmp %o3, %g1
63 blu Lnot_really_big
64 clr %o4
65
66 ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
67 ! as our usual N-at-a-shot divide step will cause overflow and havoc.
68 ! The number of bits in the result here is N*ITER+SC, where SC <= N.
69 ! Compute ITER in an unorthodox manner: know we need to shift V into
70 ! the top decade: so do not even bother to compare to R.
71 1:
72 cmp %o5, %g1
73 bgeu 3f
74 mov 1, %g7
75 sll %o5, 4, %o5
76 b 1b
77 add %o4, 1, %o4
78
79 ! Now compute %g7.
80 2: addcc %o5, %o5, %o5
81 bcc Lnot_too_big
82 add %g7, 1, %g7
83
84 ! We get here if the %o1 overflowed while shifting.
85 ! This means that %o3 has the high-order bit set.
86 ! Restore %o5 and subtract from %o3.
87 sll %g1, 4, %g1 ! high order bit
88 srl %o5, 1, %o5 ! rest of %o5
89 add %o5, %g1, %o5
90 b Ldo_single_div
91 sub %g7, 1, %g7
92
93 Lnot_too_big:
94 3: cmp %o5, %o3
95 blu 2b
96 nop
97 be Ldo_single_div
98 nop
99
100
101 ! %o5 > %o3: went too far: back up 1 step
102 ! srl %o5, 1, %o5
103 ! dec %g7
104 ! do single-bit divide steps
105 !
106 ! We have to be careful here. We know that %o3 >= %o5, so we can do the
107 ! first divide step without thinking. BUT, the others are conditional,
108 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
109 ! order bit set in the first step, just falling into the regular
110 ! division loop will mess up the first time around.
111 ! So we unroll slightly...
112 Ldo_single_div:
113 subcc %g7, 1, %g7
114 bl Lend_regular_divide
115 nop
116 sub %o3, %o5, %o3
117 mov 1, %o2
118 b Lend_single_divloop
119 nop
120 Lsingle_divloop:
121 sll %o2, 1, %o2
122 bl 1f
123 srl %o5, 1, %o5
124 ! %o3 >= 0
125 sub %o3, %o5, %o3
126 b 2f
127 add %o2, 1, %o2
128 1: ! %o3 < 0
129 add %o3, %o5, %o3
130 sub %o2, 1, %o2
131 2:
132 Lend_single_divloop:
133 subcc %g7, 1, %g7
134 bge Lsingle_divloop
135 tst %o3
136 b,a Lend_regular_divide
137
138 Lnot_really_big:
139 1:
140 sll %o5, 4, %o5
141 cmp %o5, %o3
142 bleu 1b
143 addcc %o4, 1, %o4
144 be Lgot_result
145 sub %o4, 1, %o4
146
147 tst %o3 ! set up for initial iteration
148 Ldivloop:
149 sll %o2, 4, %o2
150 ! depth 1, accumulated bits 0
151 bl L.1.16
152 srl %o5,1,%o5
153 ! remainder is positive
154 subcc %o3,%o5,%o3
155 ! depth 2, accumulated bits 1
156 bl L.2.17
157 srl %o5,1,%o5
158 ! remainder is positive
159 subcc %o3,%o5,%o3
160 ! depth 3, accumulated bits 3
161 bl L.3.19
162 srl %o5,1,%o5
163 ! remainder is positive
164 subcc %o3,%o5,%o3
165 ! depth 4, accumulated bits 7
166 bl L.4.23
167 srl %o5,1,%o5
168 ! remainder is positive
169 subcc %o3,%o5,%o3
170 b 9f
171 add %o2, (7*2+1), %o2
172
173 L.4.23:
174 ! remainder is negative
175 addcc %o3,%o5,%o3
176 b 9f
177 add %o2, (7*2-1), %o2
178
179
180 L.3.19:
181 ! remainder is negative
182 addcc %o3,%o5,%o3
183 ! depth 4, accumulated bits 5
184 bl L.4.21
185 srl %o5,1,%o5
186 ! remainder is positive
187 subcc %o3,%o5,%o3
188 b 9f
189 add %o2, (5*2+1), %o2
190
191 L.4.21:
192 ! remainder is negative
193 addcc %o3,%o5,%o3
194 b 9f
195 add %o2, (5*2-1), %o2
196
197
198
199 L.2.17:
200 ! remainder is negative
201 addcc %o3,%o5,%o3
202 ! depth 3, accumulated bits 1
203 bl L.3.17
204 srl %o5,1,%o5
205 ! remainder is positive
206 subcc %o3,%o5,%o3
207 ! depth 4, accumulated bits 3
208 bl L.4.19
209 srl %o5,1,%o5
210 ! remainder is positive
211 subcc %o3,%o5,%o3
212 b 9f
213 add %o2, (3*2+1), %o2
214
215 L.4.19:
216 ! remainder is negative
217 addcc %o3,%o5,%o3
218 b 9f
219 add %o2, (3*2-1), %o2
220
221
222 L.3.17:
223 ! remainder is negative
224 addcc %o3,%o5,%o3
225 ! depth 4, accumulated bits 1
226 bl L.4.17
227 srl %o5,1,%o5
228 ! remainder is positive
229 subcc %o3,%o5,%o3
230 b 9f
231 add %o2, (1*2+1), %o2
232
233 L.4.17:
234 ! remainder is negative
235 addcc %o3,%o5,%o3
236 b 9f
237 add %o2, (1*2-1), %o2
238
239
240
241
242 L.1.16:
243 ! remainder is negative
244 addcc %o3,%o5,%o3
245 ! depth 2, accumulated bits -1
246 bl L.2.15
247 srl %o5,1,%o5
248 ! remainder is positive
249 subcc %o3,%o5,%o3
250 ! depth 3, accumulated bits -1
251 bl L.3.15
252 srl %o5,1,%o5
253 ! remainder is positive
254 subcc %o3,%o5,%o3
255 ! depth 4, accumulated bits -1
256 bl L.4.15
257 srl %o5,1,%o5
258 ! remainder is positive
259 subcc %o3,%o5,%o3
260 b 9f
261 add %o2, (-1*2+1), %o2
262
263 L.4.15:
264 ! remainder is negative
265 addcc %o3,%o5,%o3
266 b 9f
267 add %o2, (-1*2-1), %o2
268
269
270 L.3.15:
271 ! remainder is negative
272 addcc %o3,%o5,%o3
273 ! depth 4, accumulated bits -3
274 bl L.4.13
275 srl %o5,1,%o5
276 ! remainder is positive
277 subcc %o3,%o5,%o3
278 b 9f
279 add %o2, (-3*2+1), %o2
280
281 L.4.13:
282 ! remainder is negative
283 addcc %o3,%o5,%o3
284 b 9f
285 add %o2, (-3*2-1), %o2
286
287
288
289 L.2.15:
290 ! remainder is negative
291 addcc %o3,%o5,%o3
292 ! depth 3, accumulated bits -3
293 bl L.3.13
294 srl %o5,1,%o5
295 ! remainder is positive
296 subcc %o3,%o5,%o3
297 ! depth 4, accumulated bits -5
298 bl L.4.11
299 srl %o5,1,%o5
300 ! remainder is positive
301 subcc %o3,%o5,%o3
302 b 9f
303 add %o2, (-5*2+1), %o2
304
305 L.4.11:
306 ! remainder is negative
307 addcc %o3,%o5,%o3
308 b 9f
309 add %o2, (-5*2-1), %o2
310
311
312 L.3.13:
313 ! remainder is negative
314 addcc %o3,%o5,%o3
315 ! depth 4, accumulated bits -7
316 bl L.4.9
317 srl %o5,1,%o5
318 ! remainder is positive
319 subcc %o3,%o5,%o3
320 b 9f
321 add %o2, (-7*2+1), %o2
322
323 L.4.9:
324 ! remainder is negative
325 addcc %o3,%o5,%o3
326 b 9f
327 add %o2, (-7*2-1), %o2
328
329
330
331
332 9:
333 Lend_regular_divide:
334 subcc %o4, 1, %o4
335 bge Ldivloop
336 tst %o3
337 bl,a Lgot_result
338 ! non-restoring fixup here (one instruction only!)
339 add %o3, %o1, %o3
340
341
342 Lgot_result:
343
344 retl
345 mov %o3, %o0