1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42 .globl .urem
43 .urem:
44
45 ! Ready to divide. Compute size of quotient; scale comparand.
46 orcc %o1, %g0, %o5
47 bne 1f
48 mov %o0, %o3
49
50 ! Divide by zero trap. If it returns, return 0 (about as
51 ! wrong as possible, but that is what SunOS does...).
52 ta ST_DIV0
53 retl
54 clr %o0
55
56 1:
57 cmp %o3, %o5 ! if %o1 exceeds %o0, done
58 blu Lgot_result ! (and algorithm fails otherwise)
59 clr %o2
60 sethi %hi(1 << (32 - 4 - 1)), %g1
61 cmp %o3, %g1
62 blu Lnot_really_big
63 clr %o4
64
65 ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
66 ! as our usual N-at-a-shot divide step will cause overflow and havoc.
67 ! The number of bits in the result here is N*ITER+SC, where SC <= N.
68 ! Compute ITER in an unorthodox manner: know we need to shift V into
69 ! the top decade: so do not even bother to compare to R.
70 1:
71 cmp %o5, %g1
72 bgeu 3f
73 mov 1, %g7
74 sll %o5, 4, %o5
75 b 1b
76 add %o4, 1, %o4
77
78 ! Now compute %g7.
79 2: addcc %o5, %o5, %o5
80 bcc Lnot_too_big
81 add %g7, 1, %g7
82
83 ! We get here if the %o1 overflowed while shifting.
84 ! This means that %o3 has the high-order bit set.
85 ! Restore %o5 and subtract from %o3.
86 sll %g1, 4, %g1 ! high order bit
87 srl %o5, 1, %o5 ! rest of %o5
88 add %o5, %g1, %o5
89 b Ldo_single_div
90 sub %g7, 1, %g7
91
92 Lnot_too_big:
93 3: cmp %o5, %o3
94 blu 2b
95 nop
96 be Ldo_single_div
97 nop
98
99
100 ! %o5 > %o3: went too far: back up 1 step
101 ! srl %o5, 1, %o5
102 ! dec %g7
103 ! do single-bit divide steps
104 !
105 ! We have to be careful here. We know that %o3 >= %o5, so we can do the
106 ! first divide step without thinking. BUT, the others are conditional,
107 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
108 ! order bit set in the first step, just falling into the regular
109 ! division loop will mess up the first time around.
110 ! So we unroll slightly...
111 Ldo_single_div:
112 subcc %g7, 1, %g7
113 bl Lend_regular_divide
114 nop
115 sub %o3, %o5, %o3
116 mov 1, %o2
117 b Lend_single_divloop
118 nop
119 Lsingle_divloop:
120 sll %o2, 1, %o2
121 bl 1f
122 srl %o5, 1, %o5
123 ! %o3 >= 0
124 sub %o3, %o5, %o3
125 b 2f
126 add %o2, 1, %o2
127 1: ! %o3 < 0
128 add %o3, %o5, %o3
129 sub %o2, 1, %o2
130 2:
131 Lend_single_divloop:
132 subcc %g7, 1, %g7
133 bge Lsingle_divloop
134 tst %o3
135 b,a Lend_regular_divide
136
137 Lnot_really_big:
138 1:
139 sll %o5, 4, %o5
140 cmp %o5, %o3
141 bleu 1b
142 addcc %o4, 1, %o4
143 be Lgot_result
144 sub %o4, 1, %o4
145
146 tst %o3 ! set up for initial iteration
147 Ldivloop:
148 sll %o2, 4, %o2
149 ! depth 1, accumulated bits 0
150 bl L.1.16
151 srl %o5,1,%o5
152 ! remainder is positive
153 subcc %o3,%o5,%o3
154 ! depth 2, accumulated bits 1
155 bl L.2.17
156 srl %o5,1,%o5
157 ! remainder is positive
158 subcc %o3,%o5,%o3
159 ! depth 3, accumulated bits 3
160 bl L.3.19
161 srl %o5,1,%o5
162 ! remainder is positive
163 subcc %o3,%o5,%o3
164 ! depth 4, accumulated bits 7
165 bl L.4.23
166 srl %o5,1,%o5
167 ! remainder is positive
168 subcc %o3,%o5,%o3
169 b 9f
170 add %o2, (7*2+1), %o2
171
172 L.4.23:
173 ! remainder is negative
174 addcc %o3,%o5,%o3
175 b 9f
176 add %o2, (7*2-1), %o2
177
178
179 L.3.19:
180 ! remainder is negative
181 addcc %o3,%o5,%o3
182 ! depth 4, accumulated bits 5
183 bl L.4.21
184 srl %o5,1,%o5
185 ! remainder is positive
186 subcc %o3,%o5,%o3
187 b 9f
188 add %o2, (5*2+1), %o2
189
190 L.4.21:
191 ! remainder is negative
192 addcc %o3,%o5,%o3
193 b 9f
194 add %o2, (5*2-1), %o2
195
196
197
198 L.2.17:
199 ! remainder is negative
200 addcc %o3,%o5,%o3
201 ! depth 3, accumulated bits 1
202 bl L.3.17
203 srl %o5,1,%o5
204 ! remainder is positive
205 subcc %o3,%o5,%o3
206 ! depth 4, accumulated bits 3
207 bl L.4.19
208 srl %o5,1,%o5
209 ! remainder is positive
210 subcc %o3,%o5,%o3
211 b 9f
212 add %o2, (3*2+1), %o2
213
214 L.4.19:
215 ! remainder is negative
216 addcc %o3,%o5,%o3
217 b 9f
218 add %o2, (3*2-1), %o2
219
220
221 L.3.17:
222 ! remainder is negative
223 addcc %o3,%o5,%o3
224 ! depth 4, accumulated bits 1
225 bl L.4.17
226 srl %o5,1,%o5
227 ! remainder is positive
228 subcc %o3,%o5,%o3
229 b 9f
230 add %o2, (1*2+1), %o2
231
232 L.4.17:
233 ! remainder is negative
234 addcc %o3,%o5,%o3
235 b 9f
236 add %o2, (1*2-1), %o2
237
238
239
240
241 L.1.16:
242 ! remainder is negative
243 addcc %o3,%o5,%o3
244 ! depth 2, accumulated bits -1
245 bl L.2.15
246 srl %o5,1,%o5
247 ! remainder is positive
248 subcc %o3,%o5,%o3
249 ! depth 3, accumulated bits -1
250 bl L.3.15
251 srl %o5,1,%o5
252 ! remainder is positive
253 subcc %o3,%o5,%o3
254 ! depth 4, accumulated bits -1
255 bl L.4.15
256 srl %o5,1,%o5
257 ! remainder is positive
258 subcc %o3,%o5,%o3
259 b 9f
260 add %o2, (-1*2+1), %o2
261
262 L.4.15:
263 ! remainder is negative
264 addcc %o3,%o5,%o3
265 b 9f
266 add %o2, (-1*2-1), %o2
267
268
269 L.3.15:
270 ! remainder is negative
271 addcc %o3,%o5,%o3
272 ! depth 4, accumulated bits -3
273 bl L.4.13
274 srl %o5,1,%o5
275 ! remainder is positive
276 subcc %o3,%o5,%o3
277 b 9f
278 add %o2, (-3*2+1), %o2
279
280 L.4.13:
281 ! remainder is negative
282 addcc %o3,%o5,%o3
283 b 9f
284 add %o2, (-3*2-1), %o2
285
286
287
288 L.2.15:
289 ! remainder is negative
290 addcc %o3,%o5,%o3
291 ! depth 3, accumulated bits -3
292 bl L.3.13
293 srl %o5,1,%o5
294 ! remainder is positive
295 subcc %o3,%o5,%o3
296 ! depth 4, accumulated bits -5
297 bl L.4.11
298 srl %o5,1,%o5
299 ! remainder is positive
300 subcc %o3,%o5,%o3
301 b 9f
302 add %o2, (-5*2+1), %o2
303
304 L.4.11:
305 ! remainder is negative
306 addcc %o3,%o5,%o3
307 b 9f
308 add %o2, (-5*2-1), %o2
309
310
311 L.3.13:
312 ! remainder is negative
313 addcc %o3,%o5,%o3
314 ! depth 4, accumulated bits -7
315 bl L.4.9
316 srl %o5,1,%o5
317 ! remainder is positive
318 subcc %o3,%o5,%o3
319 b 9f
320 add %o2, (-7*2+1), %o2
321
322 L.4.9:
323 ! remainder is negative
324 addcc %o3,%o5,%o3
325 b 9f
326 add %o2, (-7*2-1), %o2
327
328
329
330
331 9:
332 Lend_regular_divide:
333 subcc %o4, 1, %o4
334 bge Ldivloop
335 tst %o3
336 bl,a Lgot_result
337 ! non-restoring fixup here (one instruction only!)
338 add %o3, %o1, %o3
339
340
341 Lgot_result:
342
343 retl
344 mov %o3, %o0