root/arch/sparc/lib/urem.S

/* [previous][next][first][last][top][bottom][index][help] */
   1 /* $Id: urem.S,v 1.2 1995/11/25 00:59:11 davem Exp $
   2  * urem.S:      This routine was taken from glibc-1.09 and is covered
   3  *              by the GNU Library General Public License Version 2.
   4  */
   5 
   6 /* This file is generated from divrem.m4; DO NOT EDIT! */
   7 /*
   8  * Division and remainder, from Appendix E of the Sparc Version 8
   9  * Architecture Manual, with fixes from Gordon Irlam.
  10  */
  11 
  12 /*
  13  * Input: dividend and divisor in %o0 and %o1 respectively.
  14  *
  15  * m4 parameters:
  16  *  .urem       name of function to generate
  17  *  rem         rem=div => %o0 / %o1; rem=rem => %o0 % %o1
  18  *  false               false=true => signed; false=false => unsigned
  19  *
  20  * Algorithm parameters:
  21  *  N           how many bits per iteration we try to get (4)
  22  *  WORDSIZE    total number of bits (32)
  23  *
  24  * Derived constants:
  25  *  TOPBITS     number of bits in the top decade of a number
  26  *
  27  * Important variables:
  28  *  Q           the partial quotient under development (initially 0)
  29  *  R           the remainder so far, initially the dividend
  30  *  ITER        number of main division loop iterations required;
  31  *              equal to ceil(log2(quotient) / N).  Note that this
  32  *              is the log base (2^N) of the quotient.
  33  *  V           the current comparand, initially divisor*2^(ITER*N-1)
  34  *
  35  * Cost:
  36  *  Current estimate for non-large dividend is
  37  *      ceil(log2(quotient) / N) * (10 + 7N/2) + C
  38  *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
  39  *  different path, as the upper bits of the quotient must be developed
  40  *  one bit at a time.
  41  */
  42 
  43         .globl .urem
  44 .urem:
  45 
  46         ! Ready to divide.  Compute size of quotient; scale comparand.
  47         orcc    %o1, %g0, %o5
  48         bne     1f
  49         mov     %o0, %o3
  50 
  51                 ! Divide by zero trap.  If it returns, return 0 (about as
  52                 ! wrong as possible, but that is what SunOS does...).
  53                 ta      ST_DIV0
  54                 retl
  55                 clr     %o0
  56 
  57 1:
  58         cmp     %o3, %o5                        ! if %o1 exceeds %o0, done
  59         blu     Lgot_result             ! (and algorithm fails otherwise)
  60         clr     %o2
  61         sethi   %hi(1 << (32 - 4 - 1)), %g1
  62         cmp     %o3, %g1
  63         blu     Lnot_really_big
  64         clr     %o4
  65 
  66         ! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
  67         ! as our usual N-at-a-shot divide step will cause overflow and havoc.
  68         ! The number of bits in the result here is N*ITER+SC, where SC <= N.
  69         ! Compute ITER in an unorthodox manner: know we need to shift V into
  70         ! the top decade: so do not even bother to compare to R.
  71         1:
  72                 cmp     %o5, %g1
  73                 bgeu    3f
  74                 mov     1, %g7
  75                 sll     %o5, 4, %o5
  76                 b       1b
  77                 add     %o4, 1, %o4
  78 
  79         ! Now compute %g7.
  80         2:      addcc   %o5, %o5, %o5
  81                 bcc     Lnot_too_big
  82                 add     %g7, 1, %g7
  83 
  84                 ! We get here if the %o1 overflowed while shifting.
  85                 ! This means that %o3 has the high-order bit set.
  86                 ! Restore %o5 and subtract from %o3.
  87                 sll     %g1, 4, %g1     ! high order bit
  88                 srl     %o5, 1, %o5             ! rest of %o5
  89                 add     %o5, %g1, %o5
  90                 b       Ldo_single_div
  91                 sub     %g7, 1, %g7
  92 
  93         Lnot_too_big:
  94         3:      cmp     %o5, %o3
  95                 blu     2b
  96                 nop
  97                 be      Ldo_single_div
  98                 nop
  99         /* NB: these are commented out in the V8-Sparc manual as well */
 100         /* (I do not understand this) */
 101         ! %o5 > %o3: went too far: back up 1 step
 102         !       srl     %o5, 1, %o5
 103         !       dec     %g7
 104         ! do single-bit divide steps
 105         !
 106         ! We have to be careful here.  We know that %o3 >= %o5, so we can do the
 107         ! first divide step without thinking.  BUT, the others are conditional,
 108         ! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
 109         ! order bit set in the first step, just falling into the regular
 110         ! division loop will mess up the first time around.
 111         ! So we unroll slightly...
 112         Ldo_single_div:
 113                 subcc   %g7, 1, %g7
 114                 bl      Lend_regular_divide
 115                 nop
 116                 sub     %o3, %o5, %o3
 117                 mov     1, %o2
 118                 b       Lend_single_divloop
 119                 nop
 120         Lsingle_divloop:
 121                 sll     %o2, 1, %o2
 122                 bl      1f
 123                 srl     %o5, 1, %o5
 124                 ! %o3 >= 0
 125                 sub     %o3, %o5, %o3
 126                 b       2f
 127                 add     %o2, 1, %o2
 128         1:      ! %o3 < 0
 129                 add     %o3, %o5, %o3
 130                 sub     %o2, 1, %o2
 131         2:
 132         Lend_single_divloop:
 133                 subcc   %g7, 1, %g7
 134                 bge     Lsingle_divloop
 135                 tst     %o3
 136                 b,a     Lend_regular_divide
 137 
 138 Lnot_really_big:
 139 1:
 140         sll     %o5, 4, %o5
 141         cmp     %o5, %o3
 142         bleu    1b
 143         addcc   %o4, 1, %o4
 144         be      Lgot_result
 145         sub     %o4, 1, %o4
 146 
 147         tst     %o3     ! set up for initial iteration
 148 Ldivloop:
 149         sll     %o2, 4, %o2
 150                 ! depth 1, accumulated bits 0
 151         bl      L.1.16
 152         srl     %o5,1,%o5
 153         ! remainder is positive
 154         subcc   %o3,%o5,%o3
 155                         ! depth 2, accumulated bits 1
 156         bl      L.2.17
 157         srl     %o5,1,%o5
 158         ! remainder is positive
 159         subcc   %o3,%o5,%o3
 160                         ! depth 3, accumulated bits 3
 161         bl      L.3.19
 162         srl     %o5,1,%o5
 163         ! remainder is positive
 164         subcc   %o3,%o5,%o3
 165                         ! depth 4, accumulated bits 7
 166         bl      L.4.23
 167         srl     %o5,1,%o5
 168         ! remainder is positive
 169         subcc   %o3,%o5,%o3
 170                 b       9f
 171                 add     %o2, (7*2+1), %o2
 172         
 173 L.4.23:
 174         ! remainder is negative
 175         addcc   %o3,%o5,%o3
 176                 b       9f
 177                 add     %o2, (7*2-1), %o2
 178         
 179         
 180 L.3.19:
 181         ! remainder is negative
 182         addcc   %o3,%o5,%o3
 183                         ! depth 4, accumulated bits 5
 184         bl      L.4.21
 185         srl     %o5,1,%o5
 186         ! remainder is positive
 187         subcc   %o3,%o5,%o3
 188                 b       9f
 189                 add     %o2, (5*2+1), %o2
 190         
 191 L.4.21:
 192         ! remainder is negative
 193         addcc   %o3,%o5,%o3
 194                 b       9f
 195                 add     %o2, (5*2-1), %o2
 196         
 197         
 198         
 199 L.2.17:
 200         ! remainder is negative
 201         addcc   %o3,%o5,%o3
 202                         ! depth 3, accumulated bits 1
 203         bl      L.3.17
 204         srl     %o5,1,%o5
 205         ! remainder is positive
 206         subcc   %o3,%o5,%o3
 207                         ! depth 4, accumulated bits 3
 208         bl      L.4.19
 209         srl     %o5,1,%o5
 210         ! remainder is positive
 211         subcc   %o3,%o5,%o3
 212                 b       9f
 213                 add     %o2, (3*2+1), %o2
 214         
 215 L.4.19:
 216         ! remainder is negative
 217         addcc   %o3,%o5,%o3
 218                 b       9f
 219                 add     %o2, (3*2-1), %o2
 220         
 221         
 222 L.3.17:
 223         ! remainder is negative
 224         addcc   %o3,%o5,%o3
 225                         ! depth 4, accumulated bits 1
 226         bl      L.4.17
 227         srl     %o5,1,%o5
 228         ! remainder is positive
 229         subcc   %o3,%o5,%o3
 230                 b       9f
 231                 add     %o2, (1*2+1), %o2
 232         
 233 L.4.17:
 234         ! remainder is negative
 235         addcc   %o3,%o5,%o3
 236                 b       9f
 237                 add     %o2, (1*2-1), %o2
 238         
 239         
 240         
 241         
 242 L.1.16:
 243         ! remainder is negative
 244         addcc   %o3,%o5,%o3
 245                         ! depth 2, accumulated bits -1
 246         bl      L.2.15
 247         srl     %o5,1,%o5
 248         ! remainder is positive
 249         subcc   %o3,%o5,%o3
 250                         ! depth 3, accumulated bits -1
 251         bl      L.3.15
 252         srl     %o5,1,%o5
 253         ! remainder is positive
 254         subcc   %o3,%o5,%o3
 255                         ! depth 4, accumulated bits -1
 256         bl      L.4.15
 257         srl     %o5,1,%o5
 258         ! remainder is positive
 259         subcc   %o3,%o5,%o3
 260                 b       9f
 261                 add     %o2, (-1*2+1), %o2
 262         
 263 L.4.15:
 264         ! remainder is negative
 265         addcc   %o3,%o5,%o3
 266                 b       9f
 267                 add     %o2, (-1*2-1), %o2
 268         
 269         
 270 L.3.15:
 271         ! remainder is negative
 272         addcc   %o3,%o5,%o3
 273                         ! depth 4, accumulated bits -3
 274         bl      L.4.13
 275         srl     %o5,1,%o5
 276         ! remainder is positive
 277         subcc   %o3,%o5,%o3
 278                 b       9f
 279                 add     %o2, (-3*2+1), %o2
 280         
 281 L.4.13:
 282         ! remainder is negative
 283         addcc   %o3,%o5,%o3
 284                 b       9f
 285                 add     %o2, (-3*2-1), %o2
 286         
 287         
 288         
 289 L.2.15:
 290         ! remainder is negative
 291         addcc   %o3,%o5,%o3
 292                         ! depth 3, accumulated bits -3
 293         bl      L.3.13
 294         srl     %o5,1,%o5
 295         ! remainder is positive
 296         subcc   %o3,%o5,%o3
 297                         ! depth 4, accumulated bits -5
 298         bl      L.4.11
 299         srl     %o5,1,%o5
 300         ! remainder is positive
 301         subcc   %o3,%o5,%o3
 302                 b       9f
 303                 add     %o2, (-5*2+1), %o2
 304         
 305 L.4.11:
 306         ! remainder is negative
 307         addcc   %o3,%o5,%o3
 308                 b       9f
 309                 add     %o2, (-5*2-1), %o2
 310         
 311         
 312 L.3.13:
 313         ! remainder is negative
 314         addcc   %o3,%o5,%o3
 315                         ! depth 4, accumulated bits -7
 316         bl      L.4.9
 317         srl     %o5,1,%o5
 318         ! remainder is positive
 319         subcc   %o3,%o5,%o3
 320                 b       9f
 321                 add     %o2, (-7*2+1), %o2
 322         
 323 L.4.9:
 324         ! remainder is negative
 325         addcc   %o3,%o5,%o3
 326                 b       9f
 327                 add     %o2, (-7*2-1), %o2
 328         
 329         
 330         
 331         
 332         9:
 333 Lend_regular_divide:
 334         subcc   %o4, 1, %o4
 335         bge     Ldivloop
 336         tst     %o3
 337         bl,a    Lgot_result
 338         ! non-restoring fixup here (one instruction only!)
 339         add     %o3, %o1, %o3
 340 
 341 
 342 Lgot_result:
 343 
 344         retl
 345         mov %o3, %o0

/* [previous][next][first][last][top][bottom][index][help] */