root/arch/sparc/lib/urem.S

/* [previous][next][first][last][top][bottom][index][help] */
   1 /* urem.S:      This routine was taken from glibc-1.09 and is covered
   2  *              by the GNU Library General Public License Version 2.
   3  */
   4 
   5 /* This file is generated from divrem.m4; DO NOT EDIT! */
   6 /*
   7  * Division and remainder, from Appendix E of the Sparc Version 8
   8  * Architecture Manual, with fixes from Gordon Irlam.
   9  */
  10 
  11 /*
  12  * Input: dividend and divisor in %o0 and %o1 respectively.
  13  *
  14  * m4 parameters:
  15  *  .urem       name of function to generate
  16  *  rem         rem=div => %o0 / %o1; rem=rem => %o0 % %o1
  17  *  false               false=true => signed; false=false => unsigned
  18  *
  19  * Algorithm parameters:
  20  *  N           how many bits per iteration we try to get (4)
  21  *  WORDSIZE    total number of bits (32)
  22  *
  23  * Derived constants:
  24  *  TOPBITS     number of bits in the top decade of a number
  25  *
  26  * Important variables:
  27  *  Q           the partial quotient under development (initially 0)
  28  *  R           the remainder so far, initially the dividend
  29  *  ITER        number of main division loop iterations required;
  30  *              equal to ceil(log2(quotient) / N).  Note that this
  31  *              is the log base (2^N) of the quotient.
  32  *  V           the current comparand, initially divisor*2^(ITER*N-1)
  33  *
  34  * Cost:
  35  *  Current estimate for non-large dividend is
  36  *      ceil(log2(quotient) / N) * (10 + 7N/2) + C
  37  *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
  38  *  different path, as the upper bits of the quotient must be developed
  39  *  one bit at a time.
  40  */
  41 
  42         .globl .urem
  43 .urem:
  44 
  45         ! Ready to divide.  Compute size of quotient; scale comparand.
  46         orcc    %o1, %g0, %o5
  47         bne     1f
  48         mov     %o0, %o3
  49 
  50                 ! Divide by zero trap.  If it returns, return 0 (about as
  51                 ! wrong as possible, but that is what SunOS does...).
  52                 ta      ST_DIV0
  53                 retl
  54                 clr     %o0
  55 
  56 1:
  57         cmp     %o3, %o5                        ! if %o1 exceeds %o0, done
  58         blu     Lgot_result             ! (and algorithm fails otherwise)
  59         clr     %o2
  60         sethi   %hi(1 << (32 - 4 - 1)), %g1
  61         cmp     %o3, %g1
  62         blu     Lnot_really_big
  63         clr     %o4
  64 
  65         ! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
  66         ! as our usual N-at-a-shot divide step will cause overflow and havoc.
  67         ! The number of bits in the result here is N*ITER+SC, where SC <= N.
  68         ! Compute ITER in an unorthodox manner: know we need to shift V into
  69         ! the top decade: so do not even bother to compare to R.
  70         1:
  71                 cmp     %o5, %g1
  72                 bgeu    3f
  73                 mov     1, %g7
  74                 sll     %o5, 4, %o5
  75                 b       1b
  76                 add     %o4, 1, %o4
  77 
  78         ! Now compute %g7.
  79         2:      addcc   %o5, %o5, %o5
  80                 bcc     Lnot_too_big
  81                 add     %g7, 1, %g7
  82 
  83                 ! We get here if the %o1 overflowed while shifting.
  84                 ! This means that %o3 has the high-order bit set.
  85                 ! Restore %o5 and subtract from %o3.
  86                 sll     %g1, 4, %g1     ! high order bit
  87                 srl     %o5, 1, %o5             ! rest of %o5
  88                 add     %o5, %g1, %o5
  89                 b       Ldo_single_div
  90                 sub     %g7, 1, %g7
  91 
  92         Lnot_too_big:
  93         3:      cmp     %o5, %o3
  94                 blu     2b
  95                 nop
  96                 be      Ldo_single_div
  97                 nop
  98         /* NB: these are commented out in the V8-Sparc manual as well */
  99         /* (I do not understand this) */
 100         ! %o5 > %o3: went too far: back up 1 step
 101         !       srl     %o5, 1, %o5
 102         !       dec     %g7
 103         ! do single-bit divide steps
 104         !
 105         ! We have to be careful here.  We know that %o3 >= %o5, so we can do the
 106         ! first divide step without thinking.  BUT, the others are conditional,
 107         ! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
 108         ! order bit set in the first step, just falling into the regular
 109         ! division loop will mess up the first time around.
 110         ! So we unroll slightly...
 111         Ldo_single_div:
 112                 subcc   %g7, 1, %g7
 113                 bl      Lend_regular_divide
 114                 nop
 115                 sub     %o3, %o5, %o3
 116                 mov     1, %o2
 117                 b       Lend_single_divloop
 118                 nop
 119         Lsingle_divloop:
 120                 sll     %o2, 1, %o2
 121                 bl      1f
 122                 srl     %o5, 1, %o5
 123                 ! %o3 >= 0
 124                 sub     %o3, %o5, %o3
 125                 b       2f
 126                 add     %o2, 1, %o2
 127         1:      ! %o3 < 0
 128                 add     %o3, %o5, %o3
 129                 sub     %o2, 1, %o2
 130         2:
 131         Lend_single_divloop:
 132                 subcc   %g7, 1, %g7
 133                 bge     Lsingle_divloop
 134                 tst     %o3
 135                 b,a     Lend_regular_divide
 136 
 137 Lnot_really_big:
 138 1:
 139         sll     %o5, 4, %o5
 140         cmp     %o5, %o3
 141         bleu    1b
 142         addcc   %o4, 1, %o4
 143         be      Lgot_result
 144         sub     %o4, 1, %o4
 145 
 146         tst     %o3     ! set up for initial iteration
 147 Ldivloop:
 148         sll     %o2, 4, %o2
 149                 ! depth 1, accumulated bits 0
 150         bl      L.1.16
 151         srl     %o5,1,%o5
 152         ! remainder is positive
 153         subcc   %o3,%o5,%o3
 154                         ! depth 2, accumulated bits 1
 155         bl      L.2.17
 156         srl     %o5,1,%o5
 157         ! remainder is positive
 158         subcc   %o3,%o5,%o3
 159                         ! depth 3, accumulated bits 3
 160         bl      L.3.19
 161         srl     %o5,1,%o5
 162         ! remainder is positive
 163         subcc   %o3,%o5,%o3
 164                         ! depth 4, accumulated bits 7
 165         bl      L.4.23
 166         srl     %o5,1,%o5
 167         ! remainder is positive
 168         subcc   %o3,%o5,%o3
 169                 b       9f
 170                 add     %o2, (7*2+1), %o2
 171         
 172 L.4.23:
 173         ! remainder is negative
 174         addcc   %o3,%o5,%o3
 175                 b       9f
 176                 add     %o2, (7*2-1), %o2
 177         
 178         
 179 L.3.19:
 180         ! remainder is negative
 181         addcc   %o3,%o5,%o3
 182                         ! depth 4, accumulated bits 5
 183         bl      L.4.21
 184         srl     %o5,1,%o5
 185         ! remainder is positive
 186         subcc   %o3,%o5,%o3
 187                 b       9f
 188                 add     %o2, (5*2+1), %o2
 189         
 190 L.4.21:
 191         ! remainder is negative
 192         addcc   %o3,%o5,%o3
 193                 b       9f
 194                 add     %o2, (5*2-1), %o2
 195         
 196         
 197         
 198 L.2.17:
 199         ! remainder is negative
 200         addcc   %o3,%o5,%o3
 201                         ! depth 3, accumulated bits 1
 202         bl      L.3.17
 203         srl     %o5,1,%o5
 204         ! remainder is positive
 205         subcc   %o3,%o5,%o3
 206                         ! depth 4, accumulated bits 3
 207         bl      L.4.19
 208         srl     %o5,1,%o5
 209         ! remainder is positive
 210         subcc   %o3,%o5,%o3
 211                 b       9f
 212                 add     %o2, (3*2+1), %o2
 213         
 214 L.4.19:
 215         ! remainder is negative
 216         addcc   %o3,%o5,%o3
 217                 b       9f
 218                 add     %o2, (3*2-1), %o2
 219         
 220         
 221 L.3.17:
 222         ! remainder is negative
 223         addcc   %o3,%o5,%o3
 224                         ! depth 4, accumulated bits 1
 225         bl      L.4.17
 226         srl     %o5,1,%o5
 227         ! remainder is positive
 228         subcc   %o3,%o5,%o3
 229                 b       9f
 230                 add     %o2, (1*2+1), %o2
 231         
 232 L.4.17:
 233         ! remainder is negative
 234         addcc   %o3,%o5,%o3
 235                 b       9f
 236                 add     %o2, (1*2-1), %o2
 237         
 238         
 239         
 240         
 241 L.1.16:
 242         ! remainder is negative
 243         addcc   %o3,%o5,%o3
 244                         ! depth 2, accumulated bits -1
 245         bl      L.2.15
 246         srl     %o5,1,%o5
 247         ! remainder is positive
 248         subcc   %o3,%o5,%o3
 249                         ! depth 3, accumulated bits -1
 250         bl      L.3.15
 251         srl     %o5,1,%o5
 252         ! remainder is positive
 253         subcc   %o3,%o5,%o3
 254                         ! depth 4, accumulated bits -1
 255         bl      L.4.15
 256         srl     %o5,1,%o5
 257         ! remainder is positive
 258         subcc   %o3,%o5,%o3
 259                 b       9f
 260                 add     %o2, (-1*2+1), %o2
 261         
 262 L.4.15:
 263         ! remainder is negative
 264         addcc   %o3,%o5,%o3
 265                 b       9f
 266                 add     %o2, (-1*2-1), %o2
 267         
 268         
 269 L.3.15:
 270         ! remainder is negative
 271         addcc   %o3,%o5,%o3
 272                         ! depth 4, accumulated bits -3
 273         bl      L.4.13
 274         srl     %o5,1,%o5
 275         ! remainder is positive
 276         subcc   %o3,%o5,%o3
 277                 b       9f
 278                 add     %o2, (-3*2+1), %o2
 279         
 280 L.4.13:
 281         ! remainder is negative
 282         addcc   %o3,%o5,%o3
 283                 b       9f
 284                 add     %o2, (-3*2-1), %o2
 285         
 286         
 287         
 288 L.2.15:
 289         ! remainder is negative
 290         addcc   %o3,%o5,%o3
 291                         ! depth 3, accumulated bits -3
 292         bl      L.3.13
 293         srl     %o5,1,%o5
 294         ! remainder is positive
 295         subcc   %o3,%o5,%o3
 296                         ! depth 4, accumulated bits -5
 297         bl      L.4.11
 298         srl     %o5,1,%o5
 299         ! remainder is positive
 300         subcc   %o3,%o5,%o3
 301                 b       9f
 302                 add     %o2, (-5*2+1), %o2
 303         
 304 L.4.11:
 305         ! remainder is negative
 306         addcc   %o3,%o5,%o3
 307                 b       9f
 308                 add     %o2, (-5*2-1), %o2
 309         
 310         
 311 L.3.13:
 312         ! remainder is negative
 313         addcc   %o3,%o5,%o3
 314                         ! depth 4, accumulated bits -7
 315         bl      L.4.9
 316         srl     %o5,1,%o5
 317         ! remainder is positive
 318         subcc   %o3,%o5,%o3
 319                 b       9f
 320                 add     %o2, (-7*2+1), %o2
 321         
 322 L.4.9:
 323         ! remainder is negative
 324         addcc   %o3,%o5,%o3
 325                 b       9f
 326                 add     %o2, (-7*2-1), %o2
 327         
 328         
 329         
 330         
 331         9:
 332 Lend_regular_divide:
 333         subcc   %o4, 1, %o4
 334         bge     Ldivloop
 335         tst     %o3
 336         bl,a    Lgot_result
 337         ! non-restoring fixup here (one instruction only!)
 338         add     %o3, %o1, %o3
 339 
 340 
 341 Lgot_result:
 342 
 343         retl
 344         mov %o3, %o0

/* [previous][next][first][last][top][bottom][index][help] */