root/arch/sparc/lib/strlen.S

/* [previous][next][first][last][top][bottom][index][help] */
   1 /* strlen.S: Sparc optimized strlen().
   2  *
   3  * This was hand optimized by davem@caip.rutgers.edu from
   4  * the C-code in GNU-libc.
   5  */
   6 
   7 #include <asm/cprefix.h>
   8 
   9 #define LO_MAGIC 0x01010101
  10 #define HI_MAGIC 0x80808080
  11 
  12         .align 4
  13         .global C_LABEL(strlen)
  14 C_LABEL(strlen):
  15         mov     %o0,%o1
  16         andcc   %o0,3,%g0               ! and with %o0 so no dependency problems
  17         be      scan_words
  18          sethi  %hi(HI_MAGIC),%g2       ! common case and most Sparcs predict taken
  19 
  20         ldsb    [%o0],%g2
  21 still_not_word_aligned:
  22         cmp     %g2,0
  23         bne,a   1f
  24          add    %o0,1,%o0
  25 
  26         /* Ok, so there are tons of quick interlocks above for the
  27          * < 4 length string unaligned... not too common so I'm not
  28          * very concerned.
  29          */
  30         retl
  31          sub    %o0,%o1,%o0
  32 
  33 1:
  34         andcc   %o0,3,%g0
  35         bne,a   still_not_word_aligned
  36          ldsb   [%o0],%g2
  37 
  38         /* HyperSparc executes each sethi/or pair in 1 cycle. */
  39         sethi   %hi(HI_MAGIC),%g2
  40 scan_words:
  41         or      %g2,%lo(HI_MAGIC),%o3
  42         sethi   %hi(LO_MAGIC),%g3
  43         or      %g3,%lo(LO_MAGIC),%o2
  44 next_word:
  45         ld      [%o0],%g2               ! no dependencies
  46 next_word_preloaded:
  47         sub     %g2,%o2,%g2             ! lots of locks here
  48         andcc   %g2,%o3,%g0             ! and I dont like it...
  49         be      next_word
  50          add    %o0,4,%o0
  51 
  52         /* Check every byte. */
  53 byte_zero:
  54         ldsb    [%o0-4],%g2
  55         cmp     %g2,0
  56         bne     byte_one
  57          add    %o0,-4,%g3
  58 
  59         retl
  60          sub    %g3,%o1,%o0
  61 
  62 byte_one:
  63         ldsb    [%o0-3],%g2
  64         cmp     %g2,0
  65         bne,a   byte_two_and_three
  66          ldsb   [%o0-2],%g2
  67 
  68         sub     %g3,%o1,%o0
  69         retl
  70          add    %o0,1,%o0
  71 
  72 byte_two_and_three:
  73         cmp     %g2,0
  74         be,a    found_it
  75          sub    %g3,%o1,%o0
  76 
  77         ldsb    [%o0-1],%g2
  78         cmp     %g2,0
  79         bne,a   next_word_preloaded
  80          ld     [%o0],%g2
  81 
  82         sub     %g3,%o1,%o0
  83         retl
  84          add    %o0,3,%o0
  85 
  86 found_it:
  87         retl
  88          add    %o0,2,%o0

/* [previous][next][first][last][top][bottom][index][help] */