root/arch/alpha/lib/memset.S

/* [previous][next][first][last][top][bottom][index][help] */
   1 /*
   2  * linux/arch/alpha/memset.S
   3  *
   4  * This is an efficient (and small) implementation of the C library "memset()"
   5  * function for the alpha.
   6  *
   7  *      (C) Copyright 1996 Linus Torvalds
   8  *
   9  * This routine is "moral-ware": you are free to use it any way you wish, and
  10  * the only obligation I put on you is a moral one: if you make any improvements
  11  * to the routine, please send me your improvements for me to use similarly.
  12  *
  13  * The scheduling comments are according to the EV5 documentation (and done by 
  14  * hand, so they might well be incorrect, please do tell me about it..)
  15  */
  16 
  17         .set noat
  18         .set noreorder
  19 .text
  20         .globl __memset
  21         .globl __constant_c_memset
  22         .ent __memset
  23 .align 5
  24 __memset:
  25         .frame $30,0,$26,0
  26         .prologue 0
  27 
  28         zapnot $17,1,$17        /* E0 */
  29         sll $17,8,$1            /* E1 (p-c latency, next cycle) */
  30         bis $17,$1,$17          /* E0 (p-c latency, next cycle) */
  31         sll $17,16,$1           /* E1 (p-c latency, next cycle) */
  32 
  33         bis $17,$1,$17          /* E0 (p-c latency, next cycle) */
  34         sll $17,32,$1           /* E1 (p-c latency, next cycle) */
  35         bis $17,$1,$17          /* E0 (p-c latency, next cycle) */
  36         ldq_u $31,0($30)        /* .. E1 */
  37 
  38 .align 5
  39 __constant_c_memset:
  40         addq $18,$16,$6         /* E0 */
  41         bis $16,$16,$0          /* .. E1 */
  42         xor $16,$6,$1           /* E0 */
  43         ble $18,end             /* .. E1 */
  44 
  45         bic $1,7,$1             /* E0 */
  46         beq $1,within_one_quad  /* .. E1 (note EV5 zero-latency forwarding) */
  47         and $16,7,$3            /* E0 */
  48         beq $3,aligned          /* .. E1 (note EV5 zero-latency forwarding) */
  49 
  50         ldq_u $4,0($16)         /* E0 */
  51         bis $16,$16,$5          /* .. E1 */
  52         insql $17,$16,$2        /* E0 */
  53         subq $3,8,$3            /* .. E1 */
  54 
  55         addq $18,$3,$18         /* E0           $18 is new count ($3 is negative) */
  56         mskql $4,$16,$4         /* .. E1 (and possible load stall) */
  57         subq $16,$3,$16         /* E0           $16 is new aligned destination */
  58         bis $2,$4,$1            /* .. E1 */
  59 
  60         bis $31,$31,$31         /* E0 */
  61         ldq_u $31,0($30)        /* .. E1 */
  62         stq_u $1,0($5)          /* E0 */
  63         bis $31,$31,$31         /* .. E1 */
  64 
  65 .align 4
  66 aligned:
  67         sra $18,3,$3            /* E0 */
  68         and $18,7,$18           /* .. E1 */
  69         bis $16,$16,$5          /* E0 */
  70         beq $3,no_quad          /* .. E1 */
  71 
  72 .align 3
  73 loop:
  74         stq $17,0($5)           /* E0 */
  75         subq $3,1,$3            /* .. E1 */
  76         addq $5,8,$5            /* E0 */
  77         bne $3,loop             /* .. E1 */
  78 
  79 no_quad:
  80         bis $31,$31,$31         /* E0 */
  81         beq $18,end             /* .. E1 */
  82         ldq $7,0($5)            /* E0 */
  83         mskqh $7,$6,$2          /* .. E1 (and load stall) */
  84 
  85         insqh $17,$6,$4         /* E0 */
  86         bis $2,$4,$1            /* .. E1 */
  87         stq $1,0($5)            /* E0 */
  88         ret $31,($26),1         /* .. E1 */
  89 
  90 .align 3
  91 within_one_quad:
  92         ldq_u $1,0($16)         /* E0 */
  93         insql $17,$16,$2        /* E1 */
  94         mskql $1,$16,$4         /* E0 (after load stall) */
  95         bis $2,$4,$2            /* E0 */
  96 
  97         mskql $2,$6,$4          /* E0 */
  98         mskqh $1,$6,$2          /* .. E1 */
  99         bis $2,$4,$1            /* E0 */
 100         stq_u $1,0($16)         /* E0 */
 101 
 102 end:
 103         ret $31,($26),1         /* E1 */
 104         .end __memset
 105 
 106 .weakext memset, __memset

/* [previous][next][first][last][top][bottom][index][help] */