arch/alpha/lib/memcpy.c

/* */
This source file includes following definitions.
__memcpy_unaligned
__memcpy_aligned
__memcpy
bcopy
   1 /*
   2  *  linux/arch/alpha/lib/memcpy.c
   3  *
   4  *  Copyright (C) 1995  Linus Torvalds
   5  */
   6 
   7 /*
   8  * This is a reasonably optimized memcpy() routine.
   9  */
  10 
  11 /*
  12  * Note that the C code is written to be optimized into good assembly. However,
  13  * at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a
  14  * explicit compare against 0 (instead of just using the proper "blt reg, xx" or
  15  * "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually..
  16  */
  17 
  18 #include <linux/types.h>
  19 
  20 /*
  21  * This should be done in one go with ldq_u*2/mask/stq_u. Do it
  22  * with a macro so that we can fix it up later..
  23  */
  24 #define ALIGN_DEST_TO8(d,s,n) \
  25         while (d & 7) { \
  26                 if (n <= 0) return; \
  27                 n--; \
  28                 *(char *) d = *(char *) s; \
  29                 d++; s++; \
  30         }
  31 
  32 /*
  33  * This should similarly be done with ldq_u*2/mask/stq. The destination
  34  * is aligned, but we don't fill in a full quad-word
  35  */
  36 #define DO_REST(d,s,n) \
  37         while (n > 0) { \
  38                 n--; \
  39                 *(char *) d = *(char *) s; \
  40                 d++; s++; \
  41         }
  42 
  43 /*
  44  * This should be done with ldq/mask/stq. The source and destination are
  45  * aligned, but we don't fill in a full quad-word
  46  */
  47 #define DO_REST_ALIGNED(d,s,n) DO_REST(d,s,n)
  48 
  49 /*
  50  * This does unaligned memory copies. We want to avoid storing to
  51  * an unaligned address, as that would do a read-modify-write cycle.
  52  * We also want to avoid double-reading the unaligned reads.
  53  *
  54  * Note the ordering to try to avoid load (and address generation) latencies.
  55  */
  56 static inline void __memcpy_unaligned(unsigned long d, unsigned long s, long n)
     /*  */
  57 {
  58         ALIGN_DEST_TO8(d,s,n);
  59         n -= 8;                 /* to avoid compare against 8 in the loop */
  60         if (n >= 0) {
  61                 unsigned long low_word, high_word;
  62                 __asm__("ldq_u %0,%1":"=r" (low_word):"m" (*(unsigned long *) s));
  63                 do {
  64                         unsigned long tmp;
  65                         __asm__("ldq_u %0,%1":"=r" (high_word):"m" (*(unsigned long *)(s+8)));
  66                         n -= 8;
  67                         __asm__("extql %1,%2,%0"
  68                                 :"=r" (low_word)
  69                                 :"r" (low_word), "r" (s));
  70                         __asm__("extqh %1,%2,%0"
  71                                 :"=r" (tmp)
  72                                 :"r" (high_word), "r" (s));
  73                         s += 8;
  74                         *(unsigned long *) d = low_word | tmp;
  75                         d += 8;
  76                         low_word = high_word;
  77                 } while (n >= 0);
  78         }
  79         n += 8;
  80         DO_REST(d,s,n);
  81 }
  82 
  83 /*
  84  * Hmm.. Strange. The __asm__ here is there to make gcc use a integer register
  85  * for the load-store. I don't know why, but it would seem that using a floating
  86  * point register for the move seems to slow things down (very small difference,
  87  * though).
  88  *
  89  * Note the ordering to try to avoid load (and address generation) latencies.
  90  */
  91 static inline void __memcpy_aligned(unsigned long d, unsigned long s, long n)
     /*  */
  92 {
  93         ALIGN_DEST_TO8(d,s,n);
  94         n -= 8;
  95         while (n >= 0) {
  96                 unsigned long tmp;
  97                 __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
  98                 n -= 8;
  99                 s += 8;
 100                 *(unsigned long *) d = tmp;
 101                 d += 8;
 102         }
 103         n += 8;
 104         DO_REST_ALIGNED(d,s,n);
 105 }
 106 
 107 void * __memcpy(void * dest, const void *src, size_t n)
     /*  */
 108 {
 109         if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) {
 110                 __memcpy_aligned((unsigned long) dest, (unsigned long) src, n);
 111                 return dest;
 112         }
 113         __memcpy_unaligned((unsigned long) dest, (unsigned long) src, n);
 114         return dest;
 115 }
 116 
 117 /*
 118  * Broken compiler uses "bcopy" to do internal
 119  * assignments. Silly OSF/1 BSDism.
 120  */
 121 char * bcopy(const char * src, char * dest, size_t n)
     /*  */
 122 {
 123         __memcpy(dest, src, n);
 124         return dest;
 125 }
 126 
 127 /*
 128  * gcc-2.7.1 and newer generate calls to memset and memcpy.  So we
 129  * need to define that here:
 130  */
 131 asm (".weakext memcpy, __memcpy");
/* */
root/arch/alpha/lib/memcpy.c

DEFINITIONS