root/arch/sparc/lib/memcpy.S

/* [previous][next][first][last][top][bottom][index][help] */
   1 ! Fast memmove/memcpy/bcopy
   2 ! Copyright Australian National University, 1995
   3 ! This file may be used under the terms of the GNU Public License
   4 ! Author: Paul Mackerras, September 95
   5 ! Minor beautifications David S. Miller
   6 
   7 #include <asm/cprefix.h>
   8 
   9         .globl  C_LABEL(bcopy)
  10 C_LABEL(bcopy):
  11         mov     %o0,%o3
  12         mov     %o1,%o0
  13         mov     %o3,%o1
  14 
  15         .globl  C_LABEL(amemmove)
  16 C_LABEL(amemmove):
  17         .globl  C_LABEL(memmove)
  18         .globl  C_LABEL(memcpy)
  19 C_LABEL(memmove):
  20 C_LABEL(memcpy):
  21         save    %sp,-96,%sp
  22         mov     %i0,%l7
  23 
  24         cmp     %i0,%i1         ! check for dest within source area
  25         bleu,a  1f
  26         andcc   %i0,3,%l1
  27         add     %i1,%i2,%l0
  28         cmp     %i0,%l0
  29         blu,a   Lback
  30         mov     %l0,%i1
  31 
  32         ! copying forwards
  33         ! first get dest to be word-aligned
  34         andcc   %i0,3,%l1
  35 1:
  36         be,a    Lwalign         ! if dest already word-aligned
  37         cmp     %i2,4
  38         mov     4,%l2
  39         sub     %l2,%l1,%l2     ! #bytes until word-aligned
  40         subcc   %i2,%l2,%i2
  41         ble,a   Lend            ! not copying enough to get past word bdry
  42         addcc   %i2,%l2,%i2
  43 
  44 1:
  45         ldub    [%i1],%o0       ! copy single bytes until word-aligned
  46         add     %i1,1,%i1
  47         subcc   %l2,1,%l2
  48         stb     %o0,[%i0]
  49         bgt     1b
  50         add     %i0,1,%i0
  51         cmp     %i2,4
  52 
  53 Lwalign:                        ! dest now word aligned
  54         blt,a   Lend
  55         orcc    %i2,%g0,%g0
  56 
  57         andcc   %i1,3,%l0
  58         be,a    Ldoword         ! if dest word aligned wrt src
  59         andcc   %i0,4,%g0
  60 
  61         ! yucky cases where we have to shift
  62 
  63         mov     4,%l2
  64         sub     %l2,%l0,%l2     ! address adjustment, used at Lendn
  65         sll     %l0,3,%l0       ! bit offset = shift left count
  66         sll     %l2,3,%l1       ! shift right count
  67         add     %i1,%l2,%i1     ! round up to next word
  68         ld      [%i1-4],%o0     ! get first word
  69 
  70         andcc   %i0,4,%g0       ! get destination double-word aligned
  71         be,a    1f
  72         andcc   %i1,4,%g0
  73         ld      [%i1],%o1       ! by constructing and storing one word
  74         add     %i0,4,%i0
  75         add     %i1,4,%i1
  76         sub     %i2,4,%i2
  77         sll     %o0,%l0,%o0
  78         srl     %o1,%l1,%l6
  79         or      %o0,%l6,%o0
  80         st      %o0,[%i0-4]
  81         mov     %o1,%o0
  82 
  83         andcc   %i1,4,%g0       ! now construct & store pairs of double-words
  84 1:
  85         bne,a   3f              ! if source now not double-word aligned
  86         subcc   %i2,4,%i2
  87         subcc   %i2,16,%i2
  88         blt     2f
  89         mov     %o0,%o1
  90 4:
  91         ldd     [%i1],%o2
  92         sll     %o1,%l0,%o4
  93         ldd     [%i1+8],%o0
  94         add     %i0,16,%i0
  95         add     %i1,16,%i1
  96         subcc   %i2,16,%i2
  97         srl     %o2,%l1,%l6
  98         or      %l6,%o4,%o4
  99         sll     %o2,%l0,%o5
 100         srl     %o3,%l1,%l6
 101         or      %l6,%o5,%o5
 102         std     %o4,[%i0-16]
 103         sll     %o3,%l0,%o4
 104         srl     %o0,%l1,%l6
 105         or      %l6,%o4,%o4
 106         sll     %o0,%l0,%o5
 107         srl     %o1,%l1,%l6
 108         or      %l6,%o5,%o5
 109         bge     4b
 110         std     %o4,[%i0-8]
 111 2:
 112         addcc   %i2,12,%i2
 113         blt,a   Lendn
 114         addcc   %i2,4,%i2
 115 5:
 116         ld      [%i1],%o2
 117         add     %i0,4,%i0
 118         add     %i1,4,%i1
 119         subcc   %i2,4,%i2
 120         sll     %o1,%l0,%o0
 121         srl     %o2,%l1,%o1
 122         or      %o1,%o0,%o0
 123         st      %o0,[%i0-4]
 124         bge     5b
 125         mov     %o2,%o1
 126         ba      Lendn
 127         addcc   %i2,4,%i2
 128 
 129 3:
 130         blt,a   Lendn
 131         addcc   %i2,4,%i2
 132         ld      [%i1],%o1
 133         add     %i1,4,%i1
 134         subcc   %i2,16,%i2
 135         blt,a   8f
 136         addcc   %i2,16,%i2
 137 7:
 138         ldd     [%i1],%o2
 139         sll     %o0,%l0,%o4
 140         srl     %o1,%l1,%l6
 141         or      %l6,%o4,%o4
 142         sll     %o1,%l0,%o5
 143         ldd     [%i1+8],%o0
 144         add     %i0,16,%i0
 145         add     %i1,16,%i1
 146         subcc   %i2,16,%i2
 147         srl     %o2,%l1,%l6
 148         or      %l6,%o5,%o5
 149         std     %o4,[%i0-16]
 150         sll     %o2,%l0,%o4
 151         srl     %o3,%l1,%l6
 152         or      %l6,%o4,%o4
 153         sll     %o3,%l0,%o5
 154         srl     %o0,%l1,%l6
 155         or      %l6,%o5,%o5
 156         bge     7b
 157         std     %o4,[%i0-8]
 158         addcc   %i2,16,%i2
 159 8:
 160         sll     %o0,%l0,%o4
 161         srl     %o1,%l1,%l6
 162         or      %l6,%o4,%o4
 163         st      %o4,[%i0]
 164         add     %i0,4,%i0
 165         subcc   %i2,4,%i2
 166         blt,a   Lendn
 167         addcc   %i2,4,%i2
 168         mov     %o1,%o0
 169         ld      [%i1],%o1
 170         ba      8b
 171         add     %i1,4,%i1
 172 
 173 
 174 Ldoword:
 175         ! here both dest and src are word-aligned
 176         ! make dest double-word aligned
 177         be,a    1f
 178         andcc   %i1,4,%g0
 179         ld      [%i1],%o0
 180         add     %i0,4,%i0
 181         add     %i1,4,%i1
 182         sub     %i2,4,%i2
 183         st      %o0,[%i0-4]
 184         cmp     %i2,4
 185         blt,a   Lend
 186         orcc    %i2,%g0,%g0
 187         andcc   %i1,4,%g0
 188 
 189 1:
 190         be,a    Ldodble         ! if source double-word aligned now
 191         subcc   %i2,32,%i2
 192         ld      [%i1],%o5
 193         add     %i1,4,%i1
 194         subcc   %i2,36,%i2
 195         blt,a   3f
 196         add     %i2,32,%i2
 197 2:
 198         ldd     [%i1],%o2
 199         add     %i1,32,%i1
 200         subcc   %i2,32,%i2
 201         mov     %o5,%o0
 202         ldd     [%i1-24],%o4
 203         mov     %o2,%o1
 204         std     %o0,[%i0]
 205         mov     %o3,%o2
 206         ldd     [%i1-16],%o0
 207         mov     %o4,%o3
 208         std     %o2,[%i0+8]
 209         mov     %o5,%o2
 210         ldd     [%i1-8],%o4
 211         mov     %o0,%o3
 212         std     %o2,[%i0+16]
 213         mov     %o1,%o0
 214         mov     %o4,%o1
 215         std     %o0,[%i0+24]
 216         bge     2b
 217         add     %i0,32,%i0
 218         add     %i2,32,%i2
 219 3:
 220         st      %o5,[%i0]
 221         add     %i0,4,%i0
 222         subcc   %i2,4,%i2
 223         blt,a   Lend
 224         addcc   %i2,4,%i2
 225         ld      [%i1],%o5
 226         ba      3b
 227         add     %i1,4,%i1
 228 
 229 Ldodble:
 230         ! dest and source are both double-word aligned
 231         blt,a   2f
 232         addcc   %i2,28,%i2
 233 1:
 234         ldd     [%i1],%o0       ! copy sets of 4 double-words
 235         subcc   %i2,32,%i2
 236         ldd     [%i1+8],%o2
 237         add     %i1,32,%i1
 238         ldd     [%i1-16],%o4
 239         add     %i0,32,%i0
 240         std     %o0,[%i0-32]
 241         ldd     [%i1-8],%o0
 242         std     %o2,[%i0-24]
 243         std     %o4,[%i0-16]
 244         bge     1b
 245         std     %o0,[%i0-8]
 246         addcc   %i2,28,%i2
 247 2:
 248         blt,a   Lend
 249         addcc   %i2,4,%i2
 250 3:
 251         ld      [%i1],%o0       ! copy words
 252         add     %i1,4,%i1
 253         add     %i0,4,%i0
 254         subcc   %i2,4,%i2
 255         bge     3b
 256         st      %o0,[%i0-4]
 257         ba      Lend
 258         addcc   %i2,4,%i2
 259 
 260 Lendn:
 261         sub     %i1,%l2,%i1
 262 Lend:
 263         ble     Lout
 264         nop
 265 1:
 266         ldub    [%i1],%o0
 267         add     %i1,1,%i1
 268         subcc   %i2,1,%i2
 269         stb     %o0,[%i0]
 270         bgt     1b
 271         add     %i0,1,%i0
 272 
 273         ba      Lout
 274         nop
 275 
 276 Lback:  ! Here we have to copy backwards
 277         add     %i0,%i2,%i0
 278         ! first get dest to be word-aligned
 279         andcc   %i0,3,%l2       ! #bytes until word-aligned
 280         be,a    Lbwal           ! if dest already word-aligned
 281         cmp     %i2,4
 282         subcc   %i2,%l2,%i2
 283         ble,a   Lbend           ! not copying enough to get past word bdry
 284         addcc   %i2,%l2,%i2
 285 
 286 1:
 287         ldub    [%i1-1],%o0     ! copy single bytes until word-aligned
 288         sub     %i1,1,%i1
 289         subcc   %l2,1,%l2
 290         stb     %o0,[%i0-1]
 291         bgt     1b
 292         sub     %i0,1,%i0
 293         cmp     %i2,4
 294 
 295 Lbwal:                          ! dest now word aligned
 296         blt,a   Lbend
 297         orcc    %i2,%g0,%g0
 298 
 299         andcc   %i1,3,%l2
 300         be,a    Lbword          ! if dest word aligned wrt src
 301         andcc   %i0,4,%g0
 302 
 303         ! yucky cases where we have to shift
 304         ! note %l2 used below at Lbendn
 305 
 306         mov     4,%l0
 307         sub     %l0,%l2,%l0     ! # bytes to right of src in word
 308         sll     %l0,3,%l0       ! bit offset = shift right count
 309         sll     %l2,3,%l1       ! shift left count
 310         sub     %i1,%l2,%i1     ! round down to word boundary
 311         ld      [%i1],%o1       ! get first word
 312 
 313         andcc   %i0,4,%g0       ! get destination double-word aligned
 314         be,a    1f
 315         andcc   %i1,4,%g0
 316         ld      [%i1-4],%o0     ! by constructing and storing one word
 317         sub     %i0,4,%i0
 318         sub     %i1,4,%i1
 319         sub     %i2,4,%i2
 320         srl     %o1,%l0,%o1
 321         sll     %o0,%l1,%l6
 322         or      %o1,%l6,%o1
 323         st      %o1,[%i0]
 324         mov     %o0,%o1
 325 
 326         andcc   %i1,4,%g0       ! now construct & store pairs of double-words
 327 1:
 328         bne,a   3f              ! if source now not double-word aligned
 329         subcc   %i2,4,%i2
 330         subcc   %i2,16,%i2
 331         blt     2f
 332         mov     %o1,%o0
 333 4:
 334         ldd     [%i1-8],%o2
 335         srl     %o0,%l0,%o5
 336         ldd     [%i1-16],%o0
 337         sub     %i0,16,%i0
 338         sub     %i1,16,%i1
 339         subcc   %i2,16,%i2
 340         sll     %o3,%l1,%l6
 341         or      %l6,%o5,%o5
 342         srl     %o3,%l0,%o4
 343         sll     %o2,%l1,%l6
 344         or      %l6,%o4,%o4
 345         std     %o4,[%i0+8]
 346         srl     %o2,%l0,%o5
 347         sll     %o1,%l1,%l6
 348         or      %l6,%o5,%o5
 349         srl     %o1,%l0,%o4
 350         sll     %o0,%l1,%l6
 351         or      %l6,%o4,%o4
 352         bge     4b
 353         std     %o4,[%i0]
 354 2:
 355         addcc   %i2,12,%i2
 356         blt,a   Lbendn
 357         addcc   %i2,4,%i2
 358 5:
 359         ld      [%i1-4],%o2
 360         sub     %i0,4,%i0
 361         sub     %i1,4,%i1
 362         subcc   %i2,4,%i2
 363         srl     %o0,%l0,%o0
 364         sll     %o2,%l1,%o1
 365         or      %o1,%o0,%o0
 366         st      %o0,[%i0]
 367         bge     5b
 368         mov     %o2,%o0
 369         ba      Lbendn
 370         addcc   %i2,4,%i2
 371 
 372 3:
 373         blt,a   Lbendn
 374         addcc   %i2,4,%i2
 375         ld      [%i1-4],%o0
 376         sub     %i1,4,%i1
 377         subcc   %i2,16,%i2
 378         blt,a   8f
 379         addcc   %i2,16,%i2
 380 7:
 381         ldd     [%i1-8],%o2
 382         srl     %o1,%l0,%o5
 383         sll     %o0,%l1,%l6
 384         or      %l6,%o5,%o5
 385         srl     %o0,%l0,%o4
 386         ldd     [%i1-16],%o0
 387         sub     %i0,16,%i0
 388         sub     %i1,16,%i1
 389         subcc   %i2,16,%i2
 390         sll     %o3,%l1,%l6
 391         or      %l6,%o4,%o4
 392         std     %o4,[%i0+8]
 393         srl     %o3,%l0,%o5
 394         sll     %o2,%l1,%l6
 395         or      %l6,%o5,%o5
 396         srl     %o2,%l0,%o4
 397         sll     %o1,%l1,%l6
 398         or      %l6,%o4,%o4
 399         bge     7b
 400         std     %o4,[%i0]
 401         addcc   %i2,16,%i2
 402 8:
 403         srl     %o1,%l0,%o5
 404         sll     %o0,%l1,%l6
 405         or      %l6,%o5,%o5
 406         st      %o5,[%i0-4]
 407         sub     %i0,4,%i0
 408         subcc   %i2,4,%i2
 409         blt,a   Lbendn
 410         addcc   %i2,4,%i2
 411         mov     %o0,%o1
 412         ld      [%i1-4],%o0
 413         ba      8b
 414         sub     %i1,4,%i1
 415 
 416 
 417 Lbword:
 418         ! here both dest and src are word-aligned
 419         ! make dest double-word aligned
 420         be,a    1f
 421         andcc   %i1,4,%g0
 422         ld      [%i1-4],%o0
 423         sub     %i0,4,%i0
 424         sub     %i1,4,%i1
 425         sub     %i2,4,%i2
 426         st      %o0,[%i0]
 427         cmp     %i2,4
 428         blt,a   Lbend
 429         orcc    %i2,%g0,%g0
 430         andcc   %i1,4,%g0
 431 
 432 1:
 433         be,a    Lbdble          ! if source double-word aligned now
 434         subcc   %i2,32,%i2
 435         ld      [%i1-4],%o4
 436         sub     %i1,4,%i1
 437         subcc   %i2,36,%i2
 438         blt,a   3f
 439         add     %i2,32,%i2
 440 2:
 441         ldd     [%i1-8],%o2
 442         sub     %i1,32,%i1
 443         subcc   %i2,32,%i2
 444         mov     %o4,%o1
 445         ldd     [%i1+16],%o4
 446         mov     %o3,%o0
 447         std     %o0,[%i0-8]
 448         mov     %o2,%o3
 449         ldd     [%i1+8],%o0
 450         mov     %o5,%o2
 451         std     %o2,[%i0-16]
 452         mov     %o4,%o3
 453         ldd     [%i1],%o4
 454         mov     %o1,%o2
 455         std     %o2,[%i0-24]
 456         mov     %o0,%o1
 457         mov     %o5,%o0
 458         std     %o0,[%i0-32]
 459         bge     2b
 460         sub     %i0,32,%i0
 461         add     %i2,32,%i2
 462 3:
 463         st      %o4,[%i0-4]
 464         sub     %i0,4,%i0
 465         subcc   %i2,4,%i2
 466         blt,a   Lbend
 467         addcc   %i2,4,%i2
 468         ld      [%i1-4],%o4
 469         ba      3b
 470         sub     %i1,4,%i1
 471 
 472 Lbdble:
 473         ! dest and source are both double-word aligned
 474         blt,a   2f
 475         addcc   %i2,28,%i2
 476 1:
 477         ldd     [%i1-8],%o0     ! copy sets of 4 double-words
 478         subcc   %i2,32,%i2
 479         ldd     [%i1-16],%o2
 480         sub     %i1,32,%i1
 481         ldd     [%i1+8],%o4
 482         sub     %i0,32,%i0
 483         std     %o0,[%i0+24]
 484         ldd     [%i1],%o0
 485         std     %o2,[%i0+16]
 486         std     %o4,[%i0+8]
 487         bge     1b
 488         std     %o0,[%i0]
 489         addcc   %i2,28,%i2
 490 2:
 491         blt,a   Lbend
 492         addcc   %i2,4,%i2
 493 3:
 494         ld      [%i1-4],%o0     ! copy words
 495         sub     %i1,4,%i1
 496         sub     %i0,4,%i0
 497         subcc   %i2,4,%i2
 498         bge     3b
 499         st      %o0,[%i0]
 500         ba      Lbend
 501         addcc   %i2,4,%i2
 502 
 503 Lbendn:
 504         add     %i1,%l2,%i1
 505 Lbend:
 506         ble     Lout
 507         nop
 508 1:
 509         ldub    [%i1-1],%o0
 510         sub     %i1,1,%i1
 511         subcc   %i2,1,%i2
 512         stb     %o0,[%i0-1]
 513         bgt     1b
 514         sub     %i0,1,%i0
 515 
 516 Lout:
 517         ret
 518         restore %l7,0,%o0
 519 
 520 

/* [previous][next][first][last][top][bottom][index][help] */