1 ! Fast memmove/memcpy/bcopy 2 ! Copyright Australian National University, 1995 3 ! This file may be used under the terms of the GNU Public License 4 ! Author: Paul Mackerras, September 95 5 ! Minor beautifications David S. Miller 6 7 #include <asm/cprefix.h> 8 9 .globl C_LABEL(bcopy) 10 C_LABEL(bcopy): 11 mov %o0,%o3 12 mov %o1,%o0 13 mov %o3,%o1 14 15 .globl C_LABEL(amemmove) 16 C_LABEL(amemmove): 17 .globl C_LABEL(memmove) 18 .globl C_LABEL(memcpy) 19 C_LABEL(memmove): 20 C_LABEL(memcpy): 21 save %sp,-96,%sp 22 mov %i0,%l7 23 24 cmp %i0,%i1 ! check for dest within source area 25 bleu,a 1f 26 andcc %i0,3,%l1 27 add %i1,%i2,%l0 28 cmp %i0,%l0 29 blu,a Lback 30 mov %l0,%i1 31 32 ! copying forwards 33 ! first get dest to be word-aligned 34 andcc %i0,3,%l1 35 1: 36 be,a Lwalign ! if dest already word-aligned 37 cmp %i2,4 38 mov 4,%l2 39 sub %l2,%l1,%l2 ! #bytes until word-aligned 40 subcc %i2,%l2,%i2 41 ble,a Lend ! not copying enough to get past word bdry 42 addcc %i2,%l2,%i2 43 44 1: 45 ldub [%i1],%o0 ! copy single bytes until word-aligned 46 add %i1,1,%i1 47 subcc %l2,1,%l2 48 stb %o0,[%i0] 49 bgt 1b 50 add %i0,1,%i0 51 cmp %i2,4 52 53 Lwalign: ! dest now word aligned 54 blt,a Lend 55 orcc %i2,%g0,%g0 56 57 andcc %i1,3,%l0 58 be,a Ldoword ! if dest word aligned wrt src 59 andcc %i0,4,%g0 60 61 ! yucky cases where we have to shift 62 63 mov 4,%l2 64 sub %l2,%l0,%l2 ! address adjustment, used at Lendn 65 sll %l0,3,%l0 ! bit offset = shift left count 66 sll %l2,3,%l1 ! shift right count 67 add %i1,%l2,%i1 ! round up to next word 68 ld [%i1-4],%o0 ! get first word 69 70 andcc %i0,4,%g0 ! get destination double-word aligned 71 be,a 1f 72 andcc %i1,4,%g0 73 ld [%i1],%o1 ! by constructing and storing one word 74 add %i0,4,%i0 75 add %i1,4,%i1 76 sub %i2,4,%i2 77 sll %o0,%l0,%o0 78 srl %o1,%l1,%l6 79 or %o0,%l6,%o0 80 st %o0,[%i0-4] 81 mov %o1,%o0 82 83 andcc %i1,4,%g0 ! now construct & store pairs of double-words 84 1: 85 bne,a 3f ! if source now not double-word aligned 86 subcc %i2,4,%i2 87 subcc %i2,16,%i2 88 blt 2f 89 mov %o0,%o1 90 4: 91 ldd [%i1],%o2 92 sll %o1,%l0,%o4 93 ldd [%i1+8],%o0 94 add %i0,16,%i0 95 add %i1,16,%i1 96 subcc %i2,16,%i2 97 srl %o2,%l1,%l6 98 or %l6,%o4,%o4 99 sll %o2,%l0,%o5 100 srl %o3,%l1,%l6 101 or %l6,%o5,%o5 102 std %o4,[%i0-16] 103 sll %o3,%l0,%o4 104 srl %o0,%l1,%l6 105 or %l6,%o4,%o4 106 sll %o0,%l0,%o5 107 srl %o1,%l1,%l6 108 or %l6,%o5,%o5 109 bge 4b 110 std %o4,[%i0-8] 111 2: 112 addcc %i2,12,%i2 113 blt,a Lendn 114 addcc %i2,4,%i2 115 5: 116 ld [%i1],%o2 117 add %i0,4,%i0 118 add %i1,4,%i1 119 subcc %i2,4,%i2 120 sll %o1,%l0,%o0 121 srl %o2,%l1,%o1 122 or %o1,%o0,%o0 123 st %o0,[%i0-4] 124 bge 5b 125 mov %o2,%o1 126 ba Lendn 127 addcc %i2,4,%i2 128 129 3: 130 blt,a Lendn 131 addcc %i2,4,%i2 132 ld [%i1],%o1 133 add %i1,4,%i1 134 subcc %i2,16,%i2 135 blt,a 8f 136 addcc %i2,16,%i2 137 7: 138 ldd [%i1],%o2 139 sll %o0,%l0,%o4 140 srl %o1,%l1,%l6 141 or %l6,%o4,%o4 142 sll %o1,%l0,%o5 143 ldd [%i1+8],%o0 144 add %i0,16,%i0 145 add %i1,16,%i1 146 subcc %i2,16,%i2 147 srl %o2,%l1,%l6 148 or %l6,%o5,%o5 149 std %o4,[%i0-16] 150 sll %o2,%l0,%o4 151 srl %o3,%l1,%l6 152 or %l6,%o4,%o4 153 sll %o3,%l0,%o5 154 srl %o0,%l1,%l6 155 or %l6,%o5,%o5 156 bge 7b 157 std %o4,[%i0-8] 158 addcc %i2,16,%i2 159 8: 160 sll %o0,%l0,%o4 161 srl %o1,%l1,%l6 162 or %l6,%o4,%o4 163 st %o4,[%i0] 164 add %i0,4,%i0 165 subcc %i2,4,%i2 166 blt,a Lendn 167 addcc %i2,4,%i2 168 mov %o1,%o0 169 ld [%i1],%o1 170 ba 8b 171 add %i1,4,%i1 172 173 174 Ldoword: 175 ! here both dest and src are word-aligned 176 ! make dest double-word aligned 177 be,a 1f 178 andcc %i1,4,%g0 179 ld [%i1],%o0 180 add %i0,4,%i0 181 add %i1,4,%i1 182 sub %i2,4,%i2 183 st %o0,[%i0-4] 184 cmp %i2,4 185 blt,a Lend 186 orcc %i2,%g0,%g0 187 andcc %i1,4,%g0 188 189 1: 190 be,a Ldodble ! if source double-word aligned now 191 subcc %i2,32,%i2 192 ld [%i1],%o5 193 add %i1,4,%i1 194 subcc %i2,36,%i2 195 blt,a 3f 196 add %i2,32,%i2 197 2: 198 ldd [%i1],%o2 199 add %i1,32,%i1 200 subcc %i2,32,%i2 201 mov %o5,%o0 202 ldd [%i1-24],%o4 203 mov %o2,%o1 204 std %o0,[%i0] 205 mov %o3,%o2 206 ldd [%i1-16],%o0 207 mov %o4,%o3 208 std %o2,[%i0+8] 209 mov %o5,%o2 210 ldd [%i1-8],%o4 211 mov %o0,%o3 212 std %o2,[%i0+16] 213 mov %o1,%o0 214 mov %o4,%o1 215 std %o0,[%i0+24] 216 bge 2b 217 add %i0,32,%i0 218 add %i2,32,%i2 219 3: 220 st %o5,[%i0] 221 add %i0,4,%i0 222 subcc %i2,4,%i2 223 blt,a Lend 224 addcc %i2,4,%i2 225 ld [%i1],%o5 226 ba 3b 227 add %i1,4,%i1 228 229 Ldodble: 230 ! dest and source are both double-word aligned 231 blt,a 2f 232 addcc %i2,28,%i2 233 1: 234 ldd [%i1],%o0 ! copy sets of 4 double-words 235 subcc %i2,32,%i2 236 ldd [%i1+8],%o2 237 add %i1,32,%i1 238 ldd [%i1-16],%o4 239 add %i0,32,%i0 240 std %o0,[%i0-32] 241 ldd [%i1-8],%o0 242 std %o2,[%i0-24] 243 std %o4,[%i0-16] 244 bge 1b 245 std %o0,[%i0-8] 246 addcc %i2,28,%i2 247 2: 248 blt,a Lend 249 addcc %i2,4,%i2 250 3: 251 ld [%i1],%o0 ! copy words 252 add %i1,4,%i1 253 add %i0,4,%i0 254 subcc %i2,4,%i2 255 bge 3b 256 st %o0,[%i0-4] 257 ba Lend 258 addcc %i2,4,%i2 259 260 Lendn: 261 sub %i1,%l2,%i1 262 Lend: 263 ble Lout 264 nop 265 1: 266 ldub [%i1],%o0 267 add %i1,1,%i1 268 subcc %i2,1,%i2 269 stb %o0,[%i0] 270 bgt 1b 271 add %i0,1,%i0 272 273 ba Lout 274 nop 275 276 Lback: ! Here we have to copy backwards 277 add %i0,%i2,%i0 278 ! first get dest to be word-aligned 279 andcc %i0,3,%l2 ! #bytes until word-aligned 280 be,a Lbwal ! if dest already word-aligned 281 cmp %i2,4 282 subcc %i2,%l2,%i2 283 ble,a Lbend ! not copying enough to get past word bdry 284 addcc %i2,%l2,%i2 285 286 1: 287 ldub [%i1-1],%o0 ! copy single bytes until word-aligned 288 sub %i1,1,%i1 289 subcc %l2,1,%l2 290 stb %o0,[%i0-1] 291 bgt 1b 292 sub %i0,1,%i0 293 cmp %i2,4 294 295 Lbwal: ! dest now word aligned 296 blt,a Lbend 297 orcc %i2,%g0,%g0 298 299 andcc %i1,3,%l2 300 be,a Lbword ! if dest word aligned wrt src 301 andcc %i0,4,%g0 302 303 ! yucky cases where we have to shift 304 ! note %l2 used below at Lbendn 305 306 mov 4,%l0 307 sub %l0,%l2,%l0 ! # bytes to right of src in word 308 sll %l0,3,%l0 ! bit offset = shift right count 309 sll %l2,3,%l1 ! shift left count 310 sub %i1,%l2,%i1 ! round down to word boundary 311 ld [%i1],%o1 ! get first word 312 313 andcc %i0,4,%g0 ! get destination double-word aligned 314 be,a 1f 315 andcc %i1,4,%g0 316 ld [%i1-4],%o0 ! by constructing and storing one word 317 sub %i0,4,%i0 318 sub %i1,4,%i1 319 sub %i2,4,%i2 320 srl %o1,%l0,%o1 321 sll %o0,%l1,%l6 322 or %o1,%l6,%o1 323 st %o1,[%i0] 324 mov %o0,%o1 325 326 andcc %i1,4,%g0 ! now construct & store pairs of double-words 327 1: 328 bne,a 3f ! if source now not double-word aligned 329 subcc %i2,4,%i2 330 subcc %i2,16,%i2 331 blt 2f 332 mov %o1,%o0 333 4: 334 ldd [%i1-8],%o2 335 srl %o0,%l0,%o5 336 ldd [%i1-16],%o0 337 sub %i0,16,%i0 338 sub %i1,16,%i1 339 subcc %i2,16,%i2 340 sll %o3,%l1,%l6 341 or %l6,%o5,%o5 342 srl %o3,%l0,%o4 343 sll %o2,%l1,%l6 344 or %l6,%o4,%o4 345 std %o4,[%i0+8] 346 srl %o2,%l0,%o5 347 sll %o1,%l1,%l6 348 or %l6,%o5,%o5 349 srl %o1,%l0,%o4 350 sll %o0,%l1,%l6 351 or %l6,%o4,%o4 352 bge 4b 353 std %o4,[%i0] 354 2: 355 addcc %i2,12,%i2 356 blt,a Lbendn 357 addcc %i2,4,%i2 358 5: 359 ld [%i1-4],%o2 360 sub %i0,4,%i0 361 sub %i1,4,%i1 362 subcc %i2,4,%i2 363 srl %o0,%l0,%o0 364 sll %o2,%l1,%o1 365 or %o1,%o0,%o0 366 st %o0,[%i0] 367 bge 5b 368 mov %o2,%o0 369 ba Lbendn 370 addcc %i2,4,%i2 371 372 3: 373 blt,a Lbendn 374 addcc %i2,4,%i2 375 ld [%i1-4],%o0 376 sub %i1,4,%i1 377 subcc %i2,16,%i2 378 blt,a 8f 379 addcc %i2,16,%i2 380 7: 381 ldd [%i1-8],%o2 382 srl %o1,%l0,%o5 383 sll %o0,%l1,%l6 384 or %l6,%o5,%o5 385 srl %o0,%l0,%o4 386 ldd [%i1-16],%o0 387 sub %i0,16,%i0 388 sub %i1,16,%i1 389 subcc %i2,16,%i2 390 sll %o3,%l1,%l6 391 or %l6,%o4,%o4 392 std %o4,[%i0+8] 393 srl %o3,%l0,%o5 394 sll %o2,%l1,%l6 395 or %l6,%o5,%o5 396 srl %o2,%l0,%o4 397 sll %o1,%l1,%l6 398 or %l6,%o4,%o4 399 bge 7b 400 std %o4,[%i0] 401 addcc %i2,16,%i2 402 8: 403 srl %o1,%l0,%o5 404 sll %o0,%l1,%l6 405 or %l6,%o5,%o5 406 st %o5,[%i0-4] 407 sub %i0,4,%i0 408 subcc %i2,4,%i2 409 blt,a Lbendn 410 addcc %i2,4,%i2 411 mov %o0,%o1 412 ld [%i1-4],%o0 413 ba 8b 414 sub %i1,4,%i1 415 416 417 Lbword: 418 ! here both dest and src are word-aligned 419 ! make dest double-word aligned 420 be,a 1f 421 andcc %i1,4,%g0 422 ld [%i1-4],%o0 423 sub %i0,4,%i0 424 sub %i1,4,%i1 425 sub %i2,4,%i2 426 st %o0,[%i0] 427 cmp %i2,4 428 blt,a Lbend 429 orcc %i2,%g0,%g0 430 andcc %i1,4,%g0 431 432 1: 433 be,a Lbdble ! if source double-word aligned now 434 subcc %i2,32,%i2 435 ld [%i1-4],%o4 436 sub %i1,4,%i1 437 subcc %i2,36,%i2 438 blt,a 3f 439 add %i2,32,%i2 440 2: 441 ldd [%i1-8],%o2 442 sub %i1,32,%i1 443 subcc %i2,32,%i2 444 mov %o4,%o1 445 ldd [%i1+16],%o4 446 mov %o3,%o0 447 std %o0,[%i0-8] 448 mov %o2,%o3 449 ldd [%i1+8],%o0 450 mov %o5,%o2 451 std %o2,[%i0-16] 452 mov %o4,%o3 453 ldd [%i1],%o4 454 mov %o1,%o2 455 std %o2,[%i0-24] 456 mov %o0,%o1 457 mov %o5,%o0 458 std %o0,[%i0-32] 459 bge 2b 460 sub %i0,32,%i0 461 add %i2,32,%i2 462 3: 463 st %o4,[%i0-4] 464 sub %i0,4,%i0 465 subcc %i2,4,%i2 466 blt,a Lbend 467 addcc %i2,4,%i2 468 ld [%i1-4],%o4 469 ba 3b 470 sub %i1,4,%i1 471 472 Lbdble: 473 ! dest and source are both double-word aligned 474 blt,a 2f 475 addcc %i2,28,%i2 476 1: 477 ldd [%i1-8],%o0 ! copy sets of 4 double-words 478 subcc %i2,32,%i2 479 ldd [%i1-16],%o2 480 sub %i1,32,%i1 481 ldd [%i1+8],%o4 482 sub %i0,32,%i0 483 std %o0,[%i0+24] 484 ldd [%i1],%o0 485 std %o2,[%i0+16] 486 std %o4,[%i0+8] 487 bge 1b 488 std %o0,[%i0] 489 addcc %i2,28,%i2 490 2: 491 blt,a Lbend 492 addcc %i2,4,%i2 493 3: 494 ld [%i1-4],%o0 ! copy words 495 sub %i1,4,%i1 496 sub %i0,4,%i0 497 subcc %i2,4,%i2 498 bge 3b 499 st %o0,[%i0] 500 ba Lbend 501 addcc %i2,4,%i2 502 503 Lbendn: 504 add %i1,%l2,%i1 505 Lbend: 506 ble Lout 507 nop 508 1: 509 ldub [%i1-1],%o0 510 sub %i1,1,%i1 511 subcc %i2,1,%i2 512 stb %o0,[%i0-1] 513 bgt 1b 514 sub %i0,1,%i0 515 516 Lout: 517 ret 518 restore %l7,0,%o0 519 520