| Commit | Line | Data |
|---|---|---|
| f4a040fe PA |
1 | /* |
| 2 | * Written by J.T. Conklin <jtc@acorntoolworks.com> | |
| 3 | * Public domain. | |
| 4 | * | |
| 5 | * $NetBSD: strcpy.S,v 1.3 2004/07/19 20:04:41 drochner Exp $ | |
| 6 | * $FreeBSD: src/lib/libc/amd64/string/strcpy.S,v 1.3 2008/11/02 01:10:54 peter Exp $ | |
| 7 | */ | |
| 8 | ||
| 9 | #include <machine/asm.h> | |
| 10 | ||
| 11 | /* | |
| 12 | * This strcpy implementation copies a byte at a time until the | |
| 13 | * source pointer is aligned to a word boundary, it then copies by | |
| 14 | * words until it finds a word containing a zero byte, and finally | |
| 15 | * copies by bytes until the end of the string is reached. | |
| 16 | * | |
| 17 | * While this may result in unaligned stores if the source and | |
| 18 | * destination pointers are unaligned with respect to each other, | |
| 19 | * it is still faster than either byte copies or the overhead of | |
| 20 | * an implementation suitable for machines with strict alignment | |
| 21 | * requirements. | |
| 22 | */ | |
| 23 | ||
| 24 | ENTRY(strcpy) | |
| 25 | movq %rdi,%rax | |
| 26 | movabsq $0x0101010101010101,%r8 | |
| 27 | movabsq $0x8080808080808080,%r9 | |
| 28 | ||
| 29 | /* | |
| 30 | * Align source to a word boundary. | |
| 31 | * Consider unrolling loop? | |
| 32 | */ | |
| 33 | .Lalign: | |
| 34 | testb $7,%sil | |
| 35 | je .Lword_aligned | |
| 36 | movb (%rsi),%dl | |
| 37 | incq %rsi | |
| 38 | movb %dl,(%rdi) | |
| 39 | incq %rdi | |
| 40 | testb %dl,%dl | |
| 41 | jne .Lalign | |
| 42 | ret | |
| 43 | ||
| 44 | .p2align 4 | |
| 45 | .Lloop: | |
| 46 | movq %rdx,(%rdi) | |
| 47 | addq $8,%rdi | |
| 48 | .Lword_aligned: | |
| 49 | movq (%rsi),%rdx | |
| 50 | movq %rdx,%rcx | |
| 51 | addq $8,%rsi | |
| 52 | subq %r8,%rcx | |
| 53 | testq %r9,%rcx | |
| 54 | je .Lloop | |
| 55 | ||
| 56 | /* | |
| 57 | * In rare cases, the above loop may exit prematurely. We must | |
| 58 | * return to the loop if none of the bytes in the word equal 0. | |
| 59 | */ | |
| 60 | ||
| 61 | movb %dl,(%rdi) | |
| 62 | incq %rdi | |
| 63 | testb %dl,%dl /* 1st byte == 0? */ | |
| 64 | je .Ldone | |
| 65 | ||
| 66 | shrq $8,%rdx | |
| 67 | movb %dl,(%rdi) | |
| 68 | incq %rdi | |
| 69 | testb %dl,%dl /* 2nd byte == 0? */ | |
| 70 | je .Ldone | |
| 71 | ||
| 72 | shrq $8,%rdx | |
| 73 | movb %dl,(%rdi) | |
| 74 | incq %rdi | |
| 75 | testb %dl,%dl /* 3rd byte == 0? */ | |
| 76 | je .Ldone | |
| 77 | ||
| 78 | shrq $8,%rdx | |
| 79 | movb %dl,(%rdi) | |
| 80 | incq %rdi | |
| 81 | testb %dl,%dl /* 4th byte == 0? */ | |
| 82 | je .Ldone | |
| 83 | ||
| 84 | shrq $8,%rdx | |
| 85 | movb %dl,(%rdi) | |
| 86 | incq %rdi | |
| 87 | testb %dl,%dl /* 5th byte == 0? */ | |
| 88 | je .Ldone | |
| 89 | ||
| 90 | shrq $8,%rdx | |
| 91 | movb %dl,(%rdi) | |
| 92 | incq %rdi | |
| 93 | testb %dl,%dl /* 6th byte == 0? */ | |
| 94 | je .Ldone | |
| 95 | ||
| 96 | shrq $8,%rdx | |
| 97 | movb %dl,(%rdi) | |
| 98 | incq %rdi | |
| 99 | testb %dl,%dl /* 7th byte == 0? */ | |
| 100 | je .Ldone | |
| 101 | ||
| 102 | shrq $8,%rdx | |
| 103 | movb %dl,(%rdi) | |
| 104 | incq %rdi | |
| 105 | testb %dl,%dl /* 8th byte == 0? */ | |
| 106 | jne .Lword_aligned | |
| 107 | ||
| 108 | .Ldone: | |
| 109 | ret | |
| 110 | END(strcpy) |