Add MD, optimized versions of string functions for amd64.
[dragonfly.git] / lib / libc / amd64 / string / strcmp.S
1 /*
2  * Written by J.T. Conklin <jtc@acorntoolworks.com>
3  * Public domain.
4  *
5  * $NetBSD: strcmp.S,v 1.3 2004/07/19 20:04:41 drochner Exp $
6  * $FreeBSD: src/lib/libc/amd64/string/strcmp.S,v 1.2 2008/11/02 01:10:54 peter Exp $
7  */
8
9 #include <machine/asm.h>
10
11 ENTRY(strcmp)
12         /*
13          * Align s1 to word boundary.
14          * Consider unrolling loop?
15          */
16 .Ls1align:
17         testb   $7,%dil
18         je      .Ls1aligned
19         movb    (%rdi),%al
20         incq    %rdi
21         movb    (%rsi),%dl
22         incq    %rsi
23         testb   %al,%al
24         je      .Ldone
25         cmpb    %al,%dl
26         je      .Ls1align
27         jmp     .Ldone
28
29         /*
30          * Check whether s2 is aligned to a word boundry.  If it is, we
31          * can compare by words.  Otherwise we have to compare by bytes.
32          */
33 .Ls1aligned:
34         testb   $7,%sil
35         jne     .Lbyte_loop
36
37         movabsq $0x0101010101010101,%r8
38         subq    $8,%rdi
39         movabsq $0x8080808080808080,%r9
40         subq    $8,%rsi
41
42         .align  4
43 .Lword_loop:
44         movq    8(%rdi),%rax
45         addq    $8,%rdi
46         movq    8(%rsi),%rdx
47         addq    $8,%rsi
48         cmpq    %rax,%rdx
49         jne     .Lbyte_loop
50         subq    %r8,%rdx
51         notq    %rax
52         andq    %rax,%rdx
53         testq   %r9,%rdx
54         je      .Lword_loop
55
56         .align  4
57 .Lbyte_loop:
58         movb    (%rdi),%al
59         incq    %rdi
60         movb    (%rsi),%dl
61         incq    %rsi
62         testb   %al,%al
63         je      .Ldone
64         cmpb    %al,%dl
65         je      .Lbyte_loop
66
67 .Ldone:
68         movzbq  %al,%rax
69         movzbq  %dl,%rdx
70         subq    %rdx,%rax
71         ret
72 END(strcmp)