Add MD, optimized versions of string functions for amd64.
authorPeter Avalos <pavalos@dragonflybsd.org>
Sun, 26 Apr 2009 04:46:03 +0000 (04:46 +0000)
committerPeter Avalos <pavalos@dragonflybsd.org>
Sun, 21 Jun 2009 08:12:08 +0000 (08:12 +0000)
Obtained-from: FreeBSD

lib/libc/amd64/string/Makefile.inc [new file with mode: 0644]
lib/libc/amd64/string/bcmp.S [new file with mode: 0644]
lib/libc/amd64/string/bcopy.S [new file with mode: 0644]
lib/libc/amd64/string/bzero.S [new file with mode: 0644]
lib/libc/amd64/string/memcmp.S [new file with mode: 0644]
lib/libc/amd64/string/memcpy.S [new file with mode: 0644]
lib/libc/amd64/string/memmove.S [new file with mode: 0644]
lib/libc/amd64/string/memset.S [new file with mode: 0644]
lib/libc/amd64/string/strcat.S [new file with mode: 0644]
lib/libc/amd64/string/strcmp.S [new file with mode: 0644]
lib/libc/amd64/string/strcpy.S [new file with mode: 0644]

diff --git a/lib/libc/amd64/string/Makefile.inc b/lib/libc/amd64/string/Makefile.inc
new file mode 100644 (file)
index 0000000..ac585a4
--- /dev/null
@@ -0,0 +1,4 @@
+# $FreeBSD: src/lib/libc/amd64/string/Makefile.inc,v 1.5 2005/04/10 18:58:49 alc Exp $
+
+MDSRCS+= bcmp.S bcopy.S bzero.S memcmp.S memcpy.S memmove.S memset.S \
+       strcat.S strcmp.S strcpy.S
diff --git a/lib/libc/amd64/string/bcmp.S b/lib/libc/amd64/string/bcmp.S
new file mode 100644 (file)
index 0000000..5174cd7
--- /dev/null
@@ -0,0 +1,25 @@
+/*
+ * $NetBSD: bcmp.S,v 1.1 2001/06/19 00:25:04 fvdl Exp $
+ * $FreeBSD: src/lib/libc/amd64/string/bcmp.S,v 1.3 2008/11/02 01:10:54 peter Exp $
+ */
+
+#include <machine/asm.h>
+
+ENTRY(bcmp)
+       cld                             /* set compare direction forward */
+
+       movq    %rdx,%rcx               /* compare by words */
+       shrq    $3,%rcx
+       repe
+       cmpsq
+       jne     L1
+
+       movq    %rdx,%rcx               /* compare remainder by bytes */
+       andq    $7,%rcx
+       repe
+       cmpsb
+L1:
+       setne   %al
+       movsbl  %al,%eax
+       ret
+END(bcmp)
diff --git a/lib/libc/amd64/string/bcopy.S b/lib/libc/amd64/string/bcopy.S
new file mode 100644 (file)
index 0000000..84f8351
--- /dev/null
@@ -0,0 +1,95 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from locore.s.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $NetBSD: bcopy.S,v 1.2 2003/08/07 16:42:36 agc Exp $
+ * $FreeBSD: src/lib/libc/amd64/string/bcopy.S,v 1.3 2008/11/02 01:10:54 peter Exp $
+ */
+
+#include <machine/asm.h>
+
+       /*
+        * (ov)bcopy (src,dst,cnt)
+        *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
+        */
+
+#ifdef MEMCOPY
+ENTRY(memcpy)
+#else
+#ifdef MEMMOVE
+ENTRY(memmove)
+#else
+ENTRY(bcopy)
+#endif
+#endif
+#if defined(MEMCOPY) || defined(MEMMOVE)
+       movq    %rdi,%rax       /* return dst */
+#else
+       xchgq   %rdi,%rsi
+#endif
+       movq    %rdx,%rcx
+       movq    %rdi,%r8
+       subq    %rsi,%r8
+       cmpq    %rcx,%r8        /* overlapping? */
+       jb      1f
+       cld                     /* nope, copy forwards. */
+       shrq    $3,%rcx         /* copy by words */
+       rep
+       movsq
+       movq    %rdx,%rcx
+       andq    $7,%rcx         /* any bytes left? */
+       rep
+       movsb
+       ret
+1:
+       addq    %rcx,%rdi       /* copy backwards. */
+       addq    %rcx,%rsi
+       std
+       andq    $7,%rcx         /* any fractional bytes? */
+       decq    %rdi
+       decq    %rsi
+       rep
+       movsb
+       movq    %rdx,%rcx       /* copy remainder by words */
+       shrq    $3,%rcx
+       subq    $7,%rsi
+       subq    $7,%rdi
+       rep
+       movsq
+       cld
+       ret
+#ifdef MEMCOPY
+END(memcpy)
+#else
+#ifdef MEMMOVE
+END(memmove)
+#else
+END(bcopy)
+#endif
+#endif
diff --git a/lib/libc/amd64/string/bzero.S b/lib/libc/amd64/string/bzero.S
new file mode 100644 (file)
index 0000000..61e0427
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * Written by J.T. Conklin <jtc@NetBSD.org>.
+ * Public domain.
+ * Adapted for NetBSD/x86_64 by Frank van der Linden <fvdl@wasabisystems.com>
+ *
+ * $NetBSD: bzero.S,v 1.2 2003/07/26 19:24:38 salo Exp $
+ * $FreeBSD: src/lib/libc/amd64/string/bzero.S,v 1.3 2008/11/02 01:10:54 peter Exp $
+ */
+
+#include <machine/asm.h>
+
+ENTRY(bzero)
+       cld                             /* set fill direction forward */
+       xorq    %rax,%rax               /* set fill data to 0 */
+
+       /*
+        * if the string is too short, it's really not worth the overhead
+        * of aligning to word boundries, etc.  So we jump to a plain
+        * unaligned set.
+        */
+       cmpq    $16,%rsi
+       jb      L1
+
+       movq    %rdi,%rcx               /* compute misalignment */
+       negq    %rcx
+       andq    $7,%rcx
+       subq    %rcx,%rsi
+       rep                             /* zero until word aligned */
+       stosb
+
+       movq    %rsi,%rcx               /* zero by words */
+       shrq    $3,%rcx
+       andq    $7,%rsi
+       rep
+       stosq
+
+L1:    movq    %rsi,%rcx               /* zero remainder by bytes */
+       rep
+       stosb
+
+       ret
+END(bzero)
diff --git a/lib/libc/amd64/string/memcmp.S b/lib/libc/amd64/string/memcmp.S
new file mode 100644 (file)
index 0000000..9811028
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Written by J.T. Conklin <jtc@NetBSD.org>.
+ * Public domain.
+ * Adapted for NetBSD/x86_64 by Frank van der Linden <fvdl@wasabisystems.com>
+ *
+ * $NetBSD: memcmp.S,v 1.2 2003/07/26 19:24:39 salo Exp $
+ * $FreeBSD: src/lib/libc/amd64/string/memcmp.S,v 1.2 2008/11/02 01:10:54 peter Exp $
+ */
+
+#include <machine/asm.h>
+
+ENTRY(memcmp)
+       cld                             /* set compare direction forward */
+       movq    %rdx,%rcx               /* compare by longs */
+       shrq    $3,%rcx
+       repe
+       cmpsq
+       jne     L5                      /* do we match so far? */
+
+       movq    %rdx,%rcx               /* compare remainder by bytes */
+       andq    $7,%rcx
+       repe
+       cmpsb
+       jne     L6                      /* do we match? */
+
+       xorl    %eax,%eax               /* we match, return zero        */
+       ret
+
+L5:    movl    $8,%ecx                 /* We know that one of the next */
+       subq    %rcx,%rdi               /* eight pairs of bytes do not  */
+       subq    %rcx,%rsi               /* match.                       */
+       repe
+       cmpsb
+L6:    xorl    %eax,%eax               /* Perform unsigned comparison  */
+       movb    -1(%rdi),%al
+       xorl    %edx,%edx
+       movb    -1(%rsi),%dl
+       subl    %edx,%eax
+       ret
+END(memcmp)
diff --git a/lib/libc/amd64/string/memcpy.S b/lib/libc/amd64/string/memcpy.S
new file mode 100644 (file)
index 0000000..5b0762f
--- /dev/null
@@ -0,0 +1,5 @@
+/*     $NetBSD: memcpy.S,v 1.1 2001/06/19 00:25:05 fvdl Exp $  */
+/*     $FreeBSD: src/lib/libc/amd64/string/memcpy.S,v 1.1 2005/04/07 03:56:03 alc Exp $ */
+
+#define MEMCOPY
+#include "bcopy.S"
diff --git a/lib/libc/amd64/string/memmove.S b/lib/libc/amd64/string/memmove.S
new file mode 100644 (file)
index 0000000..93d8693
--- /dev/null
@@ -0,0 +1,5 @@
+/*     $NetBSD: memmove.S,v 1.1 2001/06/19 00:25:05 fvdl Exp $ */
+/*     $FreeBSD: src/lib/libc/amd64/string/memmove.S,v 1.1 2005/04/07 03:56:03 alc Exp $ */
+
+#define MEMMOVE
+#include "bcopy.S"
diff --git a/lib/libc/amd64/string/memset.S b/lib/libc/amd64/string/memset.S
new file mode 100644 (file)
index 0000000..96f186c
--- /dev/null
@@ -0,0 +1,59 @@
+/*
+ * Written by J.T. Conklin <jtc@NetBSD.org>.
+ * Public domain.
+ * Adapted for NetBSD/x86_64 by Frank van der Linden <fvdl@wasabisystems.com>
+ *
+ * $NetBSD: memset.S,v 1.3 2004/02/26 20:50:06 drochner Exp $
+ * $FreeBSD: src/lib/libc/amd64/string/memset.S,v 1.2 2008/11/02 01:10:54 peter Exp $
+ */
+
+#include <machine/asm.h>
+
+ENTRY(memset)
+       movq    %rsi,%rax
+       andq    $0xff,%rax
+       movq    %rdx,%rcx
+       movq    %rdi,%r11
+
+       cld                             /* set fill direction forward */
+
+       /*
+        * if the string is too short, it's really not worth the overhead
+        * of aligning to word boundries, etc.  So we jump to a plain
+        * unaligned set.
+        */
+       cmpq    $0x0f,%rcx
+       jle     L1
+
+       movb    %al,%ah                 /* copy char to all bytes in word */
+       movl    %eax,%edx
+       sall    $16,%eax
+       orl     %edx,%eax
+
+       movl    %eax,%edx
+       salq    $32,%rax
+       orq     %rdx,%rax
+
+       movq    %rdi,%rdx               /* compute misalignment */
+       negq    %rdx
+       andq    $7,%rdx
+       movq    %rcx,%r8
+       subq    %rdx,%r8
+
+       movq    %rdx,%rcx               /* set until word aligned */
+       rep
+       stosb
+
+       movq    %r8,%rcx
+       shrq    $3,%rcx                 /* set by words */
+       rep
+       stosq
+
+       movq    %r8,%rcx                /* set remainder by bytes */
+       andq    $7,%rcx
+L1:    rep
+       stosb
+       movq    %r11,%rax
+
+       ret
+END(memset)
diff --git a/lib/libc/amd64/string/strcat.S b/lib/libc/amd64/string/strcat.S
new file mode 100644 (file)
index 0000000..ee6109c
--- /dev/null
@@ -0,0 +1,164 @@
+/*
+ * Written by J.T. Conklin <jtc@acorntoolworks.com>
+ * Public domain.
+ *
+ * $NetBSD: strcat.S,v 1.4 2004/07/26 18:51:21 drochner Exp $
+ * $FreeBSD: src/lib/libc/amd64/string/strcat.S,v 1.2 2008/11/02 01:10:54 peter Exp $
+ */
+
+#include <machine/asm.h>
+
+ENTRY(strcat)
+       movq    %rdi,%rax
+       movabsq $0x0101010101010101,%r8
+       movabsq $0x8080808080808080,%r9
+
+       /*
+        * Align destination to word boundary.
+        * Consider unrolling loop?
+        */
+.Lscan:
+.Lscan_align:
+       testb   $7,%dil
+       je      .Lscan_aligned
+       cmpb    $0,(%rdi)
+       je      .Lcopy
+       incq    %rdi
+       jmp     .Lscan_align
+
+       .align  4
+.Lscan_aligned:
+.Lscan_loop:
+       movq    (%rdi),%rdx
+       addq    $8,%rdi
+       subq    %r8,%rdx
+       testq   %r9,%rdx
+       je      .Lscan_loop
+
+       /*
+        * In rare cases, the above loop may exit prematurely. We must
+        * return to the loop if none of the bytes in the word equal 0.
+        */
+
+       cmpb    $0,-8(%rdi)     /* 1st byte == 0? */
+       jne     1f
+       subq    $8,%rdi
+       jmp     .Lcopy
+
+1:     cmpb    $0,-7(%rdi)     /* 2nd byte == 0? */
+       jne     1f
+       subq    $7,%rdi
+       jmp     .Lcopy
+
+1:     cmpb    $0,-6(%rdi)     /* 3rd byte == 0? */
+       jne     1f
+       subq    $6,%rdi
+       jmp     .Lcopy
+
+1:     cmpb    $0,-5(%rdi)     /* 4th byte == 0? */
+       jne     1f
+       subq    $5,%rdi
+       jmp     .Lcopy
+
+1:     cmpb    $0,-4(%rdi)     /* 5th byte == 0? */
+       jne     1f
+       subq    $4,%rdi
+       jmp     .Lcopy
+
+1:     cmpb    $0,-3(%rdi)     /* 6th byte == 0? */
+       jne     1f
+       subq    $3,%rdi
+       jmp     .Lcopy
+
+1:     cmpb    $0,-2(%rdi)     /* 7th byte == 0? */
+       jne     1f
+       subq    $2,%rdi
+       jmp     .Lcopy
+
+1:     cmpb    $0,-1(%rdi)     /* 8th byte == 0? */
+       jne     .Lscan_loop
+       subq    $1,%rdi
+
+       /*
+        * Align source to a word boundary.
+        * Consider unrolling loop?
+        */
+.Lcopy:
+.Lcopy_align:
+       testb   $7,%sil
+       je      .Lcopy_aligned
+       movb    (%rsi),%dl
+       incq    %rsi
+       movb    %dl,(%rdi)
+       incq    %rdi
+       testb   %dl,%dl
+       jne     .Lcopy_align
+       ret
+
+       .align  4
+.Lcopy_loop:
+       movq    %rdx,(%rdi)
+       addq    $8,%rdi
+.Lcopy_aligned:
+       movq    (%rsi),%rdx
+       movq    %rdx,%rcx
+       addq    $8,%rsi
+       subq    %r8,%rcx
+       testq   %r9,%rcx
+       je      .Lcopy_loop
+
+       /*
+        * In rare cases, the above loop may exit prematurely. We must
+        * return to the loop if none of the bytes in the word equal 0.
+        */
+
+       movb    %dl,(%rdi)
+       incq    %rdi
+       testb   %dl,%dl         /* 1st byte == 0? */
+       je      .Ldone
+
+       shrq    $8,%rdx
+       movb    %dl,(%rdi)
+       incq    %rdi
+       testb   %dl,%dl         /* 2nd byte == 0? */
+       je      .Ldone
+
+       shrq    $8,%rdx
+       movb    %dl,(%rdi)
+       incq    %rdi
+       testb   %dl,%dl         /* 3rd byte == 0? */
+       je      .Ldone
+
+       shrq    $8,%rdx
+       movb    %dl,(%rdi)
+       incq    %rdi
+       testb   %dl,%dl         /* 4th byte == 0? */
+       je      .Ldone
+
+       shrq    $8,%rdx
+       movb    %dl,(%rdi)
+       incq    %rdi
+       testb   %dl,%dl         /* 5th byte == 0? */
+       je      .Ldone
+
+       shrq    $8,%rdx
+       movb    %dl,(%rdi)
+       incq    %rdi
+       testb   %dl,%dl         /* 6th byte == 0? */
+       je      .Ldone
+
+       shrq    $8,%rdx
+       movb    %dl,(%rdi)
+       incq    %rdi
+       testb   %dl,%dl         /* 7th byte == 0? */
+       je      .Ldone
+
+       shrq    $8,%rdx
+       movb    %dl,(%rdi)
+       incq    %rdi
+       testb   %dl,%dl         /* 8th byte == 0? */
+       jne     .Lcopy_aligned
+
+.Ldone:
+       ret
+END(strcat)
diff --git a/lib/libc/amd64/string/strcmp.S b/lib/libc/amd64/string/strcmp.S
new file mode 100644 (file)
index 0000000..2c6bdaa
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ * Written by J.T. Conklin <jtc@acorntoolworks.com>
+ * Public domain.
+ *
+ * $NetBSD: strcmp.S,v 1.3 2004/07/19 20:04:41 drochner Exp $
+ * $FreeBSD: src/lib/libc/amd64/string/strcmp.S,v 1.2 2008/11/02 01:10:54 peter Exp $
+ */
+
+#include <machine/asm.h>
+
+ENTRY(strcmp)
+       /*
+        * Align s1 to word boundary.
+        * Consider unrolling loop?
+        */
+.Ls1align:
+       testb   $7,%dil
+       je      .Ls1aligned
+       movb    (%rdi),%al
+       incq    %rdi
+       movb    (%rsi),%dl
+       incq    %rsi
+       testb   %al,%al
+       je      .Ldone
+       cmpb    %al,%dl
+       je      .Ls1align
+       jmp     .Ldone
+
+       /*
+        * Check whether s2 is aligned to a word boundry.  If it is, we
+        * can compare by words.  Otherwise we have to compare by bytes.
+        */
+.Ls1aligned:
+       testb   $7,%sil
+       jne     .Lbyte_loop
+
+       movabsq $0x0101010101010101,%r8
+       subq    $8,%rdi
+       movabsq $0x8080808080808080,%r9
+       subq    $8,%rsi
+
+       .align  4
+.Lword_loop:
+       movq    8(%rdi),%rax
+       addq    $8,%rdi
+       movq    8(%rsi),%rdx
+       addq    $8,%rsi
+       cmpq    %rax,%rdx
+       jne     .Lbyte_loop
+       subq    %r8,%rdx
+       notq    %rax
+       andq    %rax,%rdx
+       testq   %r9,%rdx
+       je      .Lword_loop
+
+       .align  4
+.Lbyte_loop:
+       movb    (%rdi),%al
+       incq    %rdi
+       movb    (%rsi),%dl
+       incq    %rsi
+       testb   %al,%al
+       je      .Ldone
+       cmpb    %al,%dl
+       je      .Lbyte_loop
+
+.Ldone:
+       movzbq  %al,%rax
+       movzbq  %dl,%rdx
+       subq    %rdx,%rax
+       ret
+END(strcmp)
diff --git a/lib/libc/amd64/string/strcpy.S b/lib/libc/amd64/string/strcpy.S
new file mode 100644 (file)
index 0000000..d2963b0
--- /dev/null
@@ -0,0 +1,110 @@
+/*
+ * Written by J.T. Conklin <jtc@acorntoolworks.com>
+ * Public domain.
+ *
+ * $NetBSD: strcpy.S,v 1.3 2004/07/19 20:04:41 drochner Exp $
+ * $FreeBSD: src/lib/libc/amd64/string/strcpy.S,v 1.3 2008/11/02 01:10:54 peter Exp $
+ */
+
+#include <machine/asm.h>
+
+/*
+ * This strcpy implementation copies a byte at a time until the
+ * source pointer is aligned to a word boundary, it then copies by
+ * words until it finds a word containing a zero byte, and finally
+ * copies by bytes until the end of the string is reached.
+ *
+ * While this may result in unaligned stores if the source and
+ * destination pointers are unaligned with respect to each other,
+ * it is still faster than either byte copies or the overhead of
+ * an implementation suitable for machines with strict alignment
+ * requirements.
+ */
+
+ENTRY(strcpy)
+       movq    %rdi,%rax
+       movabsq $0x0101010101010101,%r8
+       movabsq $0x8080808080808080,%r9
+
+       /*
+        * Align source to a word boundary.
+        * Consider unrolling loop?
+        */
+.Lalign:
+       testb   $7,%sil
+       je      .Lword_aligned
+       movb    (%rsi),%dl
+       incq    %rsi
+       movb    %dl,(%rdi)
+       incq    %rdi
+       testb   %dl,%dl
+       jne     .Lalign
+       ret
+
+       .p2align 4
+.Lloop:
+       movq    %rdx,(%rdi)
+       addq    $8,%rdi
+.Lword_aligned:
+       movq    (%rsi),%rdx
+       movq    %rdx,%rcx
+       addq    $8,%rsi
+       subq    %r8,%rcx
+       testq   %r9,%rcx
+       je      .Lloop
+
+       /*
+        * In rare cases, the above loop may exit prematurely. We must
+        * return to the loop if none of the bytes in the word equal 0.
+        */
+
+       movb    %dl,(%rdi)
+       incq    %rdi
+       testb   %dl,%dl         /* 1st byte == 0? */
+       je      .Ldone
+
+       shrq    $8,%rdx
+       movb    %dl,(%rdi)
+       incq    %rdi
+       testb   %dl,%dl         /* 2nd byte == 0? */
+       je      .Ldone
+
+       shrq    $8,%rdx
+       movb    %dl,(%rdi)
+       incq    %rdi
+       testb   %dl,%dl         /* 3rd byte == 0? */
+       je      .Ldone
+
+       shrq    $8,%rdx
+       movb    %dl,(%rdi)
+       incq    %rdi
+       testb   %dl,%dl         /* 4th byte == 0? */
+       je      .Ldone
+
+       shrq    $8,%rdx
+       movb    %dl,(%rdi)
+       incq    %rdi
+       testb   %dl,%dl         /* 5th byte == 0? */
+       je      .Ldone
+
+       shrq    $8,%rdx
+       movb    %dl,(%rdi)
+       incq    %rdi
+       testb   %dl,%dl         /* 6th byte == 0? */
+       je      .Ldone
+
+       shrq    $8,%rdx
+       movb    %dl,(%rdi)
+       incq    %rdi
+       testb   %dl,%dl         /* 7th byte == 0? */
+       je      .Ldone
+
+       shrq    $8,%rdx
+       movb    %dl,(%rdi)
+       incq    %rdi
+       testb   %dl,%dl         /* 8th byte == 0? */
+       jne     .Lword_aligned
+
+.Ldone:
+       ret
+END(strcpy)