tests - Adjustments to memcpy/memzero test
authorMatthew Dillon <dillon@apollo.backplane.com>
Fri, 15 Apr 2011 15:19:37 +0000 (08:19 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Fri, 15 Apr 2011 15:19:37 +0000 (08:19 -0700)
* Remove assembly from these tests so they compile on x86-64.

* Clean up some timing reporting issues.

test/sysperf/Makefile
test/sysperf/blib.c
test/sysperf/memcpy.S [deleted file]
test/sysperf/memcpy.c
test/sysperf/memzero.S [deleted file]
test/sysperf/memzero.c

index 485f4e8..34cbb78 100644 (file)
@@ -112,11 +112,11 @@ all:      $(TARGETS)
 /tmp/exec2: exec1.c blib.c
        $(CC) $(CFLAGS) exec1.c blib.c -o /tmp/exec2
 
-/tmp/mem1: memcpy.c memcpy.S blib.c
-       $(CC) $(CFLAGS) memcpy.c memcpy.S blib.c -o /tmp/mem1
+/tmp/mem1: memcpy.c blib.c
+       $(CC) $(CFLAGS) memcpy.c blib.c -o /tmp/mem1
 
-/tmp/mem2: memzero.c memzero.S blib.c
-       $(CC) $(CFLAGS) memzero.c memzero.S blib.c -o /tmp/mem2
+/tmp/mem2: memzero.c blib.c
+       $(CC) $(CFLAGS) memzero.c blib.c -o /tmp/mem2
 
 /tmp/read1: read1.c blib.c
        $(CC) $(CFLAGS) read1.c blib.c -o /tmp/read1
index c668a79..99ff282 100644 (file)
@@ -14,6 +14,7 @@
 
 static struct timeval tv1;
 static struct timeval tv2;
+static long long last_us;
 
 void
 start_timing(void)
@@ -29,6 +30,7 @@ stop_timing(long long count, const char *ctl, ...)
 
     gettimeofday(&tv2, NULL);
     us = (tv2.tv_usec - tv1.tv_usec) + (tv2.tv_sec - tv1.tv_sec) * 1000000LL;
+    last_us = us;
     if (ctl == NULL)   /* dummy call to pre-cache */
        return(us > 1000000);
 
@@ -67,11 +69,7 @@ stop_timing2(long long count, long long us, const char *ctl, ...)
 long long
 get_timing(void)
 {
-    long long us;
-
-    gettimeofday(&tv2, NULL);
-    us = (tv2.tv_usec - tv1.tv_usec) + (tv2.tv_sec - tv1.tv_sec) * 1000000LL;
-    return(us);
+    return (last_us);
 }
 
 void
diff --git a/test/sysperf/memcpy.S b/test/sysperf/memcpy.S
deleted file mode 100644 (file)
index bdc621c..0000000
+++ /dev/null
@@ -1,285 +0,0 @@
-
-               /*
-                * memcpy.S
-                *
-                * x86_64: MOVNTQ vs MOVQ, MOVNTDQ vs MOVDQ[A/U], PREFETCH[x]
-                *
-                *      NT stands for 'non-temportal', which basically means
-                *      'bypass L1 cache on write'.  Write bandwidth is
-                *      effectively reduced to the L2 cache bandwidth but
-                *      the L1 cache will not be wiped out by the copy.
-                *
-                *      DO NOT MIX 'nt' and standard writes!  Your performance
-                *      will go poof.
-                *
-                * PREFETCH[NTA,T0,T1,T2]
-                *
-                *      These instructions prefetch a cache line (typically
-                *      128 bytes).  'NT' means 'non-temporal', which bypasses
-                *      the L1 cache if the data is not already in the L1 
-                *      cache.  HOWEVER, using PREFETCHNT can put a slow memory
-                *      op in the cpu's memory request queue if a L1 or L2
-                *      miss occurs, and it can stall an L1-cache-hit access
-                *      for a small but noticeable period of time, so it is
-                *      a good idea not to put a memory op just after a 
-                *      prefetchnta instruction.
-                *
-                *      You can get better L2 bandwidth using prefetchnt but
-                *      it will not be much more then prefetcht0 and 
-                *      'prefetcht0' will give you better cache-miss
-                *      bandwidth.
-                *
-                *      The prefetch has to be done far enough ahead to do
-                *      some good, but it only has a significant effect when
-                *      it is able to move date from L2 to L1.  Prefetching
-                *      from main memory does not have a significant effect
-                *      durign a copy or zeroing operation because main
-                *      memory bandwidth is already saturated.
-                *
-                * $DragonFly: src/test/sysperf/memcpy.S,v 1.1 2004/04/29 16:14:53 dillon Exp $
-                */
-               .text
-               .globl  docopy1
-               .globl  docopy2
-               .globl  docopy3
-               .globl  docopy4
-               .globl  docopy5
-               .globl  docopy6
-               .globl  docopy7
-               .globl  fpcleanup
-
-               .p2align 4,0x90
-docopy1:
-               pushl   %esi
-               pushl   %edi
-               pushl   %ecx
-               pushl   %ebx
-
-               movl    4+16(%esp),%esi
-               movl    8+16(%esp),%edi
-               movl    12+16(%esp),%ecx
-               shrl    $2,%ecx
-               cld
-               rep
-               movsl
-               popl    %ebx
-               popl    %ecx
-               popl    %edi
-               popl    %esi
-               ret
-
-               .p2align 4,0x90
-docopy2:
-               pushl   %esi
-               pushl   %edi
-               pushl   %ecx
-               pushl   %ebx
-
-               movl    4+16(%esp),%esi
-               movl    8+16(%esp),%edi
-               movl    12+16(%esp),%ecx
-               addl    %ecx,%esi
-               addl    %ecx,%edi
-               shrl    $2,%ecx
-               std
-               rep
-               movsl
-               popl    %ebx
-               popl    %ecx
-               popl    %edi
-               popl    %esi
-               ret
-
-               .p2align 4,0x90
-docopy3:
-               pushl   %esi
-               pushl   %edi
-               pushl   %ecx
-               pushl   %ebx
-
-               movl    4+16(%esp),%esi
-               movl    8+16(%esp),%edi
-               movl    12+16(%esp),%ecx
-
-               .p2align 4,0x90
-1:
-               movl    (%esi),%eax
-               movl    4(%esi),%ebx
-               movl    8(%esi),%edx
-               movl    %eax,(%edi)
-               movl    12(%esi),%eax
-               movl    %ebx,4(%edi)
-               movl    16(%esi),%ebx
-               movl    %edx,8(%edi)
-               movl    20(%esi),%edx
-               movl    %eax,12(%edi)
-               movl    24(%esi),%eax
-               movl    %ebx,16(%edi)
-               movl    28(%esi),%ebx
-               movl    %edx,20(%edi)
-               prefetcht0 96(%esi)
-               subl    $32,%ecx
-               movl    %eax,24(%edi)
-               addl    $32,%esi
-               movl    %ebx,28(%edi)
-               addl    $32,%edi
-
-               testl   %ecx,%ecx
-               jnz     1b
-
-               popl    %ebx
-               popl    %ecx
-               popl    %edi
-               popl    %esi
-               ret
-
-               .p2align 4,0x90
-docopy4:
-               pushl   %esi
-               pushl   %edi
-               pushl   %ecx
-               pushl   %ebx
-
-               movl    4+16(%esp),%esi
-               movl    8+16(%esp),%edi
-               movl    12+16(%esp),%ecx
-
-               .p2align 4,0x90
-1:
-               movl    (%esi),%eax
-               movl    4(%esi),%ebx
-               addl    $8,%esi
-               prefetcht0 64(%esi)
-               subl    $8,%ecx
-               movl    %eax,(%edi)
-               movl    %ebx,4(%edi)
-               addl    $8,%edi
-               testl   %ecx,%ecx
-               jnz     1b
-
-               popl    %ebx
-               popl    %ecx
-               popl    %edi
-               popl    %esi
-               ret
-
-               .p2align 4,0x90
-docopy5:
-               pushl   %esi
-               pushl   %edi
-               pushl   %ecx
-               pushl   %ebx
-
-               movl    4+16(%esp),%esi
-               movl    8+16(%esp),%edi
-               movl    12+16(%esp),%ecx
-
-               .p2align 4,0x90
-1:
-               movq    (%esi),%mm0
-               movq    8(%esi),%mm1
-               movq    16(%esi),%mm2
-               movq    24(%esi),%mm3
-               movq    32(%esi),%mm4
-               movq    40(%esi),%mm5
-               movq    48(%esi),%mm6
-               movq    56(%esi),%mm7
-               prefetchnta 128(%esi)
-               subl    $64,%ecx
-               addl    $64,%esi
-               movq    %mm0,(%edi)
-               movq    %mm1,8(%edi)
-               movq    %mm2,16(%edi)
-               movq    %mm3,24(%edi)
-               movq    %mm4,32(%edi)
-               movq    %mm5,40(%edi)
-               movq    %mm6,48(%edi)
-               movq    %mm7,56(%edi)
-               addl    $64,%edi
-               testl   %ecx,%ecx
-               jnz     1b
-
-               popl    %ebx
-               popl    %ecx
-               popl    %edi
-               popl    %esi
-               ret
-
-               .p2align 4,0x90
-docopy6:
-               pushl   %esi
-               pushl   %edi
-               pushl   %ecx
-               pushl   %ebx
-
-               movl    4+16(%esp),%esi
-               movl    8+16(%esp),%edi
-               movl    12+16(%esp),%ecx
-               movl    $16,%eax
-
-               .p2align 4,0x90
-1:
-               prefetcht0 96(%esi)
-               subl    %eax,%ecx
-               movq    (%esi),%mm0
-               movq    8(%esi),%mm1
-               addl    %eax,%esi
-               movntq  %mm0,(%edi)
-               movntq  %mm1,8(%edi)
-               addl    %eax,%edi
-               testl   %ecx,%ecx
-               jnz     1b
-
-               popl    %ebx
-               popl    %ecx
-               popl    %edi
-               popl    %esi
-               ret
-
-               .p2align 4,0x90
-docopy7:
-               pushl   %esi
-               pushl   %edi
-               pushl   %ecx
-               pushl   %ebx
-
-               movl    4+16(%esp),%esi
-               movl    8+16(%esp),%edi
-               movl    12+16(%esp),%ecx
-               movl    $128,%eax
-
-               .p2align 4,0x90
-1:
-               movdqa  (%esi),%xmm0
-               movdqa  16(%esi),%xmm1
-               movdqa  32(%esi),%xmm2
-               movdqa  48(%esi),%xmm3
-               movdqa  64(%esi),%xmm4
-               movdqa  80(%esi),%xmm5
-               movdqa  96(%esi),%xmm6
-               movdqa  112(%esi),%xmm7
-               subl    %eax,%ecx
-               addl    %eax,%esi
-               movntdq  %xmm0,(%edi)
-               movntdq  %xmm1,16(%edi)
-               movntdq  %xmm2,32(%edi)
-               movntdq  %xmm3,48(%edi)
-               movntdq  %xmm4,64(%edi)
-               movntdq  %xmm5,80(%edi)
-               movntdq  %xmm6,96(%edi)
-               movntdq  %xmm7,112(%edi)
-               addl    %eax,%edi
-               testl   %ecx,%ecx
-               jnz     1b
-
-               popl    %ebx
-               popl    %ecx
-               popl    %edi
-               popl    %esi
-               ret
-
-               .p2align 4,0x90
-fpcleanup:
-               fninit
-               ret
-
index 997bc9a..f71dc63 100644 (file)
@@ -10,6 +10,7 @@ int glob[16384];
 
 void test_using(const char *ctl, char *buf, int bytes, void (*copyf)(const void *s1, void *d, size_t bytes));
 
+#if 0
 extern void docopy1(const void *s, void *d, size_t bytes);
 extern void docopy2(const void *s, void *d, size_t bytes);
 extern void docopy3(const void *s, void *d, size_t bytes);
@@ -18,6 +19,7 @@ extern void docopy5(const void *s, void *d, size_t bytes);
 extern void docopy6(const void *s, void *d, size_t bytes);
 extern void docopy7(const void *s, void *d, size_t bytes);
 extern void fpcleanup(void);
+#endif
 
 int
 main(int ac, char **av)
@@ -63,6 +65,7 @@ main(int ac, char **av)
     bzero(buf, bytes * 2);
 
     test_using("bcopy", buf, bytes, bcopy);
+#if 0
     test_using("docopy1", buf, bytes, docopy1);
     test_using("docopy2", buf, bytes, docopy2);
     test_using("docopy3", buf, bytes, docopy3);
@@ -70,6 +73,7 @@ main(int ac, char **av)
     test_using("docopy5", buf, bytes, docopy5);
     test_using("docopy6", buf, bytes, docopy6);
     test_using("docopy7", buf, bytes, docopy7);
+#endif
     return(0);
 }
 
@@ -90,7 +94,9 @@ test_using(const char *ctl, char *buf, int bytes, void (*copyf)(const void *s1,
     for (i = loops - 1; i >= 0; --i) {
        copyf(buf, buf + bytes, bytes);
     }
+#if 0
     fpcleanup();
+#endif
     stop_timing(loops, ctl);
     us = get_timing();
     printf("%s %d %5.2f MBytes/sec\n", ctl, bytes, 
diff --git a/test/sysperf/memzero.S b/test/sysperf/memzero.S
deleted file mode 100644 (file)
index e70923d..0000000
+++ /dev/null
@@ -1,205 +0,0 @@
-
-               /*
-                * memcpy.S
-                *
-                * $DragonFly: src/test/sysperf/memzero.S,v 1.1 2004/04/29 16:14:53 dillon Exp $
-                */
-               .text
-               .globl  dozero1
-               .globl  dozero2
-               .globl  dozero3
-               .globl  dozero4
-               .globl  dozero5
-               .globl  dozero6
-               .globl  dozero7
-               .globl  fpcleanup
-
-               .p2align 4,0x90
-dozero1:
-               pushl   %esi
-               pushl   %edi
-               pushl   %ecx
-               pushl   %ebx
-
-               movl    4+16(%esp),%edi
-               movl    8+16(%esp),%ecx
-               shrl    $2,%ecx
-               subl    %eax,%eax
-               cld
-               rep
-               stosl
-               popl    %ebx
-               popl    %ecx
-               popl    %edi
-               popl    %esi
-               ret
-
-               .p2align 4,0x90
-dozero2:
-               pushl   %esi
-               pushl   %edi
-               pushl   %ecx
-               pushl   %ebx
-
-               movl    4+16(%esp),%edi
-               movl    8+16(%esp),%ecx
-               addl    %ecx,%esi
-               addl    %ecx,%edi
-               shrl    $2,%ecx
-               subl    %eax,%eax
-               std
-               rep
-               stosl
-               popl    %ebx
-               popl    %ecx
-               popl    %edi
-               popl    %esi
-               ret
-
-               .p2align 4,0x90
-dozero3:
-               pushl   %esi
-               pushl   %edi
-               pushl   %ecx
-               pushl   %ebx
-
-               movl    4+16(%esp),%edi
-               movl    8+16(%esp),%ecx
-               movl    $8,%edx
-               subl    %eax,%eax
-               .p2align 4,0x90
-1:
-               subl    %edx,%ecx
-               movl    %eax,(%edi)
-               movl    %eax,4(%edi)
-               addl    %edx,%edi
-               testl   %ecx,%ecx
-               jnz     1b
-
-               popl    %ebx
-               popl    %ecx
-               popl    %edi
-               popl    %esi
-               ret
-
-               .p2align 4,0x90
-dozero4:
-               pushl   %esi
-               pushl   %edi
-               pushl   %ecx
-               pushl   %ebx
-
-               movl    4+16(%esp),%edi
-               movl    8+16(%esp),%ecx
-               subl    %eax,%eax
-               .p2align 4,0x90
-1:
-               subl    $16,%ecx
-               movnti  %eax,0(%edi)
-               movnti  %eax,4(%edi)
-               movnti  %eax,8(%edi)
-               movnti  %eax,12(%edi)
-               addl    $16,%edi
-               testl   %ecx,%ecx
-               jnz     1b
-
-               popl    %ebx
-               popl    %ecx
-               popl    %edi
-               popl    %esi
-               ret
-
-               .p2align 4,0x90
-dozero5:
-               pushl   %esi
-               pushl   %edi
-               pushl   %ecx
-               pushl   %ebx
-
-               movl    4+16(%esp),%edi
-               movl    8+16(%esp),%ecx
-
-               subl    $108,%esp
-               fnsave  0(%esp)
-               fninit
-               fldz
-               .p2align 4,0x90
-1:
-               subl    $64,%ecx
-               movq    %mm0,(%edi)
-               movq    %mm1,8(%edi)
-               movq    %mm2,16(%edi)
-               movq    %mm3,24(%edi)
-               movq    %mm4,32(%edi)
-               movq    %mm5,40(%edi)
-               movq    %mm6,48(%edi)
-               movq    %mm7,56(%edi)
-               addl    $64,%edi
-               testl   %ecx,%ecx
-               jnz     1b
-
-               frstor  0(%esp)
-               addl    $108,%esp
-
-               popl    %ebx
-               popl    %ecx
-               popl    %edi
-               popl    %esi
-               ret
-
-               .p2align 4,0x90
-dozero6:
-               pushl   %esi
-               pushl   %edi
-               pushl   %ecx
-               pushl   %ebx
-
-               movl    4+16(%esp),%edi
-               movl    8+16(%esp),%ecx
-               movl    $16,%eax
-               .p2align 4,0x90
-1:
-               subl    %eax,%ecx
-               movq    %mm0,(%edi)
-               movq    %mm1,8(%edi)
-               addl    %eax,%edi
-               testl   %ecx,%ecx
-               jnz     1b
-
-               popl    %ebx
-               popl    %ecx
-               popl    %edi
-               popl    %esi
-               ret
-
-               .p2align 4,0x90
-dozero7:
-               pushl   %esi
-               pushl   %edi
-               pushl   %ecx
-               pushl   %ebx
-
-               movl    4+16(%esp),%edi
-               movl    8+16(%esp),%ecx
-               movl    $32,%eax
-               .p2align 4,0x90
-1:
-               subl    %eax,%ecx
-               movntdq %xmm0,(%edi)
-               movntdq %xmm1,16(%edi)
-               addl    %eax,%edi
-               testl   %ecx,%ecx
-               jnz     1b
-               sfence
-
-               popl    %ebx
-               popl    %ecx
-               popl    %edi
-               popl    %esi
-               ret
-
-               .p2align 4,0x90
-fpcleanup:
-               fninit
-               ret
-
index 7731402..1055e4f 100644 (file)
@@ -64,6 +64,7 @@ main(int ac, char **av)
     bzero(buf, bytes * 2);
 
     test_using("bzero", buf, bytes, (void *)bzero);
+#if 0
     test_using("dozero1", buf, bytes, dozero1);
     test_using("dozero2", buf, bytes, dozero2);
     test_using("dozero3", buf, bytes, dozero3);
@@ -71,6 +72,7 @@ main(int ac, char **av)
     test_using("dozero5", buf, bytes, dozero5);
     test_using("dozero6", buf, bytes, dozero6);
     test_using("dozero7", buf, bytes, dozero7);
+#endif
     return(0);
 }
 
@@ -91,7 +93,9 @@ test_using(const char *ctl, char *buf, int bytes, void (*zerof)(void *d, size_t
     for (i = loops - 1; i >= 0; --i) {
        zerof(buf, bytes);
     }
+#if 0
     fpcleanup();
+#endif
     stop_timing(loops, ctl);
     us = get_timing();
     printf("%s %d %5.2f MBytes/sec\n", ctl, bytes,