From 7141674d65b7496763b13fff2f6448b667df2b48 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Fri, 15 Apr 2011 08:19:37 -0700 Subject: [PATCH] tests - Adjustments to memcpy/memzero test * Remove assembly from these tests so they compile on x86-64. * Clean up some timing reporting issues. --- test/sysperf/Makefile | 8 +- test/sysperf/blib.c | 8 +- test/sysperf/memcpy.S | 285 ----------------------------------------- test/sysperf/memcpy.c | 6 + test/sysperf/memzero.S | 205 ----------------------------- test/sysperf/memzero.c | 4 + 6 files changed, 17 insertions(+), 499 deletions(-) delete mode 100644 test/sysperf/memcpy.S delete mode 100644 test/sysperf/memzero.S diff --git a/test/sysperf/Makefile b/test/sysperf/Makefile index 485f4e8291..34cbb78fd3 100644 --- a/test/sysperf/Makefile +++ b/test/sysperf/Makefile @@ -112,11 +112,11 @@ all: $(TARGETS) /tmp/exec2: exec1.c blib.c $(CC) $(CFLAGS) exec1.c blib.c -o /tmp/exec2 -/tmp/mem1: memcpy.c memcpy.S blib.c - $(CC) $(CFLAGS) memcpy.c memcpy.S blib.c -o /tmp/mem1 +/tmp/mem1: memcpy.c blib.c + $(CC) $(CFLAGS) memcpy.c blib.c -o /tmp/mem1 -/tmp/mem2: memzero.c memzero.S blib.c - $(CC) $(CFLAGS) memzero.c memzero.S blib.c -o /tmp/mem2 +/tmp/mem2: memzero.c blib.c + $(CC) $(CFLAGS) memzero.c blib.c -o /tmp/mem2 /tmp/read1: read1.c blib.c $(CC) $(CFLAGS) read1.c blib.c -o /tmp/read1 diff --git a/test/sysperf/blib.c b/test/sysperf/blib.c index c668a7995b..99ff2824ba 100644 --- a/test/sysperf/blib.c +++ b/test/sysperf/blib.c @@ -14,6 +14,7 @@ static struct timeval tv1; static struct timeval tv2; +static long long last_us; void start_timing(void) @@ -29,6 +30,7 @@ stop_timing(long long count, const char *ctl, ...) gettimeofday(&tv2, NULL); us = (tv2.tv_usec - tv1.tv_usec) + (tv2.tv_sec - tv1.tv_sec) * 1000000LL; + last_us = us; if (ctl == NULL) /* dummy call to pre-cache */ return(us > 1000000); @@ -67,11 +69,7 @@ stop_timing2(long long count, long long us, const char *ctl, ...) long long get_timing(void) { - long long us; - - gettimeofday(&tv2, NULL); - us = (tv2.tv_usec - tv1.tv_usec) + (tv2.tv_sec - tv1.tv_sec) * 1000000LL; - return(us); + return (last_us); } void diff --git a/test/sysperf/memcpy.S b/test/sysperf/memcpy.S deleted file mode 100644 index bdc621c7a1..0000000000 --- a/test/sysperf/memcpy.S +++ /dev/null @@ -1,285 +0,0 @@ - - /* - * memcpy.S - * - * x86_64: MOVNTQ vs MOVQ, MOVNTDQ vs MOVDQ[A/U], PREFETCH[x] - * - * NT stands for 'non-temportal', which basically means - * 'bypass L1 cache on write'. Write bandwidth is - * effectively reduced to the L2 cache bandwidth but - * the L1 cache will not be wiped out by the copy. - * - * DO NOT MIX 'nt' and standard writes! Your performance - * will go poof. - * - * PREFETCH[NTA,T0,T1,T2] - * - * These instructions prefetch a cache line (typically - * 128 bytes). 'NT' means 'non-temporal', which bypasses - * the L1 cache if the data is not already in the L1 - * cache. HOWEVER, using PREFETCHNT can put a slow memory - * op in the cpu's memory request queue if a L1 or L2 - * miss occurs, and it can stall an L1-cache-hit access - * for a small but noticeable period of time, so it is - * a good idea not to put a memory op just after a - * prefetchnta instruction. - * - * You can get better L2 bandwidth using prefetchnt but - * it will not be much more then prefetcht0 and - * 'prefetcht0' will give you better cache-miss - * bandwidth. - * - * The prefetch has to be done far enough ahead to do - * some good, but it only has a significant effect when - * it is able to move date from L2 to L1. Prefetching - * from main memory does not have a significant effect - * durign a copy or zeroing operation because main - * memory bandwidth is already saturated. - * - * $DragonFly: src/test/sysperf/memcpy.S,v 1.1 2004/04/29 16:14:53 dillon Exp $ - */ - .text - .globl docopy1 - .globl docopy2 - .globl docopy3 - .globl docopy4 - .globl docopy5 - .globl docopy6 - .globl docopy7 - .globl fpcleanup - - .p2align 4,0x90 -docopy1: - pushl %esi - pushl %edi - pushl %ecx - pushl %ebx - - movl 4+16(%esp),%esi - movl 8+16(%esp),%edi - movl 12+16(%esp),%ecx - shrl $2,%ecx - cld - rep - movsl - popl %ebx - popl %ecx - popl %edi - popl %esi - ret - - .p2align 4,0x90 -docopy2: - pushl %esi - pushl %edi - pushl %ecx - pushl %ebx - - movl 4+16(%esp),%esi - movl 8+16(%esp),%edi - movl 12+16(%esp),%ecx - addl %ecx,%esi - addl %ecx,%edi - shrl $2,%ecx - std - rep - movsl - popl %ebx - popl %ecx - popl %edi - popl %esi - ret - - .p2align 4,0x90 -docopy3: - pushl %esi - pushl %edi - pushl %ecx - pushl %ebx - - movl 4+16(%esp),%esi - movl 8+16(%esp),%edi - movl 12+16(%esp),%ecx - - .p2align 4,0x90 -1: - movl (%esi),%eax - movl 4(%esi),%ebx - movl 8(%esi),%edx - movl %eax,(%edi) - movl 12(%esi),%eax - movl %ebx,4(%edi) - movl 16(%esi),%ebx - movl %edx,8(%edi) - movl 20(%esi),%edx - movl %eax,12(%edi) - movl 24(%esi),%eax - movl %ebx,16(%edi) - movl 28(%esi),%ebx - movl %edx,20(%edi) - prefetcht0 96(%esi) - subl $32,%ecx - movl %eax,24(%edi) - addl $32,%esi - movl %ebx,28(%edi) - addl $32,%edi - - testl %ecx,%ecx - jnz 1b - - popl %ebx - popl %ecx - popl %edi - popl %esi - ret - - .p2align 4,0x90 -docopy4: - pushl %esi - pushl %edi - pushl %ecx - pushl %ebx - - movl 4+16(%esp),%esi - movl 8+16(%esp),%edi - movl 12+16(%esp),%ecx - - .p2align 4,0x90 -1: - movl (%esi),%eax - movl 4(%esi),%ebx - addl $8,%esi - prefetcht0 64(%esi) - subl $8,%ecx - movl %eax,(%edi) - movl %ebx,4(%edi) - addl $8,%edi - testl %ecx,%ecx - jnz 1b - - popl %ebx - popl %ecx - popl %edi - popl %esi - ret - - .p2align 4,0x90 -docopy5: - pushl %esi - pushl %edi - pushl %ecx - pushl %ebx - - movl 4+16(%esp),%esi - movl 8+16(%esp),%edi - movl 12+16(%esp),%ecx - - .p2align 4,0x90 -1: - movq (%esi),%mm0 - movq 8(%esi),%mm1 - movq 16(%esi),%mm2 - movq 24(%esi),%mm3 - movq 32(%esi),%mm4 - movq 40(%esi),%mm5 - movq 48(%esi),%mm6 - movq 56(%esi),%mm7 - prefetchnta 128(%esi) - subl $64,%ecx - addl $64,%esi - movq %mm0,(%edi) - movq %mm1,8(%edi) - movq %mm2,16(%edi) - movq %mm3,24(%edi) - movq %mm4,32(%edi) - movq %mm5,40(%edi) - movq %mm6,48(%edi) - movq %mm7,56(%edi) - addl $64,%edi - testl %ecx,%ecx - jnz 1b - - popl %ebx - popl %ecx - popl %edi - popl %esi - ret - - .p2align 4,0x90 -docopy6: - pushl %esi - pushl %edi - pushl %ecx - pushl %ebx - - movl 4+16(%esp),%esi - movl 8+16(%esp),%edi - movl 12+16(%esp),%ecx - movl $16,%eax - - .p2align 4,0x90 -1: - prefetcht0 96(%esi) - subl %eax,%ecx - movq (%esi),%mm0 - movq 8(%esi),%mm1 - addl %eax,%esi - movntq %mm0,(%edi) - movntq %mm1,8(%edi) - addl %eax,%edi - testl %ecx,%ecx - jnz 1b - - popl %ebx - popl %ecx - popl %edi - popl %esi - ret - - .p2align 4,0x90 -docopy7: - pushl %esi - pushl %edi - pushl %ecx - pushl %ebx - - movl 4+16(%esp),%esi - movl 8+16(%esp),%edi - movl 12+16(%esp),%ecx - movl $128,%eax - - .p2align 4,0x90 -1: - movdqa (%esi),%xmm0 - movdqa 16(%esi),%xmm1 - movdqa 32(%esi),%xmm2 - movdqa 48(%esi),%xmm3 - movdqa 64(%esi),%xmm4 - movdqa 80(%esi),%xmm5 - movdqa 96(%esi),%xmm6 - movdqa 112(%esi),%xmm7 - subl %eax,%ecx - addl %eax,%esi - movntdq %xmm0,(%edi) - movntdq %xmm1,16(%edi) - movntdq %xmm2,32(%edi) - movntdq %xmm3,48(%edi) - movntdq %xmm4,64(%edi) - movntdq %xmm5,80(%edi) - movntdq %xmm6,96(%edi) - movntdq %xmm7,112(%edi) - addl %eax,%edi - testl %ecx,%ecx - jnz 1b - - popl %ebx - popl %ecx - popl %edi - popl %esi - ret - - .p2align 4,0x90 -fpcleanup: - fninit - ret - diff --git a/test/sysperf/memcpy.c b/test/sysperf/memcpy.c index 997bc9ab04..f71dc63156 100644 --- a/test/sysperf/memcpy.c +++ b/test/sysperf/memcpy.c @@ -10,6 +10,7 @@ int glob[16384]; void test_using(const char *ctl, char *buf, int bytes, void (*copyf)(const void *s1, void *d, size_t bytes)); +#if 0 extern void docopy1(const void *s, void *d, size_t bytes); extern void docopy2(const void *s, void *d, size_t bytes); extern void docopy3(const void *s, void *d, size_t bytes); @@ -18,6 +19,7 @@ extern void docopy5(const void *s, void *d, size_t bytes); extern void docopy6(const void *s, void *d, size_t bytes); extern void docopy7(const void *s, void *d, size_t bytes); extern void fpcleanup(void); +#endif int main(int ac, char **av) @@ -63,6 +65,7 @@ main(int ac, char **av) bzero(buf, bytes * 2); test_using("bcopy", buf, bytes, bcopy); +#if 0 test_using("docopy1", buf, bytes, docopy1); test_using("docopy2", buf, bytes, docopy2); test_using("docopy3", buf, bytes, docopy3); @@ -70,6 +73,7 @@ main(int ac, char **av) test_using("docopy5", buf, bytes, docopy5); test_using("docopy6", buf, bytes, docopy6); test_using("docopy7", buf, bytes, docopy7); +#endif return(0); } @@ -90,7 +94,9 @@ test_using(const char *ctl, char *buf, int bytes, void (*copyf)(const void *s1, for (i = loops - 1; i >= 0; --i) { copyf(buf, buf + bytes, bytes); } +#if 0 fpcleanup(); +#endif stop_timing(loops, ctl); us = get_timing(); printf("%s %d %5.2f MBytes/sec\n", ctl, bytes, diff --git a/test/sysperf/memzero.S b/test/sysperf/memzero.S deleted file mode 100644 index e70923dab7..0000000000 --- a/test/sysperf/memzero.S +++ /dev/null @@ -1,205 +0,0 @@ - - /* - * memcpy.S - * - * $DragonFly: src/test/sysperf/memzero.S,v 1.1 2004/04/29 16:14:53 dillon Exp $ - */ - .text - .globl dozero1 - .globl dozero2 - .globl dozero3 - .globl dozero4 - .globl dozero5 - .globl dozero6 - .globl dozero7 - .globl fpcleanup - - .p2align 4,0x90 -dozero1: - pushl %esi - pushl %edi - pushl %ecx - pushl %ebx - - movl 4+16(%esp),%edi - movl 8+16(%esp),%ecx - shrl $2,%ecx - subl %eax,%eax - cld - rep - stosl - popl %ebx - popl %ecx - popl %edi - popl %esi - ret - - .p2align 4,0x90 -dozero2: - pushl %esi - pushl %edi - pushl %ecx - pushl %ebx - - movl 4+16(%esp),%edi - movl 8+16(%esp),%ecx - addl %ecx,%esi - addl %ecx,%edi - shrl $2,%ecx - subl %eax,%eax - std - rep - stosl - popl %ebx - popl %ecx - popl %edi - popl %esi - ret - - .p2align 4,0x90 -dozero3: - pushl %esi - pushl %edi - pushl %ecx - pushl %ebx - - movl 4+16(%esp),%edi - movl 8+16(%esp),%ecx - movl $8,%edx - subl %eax,%eax - .p2align 4,0x90 -1: - subl %edx,%ecx - movl %eax,(%edi) - movl %eax,4(%edi) - addl %edx,%edi - testl %ecx,%ecx - jnz 1b - - popl %ebx - popl %ecx - popl %edi - popl %esi - ret - - .p2align 4,0x90 -dozero4: - pushl %esi - pushl %edi - pushl %ecx - pushl %ebx - - movl 4+16(%esp),%edi - movl 8+16(%esp),%ecx - subl %eax,%eax - .p2align 4,0x90 -1: - subl $16,%ecx - movnti %eax,0(%edi) - movnti %eax,4(%edi) - movnti %eax,8(%edi) - movnti %eax,12(%edi) - addl $16,%edi - testl %ecx,%ecx - jnz 1b - - popl %ebx - popl %ecx - popl %edi - popl %esi - ret - - .p2align 4,0x90 -dozero5: - pushl %esi - pushl %edi - pushl %ecx - pushl %ebx - - movl 4+16(%esp),%edi - movl 8+16(%esp),%ecx - - subl $108,%esp - fnsave 0(%esp) - fninit - fldz - .p2align 4,0x90 -1: - subl $64,%ecx - movq %mm0,(%edi) - movq %mm1,8(%edi) - movq %mm2,16(%edi) - movq %mm3,24(%edi) - movq %mm4,32(%edi) - movq %mm5,40(%edi) - movq %mm6,48(%edi) - movq %mm7,56(%edi) - addl $64,%edi - testl %ecx,%ecx - jnz 1b - - frstor 0(%esp) - addl $108,%esp - - popl %ebx - popl %ecx - popl %edi - popl %esi - ret - - .p2align 4,0x90 -dozero6: - pushl %esi - pushl %edi - pushl %ecx - pushl %ebx - - movl 4+16(%esp),%edi - movl 8+16(%esp),%ecx - movl $16,%eax - .p2align 4,0x90 -1: - subl %eax,%ecx - movq %mm0,(%edi) - movq %mm1,8(%edi) - addl %eax,%edi - testl %ecx,%ecx - jnz 1b - - popl %ebx - popl %ecx - popl %edi - popl %esi - ret - - .p2align 4,0x90 -dozero7: - pushl %esi - pushl %edi - pushl %ecx - pushl %ebx - - movl 4+16(%esp),%edi - movl 8+16(%esp),%ecx - movl $32,%eax - .p2align 4,0x90 -1: - subl %eax,%ecx - movntdq %xmm0,(%edi) - movntdq %xmm1,16(%edi) - addl %eax,%edi - testl %ecx,%ecx - jnz 1b - sfence - - popl %ebx - popl %ecx - popl %edi - popl %esi - ret - - .p2align 4,0x90 -fpcleanup: - fninit - ret - diff --git a/test/sysperf/memzero.c b/test/sysperf/memzero.c index 7731402cfe..1055e4ff5c 100644 --- a/test/sysperf/memzero.c +++ b/test/sysperf/memzero.c @@ -64,6 +64,7 @@ main(int ac, char **av) bzero(buf, bytes * 2); test_using("bzero", buf, bytes, (void *)bzero); +#if 0 test_using("dozero1", buf, bytes, dozero1); test_using("dozero2", buf, bytes, dozero2); test_using("dozero3", buf, bytes, dozero3); @@ -71,6 +72,7 @@ main(int ac, char **av) test_using("dozero5", buf, bytes, dozero5); test_using("dozero6", buf, bytes, dozero6); test_using("dozero7", buf, bytes, dozero7); +#endif return(0); } @@ -91,7 +93,9 @@ test_using(const char *ctl, char *buf, int bytes, void (*zerof)(void *d, size_t for (i = loops - 1; i >= 0; --i) { zerof(buf, bytes); } +#if 0 fpcleanup(); +#endif stop_timing(loops, ctl); us = get_timing(); printf("%s %d %5.2f MBytes/sec\n", ctl, bytes, -- 2.41.0