From 8120f5e2a46e669c06a7afdd7de60fa6d6996f9d Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Fri, 4 Jan 2013 14:24:12 -0800 Subject: [PATCH] libc - Add poor man's cache coloring optimization to nmalloc module. * A series of large allocations in excess of 32KB will be offset by 4K from each other. This fixes performance issues on SandyBridge and later cpus related to large matrix operations. This eats an extra 4K of VM for such allocations but does not eat any additional real memory. * Greatly improves large FP matrix benchmarks. Real-world effects are more questionable. * The Sandybridge and later cpus use a virtually indexed, physically tagged L1 cache, and tend to be sensitive to substantially different memory addresses winding up on the same cache line. Matrix operations (primarily benchmarks) can cause these sorts of effects. Reported-by: alexh --- lib/libc/stdlib/nmalloc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/libc/stdlib/nmalloc.c b/lib/libc/stdlib/nmalloc.c index 6c98d907f6..b1a6f29e6e 100644 --- a/lib/libc/stdlib/nmalloc.c +++ b/lib/libc/stdlib/nmalloc.c @@ -821,7 +821,15 @@ _slaballoc(size_t size, int flags) bigalloc_t big; bigalloc_t *bigp; + /* + * Page-align and cache-color in case of virtually indexed + * physically tagged L1 caches (aka SandyBridge). No sweat + * otherwise, so just do it. + */ size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; + if ((size & 8191) == 0) + size += 4096; + chunk = _vmem_alloc(size, PAGE_SIZE, flags); if (chunk == NULL) return(NULL); -- 2.41.0