From: Sepherosa Ziehau Date: Mon, 8 Oct 2012 12:14:56 +0000 (+0800) Subject: kmalloc: Use 'fls' to round up the size to the nearest power of 2 X-Git-Tag: v3.2.0~4 X-Git-Url: http://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/1e57f8673c953ace8a25ada0a39ab83008646ce3 kmalloc: Use 'fls' to round up the size to the nearest power of 2 On average tests conducted on Intel i3, i7 and xeon-e3 in x86_64 mode, fls version is 3 times faster than the simple loop version. Submitted-by: vsrinivas@ Also M_POWEROF2 flag is used to do the nearest power of 2 size rounding up, instead of a seperate function (was kmalloc_powerof2) Suggested-by: sjg@, vsrinivas@ --- diff --git a/sys/kern/kern_slaballoc.c b/sys/kern/kern_slaballoc.c index 2f3a853..0edfdc4 100644 --- a/sys/kern/kern_slaballoc.c +++ b/sys/kern/kern_slaballoc.c @@ -492,6 +492,22 @@ slab_record_source(SLZone *z, const char *file, int line) #endif +static __inline unsigned long +powerof2_size(unsigned long size) +{ + int i, wt; + + if (size == 0) + return 0; + + i = flsl(size); + wt = (size & ~(1 << (i - 1))); + if (!wt) + --i; + + return (1UL << i); +} + /* * kmalloc() (SLAB ALLOCATOR) * @@ -505,6 +521,7 @@ slab_record_source(SLZone *z, const char *file, int line) * M_ZERO - zero the returned memory. * M_USE_RESERVE - allow greater drawdown of the free list * M_USE_INTERRUPT_RESERVE - allow the freelist to be exhausted + * M_POWEROF2 - roundup size to the nearest power of 2 * * MPSAFE */ @@ -545,6 +562,9 @@ kmalloc(unsigned long size, struct malloc_type *type, int flags) } ++type->ks_calls; + if (flags & M_POWEROF2) + size = powerof2_size(size); + /* * Handle the case where the limit is reached. Panic if we can't return * NULL. The original malloc code looped, but this tended to @@ -1567,20 +1587,10 @@ kmem_slab_free(void *ptr, vm_size_t size) } void * -kmalloc_powerof2(unsigned long size_alloc, struct malloc_type *type, int flags) -{ - unsigned long size; - - for (size = 1; size < size_alloc; size <<= 1) - ; /* EMPTY */ - return kmalloc(size, type, flags); -} - -void * kmalloc_cachealign(unsigned long size_alloc, struct malloc_type *type, int flags) { if (size_alloc < __VM_CACHELINE_SIZE) size_alloc = __VM_CACHELINE_SIZE; - return kmalloc_powerof2(size_alloc, type, flags); + return kmalloc(size_alloc, type, flags | M_POWEROF2); } diff --git a/sys/platform/pc32/i386/busdma_machdep.c b/sys/platform/pc32/i386/busdma_machdep.c index ae451fb..f9dd80e 100644 --- a/sys/platform/pc32/i386/busdma_machdep.c +++ b/sys/platform/pc32/i386/busdma_machdep.c @@ -574,7 +574,8 @@ bus_dmamem_alloc(bus_dma_tag_t dmat, void **vaddr, int flags, maxsize = check_kmalloc(dmat, *vaddr, 0); if (maxsize) { kfree(*vaddr, M_DEVBUF); - *vaddr = kmalloc_powerof2(maxsize, M_DEVBUF, mflags); + *vaddr = kmalloc(maxsize, M_DEVBUF, + mflags | M_POWEROF2); check_kmalloc(dmat, *vaddr, 1); } } else { diff --git a/sys/platform/pc64/x86_64/busdma_machdep.c b/sys/platform/pc64/x86_64/busdma_machdep.c index 3a68b42..00304e2 100644 --- a/sys/platform/pc64/x86_64/busdma_machdep.c +++ b/sys/platform/pc64/x86_64/busdma_machdep.c @@ -574,7 +574,8 @@ bus_dmamem_alloc(bus_dma_tag_t dmat, void **vaddr, int flags, maxsize = check_kmalloc(dmat, *vaddr, 0); if (maxsize) { kfree(*vaddr, M_DEVBUF); - *vaddr = kmalloc_powerof2(maxsize, M_DEVBUF, mflags); + *vaddr = kmalloc(maxsize, M_DEVBUF, + mflags | M_POWEROF2); check_kmalloc(dmat, *vaddr, 1); } } else { diff --git a/sys/sys/malloc.h b/sys/sys/malloc.h index 520bb64..080e35a 100644 --- a/sys/sys/malloc.h +++ b/sys/sys/malloc.h @@ -63,6 +63,7 @@ #define M_PASSIVE_ZERO 0x0800 /* (internal to the slab code only) */ #define M_USE_INTERRUPT_RESERVE \ 0x1000 /* can exhaust free list entirely */ +#define M_POWEROF2 0x2000 /* roundup size to the nearest power of 2 */ /* * M_NOWAIT has to be a set of flags for equivalence to prior use. @@ -204,8 +205,6 @@ char *kstrdup (const char *, struct malloc_type *); #define kstrdup_debug(str, type, file, line) \ kstrdup(str, type) #endif -void *kmalloc_powerof2 (unsigned long size, struct malloc_type *type, - int flags); void *kmalloc_cachealign (unsigned long size, struct malloc_type *type, int flags); void kfree (void *addr, struct malloc_type *type);