2 * ALIST.C - Bitmap allocator/deallocator, using a radix tree with hinting.
3 * Unlimited-size allocations, power-of-2 only, power-of-2
4 * aligned results only.
6 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
8 * This code is derived from software contributed to The DragonFly Project
9 * by Matthew Dillon <dillon@backplane.com>
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in
19 * the documentation and/or other materials provided with the
21 * 3. Neither the name of The DragonFly Project nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific, prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
28 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
29 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
30 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
31 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
32 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
33 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
34 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
35 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * This module has been adapted from the BLIST module, which was written
40 * by Matthew Dillon many years ago.
42 * This module implements a general power-of-2 bitmap allocator/deallocator.
43 * All allocations must be in powers of 2 and will return similarly aligned
44 * results. The module does not try to interpret the meaning of a 'block'
45 * other then to return ALIST_BLOCK_NONE on an allocation failure.
47 * A maximum of 2 billion blocks is supported so, for example, if one block
48 * represented 64 bytes a maximally sized ALIST would represent
51 * A radix tree is used to maintain the bitmap and layed out in a manner
52 * similar to the blist code. Meta nodes use a radix of 16 and 2 bits per
53 * block while leaf nodes use a radix of 32 and 1 bit per block (stored in
54 * a 32 bit bitmap field). Both meta and leaf nodes have a hint field.
55 * This field gives us a hint as to the largest free contiguous range of
56 * blocks under the node. It may contain a value that is too high, but
57 * will never contain a value that is too low. When the radix tree is
58 * searched, allocation failures in subtrees update the hint.
60 * The radix tree is layed out recursively using a linear array. Each meta
61 * node is immediately followed (layed out sequentially in memory) by
62 * ALIST_META_RADIX lower level nodes. This is a recursive structure but one
63 * that can be easily scanned through a very simple 'skip' calculation. In
64 * order to support large radixes, portions of the tree may reside outside our
65 * memory allocation. We handle this with an early-terminate optimization
66 * in the meta-node. The memory allocation is only large enough to cover
67 * the number of blocks requested at creation time even if it must be
68 * encompassed in larger root-node radix.
70 * This code can be compiled stand-alone for debugging.
75 #include <sys/param.h>
76 #include <sys/systm.h>
78 #include <sys/kernel.h>
79 #include <sys/alist.h>
80 #include <sys/malloc.h>
82 #include <vm/vm_object.h>
83 #include <vm/vm_kern.h>
84 #include <vm/vm_extern.h>
85 #include <vm/vm_page.h>
89 #ifndef ALIST_NO_DEBUG
93 #include <sys/types.h>
100 #define kmalloc(a,b,c) malloc(a)
101 #define kfree(a,b) free(a)
102 #define kprintf printf
103 #define KKASSERT(exp) assert(exp)
106 typedef unsigned int u_daddr_t;
108 #include <sys/alist.h>
110 void panic(const char *ctl, ...);
115 * static support functions
118 static daddr_t alst_leaf_alloc(almeta_t *scan, daddr_t blk, int count);
119 static daddr_t alst_meta_alloc(almeta_t *scan, daddr_t blk,
120 daddr_t count, daddr_t radix, int skip);
121 static void alst_leaf_free(almeta_t *scan, daddr_t relblk, int count);
122 static void alst_meta_free(almeta_t *scan, daddr_t freeBlk, daddr_t count,
123 daddr_t radix, int skip, daddr_t blk);
124 static daddr_t alst_radix_init(almeta_t *scan, daddr_t radix,
125 int skip, daddr_t count);
127 static void alst_radix_print(almeta_t *scan, daddr_t blk,
128 daddr_t radix, int skip, int tab);
132 * alist_create() - create a alist capable of handling up to the specified
135 * blocks must be greater then 0
137 * The smallest alist consists of a single leaf node capable of
138 * managing ALIST_BMAP_RADIX blocks.
142 alist_create(daddr_t blocks, struct malloc_type *mtype)
149 * Calculate radix and skip field used for scanning.
151 radix = ALIST_BMAP_RADIX;
153 while (radix < blocks) {
154 radix *= ALIST_META_RADIX;
155 skip = (skip + 1) * ALIST_META_RADIX;
158 bl = kmalloc(sizeof(struct alist), mtype, M_WAITOK | M_ZERO);
160 bl->bl_blocks = blocks;
161 bl->bl_radix = radix;
163 bl->bl_rootblks = 1 +
164 alst_radix_init(NULL, bl->bl_radix, bl->bl_skip, blocks);
165 bl->bl_root = kmalloc(sizeof(almeta_t) * bl->bl_rootblks, mtype, M_WAITOK);
167 #if defined(ALIST_DEBUG)
169 "ALIST representing %d blocks (%d MB of swap)"
170 ", requiring %dK (%d bytes) of ram\n",
172 bl->bl_blocks * 4 / 1024,
173 (bl->bl_rootblks * sizeof(almeta_t) + 1023) / 1024,
174 (bl->bl_rootblks * sizeof(almeta_t))
176 kprintf("ALIST raw radix tree contains %d records\n", bl->bl_rootblks);
178 alst_radix_init(bl->bl_root, bl->bl_radix, bl->bl_skip, blocks);
184 alist_destroy(alist_t bl, struct malloc_type *mtype)
186 kfree(bl->bl_root, mtype);
191 * alist_alloc() - reserve space in the block bitmap. Return the base
192 * of a contiguous region or ALIST_BLOCK_NONE if space
193 * could not be allocated.
197 alist_alloc(alist_t bl, daddr_t count)
199 daddr_t blk = ALIST_BLOCK_NONE;
201 KKASSERT((count | (count - 1)) == (count << 1) - 1);
203 if (bl && count < bl->bl_radix) {
204 if (bl->bl_radix == ALIST_BMAP_RADIX)
205 blk = alst_leaf_alloc(bl->bl_root, 0, count);
207 blk = alst_meta_alloc(bl->bl_root, 0, count, bl->bl_radix, bl->bl_skip);
208 if (blk != ALIST_BLOCK_NONE)
209 bl->bl_free -= count;
215 * alist_free() - free up space in the block bitmap. Return the base
216 * of a contiguous region. Panic if an inconsistancy is
221 alist_free(alist_t bl, daddr_t blkno, daddr_t count)
224 KKASSERT(blkno + count <= bl->bl_blocks);
225 if (bl->bl_radix == ALIST_BMAP_RADIX)
226 alst_leaf_free(bl->bl_root, blkno, count);
228 alst_meta_free(bl->bl_root, blkno, count, bl->bl_radix, bl->bl_skip, 0);
229 bl->bl_free += count;
236 * alist_print() - dump radix tree
240 alist_print(alist_t bl)
242 kprintf("ALIST {\n");
243 alst_radix_print(bl->bl_root, 0, bl->bl_radix, bl->bl_skip, 4);
249 /************************************************************************
250 * ALLOCATION SUPPORT FUNCTIONS *
251 ************************************************************************
253 * These support functions do all the actual work. They may seem
254 * rather longish, but that's because I've commented them up. The
255 * actual code is straight forward.
260 * alist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap).
262 * This is the core of the allocator and is optimized for the 1 block
263 * and the ALIST_BMAP_RADIX block allocation cases. Other cases are
264 * somewhat slower. The 1 block allocation case is log2 and extremely
274 u_daddr_t orig = scan->bm_bitmap;
277 * Optimize bitmap all-allocated case. Also, count = 1
278 * case assumes at least 1 bit is free in the bitmap, so
279 * we have to take care of this case here.
282 scan->bm_bighint = 0;
283 return(ALIST_BLOCK_NONE);
287 * Optimized code to allocate one bit out of the bitmap
291 int j = ALIST_BMAP_RADIX/2;
294 mask = (u_daddr_t)-1 >> (ALIST_BMAP_RADIX/2);
297 if ((orig & mask) == 0) {
304 scan->bm_bitmap &= ~(1 << r);
309 * non-optimized code to allocate N bits out of the bitmap.
310 * The more bits, the faster the code runs. It will run
311 * the slowest allocating 2 bits, but since there aren't any
312 * memory ops in the core loop (or shouldn't be, anyway),
313 * you probably won't notice the difference.
315 * Similar to the blist case, the alist code also requires
316 * allocations to be power-of-2 sized and aligned to the
317 * size of the allocation, which simplifies the algorithm.
321 int n = ALIST_BMAP_RADIX - count;
324 mask = (u_daddr_t)-1 >> n;
326 for (j = 0; j <= n; j += count) {
327 if ((orig & mask) == mask) {
328 scan->bm_bitmap &= ~mask;
331 mask = mask << count;
336 * We couldn't allocate count in this subtree, update bighint.
338 scan->bm_bighint = count - 1;
339 return(ALIST_BLOCK_NONE);
343 * alist_meta_alloc() - allocate at a meta in the radix tree.
345 * Attempt to allocate at a meta node. If we can't, we update
346 * bighint and return a failure. Updating bighint optimize future
347 * calls that hit this node. We have to check for our collapse cases
348 * and we have a few optimizations strewn in as well.
362 int next_skip = ((u_int)skip / ALIST_META_RADIX);
365 * ALL-ALLOCATED special case
367 if (scan->bm_bitmap == 0) {
368 scan->bm_bighint = 0;
369 return(ALIST_BLOCK_NONE);
372 radix /= ALIST_META_RADIX;
375 * Radix now represents each bitmap entry for this meta node. If
376 * the number of blocks being allocated can be fully represented,
377 * we allocate directly out of this meta node.
379 * Meta node bitmaps use 2 bits per block.
382 * 01 PARTIALLY-FREE/PARTIALLY-ALLOCATED
386 if (count >= radix) {
387 int n = count / radix * 2; /* number of bits */
390 mask = (u_daddr_t)-1 >> (ALIST_BMAP_RADIX - n);
391 for (j = 0; j < ALIST_META_RADIX; j += n / 2) {
392 if ((scan->bm_bitmap & mask) == mask) {
393 scan->bm_bitmap &= ~mask;
394 return(blk + j * radix);
398 if (scan->bm_bighint >= count)
399 scan->bm_bighint = count >> 1;
400 return(ALIST_BLOCK_NONE);
404 * If not we have to recurse.
408 for (i = 1; i <= skip; i += next_skip) {
409 if (scan[i].bm_bighint == (daddr_t)-1) {
417 * If the element is marked completely free (11), initialize
420 if ((scan->bm_bitmap & mask) == mask) {
421 scan[i].bm_bitmap = (u_daddr_t)-1;
422 scan[i].bm_bighint = radix;
425 if ((scan->bm_bitmap & mask) == 0) {
427 * Object marked completely allocated, recursion
431 } else if (count <= scan[i].bm_bighint) {
433 * count fits in object
436 if (next_skip == 1) {
437 r = alst_leaf_alloc(&scan[i], blk, count);
439 r = alst_meta_alloc(&scan[i], blk, count, radix, next_skip - 1);
441 if (r != ALIST_BLOCK_NONE) {
442 if (scan[i].bm_bitmap == 0) {
443 scan->bm_bitmap &= ~mask;
445 scan->bm_bitmap &= ~mask;
446 scan->bm_bitmap |= pmask;
457 * We couldn't allocate count in this subtree, update bighint.
459 if (scan->bm_bighint >= count)
460 scan->bm_bighint = count >> 1;
461 return(ALIST_BLOCK_NONE);
465 * BLST_LEAF_FREE() - free allocated block from leaf bitmap
475 * free some data in this bitmap
478 * 0000111111111110000
482 int n = blk & (ALIST_BMAP_RADIX - 1);
485 mask = ((u_daddr_t)-1 << n) &
486 ((u_daddr_t)-1 >> (ALIST_BMAP_RADIX - count - n));
488 if (scan->bm_bitmap & mask)
489 panic("alst_radix_free: freeing free block");
490 scan->bm_bitmap |= mask;
493 * We could probably do a better job here. We are required to make
494 * bighint at least as large as the biggest contiguous block of
495 * data. If we just shoehorn it, a little extra overhead will
496 * be incured on the next allocation (but only that one typically).
498 scan->bm_bighint = ALIST_BMAP_RADIX;
502 * BLST_META_FREE() - free allocated blocks from radix tree meta info
504 * This support routine frees a range of blocks from the bitmap.
505 * The range must be entirely enclosed by this radix node. If a
506 * meta node, we break the range down recursively to free blocks
507 * in subnodes (which means that this code can free an arbitrary
508 * range whereas the allocation code cannot allocate an arbitrary
521 int next_skip = ((u_int)skip / ALIST_META_RADIX);
527 * Break the free down into its components. Because it is so easy
528 * to implement, frees are not limited to power-of-2 sizes.
530 * Each block in a meta-node bitmap takes two bits.
532 radix /= ALIST_META_RADIX;
534 i = (freeBlk - blk) / radix;
536 mask = 0x00000003 << (i * 2);
537 pmask = 0x00000001 << (i * 2);
539 i = i * next_skip + 1;
541 while (i <= skip && blk < freeBlk + count) {
544 v = blk + radix - freeBlk;
548 if (scan->bm_bighint == (daddr_t)-1)
549 panic("alst_meta_free: freeing unexpected range");
551 if (freeBlk == blk && count >= radix) {
553 * All-free case, no need to update sub-tree
555 scan->bm_bitmap |= mask;
556 scan->bm_bighint = radix * ALIST_META_RADIX;/*XXX*/
559 * If we were previously marked all-allocated, fix-up
560 * the next layer so we can recurse down into it.
562 if ((scan->bm_bitmap & mask) == 0) {
563 scan[i].bm_bitmap = (u_daddr_t)0;
564 scan[i].bm_bighint = 0;
571 alst_leaf_free(&scan[i], freeBlk, v);
573 alst_meta_free(&scan[i], freeBlk, v, radix, next_skip - 1, blk);
574 if (scan[i].bm_bitmap == (u_daddr_t)-1)
575 scan->bm_bitmap |= mask;
577 scan->bm_bitmap |= pmask;
578 if (scan->bm_bighint < scan[i].bm_bighint)
579 scan->bm_bighint = scan[i].bm_bighint;
591 * BLST_RADIX_INIT() - initialize radix tree
593 * Initialize our meta structures and bitmaps and calculate the exact
594 * amount of space required to manage 'count' blocks - this space may
595 * be considerably less then the calculated radix due to the large
596 * RADIX values we use.
600 alst_radix_init(almeta_t *scan, daddr_t radix, int skip, daddr_t count)
604 daddr_t memindex = 0;
611 if (radix == ALIST_BMAP_RADIX) {
613 scan->bm_bighint = 0;
620 * Meta node. If allocating the entire object we can special
621 * case it. However, we need to figure out how much memory
622 * is required to manage 'count' blocks, so we continue on anyway.
626 scan->bm_bighint = 0;
630 radix /= ALIST_META_RADIX;
631 next_skip = ((u_int)skip / ALIST_META_RADIX);
635 for (i = 1; i <= skip; i += next_skip) {
636 if (count >= radix) {
638 * Allocate the entire object
640 memindex = i + alst_radix_init(
641 ((scan) ? &scan[i] : NULL),
647 /* already marked as wholely allocated */
648 } else if (count > 0) {
650 * Allocate a partial object
652 memindex = i + alst_radix_init(
653 ((scan) ? &scan[i] : NULL),
661 * Mark as partially allocated
664 scan->bm_bitmap |= pmask;
667 * Add terminator and break out
670 scan[i].bm_bighint = (daddr_t)-1;
671 /* already marked as wholely allocated */
685 alst_radix_print(almeta_t *scan, daddr_t blk, daddr_t radix, int skip, int tab)
692 if (radix == ALIST_BMAP_RADIX) {
694 "%*.*s(%04x,%d): bitmap %08x big=%d\n",
703 if (scan->bm_bitmap == 0) {
705 "%*.*s(%04x,%d) ALL ALLOCATED\n",
712 if (scan->bm_bitmap == (u_daddr_t)-1) {
714 "%*.*s(%04x,%d) ALL FREE\n",
723 "%*.*s(%04x,%d): subtree (%d) bitmap=%08x big=%d {\n",
731 radix /= ALIST_META_RADIX;
732 next_skip = ((u_int)skip / ALIST_META_RADIX);
736 for (i = 1; i <= skip; i += next_skip) {
737 if (scan[i].bm_bighint == (daddr_t)-1) {
739 "%*.*s(%04x,%d): Terminator\n",
746 if ((scan->bm_bitmap & mask) == mask) {
748 "%*.*s(%04x,%d): ALL FREE\n",
752 } else if ((scan->bm_bitmap & mask) == 0) {
754 "%*.*s(%04x,%d): ALL ALLOCATED\n",
783 main(int ac, char **av)
789 for (i = 1; i < ac; ++i) {
790 const char *ptr = av[i];
792 size = strtol(ptr, NULL, 0);
796 fprintf(stderr, "Bad option: %s\n", ptr - 2);
799 bl = alist_create(size, NULL);
800 alist_free(bl, 0, size);
808 kprintf("%d/%d/%d> ", bl->bl_free, size, bl->bl_radix);
810 if (fgets(buf, sizeof(buf), stdin) == NULL)
817 if (sscanf(buf + 1, "%d", &count) == 1) {
818 daddr_t blk = alist_alloc(bl, count);
819 kprintf(" R=%04x\n", blk);
825 if (sscanf(buf + 1, "%x %d", &da, &count) == 2) {
826 alist_free(bl, da, count);
849 panic(const char *ctl, ...)
854 vfprintf(stderr, ctl, va);
855 fprintf(stderr, "\n");