2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.7 2008/03/19 20:18:17 dillon Exp $
42 static hammer_off_t hammer_find_hole(hammer_mount_t hmp,
43 hammer_holes_t holes, int bytes);
44 static void hammer_add_hole(hammer_mount_t hmp, hammer_holes_t holes,
45 hammer_off_t offset, int bytes);
48 * Allocate bytes from a zone
51 hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
52 int bytes, int *errorp)
54 hammer_volume_t root_volume;
55 hammer_blockmap_t rootmap;
56 struct hammer_blockmap_layer1 *layer1;
57 struct hammer_blockmap_layer2 *layer2;
58 hammer_buffer_t buffer1 = NULL;
59 hammer_buffer_t buffer2 = NULL;
60 hammer_buffer_t buffer3 = NULL;
61 hammer_off_t tmp_offset;
62 hammer_off_t next_offset;
63 hammer_off_t layer1_offset;
64 hammer_off_t layer2_offset;
65 hammer_off_t bigblock_offset;
70 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
71 root_volume = hammer_get_root_volume(trans->hmp, errorp);
74 rootmap = &root_volume->ondisk->vol0_blockmap[zone];
75 KKASSERT(rootmap->phys_offset != 0);
76 KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
77 HAMMER_ZONE_RAW_BUFFER_INDEX);
78 KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
79 KKASSERT(HAMMER_ZONE_DECODE(rootmap->next_offset) == zone);
82 * Deal with alignment and buffer-boundary issues.
84 * Be careful, certain primary alignments are used below to allocate
85 * new blockmap blocks.
87 bytes = (bytes + 7) & ~7;
88 KKASSERT(bytes > 0 && bytes <= HAMMER_BUFSIZE);
90 lockmgr(&trans->hmp->blockmap_lock, LK_EXCLUSIVE|LK_RETRY);
93 * Try to use a known-free hole, otherwise append.
95 next_offset = hammer_find_hole(trans->hmp, &trans->hmp->holes[zone],
97 if (next_offset == 0) {
98 next_offset = rootmap->next_offset;
106 * The allocation request may not cross a buffer boundary.
108 tmp_offset = next_offset + bytes - 1;
109 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
110 skip_amount = HAMMER_BUFSIZE -
111 ((int)next_offset & HAMMER_BUFMASK);
112 hammer_add_hole(trans->hmp, &trans->hmp->holes[zone],
113 next_offset, skip_amount);
114 next_offset = tmp_offset & ~HAMMER_BUFMASK64;
118 * Dive layer 1. If we are starting a new layer 1 entry,
119 * allocate a layer 2 block for it.
121 layer1_offset = rootmap->phys_offset +
122 HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
123 layer1 = hammer_bread(trans->hmp, layer1_offset, errorp, &buffer1);
124 KKASSERT(*errorp == 0);
125 KKASSERT(next_offset <= rootmap->alloc_offset);
128 * Allocate layer2 backing store in layer1 if necessary. next_offset
129 * can skip to a bigblock boundary but alloc_offset is at least
130 * bigblock=aligned so that's ok.
132 if (next_offset == rootmap->alloc_offset &&
133 ((next_offset & HAMMER_BLOCKMAP_LAYER2_MASK) == 0 ||
134 layer1->phys_offset == HAMMER_BLOCKMAP_FREE)
136 KKASSERT((next_offset & HAMMER_BLOCKMAP_LAYER2_MASK) == 0);
137 hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
138 bzero(layer1, sizeof(*layer1));
139 layer1->phys_offset =
140 hammer_freemap_alloc(trans, next_offset, errorp);
141 layer1->blocks_free = HAMMER_BLOCKMAP_RADIX2;
142 KKASSERT(*errorp == 0);
144 KKASSERT(layer1->phys_offset);
147 * If layer1 indicates no free blocks in layer2 and our alloc_offset
148 * is not in layer2, skip layer2 entirely.
150 if (layer1->blocks_free == 0 &&
151 ((next_offset ^ rootmap->alloc_offset) & ~HAMMER_BLOCKMAP_LAYER2_MASK) != 0) {
152 kprintf("blockmap skip1 %016llx\n", next_offset);
153 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2_MASK) &
154 ~HAMMER_BLOCKMAP_LAYER2_MASK;
155 if (next_offset >= trans->hmp->zone_limits[zone]) {
156 kprintf("blockmap wrap1\n");
157 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
158 if (++loops == 2) { /* XXX poor-man's */
168 * Dive layer 2, each entry represents a large-block.
170 layer2_offset = layer1->phys_offset +
171 HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
172 layer2 = hammer_bread(trans->hmp, layer2_offset, errorp, &buffer2);
173 KKASSERT(*errorp == 0);
175 if ((next_offset & HAMMER_LARGEBLOCK_MASK64) == 0) {
177 * We are at the beginning of a new bigblock
179 if (next_offset == rootmap->alloc_offset ||
180 layer2->u.phys_offset == HAMMER_BLOCKMAP_FREE) {
182 * Allocate the bigblock in layer2 if diving into
183 * uninitialized space or if the block was previously
186 hammer_modify_buffer(trans, buffer1,
187 layer1, sizeof(*layer1));
188 KKASSERT(layer1->blocks_free);
189 --layer1->blocks_free;
190 hammer_modify_buffer(trans, buffer2,
191 layer2, sizeof(*layer2));
192 bzero(layer2, sizeof(*layer2));
193 layer2->u.phys_offset =
194 hammer_freemap_alloc(trans, next_offset,
196 layer2->bytes_free = HAMMER_LARGEBLOCK_SIZE;
197 KKASSERT(*errorp == 0);
198 } else if (layer2->bytes_free != HAMMER_LARGEBLOCK_SIZE) {
200 * We have encountered a block that is already
201 * partially allocated. We must skip this block.
203 kprintf("blockmap skip2 %016llx %d\n",
204 next_offset, layer2->bytes_free);
205 next_offset += HAMMER_LARGEBLOCK_SIZE;
206 if (next_offset >= trans->hmp->zone_limits[zone]) {
207 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
208 kprintf("blockmap wrap2\n");
209 if (++loops == 2) { /* XXX poor-man's */
219 * We are appending within a bigblock.
221 KKASSERT(layer2->u.phys_offset != HAMMER_BLOCKMAP_FREE);
224 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
225 layer2->bytes_free -= bytes;
226 KKASSERT(layer2->bytes_free >= 0);
229 * If the buffer was completely free we do not have to read it from
230 * disk, call hammer_bnew() to instantiate it.
232 if ((next_offset & HAMMER_BUFMASK) == 0) {
233 bigblock_offset = layer2->u.phys_offset +
234 (next_offset & HAMMER_LARGEBLOCK_MASK64);
235 hammer_bnew(trans->hmp, bigblock_offset, errorp, &buffer3);
239 * Adjust our iterator and alloc_offset. The layer1 and layer2
240 * space beyond alloc_offset is uninitialized. alloc_offset must
241 * be big-block aligned.
243 if (used_hole == 0) {
244 hammer_modify_volume(trans, root_volume,
245 rootmap, sizeof(*rootmap));
246 rootmap->next_offset = next_offset + bytes;
247 if (rootmap->alloc_offset < rootmap->next_offset) {
248 rootmap->alloc_offset =
249 (rootmap->next_offset + HAMMER_LARGEBLOCK_MASK) &
250 ~HAMMER_LARGEBLOCK_MASK64;
255 hammer_rel_buffer(buffer1, 0);
257 hammer_rel_buffer(buffer2, 0);
259 hammer_rel_buffer(buffer3, 0);
260 hammer_rel_volume(root_volume, 0);
261 lockmgr(&trans->hmp->blockmap_lock, LK_RELEASE);
266 * Free (offset,bytes) in a zone
269 hammer_blockmap_free(hammer_transaction_t trans,
270 hammer_off_t bmap_off, int bytes)
272 hammer_volume_t root_volume;
273 hammer_blockmap_t rootmap;
274 struct hammer_blockmap_layer1 *layer1;
275 struct hammer_blockmap_layer2 *layer2;
276 hammer_buffer_t buffer1 = NULL;
277 hammer_buffer_t buffer2 = NULL;
278 hammer_off_t layer1_offset;
279 hammer_off_t layer2_offset;
283 bytes = (bytes + 7) & ~7;
284 KKASSERT(bytes <= HAMMER_BUFSIZE);
285 zone = HAMMER_ZONE_DECODE(bmap_off);
286 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
287 root_volume = hammer_get_root_volume(trans->hmp, &error);
291 lockmgr(&trans->hmp->blockmap_lock, LK_EXCLUSIVE|LK_RETRY);
293 rootmap = &root_volume->ondisk->vol0_blockmap[zone];
294 KKASSERT(rootmap->phys_offset != 0);
295 KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
296 HAMMER_ZONE_RAW_BUFFER_INDEX);
297 KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
298 KKASSERT(((bmap_off ^ (bmap_off + (bytes - 1))) &
299 ~HAMMER_LARGEBLOCK_MASK64) == 0);
301 if (bmap_off >= rootmap->alloc_offset) {
302 panic("hammer_blockmap_lookup: %016llx beyond EOF %016llx",
303 bmap_off, rootmap->alloc_offset);
310 layer1_offset = rootmap->phys_offset +
311 HAMMER_BLOCKMAP_LAYER1_OFFSET(bmap_off);
312 layer1 = hammer_bread(trans->hmp, layer1_offset, &error, &buffer1);
313 KKASSERT(error == 0);
314 KKASSERT(layer1->phys_offset);
317 * Dive layer 2, each entry represents a large-block.
319 layer2_offset = layer1->phys_offset +
320 HAMMER_BLOCKMAP_LAYER2_OFFSET(bmap_off);
321 layer2 = hammer_bread(trans->hmp, layer2_offset, &error, &buffer2);
323 KKASSERT(error == 0);
324 KKASSERT(layer2->u.phys_offset);
325 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
326 layer2->bytes_free += bytes;
327 KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
330 * If the big-block is free, return it to the free pool. If our
331 * iterator is in the wholely free block, leave the block intact
332 * and reset the iterator.
334 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
335 if ((rootmap->next_offset ^ bmap_off) &
336 ~HAMMER_LARGEBLOCK_MASK64) {
337 hammer_freemap_free(trans, layer2->u.phys_offset,
339 layer2->u.phys_offset = HAMMER_BLOCKMAP_FREE;
341 hammer_modify_buffer(trans, buffer1,
342 layer1, sizeof(*layer1));
343 ++layer1->blocks_free;
346 * XXX Not working yet - we aren't clearing it when
347 * reallocating the block later on.
349 if (layer1->blocks_free == HAMMER_BLOCKMAP_RADIX2) {
351 trans, layer1->phys_offset,
352 bmap_off & ~HAMMER_BLOCKMAP_LAYER2_MASK,
354 layer1->phys_offset = HAMMER_BLOCKMAP_FREE;
358 hammer_modify_volume(trans, root_volume,
359 rootmap, sizeof(*rootmap));
360 rootmap->next_offset &= ~HAMMER_LARGEBLOCK_MASK64;
364 lockmgr(&trans->hmp->blockmap_lock, LK_RELEASE);
367 hammer_rel_buffer(buffer1, 0);
369 hammer_rel_buffer(buffer2, 0);
370 hammer_rel_volume(root_volume, 0);
374 * Return the number of free bytes in the big-block containing the
375 * specified blockmap offset.
378 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t bmap_off,
379 int *curp, int *errorp)
381 hammer_volume_t root_volume;
382 hammer_blockmap_t rootmap;
383 struct hammer_blockmap_layer1 *layer1;
384 struct hammer_blockmap_layer2 *layer2;
385 hammer_buffer_t buffer = NULL;
386 hammer_off_t layer1_offset;
387 hammer_off_t layer2_offset;
391 zone = HAMMER_ZONE_DECODE(bmap_off);
392 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
393 root_volume = hammer_get_root_volume(hmp, errorp);
398 rootmap = &root_volume->ondisk->vol0_blockmap[zone];
399 KKASSERT(rootmap->phys_offset != 0);
400 KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
401 HAMMER_ZONE_RAW_BUFFER_INDEX);
402 KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
404 if (bmap_off >= rootmap->alloc_offset) {
405 panic("hammer_blockmap_lookup: %016llx beyond EOF %016llx",
406 bmap_off, rootmap->alloc_offset);
415 layer1_offset = rootmap->phys_offset +
416 HAMMER_BLOCKMAP_LAYER1_OFFSET(bmap_off);
417 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
418 KKASSERT(*errorp == 0);
419 KKASSERT(layer1->phys_offset);
422 * Dive layer 2, each entry represents a large-block.
424 layer2_offset = layer1->phys_offset +
425 HAMMER_BLOCKMAP_LAYER2_OFFSET(bmap_off);
426 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
428 KKASSERT(*errorp == 0);
429 KKASSERT(layer2->u.phys_offset);
431 bytes = layer2->bytes_free;
433 if ((rootmap->next_offset ^ bmap_off) & ~HAMMER_LARGEBLOCK_MASK64)
439 hammer_rel_buffer(buffer, 0);
440 hammer_rel_volume(root_volume, 0);
441 if (hammer_debug_general & 0x0800) {
442 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
450 * Lookup a blockmap offset.
453 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t bmap_off, int *errorp)
455 hammer_volume_t root_volume;
456 hammer_blockmap_t rootmap;
457 struct hammer_blockmap_layer1 *layer1;
458 struct hammer_blockmap_layer2 *layer2;
459 hammer_buffer_t buffer = NULL;
460 hammer_off_t layer1_offset;
461 hammer_off_t layer2_offset;
462 hammer_off_t result_offset;
465 zone = HAMMER_ZONE_DECODE(bmap_off);
466 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
467 root_volume = hammer_get_root_volume(hmp, errorp);
470 rootmap = &root_volume->ondisk->vol0_blockmap[zone];
471 KKASSERT(rootmap->phys_offset != 0);
472 KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
473 HAMMER_ZONE_RAW_BUFFER_INDEX);
474 KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
476 if (bmap_off >= rootmap->alloc_offset) {
477 panic("hammer_blockmap_lookup: %016llx beyond EOF %016llx",
478 bmap_off, rootmap->alloc_offset);
486 layer1_offset = rootmap->phys_offset +
487 HAMMER_BLOCKMAP_LAYER1_OFFSET(bmap_off);
488 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
489 KKASSERT(*errorp == 0);
490 KKASSERT(layer1->phys_offset);
493 * Dive layer 2, each entry represents a large-block.
495 layer2_offset = layer1->phys_offset +
496 HAMMER_BLOCKMAP_LAYER2_OFFSET(bmap_off);
497 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
499 KKASSERT(*errorp == 0);
500 KKASSERT(layer2->u.phys_offset);
502 result_offset = layer2->u.phys_offset +
503 (bmap_off & HAMMER_LARGEBLOCK_MASK64);
506 hammer_rel_buffer(buffer, 0);
507 hammer_rel_volume(root_volume, 0);
508 if (hammer_debug_general & 0x0800) {
509 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
510 bmap_off, result_offset);
512 return(result_offset);
515 /************************************************************************
516 * IN-CORE TRACKING OF ALLOCATION HOLES *
517 ************************************************************************
519 * This is a temporary shim in need of a more permanent solution.
521 * As we allocate space holes are created due to having to align to a new
522 * 16K buffer when an allocation would otherwise cross the buffer boundary.
523 * These holes are recorded here and used to fullfill smaller requests as
524 * much as possible. Only a limited number of holes are recorded and these
525 * functions operate somewhat like a heuristic, where information is allowed
530 hammer_init_holes(hammer_mount_t hmp, hammer_holes_t holes)
532 TAILQ_INIT(&holes->list);
537 hammer_free_holes(hammer_mount_t hmp, hammer_holes_t holes)
541 while ((hole = TAILQ_FIRST(&holes->list)) != NULL) {
542 TAILQ_REMOVE(&holes->list, hole, entry);
543 kfree(hole, M_HAMMER);
548 * Attempt to locate a hole with sufficient free space to accomodate the
549 * requested allocation. Return the offset or 0 if no hole could be found.
552 hammer_find_hole(hammer_mount_t hmp, hammer_holes_t holes, int bytes)
555 hammer_off_t result_off = 0;
557 TAILQ_FOREACH(hole, &holes->list, entry) {
558 if (bytes <= hole->bytes) {
559 result_off = hole->offset;
560 hole->offset += bytes;
561 hole->bytes -= bytes;
569 * If a newly created hole is reasonably sized then record it. We only
570 * keep track of a limited number of holes. Lost holes are recovered by
574 hammer_add_hole(hammer_mount_t hmp, hammer_holes_t holes,
575 hammer_off_t offset, int bytes)
582 if (holes->count < HAMMER_MAX_HOLES) {
583 hole = kmalloc(sizeof(*hole), M_HAMMER, M_WAITOK);
586 hole = TAILQ_FIRST(&holes->list);
587 TAILQ_REMOVE(&holes->list, hole, entry);
589 TAILQ_INSERT_TAIL(&holes->list, hole, entry);
590 hole->offset = offset;