2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.9 2008/04/29 01:10:37 dillon Exp $
42 static hammer_off_t hammer_find_hole(hammer_mount_t hmp,
43 hammer_holes_t holes, int bytes);
44 static void hammer_add_hole(hammer_mount_t hmp, hammer_holes_t holes,
45 hammer_off_t offset, int bytes);
48 * Allocate bytes from a zone
51 hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
52 int bytes, int *errorp)
54 hammer_volume_t root_volume;
55 hammer_blockmap_t rootmap;
56 struct hammer_blockmap_layer1 *layer1;
57 struct hammer_blockmap_layer2 *layer2;
58 hammer_buffer_t buffer1 = NULL;
59 hammer_buffer_t buffer2 = NULL;
60 hammer_buffer_t buffer3 = NULL;
61 hammer_off_t tmp_offset;
62 hammer_off_t next_offset;
63 hammer_off_t layer1_offset;
64 hammer_off_t layer2_offset;
65 hammer_off_t bigblock_offset;
70 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
71 root_volume = hammer_get_root_volume(trans->hmp, errorp);
74 rootmap = &trans->hmp->blockmap[zone];
75 KKASSERT(rootmap->phys_offset != 0);
76 KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
77 HAMMER_ZONE_RAW_BUFFER_INDEX);
78 KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
79 KKASSERT(HAMMER_ZONE_DECODE(rootmap->next_offset) == zone);
82 * Deal with alignment and buffer-boundary issues.
84 * Be careful, certain primary alignments are used below to allocate
85 * new blockmap blocks.
87 bytes = (bytes + 7) & ~7;
88 KKASSERT(bytes > 0 && bytes <= HAMMER_BUFSIZE);
90 lockmgr(&trans->hmp->blockmap_lock, LK_EXCLUSIVE|LK_RETRY);
93 * Try to use a known-free hole, otherwise append.
95 next_offset = hammer_find_hole(trans->hmp, &trans->hmp->holes[zone],
97 if (next_offset == 0) {
98 next_offset = rootmap->next_offset;
106 * The allocation request may not cross a buffer boundary.
108 tmp_offset = next_offset + bytes - 1;
109 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
110 skip_amount = HAMMER_BUFSIZE -
111 ((int)next_offset & HAMMER_BUFMASK);
112 hammer_add_hole(trans->hmp, &trans->hmp->holes[zone],
113 next_offset, skip_amount);
114 next_offset = tmp_offset & ~HAMMER_BUFMASK64;
118 * Dive layer 1. If we are starting a new layer 1 entry,
119 * allocate a layer 2 block for it.
121 layer1_offset = rootmap->phys_offset +
122 HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
123 layer1 = hammer_bread(trans->hmp, layer1_offset, errorp, &buffer1);
124 KKASSERT(*errorp == 0);
125 KKASSERT(next_offset <= rootmap->alloc_offset);
128 * Allocate layer2 backing store in layer1 if necessary. next_offset
129 * can skip to a bigblock boundary but alloc_offset is at least
130 * bigblock=aligned so that's ok.
132 if (next_offset == rootmap->alloc_offset &&
133 ((next_offset & HAMMER_BLOCKMAP_LAYER2_MASK) == 0 ||
134 layer1->phys_offset == HAMMER_BLOCKMAP_FREE)
136 KKASSERT((next_offset & HAMMER_BLOCKMAP_LAYER2_MASK) == 0);
137 hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
138 bzero(layer1, sizeof(*layer1));
139 layer1->phys_offset =
140 hammer_freemap_alloc(trans, next_offset, errorp);
141 layer1->blocks_free = HAMMER_BLOCKMAP_RADIX2;
142 hammer_modify_buffer_done(buffer1);
143 KKASSERT(*errorp == 0);
145 KKASSERT(layer1->phys_offset);
148 * If layer1 indicates no free blocks in layer2 and our alloc_offset
149 * is not in layer2, skip layer2 entirely.
151 if (layer1->blocks_free == 0 &&
152 ((next_offset ^ rootmap->alloc_offset) & ~HAMMER_BLOCKMAP_LAYER2_MASK) != 0) {
153 kprintf("blockmap skip1 %016llx\n", next_offset);
154 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2_MASK) &
155 ~HAMMER_BLOCKMAP_LAYER2_MASK;
156 if (next_offset >= trans->hmp->zone_limits[zone]) {
157 kprintf("blockmap wrap1\n");
158 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
159 if (++loops == 2) { /* XXX poor-man's */
169 * Dive layer 2, each entry represents a large-block.
171 layer2_offset = layer1->phys_offset +
172 HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
173 layer2 = hammer_bread(trans->hmp, layer2_offset, errorp, &buffer2);
174 KKASSERT(*errorp == 0);
176 if ((next_offset & HAMMER_LARGEBLOCK_MASK64) == 0) {
178 * We are at the beginning of a new bigblock
180 if (next_offset == rootmap->alloc_offset ||
181 layer2->u.phys_offset == HAMMER_BLOCKMAP_FREE) {
183 * Allocate the bigblock in layer2 if diving into
184 * uninitialized space or if the block was previously
187 hammer_modify_buffer(trans, buffer1,
188 layer1, sizeof(*layer1));
189 KKASSERT(layer1->blocks_free);
190 --layer1->blocks_free;
191 hammer_modify_buffer_done(buffer1);
192 hammer_modify_buffer(trans, buffer2,
193 layer2, sizeof(*layer2));
194 bzero(layer2, sizeof(*layer2));
195 layer2->u.phys_offset =
196 hammer_freemap_alloc(trans, next_offset,
198 layer2->bytes_free = HAMMER_LARGEBLOCK_SIZE;
199 hammer_modify_buffer_done(buffer2);
200 KKASSERT(*errorp == 0);
201 } else if (layer2->bytes_free != HAMMER_LARGEBLOCK_SIZE) {
203 * We have encountered a block that is already
204 * partially allocated. We must skip this block.
206 kprintf("blockmap skip2 %016llx %d\n",
207 next_offset, layer2->bytes_free);
208 next_offset += HAMMER_LARGEBLOCK_SIZE;
209 if (next_offset >= trans->hmp->zone_limits[zone]) {
210 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
211 kprintf("blockmap wrap2\n");
212 if (++loops == 2) { /* XXX poor-man's */
222 * We are appending within a bigblock.
224 KKASSERT(layer2->u.phys_offset != HAMMER_BLOCKMAP_FREE);
227 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
228 layer2->bytes_free -= bytes;
229 hammer_modify_buffer_done(buffer2);
230 KKASSERT(layer2->bytes_free >= 0);
233 * If the buffer was completely free we do not have to read it from
234 * disk, call hammer_bnew() to instantiate it.
236 if ((next_offset & HAMMER_BUFMASK) == 0) {
237 bigblock_offset = layer2->u.phys_offset +
238 (next_offset & HAMMER_LARGEBLOCK_MASK64);
239 hammer_bnew(trans->hmp, bigblock_offset, errorp, &buffer3);
243 * Adjust our iterator and alloc_offset. The layer1 and layer2
244 * space beyond alloc_offset is uninitialized. alloc_offset must
245 * be big-block aligned.
247 if (used_hole == 0) {
248 hammer_modify_volume(trans, root_volume, NULL, 0);
249 rootmap->next_offset = next_offset + bytes;
250 if (rootmap->alloc_offset < rootmap->next_offset) {
251 rootmap->alloc_offset =
252 (rootmap->next_offset + HAMMER_LARGEBLOCK_MASK) &
253 ~HAMMER_LARGEBLOCK_MASK64;
255 hammer_modify_volume_done(root_volume);
259 hammer_rel_buffer(buffer1, 0);
261 hammer_rel_buffer(buffer2, 0);
263 hammer_rel_buffer(buffer3, 0);
264 hammer_rel_volume(root_volume, 0);
265 lockmgr(&trans->hmp->blockmap_lock, LK_RELEASE);
270 * Free (offset,bytes) in a zone
273 hammer_blockmap_free(hammer_transaction_t trans,
274 hammer_off_t bmap_off, int bytes)
276 hammer_volume_t root_volume;
277 hammer_blockmap_t rootmap;
278 struct hammer_blockmap_layer1 *layer1;
279 struct hammer_blockmap_layer2 *layer2;
280 hammer_buffer_t buffer1 = NULL;
281 hammer_buffer_t buffer2 = NULL;
282 hammer_off_t layer1_offset;
283 hammer_off_t layer2_offset;
287 bytes = (bytes + 7) & ~7;
288 KKASSERT(bytes <= HAMMER_BUFSIZE);
289 zone = HAMMER_ZONE_DECODE(bmap_off);
290 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
291 root_volume = hammer_get_root_volume(trans->hmp, &error);
295 lockmgr(&trans->hmp->blockmap_lock, LK_EXCLUSIVE|LK_RETRY);
297 rootmap = &trans->hmp->blockmap[zone];
298 KKASSERT(rootmap->phys_offset != 0);
299 KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
300 HAMMER_ZONE_RAW_BUFFER_INDEX);
301 KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
302 KKASSERT(((bmap_off ^ (bmap_off + (bytes - 1))) &
303 ~HAMMER_LARGEBLOCK_MASK64) == 0);
305 if (bmap_off >= rootmap->alloc_offset) {
306 panic("hammer_blockmap_lookup: %016llx beyond EOF %016llx",
307 bmap_off, rootmap->alloc_offset);
314 layer1_offset = rootmap->phys_offset +
315 HAMMER_BLOCKMAP_LAYER1_OFFSET(bmap_off);
316 layer1 = hammer_bread(trans->hmp, layer1_offset, &error, &buffer1);
317 KKASSERT(error == 0);
318 KKASSERT(layer1->phys_offset);
321 * Dive layer 2, each entry represents a large-block.
323 layer2_offset = layer1->phys_offset +
324 HAMMER_BLOCKMAP_LAYER2_OFFSET(bmap_off);
325 layer2 = hammer_bread(trans->hmp, layer2_offset, &error, &buffer2);
327 KKASSERT(error == 0);
328 KKASSERT(layer2->u.phys_offset);
329 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
330 layer2->bytes_free += bytes;
331 KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
334 * If the big-block is free, return it to the free pool. If our
335 * iterator is in the wholely free block, leave the block intact
336 * and reset the iterator.
338 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
339 if ((rootmap->next_offset ^ bmap_off) &
340 ~HAMMER_LARGEBLOCK_MASK64) {
341 hammer_freemap_free(trans, layer2->u.phys_offset,
343 layer2->u.phys_offset = HAMMER_BLOCKMAP_FREE;
345 hammer_modify_buffer(trans, buffer1,
346 layer1, sizeof(*layer1));
347 ++layer1->blocks_free;
350 * XXX Not working yet - we aren't clearing it when
351 * reallocating the block later on.
353 if (layer1->blocks_free == HAMMER_BLOCKMAP_RADIX2) {
355 trans, layer1->phys_offset,
356 bmap_off & ~HAMMER_BLOCKMAP_LAYER2_MASK,
358 layer1->phys_offset = HAMMER_BLOCKMAP_FREE;
361 hammer_modify_buffer_done(buffer1);
364 * Leave block intact and reset the iterator.
366 * XXX can't do this yet because if we allow data
367 * allocations they could overwrite deleted data
368 * that is still subject to an undo on reboot.
371 hammer_modify_volume(trans, root_volume,
373 rootmap->next_offset &= ~HAMMER_LARGEBLOCK_MASK64;
374 hammer_modify_volume_done(root_volume);
378 hammer_modify_buffer_done(buffer2);
380 lockmgr(&trans->hmp->blockmap_lock, LK_RELEASE);
383 hammer_rel_buffer(buffer1, 0);
385 hammer_rel_buffer(buffer2, 0);
386 hammer_rel_volume(root_volume, 0);
390 * Return the number of free bytes in the big-block containing the
391 * specified blockmap offset.
394 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t bmap_off,
395 int *curp, int *errorp)
397 hammer_volume_t root_volume;
398 hammer_blockmap_t rootmap;
399 struct hammer_blockmap_layer1 *layer1;
400 struct hammer_blockmap_layer2 *layer2;
401 hammer_buffer_t buffer = NULL;
402 hammer_off_t layer1_offset;
403 hammer_off_t layer2_offset;
407 zone = HAMMER_ZONE_DECODE(bmap_off);
408 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
409 root_volume = hammer_get_root_volume(hmp, errorp);
414 rootmap = &hmp->blockmap[zone];
415 KKASSERT(rootmap->phys_offset != 0);
416 KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
417 HAMMER_ZONE_RAW_BUFFER_INDEX);
418 KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
420 if (bmap_off >= rootmap->alloc_offset) {
421 panic("hammer_blockmap_lookup: %016llx beyond EOF %016llx",
422 bmap_off, rootmap->alloc_offset);
431 layer1_offset = rootmap->phys_offset +
432 HAMMER_BLOCKMAP_LAYER1_OFFSET(bmap_off);
433 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
434 KKASSERT(*errorp == 0);
435 KKASSERT(layer1->phys_offset);
438 * Dive layer 2, each entry represents a large-block.
440 layer2_offset = layer1->phys_offset +
441 HAMMER_BLOCKMAP_LAYER2_OFFSET(bmap_off);
442 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
444 KKASSERT(*errorp == 0);
445 KKASSERT(layer2->u.phys_offset);
447 bytes = layer2->bytes_free;
449 if ((rootmap->next_offset ^ bmap_off) & ~HAMMER_LARGEBLOCK_MASK64)
455 hammer_rel_buffer(buffer, 0);
456 hammer_rel_volume(root_volume, 0);
457 if (hammer_debug_general & 0x0800) {
458 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
466 * Lookup a blockmap offset.
469 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t bmap_off, int *errorp)
471 hammer_volume_t root_volume;
472 hammer_blockmap_t rootmap;
473 struct hammer_blockmap_layer1 *layer1;
474 struct hammer_blockmap_layer2 *layer2;
475 hammer_buffer_t buffer = NULL;
476 hammer_off_t layer1_offset;
477 hammer_off_t layer2_offset;
478 hammer_off_t result_offset;
481 zone = HAMMER_ZONE_DECODE(bmap_off);
482 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
483 root_volume = hammer_get_root_volume(hmp, errorp);
486 rootmap = &hmp->blockmap[zone];
487 KKASSERT(rootmap->phys_offset != 0);
488 KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
489 HAMMER_ZONE_RAW_BUFFER_INDEX);
490 KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
492 if (bmap_off >= rootmap->alloc_offset) {
493 panic("hammer_blockmap_lookup: %016llx beyond EOF %016llx",
494 bmap_off, rootmap->alloc_offset);
502 layer1_offset = rootmap->phys_offset +
503 HAMMER_BLOCKMAP_LAYER1_OFFSET(bmap_off);
504 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
505 KKASSERT(*errorp == 0);
506 KKASSERT(layer1->phys_offset);
509 * Dive layer 2, each entry represents a large-block.
511 layer2_offset = layer1->phys_offset +
512 HAMMER_BLOCKMAP_LAYER2_OFFSET(bmap_off);
513 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
515 KKASSERT(*errorp == 0);
516 KKASSERT(layer2->u.phys_offset);
518 result_offset = layer2->u.phys_offset +
519 (bmap_off & HAMMER_LARGEBLOCK_MASK64);
522 hammer_rel_buffer(buffer, 0);
523 hammer_rel_volume(root_volume, 0);
524 if (hammer_debug_general & 0x0800) {
525 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
526 bmap_off, result_offset);
528 return(result_offset);
531 /************************************************************************
532 * IN-CORE TRACKING OF ALLOCATION HOLES *
533 ************************************************************************
535 * This is a temporary shim in need of a more permanent solution.
537 * As we allocate space holes are created due to having to align to a new
538 * 16K buffer when an allocation would otherwise cross the buffer boundary.
539 * These holes are recorded here and used to fullfill smaller requests as
540 * much as possible. Only a limited number of holes are recorded and these
541 * functions operate somewhat like a heuristic, where information is allowed
546 hammer_init_holes(hammer_mount_t hmp, hammer_holes_t holes)
548 TAILQ_INIT(&holes->list);
553 hammer_free_holes(hammer_mount_t hmp, hammer_holes_t holes)
557 while ((hole = TAILQ_FIRST(&holes->list)) != NULL) {
558 TAILQ_REMOVE(&holes->list, hole, entry);
559 kfree(hole, M_HAMMER);
564 * Attempt to locate a hole with sufficient free space to accomodate the
565 * requested allocation. Return the offset or 0 if no hole could be found.
568 hammer_find_hole(hammer_mount_t hmp, hammer_holes_t holes, int bytes)
571 hammer_off_t result_off = 0;
573 TAILQ_FOREACH(hole, &holes->list, entry) {
574 if (bytes <= hole->bytes) {
575 result_off = hole->offset;
576 hole->offset += bytes;
577 hole->bytes -= bytes;
585 * If a newly created hole is reasonably sized then record it. We only
586 * keep track of a limited number of holes. Lost holes are recovered by
590 hammer_add_hole(hammer_mount_t hmp, hammer_holes_t holes,
591 hammer_off_t offset, int bytes)
598 if (holes->count < HAMMER_MAX_HOLES) {
599 hole = kmalloc(sizeof(*hole), M_HAMMER, M_WAITOK);
602 hole = TAILQ_FIRST(&holes->list);
603 TAILQ_REMOVE(&holes->list, hole, entry);
605 TAILQ_INSERT_TAIL(&holes->list, hole, entry);
606 hole->offset = offset;