2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.12 2008/05/06 00:21:07 dillon Exp $
42 static hammer_off_t hammer_find_hole(hammer_mount_t hmp,
43 hammer_holes_t holes, int bytes);
44 static void hammer_add_hole(hammer_mount_t hmp, hammer_holes_t holes,
45 hammer_off_t offset, int bytes);
48 * Allocate bytes from a zone
51 hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
52 int bytes, int *errorp)
54 hammer_volume_t root_volume;
55 hammer_blockmap_t rootmap;
56 struct hammer_blockmap_layer1 *layer1;
57 struct hammer_blockmap_layer2 *layer2;
58 hammer_buffer_t buffer1 = NULL;
59 hammer_buffer_t buffer2 = NULL;
60 hammer_buffer_t buffer3 = NULL;
61 hammer_off_t tmp_offset;
62 hammer_off_t next_offset;
63 hammer_off_t layer1_offset;
64 hammer_off_t layer2_offset;
65 hammer_off_t bigblock_offset;
70 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
71 root_volume = hammer_get_root_volume(trans->hmp, errorp);
74 rootmap = &trans->hmp->blockmap[zone];
75 KKASSERT(rootmap->phys_offset != 0);
76 KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
77 HAMMER_ZONE_RAW_BUFFER_INDEX);
78 KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
79 KKASSERT(HAMMER_ZONE_DECODE(rootmap->next_offset) == zone);
82 * Deal with alignment and buffer-boundary issues.
84 * Be careful, certain primary alignments are used below to allocate
85 * new blockmap blocks.
87 bytes = (bytes + 7) & ~7;
88 KKASSERT(bytes > 0 && bytes <= HAMMER_BUFSIZE);
90 lockmgr(&trans->hmp->blockmap_lock, LK_EXCLUSIVE|LK_RETRY);
93 * Try to use a known-free hole, otherwise append.
95 next_offset = hammer_find_hole(trans->hmp, &trans->hmp->holes[zone],
97 if (next_offset == 0) {
98 next_offset = rootmap->next_offset;
106 * The allocation request may not cross a buffer boundary.
108 tmp_offset = next_offset + bytes - 1;
109 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
110 skip_amount = HAMMER_BUFSIZE -
111 ((int)next_offset & HAMMER_BUFMASK);
112 hammer_add_hole(trans->hmp, &trans->hmp->holes[zone],
113 next_offset, skip_amount);
114 next_offset = tmp_offset & ~HAMMER_BUFMASK64;
118 * Dive layer 1. If we are starting a new layer 1 entry,
119 * allocate a layer 2 block for it.
121 layer1_offset = rootmap->phys_offset +
122 HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
123 layer1 = hammer_bread(trans->hmp, layer1_offset, errorp, &buffer1);
124 KKASSERT(*errorp == 0);
125 KKASSERT(next_offset <= rootmap->alloc_offset);
128 * Check CRC if not allocating into uninitialized space
130 if ((next_offset != rootmap->alloc_offset) ||
131 (next_offset & HAMMER_BLOCKMAP_LAYER2_MASK)) {
132 if (layer1->layer1_crc != crc32(layer1,
133 HAMMER_LAYER1_CRCSIZE)) {
134 Debugger("CRC FAILED: LAYER1");
139 * Allocate layer2 backing store in layer1 if necessary. next_offset
140 * can skip to a bigblock boundary but alloc_offset is at least
141 * bigblock=aligned so that's ok.
143 if (next_offset == rootmap->alloc_offset &&
144 ((next_offset & HAMMER_BLOCKMAP_LAYER2_MASK) == 0 ||
145 layer1->phys_offset == HAMMER_BLOCKMAP_FREE)
147 KKASSERT((next_offset & HAMMER_BLOCKMAP_LAYER2_MASK) == 0);
148 hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
149 bzero(layer1, sizeof(*layer1));
150 layer1->phys_offset =
151 hammer_freemap_alloc(trans, next_offset, errorp);
152 layer1->blocks_free = HAMMER_BLOCKMAP_RADIX2;
153 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
154 hammer_modify_buffer_done(buffer1);
155 KKASSERT(*errorp == 0);
157 KKASSERT(layer1->phys_offset);
160 * If layer1 indicates no free blocks in layer2 and our alloc_offset
161 * is not in layer2, skip layer2 entirely.
163 if (layer1->blocks_free == 0 &&
164 ((next_offset ^ rootmap->alloc_offset) & ~HAMMER_BLOCKMAP_LAYER2_MASK) != 0) {
165 hkprintf("blockmap skip1 %016llx\n", next_offset);
166 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2_MASK) &
167 ~HAMMER_BLOCKMAP_LAYER2_MASK;
168 if (next_offset >= trans->hmp->zone_limits[zone]) {
169 hkprintf("blockmap wrap1\n");
170 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
171 if (++loops == 2) { /* XXX poor-man's */
181 * Dive layer 2, each entry represents a large-block.
183 layer2_offset = layer1->phys_offset +
184 HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
185 layer2 = hammer_bread(trans->hmp, layer2_offset, errorp, &buffer2);
186 KKASSERT(*errorp == 0);
189 * Check CRC if not allocating into uninitialized space
191 if (next_offset != rootmap->alloc_offset ||
192 (next_offset & HAMMER_LARGEBLOCK_MASK64)) {
193 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
194 Debugger("CRC FAILED: LAYER2");
198 if ((next_offset & HAMMER_LARGEBLOCK_MASK64) == 0) {
200 * We are at the beginning of a new bigblock
202 if (next_offset == rootmap->alloc_offset ||
203 layer2->u.phys_offset == HAMMER_BLOCKMAP_FREE) {
205 * Allocate the bigblock in layer2 if diving into
206 * uninitialized space or if the block was previously
209 hammer_modify_buffer(trans, buffer1,
210 layer1, sizeof(*layer1));
211 KKASSERT(layer1->blocks_free);
212 --layer1->blocks_free;
213 layer1->layer1_crc = crc32(layer1,
214 HAMMER_LAYER1_CRCSIZE);
215 hammer_modify_buffer_done(buffer1);
216 hammer_modify_buffer(trans, buffer2,
217 layer2, sizeof(*layer2));
218 bzero(layer2, sizeof(*layer2));
219 layer2->u.phys_offset =
220 hammer_freemap_alloc(trans, next_offset,
222 layer2->bytes_free = HAMMER_LARGEBLOCK_SIZE;
223 layer2->entry_crc = crc32(layer2,
224 HAMMER_LAYER2_CRCSIZE);
225 hammer_modify_buffer_done(buffer2);
226 KKASSERT(*errorp == 0);
227 } else if (layer2->bytes_free != HAMMER_LARGEBLOCK_SIZE) {
229 * We have encountered a block that is already
230 * partially allocated. We must skip this block.
232 hkprintf("blockmap skip2 %016llx %d\n",
233 next_offset, layer2->bytes_free);
234 next_offset += HAMMER_LARGEBLOCK_SIZE;
235 if (next_offset >= trans->hmp->zone_limits[zone]) {
236 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
237 hkprintf("blockmap wrap2\n");
238 if (++loops == 2) { /* XXX poor-man's */
248 * We are appending within a bigblock. It is possible that
249 * the blockmap has been marked completely free via a prior
250 * pruning operation. We no longer reset the append index
251 * for that case because it compromises the UNDO by allowing
255 KKASSERT(layer2->u.phys_offset != HAMMER_BLOCKMAP_FREE);
259 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
260 layer2->bytes_free -= bytes;
261 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
262 hammer_modify_buffer_done(buffer2);
263 KKASSERT(layer2->bytes_free >= 0);
266 * If the buffer was completely free we do not have to read it from
267 * disk, call hammer_bnew() to instantiate it.
269 if ((next_offset & HAMMER_BUFMASK) == 0) {
270 bigblock_offset = layer2->u.phys_offset +
271 (next_offset & HAMMER_LARGEBLOCK_MASK64);
272 hammer_bnew(trans->hmp, bigblock_offset, errorp, &buffer3);
276 * Adjust our iterator and alloc_offset. The layer1 and layer2
277 * space beyond alloc_offset is uninitialized. alloc_offset must
278 * be big-block aligned.
280 if (used_hole == 0) {
281 hammer_modify_volume(trans, root_volume, NULL, 0);
282 rootmap->next_offset = next_offset + bytes;
283 if (rootmap->alloc_offset < rootmap->next_offset) {
284 rootmap->alloc_offset =
285 (rootmap->next_offset + HAMMER_LARGEBLOCK_MASK) &
286 ~HAMMER_LARGEBLOCK_MASK64;
288 hammer_modify_volume_done(root_volume);
292 hammer_rel_buffer(buffer1, 0);
294 hammer_rel_buffer(buffer2, 0);
296 hammer_rel_buffer(buffer3, 0);
297 hammer_rel_volume(root_volume, 0);
298 lockmgr(&trans->hmp->blockmap_lock, LK_RELEASE);
303 * Free (offset,bytes) in a zone
306 hammer_blockmap_free(hammer_transaction_t trans,
307 hammer_off_t bmap_off, int bytes)
309 hammer_volume_t root_volume;
310 hammer_blockmap_t rootmap;
311 struct hammer_blockmap_layer1 *layer1;
312 struct hammer_blockmap_layer2 *layer2;
313 hammer_buffer_t buffer1 = NULL;
314 hammer_buffer_t buffer2 = NULL;
315 hammer_off_t layer1_offset;
316 hammer_off_t layer2_offset;
320 bytes = (bytes + 7) & ~7;
321 KKASSERT(bytes <= HAMMER_BUFSIZE);
322 zone = HAMMER_ZONE_DECODE(bmap_off);
323 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
324 root_volume = hammer_get_root_volume(trans->hmp, &error);
328 lockmgr(&trans->hmp->blockmap_lock, LK_EXCLUSIVE|LK_RETRY);
330 rootmap = &trans->hmp->blockmap[zone];
331 KKASSERT(rootmap->phys_offset != 0);
332 KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
333 HAMMER_ZONE_RAW_BUFFER_INDEX);
334 KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
335 KKASSERT(((bmap_off ^ (bmap_off + (bytes - 1))) &
336 ~HAMMER_LARGEBLOCK_MASK64) == 0);
338 if (bmap_off >= rootmap->alloc_offset) {
339 panic("hammer_blockmap_lookup: %016llx beyond EOF %016llx",
340 bmap_off, rootmap->alloc_offset);
347 layer1_offset = rootmap->phys_offset +
348 HAMMER_BLOCKMAP_LAYER1_OFFSET(bmap_off);
349 layer1 = hammer_bread(trans->hmp, layer1_offset, &error, &buffer1);
350 KKASSERT(error == 0);
351 KKASSERT(layer1->phys_offset);
352 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
353 Debugger("CRC FAILED: LAYER1");
357 * Dive layer 2, each entry represents a large-block.
359 layer2_offset = layer1->phys_offset +
360 HAMMER_BLOCKMAP_LAYER2_OFFSET(bmap_off);
361 layer2 = hammer_bread(trans->hmp, layer2_offset, &error, &buffer2);
362 KKASSERT(error == 0);
363 KKASSERT(layer2->u.phys_offset);
364 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
365 Debugger("CRC FAILED: LAYER2");
368 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
369 layer2->bytes_free += bytes;
370 KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
373 * If the big-block is free, return it to the free pool. If our
374 * iterator is in the wholely free block, leave the block intact
375 * and reset the iterator.
377 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
378 if ((rootmap->next_offset ^ bmap_off) &
379 ~HAMMER_LARGEBLOCK_MASK64) {
380 hammer_freemap_free(trans, layer2->u.phys_offset,
382 layer2->u.phys_offset = HAMMER_BLOCKMAP_FREE;
384 hammer_modify_buffer(trans, buffer1,
385 layer1, sizeof(*layer1));
386 ++layer1->blocks_free;
389 * XXX Not working yet - we aren't clearing it when
390 * reallocating the block later on.
392 if (layer1->blocks_free == HAMMER_BLOCKMAP_RADIX2) {
394 trans, layer1->phys_offset,
395 bmap_off & ~HAMMER_BLOCKMAP_LAYER2_MASK,
397 layer1->phys_offset = HAMMER_BLOCKMAP_FREE;
400 layer1->layer1_crc = crc32(layer1,
401 HAMMER_LAYER1_CRCSIZE);
402 hammer_modify_buffer_done(buffer1);
405 * Leave block intact and reset the iterator.
407 * XXX can't do this yet because if we allow data
408 * allocations they could overwrite deleted data
409 * that is still subject to an undo on reboot.
412 hammer_modify_volume(trans, root_volume,
414 rootmap->next_offset &= ~HAMMER_LARGEBLOCK_MASK64;
415 hammer_modify_volume_done(root_volume);
419 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
420 hammer_modify_buffer_done(buffer2);
422 lockmgr(&trans->hmp->blockmap_lock, LK_RELEASE);
425 hammer_rel_buffer(buffer1, 0);
427 hammer_rel_buffer(buffer2, 0);
428 hammer_rel_volume(root_volume, 0);
432 * Return the number of free bytes in the big-block containing the
433 * specified blockmap offset.
436 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t bmap_off,
437 int *curp, int *errorp)
439 hammer_volume_t root_volume;
440 hammer_blockmap_t rootmap;
441 struct hammer_blockmap_layer1 *layer1;
442 struct hammer_blockmap_layer2 *layer2;
443 hammer_buffer_t buffer = NULL;
444 hammer_off_t layer1_offset;
445 hammer_off_t layer2_offset;
449 zone = HAMMER_ZONE_DECODE(bmap_off);
450 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
451 root_volume = hammer_get_root_volume(hmp, errorp);
456 rootmap = &hmp->blockmap[zone];
457 KKASSERT(rootmap->phys_offset != 0);
458 KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
459 HAMMER_ZONE_RAW_BUFFER_INDEX);
460 KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
462 if (bmap_off >= rootmap->alloc_offset) {
463 panic("hammer_blockmap_lookup: %016llx beyond EOF %016llx",
464 bmap_off, rootmap->alloc_offset);
473 layer1_offset = rootmap->phys_offset +
474 HAMMER_BLOCKMAP_LAYER1_OFFSET(bmap_off);
475 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
476 KKASSERT(*errorp == 0);
477 KKASSERT(layer1->phys_offset);
478 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
479 Debugger("CRC FAILED: LAYER1");
483 * Dive layer 2, each entry represents a large-block.
485 layer2_offset = layer1->phys_offset +
486 HAMMER_BLOCKMAP_LAYER2_OFFSET(bmap_off);
487 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
488 KKASSERT(*errorp == 0);
489 KKASSERT(layer2->u.phys_offset);
490 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
491 Debugger("CRC FAILED: LAYER2");
494 bytes = layer2->bytes_free;
496 if ((rootmap->next_offset ^ bmap_off) & ~HAMMER_LARGEBLOCK_MASK64)
502 hammer_rel_buffer(buffer, 0);
503 hammer_rel_volume(root_volume, 0);
504 if (hammer_debug_general & 0x0800) {
505 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
513 * Lookup a blockmap offset.
516 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t bmap_off, int *errorp)
518 hammer_volume_t root_volume;
519 hammer_blockmap_t rootmap;
520 struct hammer_blockmap_layer1 *layer1;
521 struct hammer_blockmap_layer2 *layer2;
522 hammer_buffer_t buffer = NULL;
523 hammer_off_t layer1_offset;
524 hammer_off_t layer2_offset;
525 hammer_off_t result_offset;
528 zone = HAMMER_ZONE_DECODE(bmap_off);
529 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
530 root_volume = hammer_get_root_volume(hmp, errorp);
533 rootmap = &hmp->blockmap[zone];
534 KKASSERT(rootmap->phys_offset != 0);
535 KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
536 HAMMER_ZONE_RAW_BUFFER_INDEX);
537 KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
539 if (bmap_off >= rootmap->alloc_offset) {
540 panic("hammer_blockmap_lookup: %016llx beyond EOF %016llx",
541 bmap_off, rootmap->alloc_offset);
549 layer1_offset = rootmap->phys_offset +
550 HAMMER_BLOCKMAP_LAYER1_OFFSET(bmap_off);
551 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
552 KKASSERT(*errorp == 0);
553 KKASSERT(layer1->phys_offset);
554 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
555 Debugger("CRC FAILED: LAYER1");
559 * Dive layer 2, each entry represents a large-block.
561 layer2_offset = layer1->phys_offset +
562 HAMMER_BLOCKMAP_LAYER2_OFFSET(bmap_off);
563 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
565 KKASSERT(*errorp == 0);
566 KKASSERT(layer2->u.phys_offset);
567 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
568 Debugger("CRC FAILED: LAYER2");
571 result_offset = layer2->u.phys_offset +
572 (bmap_off & HAMMER_LARGEBLOCK_MASK64);
575 hammer_rel_buffer(buffer, 0);
576 hammer_rel_volume(root_volume, 0);
577 if (hammer_debug_general & 0x0800) {
578 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
579 bmap_off, result_offset);
581 return(result_offset);
584 /************************************************************************
585 * IN-CORE TRACKING OF ALLOCATION HOLES *
586 ************************************************************************
588 * This is a temporary shim in need of a more permanent solution.
590 * As we allocate space holes are created due to having to align to a new
591 * 16K buffer when an allocation would otherwise cross the buffer boundary.
592 * These holes are recorded here and used to fullfill smaller requests as
593 * much as possible. Only a limited number of holes are recorded and these
594 * functions operate somewhat like a heuristic, where information is allowed
599 hammer_init_holes(hammer_mount_t hmp, hammer_holes_t holes)
601 TAILQ_INIT(&holes->list);
606 hammer_free_holes(hammer_mount_t hmp, hammer_holes_t holes)
610 while ((hole = TAILQ_FIRST(&holes->list)) != NULL) {
611 TAILQ_REMOVE(&holes->list, hole, entry);
612 kfree(hole, M_HAMMER);
617 * Attempt to locate a hole with sufficient free space to accomodate the
618 * requested allocation. Return the offset or 0 if no hole could be found.
621 hammer_find_hole(hammer_mount_t hmp, hammer_holes_t holes, int bytes)
624 hammer_off_t result_off = 0;
626 TAILQ_FOREACH(hole, &holes->list, entry) {
627 if (bytes <= hole->bytes) {
628 result_off = hole->offset;
629 hole->offset += bytes;
630 hole->bytes -= bytes;
638 * If a newly created hole is reasonably sized then record it. We only
639 * keep track of a limited number of holes. Lost holes are recovered by
643 hammer_add_hole(hammer_mount_t hmp, hammer_holes_t holes,
644 hammer_off_t offset, int bytes)
651 if (holes->count < HAMMER_MAX_HOLES) {
652 hole = kmalloc(sizeof(*hole), M_HAMMER, M_WAITOK);
655 hole = TAILQ_FIRST(&holes->list);
656 TAILQ_REMOVE(&holes->list, hole, entry);
658 TAILQ_INSERT_TAIL(&holes->list, hole, entry);
659 hole->offset = offset;