2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.10 2008/05/03 05:28:55 dillon Exp $
42 static hammer_off_t hammer_find_hole(hammer_mount_t hmp,
43 hammer_holes_t holes, int bytes);
44 static void hammer_add_hole(hammer_mount_t hmp, hammer_holes_t holes,
45 hammer_off_t offset, int bytes);
48 * Allocate bytes from a zone
51 hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
52 int bytes, int *errorp)
54 hammer_volume_t root_volume;
55 hammer_blockmap_t rootmap;
56 struct hammer_blockmap_layer1 *layer1;
57 struct hammer_blockmap_layer2 *layer2;
58 hammer_buffer_t buffer1 = NULL;
59 hammer_buffer_t buffer2 = NULL;
60 hammer_buffer_t buffer3 = NULL;
61 hammer_off_t tmp_offset;
62 hammer_off_t next_offset;
63 hammer_off_t layer1_offset;
64 hammer_off_t layer2_offset;
65 hammer_off_t bigblock_offset;
70 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
71 root_volume = hammer_get_root_volume(trans->hmp, errorp);
74 rootmap = &trans->hmp->blockmap[zone];
75 KKASSERT(rootmap->phys_offset != 0);
76 KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
77 HAMMER_ZONE_RAW_BUFFER_INDEX);
78 KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
79 KKASSERT(HAMMER_ZONE_DECODE(rootmap->next_offset) == zone);
82 * Deal with alignment and buffer-boundary issues.
84 * Be careful, certain primary alignments are used below to allocate
85 * new blockmap blocks.
87 bytes = (bytes + 7) & ~7;
88 KKASSERT(bytes > 0 && bytes <= HAMMER_BUFSIZE);
90 lockmgr(&trans->hmp->blockmap_lock, LK_EXCLUSIVE|LK_RETRY);
93 * Try to use a known-free hole, otherwise append.
95 next_offset = hammer_find_hole(trans->hmp, &trans->hmp->holes[zone],
97 if (next_offset == 0) {
98 next_offset = rootmap->next_offset;
106 * The allocation request may not cross a buffer boundary.
108 tmp_offset = next_offset + bytes - 1;
109 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
110 skip_amount = HAMMER_BUFSIZE -
111 ((int)next_offset & HAMMER_BUFMASK);
112 hammer_add_hole(trans->hmp, &trans->hmp->holes[zone],
113 next_offset, skip_amount);
114 next_offset = tmp_offset & ~HAMMER_BUFMASK64;
118 * Dive layer 1. If we are starting a new layer 1 entry,
119 * allocate a layer 2 block for it.
121 layer1_offset = rootmap->phys_offset +
122 HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
123 layer1 = hammer_bread(trans->hmp, layer1_offset, errorp, &buffer1);
124 KKASSERT(*errorp == 0);
125 KKASSERT(next_offset <= rootmap->alloc_offset);
128 * Allocate layer2 backing store in layer1 if necessary. next_offset
129 * can skip to a bigblock boundary but alloc_offset is at least
130 * bigblock=aligned so that's ok.
132 if (next_offset == rootmap->alloc_offset &&
133 ((next_offset & HAMMER_BLOCKMAP_LAYER2_MASK) == 0 ||
134 layer1->phys_offset == HAMMER_BLOCKMAP_FREE)
136 KKASSERT((next_offset & HAMMER_BLOCKMAP_LAYER2_MASK) == 0);
137 hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
138 bzero(layer1, sizeof(*layer1));
139 layer1->phys_offset =
140 hammer_freemap_alloc(trans, next_offset, errorp);
141 layer1->blocks_free = HAMMER_BLOCKMAP_RADIX2;
142 hammer_modify_buffer_done(buffer1);
143 KKASSERT(*errorp == 0);
145 KKASSERT(layer1->phys_offset);
148 * If layer1 indicates no free blocks in layer2 and our alloc_offset
149 * is not in layer2, skip layer2 entirely.
151 if (layer1->blocks_free == 0 &&
152 ((next_offset ^ rootmap->alloc_offset) & ~HAMMER_BLOCKMAP_LAYER2_MASK) != 0) {
153 kprintf("blockmap skip1 %016llx\n", next_offset);
154 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2_MASK) &
155 ~HAMMER_BLOCKMAP_LAYER2_MASK;
156 if (next_offset >= trans->hmp->zone_limits[zone]) {
157 kprintf("blockmap wrap1\n");
158 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
159 if (++loops == 2) { /* XXX poor-man's */
169 * Dive layer 2, each entry represents a large-block.
171 layer2_offset = layer1->phys_offset +
172 HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
173 layer2 = hammer_bread(trans->hmp, layer2_offset, errorp, &buffer2);
174 KKASSERT(*errorp == 0);
176 if ((next_offset & HAMMER_LARGEBLOCK_MASK64) == 0) {
178 * We are at the beginning of a new bigblock
180 if (next_offset == rootmap->alloc_offset ||
181 layer2->u.phys_offset == HAMMER_BLOCKMAP_FREE) {
183 * Allocate the bigblock in layer2 if diving into
184 * uninitialized space or if the block was previously
187 hammer_modify_buffer(trans, buffer1,
188 layer1, sizeof(*layer1));
189 KKASSERT(layer1->blocks_free);
190 --layer1->blocks_free;
191 hammer_modify_buffer_done(buffer1);
192 hammer_modify_buffer(trans, buffer2,
193 layer2, sizeof(*layer2));
194 bzero(layer2, sizeof(*layer2));
195 layer2->u.phys_offset =
196 hammer_freemap_alloc(trans, next_offset,
198 layer2->bytes_free = HAMMER_LARGEBLOCK_SIZE;
199 hammer_modify_buffer_done(buffer2);
200 KKASSERT(*errorp == 0);
201 } else if (layer2->bytes_free != HAMMER_LARGEBLOCK_SIZE) {
203 * We have encountered a block that is already
204 * partially allocated. We must skip this block.
206 kprintf("blockmap skip2 %016llx %d\n",
207 next_offset, layer2->bytes_free);
208 next_offset += HAMMER_LARGEBLOCK_SIZE;
209 if (next_offset >= trans->hmp->zone_limits[zone]) {
210 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
211 kprintf("blockmap wrap2\n");
212 if (++loops == 2) { /* XXX poor-man's */
222 * We are appending within a bigblock. It is possible that
223 * the blockmap has been marked completely free via a prior
224 * pruning operation. We no longer reset the append index
225 * for that case because it compromises the UNDO by allowing
229 KKASSERT(layer2->u.phys_offset != HAMMER_BLOCKMAP_FREE);
233 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
234 layer2->bytes_free -= bytes;
235 hammer_modify_buffer_done(buffer2);
236 KKASSERT(layer2->bytes_free >= 0);
239 * If the buffer was completely free we do not have to read it from
240 * disk, call hammer_bnew() to instantiate it.
242 if ((next_offset & HAMMER_BUFMASK) == 0) {
243 bigblock_offset = layer2->u.phys_offset +
244 (next_offset & HAMMER_LARGEBLOCK_MASK64);
245 hammer_bnew(trans->hmp, bigblock_offset, errorp, &buffer3);
249 * Adjust our iterator and alloc_offset. The layer1 and layer2
250 * space beyond alloc_offset is uninitialized. alloc_offset must
251 * be big-block aligned.
253 if (used_hole == 0) {
254 hammer_modify_volume(trans, root_volume, NULL, 0);
255 rootmap->next_offset = next_offset + bytes;
256 if (rootmap->alloc_offset < rootmap->next_offset) {
257 rootmap->alloc_offset =
258 (rootmap->next_offset + HAMMER_LARGEBLOCK_MASK) &
259 ~HAMMER_LARGEBLOCK_MASK64;
261 hammer_modify_volume_done(root_volume);
265 hammer_rel_buffer(buffer1, 0);
267 hammer_rel_buffer(buffer2, 0);
269 hammer_rel_buffer(buffer3, 0);
270 hammer_rel_volume(root_volume, 0);
271 lockmgr(&trans->hmp->blockmap_lock, LK_RELEASE);
276 * Free (offset,bytes) in a zone
279 hammer_blockmap_free(hammer_transaction_t trans,
280 hammer_off_t bmap_off, int bytes)
282 hammer_volume_t root_volume;
283 hammer_blockmap_t rootmap;
284 struct hammer_blockmap_layer1 *layer1;
285 struct hammer_blockmap_layer2 *layer2;
286 hammer_buffer_t buffer1 = NULL;
287 hammer_buffer_t buffer2 = NULL;
288 hammer_off_t layer1_offset;
289 hammer_off_t layer2_offset;
293 bytes = (bytes + 7) & ~7;
294 KKASSERT(bytes <= HAMMER_BUFSIZE);
295 zone = HAMMER_ZONE_DECODE(bmap_off);
296 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
297 root_volume = hammer_get_root_volume(trans->hmp, &error);
301 lockmgr(&trans->hmp->blockmap_lock, LK_EXCLUSIVE|LK_RETRY);
303 rootmap = &trans->hmp->blockmap[zone];
304 KKASSERT(rootmap->phys_offset != 0);
305 KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
306 HAMMER_ZONE_RAW_BUFFER_INDEX);
307 KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
308 KKASSERT(((bmap_off ^ (bmap_off + (bytes - 1))) &
309 ~HAMMER_LARGEBLOCK_MASK64) == 0);
311 if (bmap_off >= rootmap->alloc_offset) {
312 panic("hammer_blockmap_lookup: %016llx beyond EOF %016llx",
313 bmap_off, rootmap->alloc_offset);
320 layer1_offset = rootmap->phys_offset +
321 HAMMER_BLOCKMAP_LAYER1_OFFSET(bmap_off);
322 layer1 = hammer_bread(trans->hmp, layer1_offset, &error, &buffer1);
323 KKASSERT(error == 0);
324 KKASSERT(layer1->phys_offset);
327 * Dive layer 2, each entry represents a large-block.
329 layer2_offset = layer1->phys_offset +
330 HAMMER_BLOCKMAP_LAYER2_OFFSET(bmap_off);
331 layer2 = hammer_bread(trans->hmp, layer2_offset, &error, &buffer2);
333 KKASSERT(error == 0);
334 KKASSERT(layer2->u.phys_offset);
335 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
336 layer2->bytes_free += bytes;
337 KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
340 * If the big-block is free, return it to the free pool. If our
341 * iterator is in the wholely free block, leave the block intact
342 * and reset the iterator.
344 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
345 if ((rootmap->next_offset ^ bmap_off) &
346 ~HAMMER_LARGEBLOCK_MASK64) {
347 hammer_freemap_free(trans, layer2->u.phys_offset,
349 layer2->u.phys_offset = HAMMER_BLOCKMAP_FREE;
351 hammer_modify_buffer(trans, buffer1,
352 layer1, sizeof(*layer1));
353 ++layer1->blocks_free;
356 * XXX Not working yet - we aren't clearing it when
357 * reallocating the block later on.
359 if (layer1->blocks_free == HAMMER_BLOCKMAP_RADIX2) {
361 trans, layer1->phys_offset,
362 bmap_off & ~HAMMER_BLOCKMAP_LAYER2_MASK,
364 layer1->phys_offset = HAMMER_BLOCKMAP_FREE;
367 hammer_modify_buffer_done(buffer1);
370 * Leave block intact and reset the iterator.
372 * XXX can't do this yet because if we allow data
373 * allocations they could overwrite deleted data
374 * that is still subject to an undo on reboot.
377 hammer_modify_volume(trans, root_volume,
379 rootmap->next_offset &= ~HAMMER_LARGEBLOCK_MASK64;
380 hammer_modify_volume_done(root_volume);
384 hammer_modify_buffer_done(buffer2);
386 lockmgr(&trans->hmp->blockmap_lock, LK_RELEASE);
389 hammer_rel_buffer(buffer1, 0);
391 hammer_rel_buffer(buffer2, 0);
392 hammer_rel_volume(root_volume, 0);
396 * Return the number of free bytes in the big-block containing the
397 * specified blockmap offset.
400 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t bmap_off,
401 int *curp, int *errorp)
403 hammer_volume_t root_volume;
404 hammer_blockmap_t rootmap;
405 struct hammer_blockmap_layer1 *layer1;
406 struct hammer_blockmap_layer2 *layer2;
407 hammer_buffer_t buffer = NULL;
408 hammer_off_t layer1_offset;
409 hammer_off_t layer2_offset;
413 zone = HAMMER_ZONE_DECODE(bmap_off);
414 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
415 root_volume = hammer_get_root_volume(hmp, errorp);
420 rootmap = &hmp->blockmap[zone];
421 KKASSERT(rootmap->phys_offset != 0);
422 KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
423 HAMMER_ZONE_RAW_BUFFER_INDEX);
424 KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
426 if (bmap_off >= rootmap->alloc_offset) {
427 panic("hammer_blockmap_lookup: %016llx beyond EOF %016llx",
428 bmap_off, rootmap->alloc_offset);
437 layer1_offset = rootmap->phys_offset +
438 HAMMER_BLOCKMAP_LAYER1_OFFSET(bmap_off);
439 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
440 KKASSERT(*errorp == 0);
441 KKASSERT(layer1->phys_offset);
444 * Dive layer 2, each entry represents a large-block.
446 layer2_offset = layer1->phys_offset +
447 HAMMER_BLOCKMAP_LAYER2_OFFSET(bmap_off);
448 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
450 KKASSERT(*errorp == 0);
451 KKASSERT(layer2->u.phys_offset);
453 bytes = layer2->bytes_free;
455 if ((rootmap->next_offset ^ bmap_off) & ~HAMMER_LARGEBLOCK_MASK64)
461 hammer_rel_buffer(buffer, 0);
462 hammer_rel_volume(root_volume, 0);
463 if (hammer_debug_general & 0x0800) {
464 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
472 * Lookup a blockmap offset.
475 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t bmap_off, int *errorp)
477 hammer_volume_t root_volume;
478 hammer_blockmap_t rootmap;
479 struct hammer_blockmap_layer1 *layer1;
480 struct hammer_blockmap_layer2 *layer2;
481 hammer_buffer_t buffer = NULL;
482 hammer_off_t layer1_offset;
483 hammer_off_t layer2_offset;
484 hammer_off_t result_offset;
487 zone = HAMMER_ZONE_DECODE(bmap_off);
488 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
489 root_volume = hammer_get_root_volume(hmp, errorp);
492 rootmap = &hmp->blockmap[zone];
493 KKASSERT(rootmap->phys_offset != 0);
494 KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
495 HAMMER_ZONE_RAW_BUFFER_INDEX);
496 KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
498 if (bmap_off >= rootmap->alloc_offset) {
499 panic("hammer_blockmap_lookup: %016llx beyond EOF %016llx",
500 bmap_off, rootmap->alloc_offset);
508 layer1_offset = rootmap->phys_offset +
509 HAMMER_BLOCKMAP_LAYER1_OFFSET(bmap_off);
510 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
511 KKASSERT(*errorp == 0);
512 KKASSERT(layer1->phys_offset);
515 * Dive layer 2, each entry represents a large-block.
517 layer2_offset = layer1->phys_offset +
518 HAMMER_BLOCKMAP_LAYER2_OFFSET(bmap_off);
519 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
521 KKASSERT(*errorp == 0);
522 KKASSERT(layer2->u.phys_offset);
524 result_offset = layer2->u.phys_offset +
525 (bmap_off & HAMMER_LARGEBLOCK_MASK64);
528 hammer_rel_buffer(buffer, 0);
529 hammer_rel_volume(root_volume, 0);
530 if (hammer_debug_general & 0x0800) {
531 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
532 bmap_off, result_offset);
534 return(result_offset);
537 /************************************************************************
538 * IN-CORE TRACKING OF ALLOCATION HOLES *
539 ************************************************************************
541 * This is a temporary shim in need of a more permanent solution.
543 * As we allocate space holes are created due to having to align to a new
544 * 16K buffer when an allocation would otherwise cross the buffer boundary.
545 * These holes are recorded here and used to fullfill smaller requests as
546 * much as possible. Only a limited number of holes are recorded and these
547 * functions operate somewhat like a heuristic, where information is allowed
552 hammer_init_holes(hammer_mount_t hmp, hammer_holes_t holes)
554 TAILQ_INIT(&holes->list);
559 hammer_free_holes(hammer_mount_t hmp, hammer_holes_t holes)
563 while ((hole = TAILQ_FIRST(&holes->list)) != NULL) {
564 TAILQ_REMOVE(&holes->list, hole, entry);
565 kfree(hole, M_HAMMER);
570 * Attempt to locate a hole with sufficient free space to accomodate the
571 * requested allocation. Return the offset or 0 if no hole could be found.
574 hammer_find_hole(hammer_mount_t hmp, hammer_holes_t holes, int bytes)
577 hammer_off_t result_off = 0;
579 TAILQ_FOREACH(hole, &holes->list, entry) {
580 if (bytes <= hole->bytes) {
581 result_off = hole->offset;
582 hole->offset += bytes;
583 hole->bytes -= bytes;
591 * If a newly created hole is reasonably sized then record it. We only
592 * keep track of a limited number of holes. Lost holes are recovered by
596 hammer_add_hole(hammer_mount_t hmp, hammer_holes_t holes,
597 hammer_off_t offset, int bytes)
604 if (holes->count < HAMMER_MAX_HOLES) {
605 hole = kmalloc(sizeof(*hole), M_HAMMER, M_WAITOK);
608 hole = TAILQ_FIRST(&holes->list);
609 TAILQ_REMOVE(&holes->list, hole, entry);
611 TAILQ_INSERT_TAIL(&holes->list, hole, entry);
612 hole->offset = offset;