2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.27 2008/07/31 22:30:33 dillon Exp $
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44 hammer_off_t base_offset, int zone,
45 struct hammer_blockmap_layer2 *layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
49 * Reserved big-blocks red-black tree support
51 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
52 hammer_res_rb_compare, hammer_off_t, zone_offset);
55 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
57 if (res1->zone_offset < res2->zone_offset)
59 if (res1->zone_offset > res2->zone_offset)
65 * Allocate bytes from a zone
68 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
69 hammer_off_t hint, int *errorp)
72 hammer_volume_t root_volume;
73 hammer_blockmap_t blockmap;
74 hammer_blockmap_t freemap;
75 hammer_reserve_t resv;
76 struct hammer_blockmap_layer1 *layer1;
77 struct hammer_blockmap_layer2 *layer2;
78 hammer_buffer_t buffer1 = NULL;
79 hammer_buffer_t buffer2 = NULL;
80 hammer_buffer_t buffer3 = NULL;
81 hammer_off_t tmp_offset;
82 hammer_off_t next_offset;
83 hammer_off_t result_offset;
84 hammer_off_t layer1_offset;
85 hammer_off_t layer2_offset;
86 hammer_off_t base_off;
88 int offset; /* offset within big-block */
94 * Deal with alignment and buffer-boundary issues.
96 * Be careful, certain primary alignments are used below to allocate
97 * new blockmap blocks.
99 bytes = (bytes + 15) & ~15;
100 KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
101 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
106 root_volume = trans->rootvol;
108 blockmap = &hmp->blockmap[zone];
109 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
110 KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
113 * Use the hint if we have one.
115 if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
116 next_offset = (hint + 15) & ~(hammer_off_t)15;
119 next_offset = blockmap->next_offset;
125 * use_hint is turned off if we leave the hinted big-block.
127 if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
128 next_offset = blockmap->next_offset;
135 if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
141 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
145 * The allocation request may not cross a buffer boundary. Special
146 * large allocations must not cross a large-block boundary.
148 tmp_offset = next_offset + bytes - 1;
149 if (bytes <= HAMMER_BUFSIZE) {
150 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
151 next_offset = tmp_offset & ~HAMMER_BUFMASK64;
155 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
156 next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
160 offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
165 layer1_offset = freemap->phys_offset +
166 HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
168 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
177 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
178 hammer_lock_ex(&hmp->blkmap_lock);
179 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
180 panic("CRC FAILED: LAYER1");
181 hammer_unlock(&hmp->blkmap_lock);
185 * If we are at a big-block boundary and layer1 indicates no
186 * free big-blocks, then we cannot allocate a new bigblock in
187 * layer2, skip to the next layer1 entry.
189 if (offset == 0 && layer1->blocks_free == 0) {
190 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
191 ~HAMMER_BLOCKMAP_LAYER2_MASK;
194 KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
197 * Skip this layer1 entry if it is pointing to a layer2 big-block
198 * on a volume that we are currently trying to remove from the
199 * file-system. This is used by the volume-del code together with
200 * the reblocker to free up a volume.
202 if ((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
203 hmp->volume_to_remove) {
204 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
205 ~HAMMER_BLOCKMAP_LAYER2_MASK;
210 * Dive layer 2, each entry represents a large-block.
212 layer2_offset = layer1->phys_offset +
213 HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
214 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
221 * Check CRC. This can race another thread holding the lock
222 * and in the middle of modifying layer2.
224 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
225 hammer_lock_ex(&hmp->blkmap_lock);
226 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
227 panic("CRC FAILED: LAYER2");
228 hammer_unlock(&hmp->blkmap_lock);
232 * Skip the layer if the zone is owned by someone other then us.
234 if (layer2->zone && layer2->zone != zone) {
235 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
238 if (offset < layer2->append_off) {
239 next_offset += layer2->append_off - offset;
244 * If operating in the current non-hint blockmap block, do not
245 * allow it to get over-full. Also drop any active hinting so
246 * blockmap->next_offset is updated at the end.
248 * We do this for B-Tree and meta-data allocations to provide
249 * localization for updates.
251 if ((zone == HAMMER_ZONE_BTREE_INDEX ||
252 zone == HAMMER_ZONE_META_INDEX) &&
253 offset >= HAMMER_LARGEBLOCK_OVERFILL &&
254 !((next_offset ^ blockmap->next_offset) & ~HAMMER_LARGEBLOCK_MASK64)
256 if (offset >= HAMMER_LARGEBLOCK_OVERFILL) {
257 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
264 * We need the lock from this point on. We have to re-check zone
265 * ownership after acquiring the lock and also check for reservations.
267 hammer_lock_ex(&hmp->blkmap_lock);
269 if (layer2->zone && layer2->zone != zone) {
270 hammer_unlock(&hmp->blkmap_lock);
271 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
274 if (offset < layer2->append_off) {
275 hammer_unlock(&hmp->blkmap_lock);
276 next_offset += layer2->append_off - offset;
281 * The bigblock might be reserved by another zone. If it is reserved
282 * by our zone we may have to move next_offset past the append_off.
284 base_off = (next_offset &
285 (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
286 HAMMER_ZONE_RAW_BUFFER;
287 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
289 if (resv->zone != zone) {
290 hammer_unlock(&hmp->blkmap_lock);
291 next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
292 ~HAMMER_LARGEBLOCK_MASK64;
295 if (offset < resv->append_off) {
296 hammer_unlock(&hmp->blkmap_lock);
297 next_offset += resv->append_off - offset;
304 * Ok, we can allocate out of this layer2 big-block. Assume ownership
305 * of the layer for real. At this point we've validated any
306 * reservation that might exist and can just ignore resv.
308 if (layer2->zone == 0) {
310 * Assign the bigblock to our zone
312 hammer_modify_buffer(trans, buffer1,
313 layer1, sizeof(*layer1));
314 --layer1->blocks_free;
315 layer1->layer1_crc = crc32(layer1,
316 HAMMER_LAYER1_CRCSIZE);
317 hammer_modify_buffer_done(buffer1);
318 hammer_modify_buffer(trans, buffer2,
319 layer2, sizeof(*layer2));
321 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
322 KKASSERT(layer2->append_off == 0);
323 hammer_modify_volume_field(trans, trans->rootvol,
324 vol0_stat_freebigblocks);
325 --root_volume->ondisk->vol0_stat_freebigblocks;
326 hmp->copy_stat_freebigblocks =
327 root_volume->ondisk->vol0_stat_freebigblocks;
328 hammer_modify_volume_done(trans->rootvol);
330 hammer_modify_buffer(trans, buffer2,
331 layer2, sizeof(*layer2));
333 KKASSERT(layer2->zone == zone);
336 * NOTE: bytes_free can legally go negative due to de-dup.
338 layer2->bytes_free -= bytes;
339 KKASSERT(layer2->append_off <= offset);
340 layer2->append_off = offset + bytes;
341 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
342 hammer_modify_buffer_done(buffer2);
345 * We hold the blockmap lock and should be the only ones
346 * capable of modifying resv->append_off. Track the allocation
349 KKASSERT(bytes != 0);
351 KKASSERT(resv->append_off <= offset);
352 resv->append_off = offset + bytes;
353 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
354 hammer_blockmap_reserve_complete(hmp, resv);
358 * If we are allocating from the base of a new buffer we can avoid
359 * a disk read by calling hammer_bnew().
361 if ((next_offset & HAMMER_BUFMASK) == 0) {
362 hammer_bnew_ext(trans->hmp, next_offset, bytes,
365 result_offset = next_offset;
368 * If we weren't supplied with a hint or could not use the hint
369 * then we wound up using blockmap->next_offset as the hint and
373 hammer_modify_volume(NULL, root_volume, NULL, 0);
374 blockmap->next_offset = next_offset + bytes;
375 hammer_modify_volume_done(root_volume);
377 hammer_unlock(&hmp->blkmap_lock);
384 hammer_rel_buffer(buffer1, 0);
386 hammer_rel_buffer(buffer2, 0);
388 hammer_rel_buffer(buffer3, 0);
390 return(result_offset);
394 * Frontend function - Reserve bytes in a zone.
396 * This code reserves bytes out of a blockmap without committing to any
397 * meta-data modifications, allowing the front-end to directly issue disk
398 * write I/O for large blocks of data
400 * The backend later finalizes the reservation with hammer_blockmap_finalize()
401 * upon committing the related record.
404 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
405 hammer_off_t *zone_offp, int *errorp)
407 hammer_volume_t root_volume;
408 hammer_blockmap_t blockmap;
409 hammer_blockmap_t freemap;
410 struct hammer_blockmap_layer1 *layer1;
411 struct hammer_blockmap_layer2 *layer2;
412 hammer_buffer_t buffer1 = NULL;
413 hammer_buffer_t buffer2 = NULL;
414 hammer_buffer_t buffer3 = NULL;
415 hammer_off_t tmp_offset;
416 hammer_off_t next_offset;
417 hammer_off_t layer1_offset;
418 hammer_off_t layer2_offset;
419 hammer_off_t base_off;
420 hammer_reserve_t resv;
421 hammer_reserve_t resx;
428 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
429 root_volume = hammer_get_root_volume(hmp, errorp);
432 blockmap = &hmp->blockmap[zone];
433 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
434 KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
437 * Deal with alignment and buffer-boundary issues.
439 * Be careful, certain primary alignments are used below to allocate
440 * new blockmap blocks.
442 bytes = (bytes + 15) & ~15;
443 KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
445 next_offset = blockmap->next_offset;
451 if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
456 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
460 * The allocation request may not cross a buffer boundary. Special
461 * large allocations must not cross a large-block boundary.
463 tmp_offset = next_offset + bytes - 1;
464 if (bytes <= HAMMER_BUFSIZE) {
465 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
466 next_offset = tmp_offset & ~HAMMER_BUFMASK64;
470 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
471 next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
475 offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
480 layer1_offset = freemap->phys_offset +
481 HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
482 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
489 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
490 hammer_lock_ex(&hmp->blkmap_lock);
491 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
492 panic("CRC FAILED: LAYER1");
493 hammer_unlock(&hmp->blkmap_lock);
497 * If we are at a big-block boundary and layer1 indicates no
498 * free big-blocks, then we cannot allocate a new bigblock in
499 * layer2, skip to the next layer1 entry.
501 if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
502 layer1->blocks_free == 0) {
503 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
504 ~HAMMER_BLOCKMAP_LAYER2_MASK;
507 KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
510 * Dive layer 2, each entry represents a large-block.
512 layer2_offset = layer1->phys_offset +
513 HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
514 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
519 * Check CRC if not allocating into uninitialized space (which we
520 * aren't when reserving space).
522 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
523 hammer_lock_ex(&hmp->blkmap_lock);
524 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
525 panic("CRC FAILED: LAYER2");
526 hammer_unlock(&hmp->blkmap_lock);
530 * Skip the layer if the zone is owned by someone other then us.
532 if (layer2->zone && layer2->zone != zone) {
533 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
536 if (offset < layer2->append_off) {
537 next_offset += layer2->append_off - offset;
542 * We need the lock from this point on. We have to re-check zone
543 * ownership after acquiring the lock and also check for reservations.
545 hammer_lock_ex(&hmp->blkmap_lock);
547 if (layer2->zone && layer2->zone != zone) {
548 hammer_unlock(&hmp->blkmap_lock);
549 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
552 if (offset < layer2->append_off) {
553 hammer_unlock(&hmp->blkmap_lock);
554 next_offset += layer2->append_off - offset;
559 * The bigblock might be reserved by another zone. If it is reserved
560 * by our zone we may have to move next_offset past the append_off.
562 base_off = (next_offset &
563 (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
564 HAMMER_ZONE_RAW_BUFFER;
565 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
567 if (resv->zone != zone) {
568 hammer_unlock(&hmp->blkmap_lock);
569 next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
570 ~HAMMER_LARGEBLOCK_MASK64;
573 if (offset < resv->append_off) {
574 hammer_unlock(&hmp->blkmap_lock);
575 next_offset += resv->append_off - offset;
581 resx = kmalloc(sizeof(*resv), hmp->m_misc,
582 M_WAITOK | M_ZERO | M_USE_RESERVE);
585 resx->zone_offset = base_off;
586 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
587 resx->flags |= HAMMER_RESF_LAYER2FREE;
588 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
589 KKASSERT(resv == NULL);
591 ++hammer_count_reservations;
593 resv->append_off = offset + bytes;
596 * If we are not reserving a whole buffer but are at the start of
597 * a new block, call hammer_bnew() to avoid a disk read.
599 * If we are reserving a whole buffer (or more), the caller will
600 * probably use a direct read, so do nothing.
602 if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
603 hammer_bnew(hmp, next_offset, errorp, &buffer3);
607 * Adjust our iterator and alloc_offset. The layer1 and layer2
608 * space beyond alloc_offset is uninitialized. alloc_offset must
609 * be big-block aligned.
611 blockmap->next_offset = next_offset + bytes;
612 hammer_unlock(&hmp->blkmap_lock);
616 hammer_rel_buffer(buffer1, 0);
618 hammer_rel_buffer(buffer2, 0);
620 hammer_rel_buffer(buffer3, 0);
621 hammer_rel_volume(root_volume, 0);
622 *zone_offp = next_offset;
628 * Dereference a reservation structure. Upon the final release the
629 * underlying big-block is checked and if it is entirely free we delete
630 * any related HAMMER buffers to avoid potential conflicts with future
631 * reuse of the big-block.
634 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
636 hammer_off_t base_offset;
639 KKASSERT(resv->refs > 0);
640 KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
641 HAMMER_ZONE_RAW_BUFFER);
644 * Setting append_off to the max prevents any new allocations
645 * from occuring while we are trying to dispose of the reservation,
646 * allowing us to safely delete any related HAMMER buffers.
648 * If we are unable to clean out all related HAMMER buffers we
651 if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
652 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
653 base_offset = resv->zone_offset & ~HAMMER_OFF_ZONE_MASK;
654 base_offset = HAMMER_ZONE_ENCODE(resv->zone, base_offset);
655 error = hammer_del_buffers(hmp, base_offset,
657 HAMMER_LARGEBLOCK_SIZE,
659 if (hammer_debug_general & 0x20000) {
660 kprintf("hammer: dellgblk %016jx error %d\n",
661 (intmax_t)base_offset, error);
664 hammer_reserve_setdelay(hmp, resv);
666 if (--resv->refs == 0) {
667 if (hammer_debug_general & 0x20000) {
668 kprintf("hammer: delresvr %016jx zone %02x\n",
669 (intmax_t)resv->zone_offset, resv->zone);
671 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
672 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
673 kfree(resv, hmp->m_misc);
674 --hammer_count_reservations;
679 * Prevent a potentially free big-block from being reused until after
680 * the related flushes have completely cycled, otherwise crash recovery
681 * could resurrect a data block that was already reused and overwritten.
683 * The caller might reset the underlying layer2 entry's append_off to 0, so
684 * our covering append_off must be set to max to prevent any reallocation
685 * until after the flush delays complete, not to mention proper invalidation
686 * of any underlying cached blocks.
689 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
690 int zone, struct hammer_blockmap_layer2 *layer2)
692 hammer_reserve_t resv;
695 * Allocate the reservation if necessary.
697 * NOTE: need lock in future around resv lookup/allocation and
698 * the setdelay call, currently refs is not bumped until the call.
701 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
703 resv = kmalloc(sizeof(*resv), hmp->m_misc,
704 M_WAITOK | M_ZERO | M_USE_RESERVE);
706 resv->zone_offset = base_offset;
708 resv->append_off = HAMMER_LARGEBLOCK_SIZE;
710 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
711 resv->flags |= HAMMER_RESF_LAYER2FREE;
712 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
713 kfree(resv, hmp->m_misc);
716 ++hammer_count_reservations;
718 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
719 resv->flags |= HAMMER_RESF_LAYER2FREE;
721 hammer_reserve_setdelay(hmp, resv);
725 * Enter the reservation on the on-delay list, or move it if it
726 * is already on the list.
729 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
731 if (resv->flags & HAMMER_RESF_ONDELAY) {
732 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
733 resv->flush_group = hmp->flusher.next + 1;
734 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
737 ++hmp->rsv_fromdelay;
738 resv->flags |= HAMMER_RESF_ONDELAY;
739 resv->flush_group = hmp->flusher.next + 1;
740 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
745 * Reserve has reached its flush point, remove it from the delay list
746 * and finish it off. hammer_blockmap_reserve_complete() inherits
747 * the ondelay reference.
750 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
752 KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
753 resv->flags &= ~HAMMER_RESF_ONDELAY;
754 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
755 --hmp->rsv_fromdelay;
756 hammer_blockmap_reserve_complete(hmp, resv);
760 * Backend function - free (offset, bytes) in a zone.
765 hammer_blockmap_free(hammer_transaction_t trans,
766 hammer_off_t zone_offset, int bytes)
769 hammer_volume_t root_volume;
770 hammer_blockmap_t blockmap;
771 hammer_blockmap_t freemap;
772 struct hammer_blockmap_layer1 *layer1;
773 struct hammer_blockmap_layer2 *layer2;
774 hammer_buffer_t buffer1 = NULL;
775 hammer_buffer_t buffer2 = NULL;
776 hammer_off_t layer1_offset;
777 hammer_off_t layer2_offset;
778 hammer_off_t base_off;
789 bytes = (bytes + 15) & ~15;
790 KKASSERT(bytes <= HAMMER_XBUFSIZE);
791 KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
792 ~HAMMER_LARGEBLOCK_MASK64) == 0);
795 * Basic zone validation & locking
797 zone = HAMMER_ZONE_DECODE(zone_offset);
798 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
799 root_volume = trans->rootvol;
802 blockmap = &hmp->blockmap[zone];
803 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
808 layer1_offset = freemap->phys_offset +
809 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
810 layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
813 KKASSERT(layer1->phys_offset &&
814 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
815 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
816 hammer_lock_ex(&hmp->blkmap_lock);
817 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
818 panic("CRC FAILED: LAYER1");
819 hammer_unlock(&hmp->blkmap_lock);
823 * Dive layer 2, each entry represents a large-block.
825 layer2_offset = layer1->phys_offset +
826 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
827 layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
830 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
831 hammer_lock_ex(&hmp->blkmap_lock);
832 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
833 panic("CRC FAILED: LAYER2");
834 hammer_unlock(&hmp->blkmap_lock);
837 hammer_lock_ex(&hmp->blkmap_lock);
839 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
842 * Free space previously allocated via blockmap_alloc().
844 * NOTE: bytes_free can be and remain negative due to de-dup ops
845 * but can never become larger than HAMMER_LARGEBLOCK_SIZE.
847 KKASSERT(layer2->zone == zone);
848 layer2->bytes_free += bytes;
849 KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
852 * If a big-block becomes entirely free we must create a covering
853 * reservation to prevent premature reuse. Note, however, that
854 * the big-block and/or reservation may still have an append_off
855 * that allows further (non-reused) allocations.
857 * Once the reservation has been made we re-check layer2 and if
858 * the big-block is still entirely free we reset the layer2 entry.
859 * The reservation will prevent premature reuse.
861 * NOTE: hammer_buffer's are only invalidated when the reservation
862 * is completed, if the layer2 entry is still completely free at
863 * that time. Any allocations from the reservation that may have
864 * occured in the mean time, or active references on the reservation
865 * from new pending allocations, will prevent the invalidation from
868 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
869 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
871 hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
872 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
874 layer2->append_off = 0;
875 hammer_modify_buffer(trans, buffer1,
876 layer1, sizeof(*layer1));
877 ++layer1->blocks_free;
878 layer1->layer1_crc = crc32(layer1,
879 HAMMER_LAYER1_CRCSIZE);
880 hammer_modify_buffer_done(buffer1);
881 hammer_modify_volume_field(trans,
883 vol0_stat_freebigblocks);
884 ++root_volume->ondisk->vol0_stat_freebigblocks;
885 hmp->copy_stat_freebigblocks =
886 root_volume->ondisk->vol0_stat_freebigblocks;
887 hammer_modify_volume_done(trans->rootvol);
890 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
891 hammer_modify_buffer_done(buffer2);
892 hammer_unlock(&hmp->blkmap_lock);
896 hammer_rel_buffer(buffer1, 0);
898 hammer_rel_buffer(buffer2, 0);
902 hammer_blockmap_dedup(hammer_transaction_t trans,
903 hammer_off_t zone_offset, int bytes)
906 hammer_volume_t root_volume;
907 hammer_blockmap_t blockmap;
908 hammer_blockmap_t freemap;
909 struct hammer_blockmap_layer1 *layer1;
910 struct hammer_blockmap_layer2 *layer2;
911 hammer_buffer_t buffer1 = NULL;
912 hammer_buffer_t buffer2 = NULL;
913 hammer_off_t layer1_offset;
914 hammer_off_t layer2_offset;
926 bytes = (bytes + 15) & ~15;
927 KKASSERT(bytes <= HAMMER_LARGEBLOCK_SIZE);
928 KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
929 ~HAMMER_LARGEBLOCK_MASK64) == 0);
932 * Basic zone validation & locking
934 zone = HAMMER_ZONE_DECODE(zone_offset);
935 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
936 root_volume = trans->rootvol;
939 blockmap = &hmp->blockmap[zone];
940 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
945 layer1_offset = freemap->phys_offset +
946 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
947 layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
950 KKASSERT(layer1->phys_offset &&
951 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
952 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
953 hammer_lock_ex(&hmp->blkmap_lock);
954 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
955 panic("CRC FAILED: LAYER1");
956 hammer_unlock(&hmp->blkmap_lock);
960 * Dive layer 2, each entry represents a large-block.
962 layer2_offset = layer1->phys_offset +
963 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
964 layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
967 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
968 hammer_lock_ex(&hmp->blkmap_lock);
969 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
970 panic("CRC FAILED: LAYER2");
971 hammer_unlock(&hmp->blkmap_lock);
974 hammer_lock_ex(&hmp->blkmap_lock);
976 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
979 * Free space previously allocated via blockmap_alloc().
981 * NOTE: bytes_free can be and remain negative due to de-dup ops
982 * but can never become larger than HAMMER_LARGEBLOCK_SIZE.
984 KKASSERT(layer2->zone == zone);
985 temp = layer2->bytes_free - HAMMER_LARGEBLOCK_SIZE * 2;
986 cpu_ccfence(); /* prevent gcc from optimizing temp out */
987 if (temp > layer2->bytes_free) {
991 layer2->bytes_free -= bytes;
993 KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
995 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
997 hammer_modify_buffer_done(buffer2);
998 hammer_unlock(&hmp->blkmap_lock);
1002 hammer_rel_buffer(buffer1, 0);
1004 hammer_rel_buffer(buffer2, 0);
1009 * Backend function - finalize (offset, bytes) in a zone.
1011 * Allocate space that was previously reserved by the frontend.
1014 hammer_blockmap_finalize(hammer_transaction_t trans,
1015 hammer_reserve_t resv,
1016 hammer_off_t zone_offset, int bytes)
1019 hammer_volume_t root_volume;
1020 hammer_blockmap_t blockmap;
1021 hammer_blockmap_t freemap;
1022 struct hammer_blockmap_layer1 *layer1;
1023 struct hammer_blockmap_layer2 *layer2;
1024 hammer_buffer_t buffer1 = NULL;
1025 hammer_buffer_t buffer2 = NULL;
1026 hammer_off_t layer1_offset;
1027 hammer_off_t layer2_offset;
1039 bytes = (bytes + 15) & ~15;
1040 KKASSERT(bytes <= HAMMER_XBUFSIZE);
1043 * Basic zone validation & locking
1045 zone = HAMMER_ZONE_DECODE(zone_offset);
1046 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1047 root_volume = trans->rootvol;
1050 blockmap = &hmp->blockmap[zone];
1051 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1056 layer1_offset = freemap->phys_offset +
1057 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1058 layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1061 KKASSERT(layer1->phys_offset &&
1062 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1063 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1064 hammer_lock_ex(&hmp->blkmap_lock);
1065 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1066 panic("CRC FAILED: LAYER1");
1067 hammer_unlock(&hmp->blkmap_lock);
1071 * Dive layer 2, each entry represents a large-block.
1073 layer2_offset = layer1->phys_offset +
1074 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1075 layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1078 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1079 hammer_lock_ex(&hmp->blkmap_lock);
1080 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1081 panic("CRC FAILED: LAYER2");
1082 hammer_unlock(&hmp->blkmap_lock);
1085 hammer_lock_ex(&hmp->blkmap_lock);
1087 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1090 * Finalize some or all of the space covered by a current
1091 * reservation. An allocation in the same layer may have
1092 * already assigned ownership.
1094 if (layer2->zone == 0) {
1095 hammer_modify_buffer(trans, buffer1,
1096 layer1, sizeof(*layer1));
1097 --layer1->blocks_free;
1098 layer1->layer1_crc = crc32(layer1,
1099 HAMMER_LAYER1_CRCSIZE);
1100 hammer_modify_buffer_done(buffer1);
1101 layer2->zone = zone;
1102 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
1103 KKASSERT(layer2->append_off == 0);
1104 hammer_modify_volume_field(trans,
1106 vol0_stat_freebigblocks);
1107 --root_volume->ondisk->vol0_stat_freebigblocks;
1108 hmp->copy_stat_freebigblocks =
1109 root_volume->ondisk->vol0_stat_freebigblocks;
1110 hammer_modify_volume_done(trans->rootvol);
1112 if (layer2->zone != zone)
1113 kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1114 KKASSERT(layer2->zone == zone);
1115 KKASSERT(bytes != 0);
1116 layer2->bytes_free -= bytes;
1118 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1121 * Finalizations can occur out of order, or combined with allocations.
1122 * append_off must be set to the highest allocated offset.
1124 offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
1125 if (layer2->append_off < offset)
1126 layer2->append_off = offset;
1128 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1129 hammer_modify_buffer_done(buffer2);
1130 hammer_unlock(&hmp->blkmap_lock);
1134 hammer_rel_buffer(buffer1, 0);
1136 hammer_rel_buffer(buffer2, 0);
1141 * Return the approximate number of free bytes in the big-block
1142 * containing the specified blockmap offset.
1144 * WARNING: A negative number can be returned if data de-dup exists,
1145 * and the result will also not represent he actual number
1146 * of free bytes in this case.
1148 * This code is used only by the reblocker.
1151 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1152 int *curp, int *errorp)
1154 hammer_volume_t root_volume;
1155 hammer_blockmap_t blockmap;
1156 hammer_blockmap_t freemap;
1157 struct hammer_blockmap_layer1 *layer1;
1158 struct hammer_blockmap_layer2 *layer2;
1159 hammer_buffer_t buffer = NULL;
1160 hammer_off_t layer1_offset;
1161 hammer_off_t layer2_offset;
1165 zone = HAMMER_ZONE_DECODE(zone_offset);
1166 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1167 root_volume = hammer_get_root_volume(hmp, errorp);
1172 blockmap = &hmp->blockmap[zone];
1173 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1178 layer1_offset = freemap->phys_offset +
1179 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1180 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1185 KKASSERT(layer1->phys_offset);
1186 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1187 hammer_lock_ex(&hmp->blkmap_lock);
1188 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1189 panic("CRC FAILED: LAYER1");
1190 hammer_unlock(&hmp->blkmap_lock);
1194 * Dive layer 2, each entry represents a large-block.
1196 * (reuse buffer, layer1 pointer becomes invalid)
1198 layer2_offset = layer1->phys_offset +
1199 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1200 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1205 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1206 hammer_lock_ex(&hmp->blkmap_lock);
1207 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1208 panic("CRC FAILED: LAYER2");
1209 hammer_unlock(&hmp->blkmap_lock);
1211 KKASSERT(layer2->zone == zone);
1213 bytes = layer2->bytes_free;
1215 if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
1221 hammer_rel_buffer(buffer, 0);
1222 hammer_rel_volume(root_volume, 0);
1223 if (hammer_debug_general & 0x0800) {
1224 kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1225 (long long)zone_offset, bytes);
1232 * Lookup a blockmap offset.
1235 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
1238 hammer_volume_t root_volume;
1239 hammer_blockmap_t freemap;
1240 struct hammer_blockmap_layer1 *layer1;
1241 struct hammer_blockmap_layer2 *layer2;
1242 hammer_buffer_t buffer = NULL;
1243 hammer_off_t layer1_offset;
1244 hammer_off_t layer2_offset;
1245 hammer_off_t result_offset;
1246 hammer_off_t base_off;
1247 hammer_reserve_t resv;
1251 * Calculate the zone-2 offset.
1253 zone = HAMMER_ZONE_DECODE(zone_offset);
1254 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1256 result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
1257 HAMMER_ZONE_RAW_BUFFER;
1260 * We can actually stop here, normal blockmaps are now direct-mapped
1261 * onto the freemap and so represent zone-2 addresses.
1263 if (hammer_verify_zone == 0) {
1265 return(result_offset);
1269 * Validate the allocation zone
1271 root_volume = hammer_get_root_volume(hmp, errorp);
1274 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1275 KKASSERT(freemap->phys_offset != 0);
1280 layer1_offset = freemap->phys_offset +
1281 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1282 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1285 KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1286 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1287 hammer_lock_ex(&hmp->blkmap_lock);
1288 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1289 panic("CRC FAILED: LAYER1");
1290 hammer_unlock(&hmp->blkmap_lock);
1294 * Dive layer 2, each entry represents a large-block.
1296 layer2_offset = layer1->phys_offset +
1297 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1298 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1302 if (layer2->zone == 0) {
1303 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1304 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1306 KKASSERT(resv && resv->zone == zone);
1308 } else if (layer2->zone != zone) {
1309 panic("hammer_blockmap_lookup: bad zone %d/%d\n",
1310 layer2->zone, zone);
1312 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1313 hammer_lock_ex(&hmp->blkmap_lock);
1314 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1315 panic("CRC FAILED: LAYER2");
1316 hammer_unlock(&hmp->blkmap_lock);
1321 hammer_rel_buffer(buffer, 0);
1322 hammer_rel_volume(root_volume, 0);
1323 if (hammer_debug_general & 0x0800) {
1324 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
1325 (long long)zone_offset, (long long)result_offset);
1327 return(result_offset);
1332 * Check space availability
1334 * MPSAFE - does not require fs_token
1337 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1339 const int in_size = sizeof(struct hammer_inode_data) +
1340 sizeof(union hammer_btree_elm);
1341 const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1344 usedbytes = hmp->rsv_inodes * in_size +
1345 hmp->rsv_recs * rec_size +
1346 hmp->rsv_databytes +
1347 ((int64_t)hmp->rsv_fromdelay << HAMMER_LARGEBLOCK_BITS) +
1348 ((int64_t)hidirtybufspace << 2) +
1349 (slop << HAMMER_LARGEBLOCK_BITS);
1351 hammer_count_extra_space_used = usedbytes; /* debugging */
1355 if (hmp->copy_stat_freebigblocks >=
1356 (usedbytes >> HAMMER_LARGEBLOCK_BITS)) {