2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 #include <vm/vm_page2.h>
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44 hammer_off_t base_offset, int zone,
45 hammer_blockmap_layer2_t layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
47 static int hammer_check_volume(hammer_mount_t, hammer_off_t*);
48 static void hammer_skip_volume(hammer_off_t *offsetp);
51 * Reserved big-blocks red-black tree support
53 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
54 hammer_res_rb_compare, hammer_off_t, zone_offset);
57 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
59 if (res1->zone_offset < res2->zone_offset)
61 if (res1->zone_offset > res2->zone_offset)
67 * Allocate bytes from a zone
70 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
71 hammer_off_t hint, int *errorp)
74 hammer_volume_t root_volume;
75 hammer_blockmap_t blockmap;
76 hammer_blockmap_t freemap;
77 hammer_reserve_t resv;
78 hammer_blockmap_layer1_t layer1;
79 hammer_blockmap_layer2_t layer2;
80 hammer_buffer_t buffer1 = NULL;
81 hammer_buffer_t buffer2 = NULL;
82 hammer_buffer_t buffer3 = NULL;
83 hammer_off_t tmp_offset;
84 hammer_off_t next_offset;
85 hammer_off_t result_offset;
86 hammer_off_t layer1_offset;
87 hammer_off_t layer2_offset;
88 hammer_off_t base_off;
90 int offset; /* offset within big-block */
96 * Deal with alignment and buffer-boundary issues.
98 * Be careful, certain primary alignments are used below to allocate
99 * new blockmap blocks.
101 bytes = HAMMER_DATA_DOALIGN(bytes);
102 KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
103 KKASSERT(hammer_is_index_record(zone));
108 root_volume = trans->rootvol;
110 blockmap = &hmp->blockmap[zone];
111 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
112 KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
115 * Use the hint if we have one.
117 if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
118 next_offset = HAMMER_DATA_DOALIGN_WITH(hammer_off_t, hint);
121 next_offset = blockmap->next_offset;
127 * use_hint is turned off if we leave the hinted big-block.
129 if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
130 next_offset = blockmap->next_offset;
137 if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
139 hmkprintf(hmp, "No space left for zone %d "
140 "allocation\n", zone);
145 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
149 * The allocation request may not cross a buffer boundary. Special
150 * large allocations must not cross a big-block boundary.
152 tmp_offset = next_offset + bytes - 1;
153 if (bytes <= HAMMER_BUFSIZE) {
154 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
155 next_offset = tmp_offset & ~HAMMER_BUFMASK64;
159 if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
160 next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
164 offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
169 layer1_offset = freemap->phys_offset +
170 HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
172 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
181 if (!hammer_crc_test_layer1(hmp->version, layer1)) {
182 hammer_lock_ex(&hmp->blkmap_lock);
183 if (!hammer_crc_test_layer1(hmp->version, layer1))
184 hpanic("CRC FAILED: LAYER1");
185 hammer_unlock(&hmp->blkmap_lock);
189 * If we are at a big-block boundary and layer1 indicates no
190 * free big-blocks, then we cannot allocate a new big-block in
191 * layer2, skip to the next layer1 entry.
193 if (offset == 0 && layer1->blocks_free == 0) {
194 next_offset = HAMMER_ZONE_LAYER1_NEXT_OFFSET(next_offset);
195 if (hammer_check_volume(hmp, &next_offset)) {
201 KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
204 * Skip the whole volume if it is pointing to a layer2 big-block
205 * on a volume that we are currently trying to remove from the
206 * file-system. This is used by the volume-del code together with
207 * the reblocker to free up a volume.
209 if (HAMMER_VOL_DECODE(layer1->phys_offset) == hmp->volume_to_remove) {
210 hammer_skip_volume(&next_offset);
215 * Dive layer 2, each entry represents a big-block.
217 layer2_offset = layer1->phys_offset +
218 HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
219 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
226 * Check CRC. This can race another thread holding the lock
227 * and in the middle of modifying layer2.
229 if (!hammer_crc_test_layer2(hmp->version, layer2)) {
230 hammer_lock_ex(&hmp->blkmap_lock);
231 if (!hammer_crc_test_layer2(hmp->version, layer2))
232 hpanic("CRC FAILED: LAYER2");
233 hammer_unlock(&hmp->blkmap_lock);
237 * Skip the layer if the zone is owned by someone other then us.
239 if (layer2->zone && layer2->zone != zone) {
240 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
243 if (offset < layer2->append_off) {
244 next_offset += layer2->append_off - offset;
250 * If operating in the current non-hint blockmap block, do not
251 * allow it to get over-full. Also drop any active hinting so
252 * blockmap->next_offset is updated at the end.
254 * We do this for B-Tree and meta-data allocations to provide
255 * localization for updates.
257 if ((zone == HAMMER_ZONE_BTREE_INDEX ||
258 zone == HAMMER_ZONE_META_INDEX) &&
259 offset >= HAMMER_BIGBLOCK_OVERFILL &&
260 !((next_offset ^ blockmap->next_offset) & ~HAMMER_BIGBLOCK_MASK64)) {
261 if (offset >= HAMMER_BIGBLOCK_OVERFILL) {
262 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
270 * We need the lock from this point on. We have to re-check zone
271 * ownership after acquiring the lock and also check for reservations.
273 hammer_lock_ex(&hmp->blkmap_lock);
275 if (layer2->zone && layer2->zone != zone) {
276 hammer_unlock(&hmp->blkmap_lock);
277 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
280 if (offset < layer2->append_off) {
281 hammer_unlock(&hmp->blkmap_lock);
282 next_offset += layer2->append_off - offset;
287 * The big-block might be reserved by another zone. If it is reserved
288 * by our zone we may have to move next_offset past the append_off.
290 base_off = hammer_xlate_to_zone2(next_offset & ~HAMMER_BIGBLOCK_MASK64);
291 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
293 if (resv->zone != zone) {
294 hammer_unlock(&hmp->blkmap_lock);
295 next_offset = HAMMER_ZONE_LAYER2_NEXT_OFFSET(next_offset);
298 if (offset < resv->append_off) {
299 hammer_unlock(&hmp->blkmap_lock);
300 next_offset += resv->append_off - offset;
307 * Ok, we can allocate out of this layer2 big-block. Assume ownership
308 * of the layer for real. At this point we've validated any
309 * reservation that might exist and can just ignore resv.
311 if (layer2->zone == 0) {
313 * Assign the big-block to our zone
315 hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
316 --layer1->blocks_free;
317 hammer_crc_set_layer1(hmp->version, layer1);
318 hammer_modify_buffer_done(buffer1);
319 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
321 KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
322 KKASSERT(layer2->append_off == 0);
323 hammer_modify_volume_field(trans, trans->rootvol,
324 vol0_stat_freebigblocks);
325 --root_volume->ondisk->vol0_stat_freebigblocks;
326 hmp->copy_stat_freebigblocks =
327 root_volume->ondisk->vol0_stat_freebigblocks;
328 hammer_modify_volume_done(trans->rootvol);
330 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
332 KKASSERT(layer2->zone == zone);
335 * NOTE: bytes_free can legally go negative due to de-dup.
337 layer2->bytes_free -= bytes;
338 KKASSERT(layer2->append_off <= offset);
339 layer2->append_off = offset + bytes;
340 hammer_crc_set_layer2(hmp->version, layer2);
341 hammer_modify_buffer_done(buffer2);
344 * We hold the blockmap lock and should be the only ones
345 * capable of modifying resv->append_off. Track the allocation
348 KKASSERT(bytes != 0);
350 KKASSERT(resv->append_off <= offset);
351 resv->append_off = offset + bytes;
352 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
353 hammer_blockmap_reserve_complete(hmp, resv);
357 * If we are allocating from the base of a new buffer we can avoid
358 * a disk read by calling hammer_bnew_ext().
360 if ((next_offset & HAMMER_BUFMASK) == 0) {
361 hammer_bnew_ext(trans->hmp, next_offset, bytes,
368 result_offset = next_offset;
371 * If we weren't supplied with a hint or could not use the hint
372 * then we wound up using blockmap->next_offset as the hint and
376 hammer_modify_volume_noundo(NULL, root_volume);
377 blockmap->next_offset = next_offset + bytes;
378 hammer_modify_volume_done(root_volume);
380 hammer_unlock(&hmp->blkmap_lock);
387 hammer_rel_buffer(buffer1, 0);
389 hammer_rel_buffer(buffer2, 0);
391 hammer_rel_buffer(buffer3, 0);
393 return(result_offset);
397 * Frontend function - Reserve bytes in a zone.
399 * This code reserves bytes out of a blockmap without committing to any
400 * meta-data modifications, allowing the front-end to directly issue disk
401 * write I/O for big-blocks of data
403 * The backend later finalizes the reservation with hammer_blockmap_finalize()
404 * upon committing the related record.
407 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
408 hammer_off_t *zone_offp, int *errorp)
410 hammer_volume_t root_volume;
411 hammer_blockmap_t blockmap;
412 hammer_blockmap_t freemap;
413 hammer_blockmap_layer1_t layer1;
414 hammer_blockmap_layer2_t layer2;
415 hammer_buffer_t buffer1 = NULL;
416 hammer_buffer_t buffer2 = NULL;
417 hammer_buffer_t buffer3 = NULL;
418 hammer_off_t tmp_offset;
419 hammer_off_t next_offset;
420 hammer_off_t layer1_offset;
421 hammer_off_t layer2_offset;
422 hammer_off_t base_off;
423 hammer_reserve_t resv;
424 hammer_reserve_t resx = NULL;
431 KKASSERT(hammer_is_index_record(zone));
432 root_volume = hammer_get_root_volume(hmp, errorp);
435 blockmap = &hmp->blockmap[zone];
436 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
437 KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
440 * Deal with alignment and buffer-boundary issues.
442 * Be careful, certain primary alignments are used below to allocate
443 * new blockmap blocks.
445 bytes = HAMMER_DATA_DOALIGN(bytes);
446 KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
448 next_offset = blockmap->next_offset;
454 if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
456 hmkprintf(hmp, "No space left for zone %d "
457 "reservation\n", zone);
461 next_offset = HAMMER_ZONE_ENCODE(zone, 0);
465 * The allocation request may not cross a buffer boundary. Special
466 * large allocations must not cross a big-block boundary.
468 tmp_offset = next_offset + bytes - 1;
469 if (bytes <= HAMMER_BUFSIZE) {
470 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
471 next_offset = tmp_offset & ~HAMMER_BUFMASK64;
475 if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
476 next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
480 offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
485 layer1_offset = freemap->phys_offset +
486 HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
487 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
494 if (!hammer_crc_test_layer1(hmp->version, layer1)) {
495 hammer_lock_ex(&hmp->blkmap_lock);
496 if (!hammer_crc_test_layer1(hmp->version, layer1))
497 hpanic("CRC FAILED: LAYER1");
498 hammer_unlock(&hmp->blkmap_lock);
502 * If we are at a big-block boundary and layer1 indicates no
503 * free big-blocks, then we cannot allocate a new big-block in
504 * layer2, skip to the next layer1 entry.
506 if ((next_offset & HAMMER_BIGBLOCK_MASK) == 0 &&
507 layer1->blocks_free == 0) {
508 next_offset = HAMMER_ZONE_LAYER1_NEXT_OFFSET(next_offset);
509 if (hammer_check_volume(hmp, &next_offset))
513 KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
516 * Dive layer 2, each entry represents a big-block.
518 layer2_offset = layer1->phys_offset +
519 HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
520 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
525 * Check CRC if not allocating into uninitialized space (which we
526 * aren't when reserving space).
528 if (!hammer_crc_test_layer2(hmp->version, layer2)) {
529 hammer_lock_ex(&hmp->blkmap_lock);
530 if (!hammer_crc_test_layer2(hmp->version, layer2))
531 hpanic("CRC FAILED: LAYER2");
532 hammer_unlock(&hmp->blkmap_lock);
536 * Skip the layer if the zone is owned by someone other then us.
538 if (layer2->zone && layer2->zone != zone) {
539 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
542 if (offset < layer2->append_off) {
543 next_offset += layer2->append_off - offset;
548 * We need the lock from this point on. We have to re-check zone
549 * ownership after acquiring the lock and also check for reservations.
551 hammer_lock_ex(&hmp->blkmap_lock);
553 if (layer2->zone && layer2->zone != zone) {
554 hammer_unlock(&hmp->blkmap_lock);
555 next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
558 if (offset < layer2->append_off) {
559 hammer_unlock(&hmp->blkmap_lock);
560 next_offset += layer2->append_off - offset;
565 * The big-block might be reserved by another zone. If it is reserved
566 * by our zone we may have to move next_offset past the append_off.
568 base_off = hammer_xlate_to_zone2(next_offset & ~HAMMER_BIGBLOCK_MASK64);
569 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
571 if (resv->zone != zone) {
572 hammer_unlock(&hmp->blkmap_lock);
573 next_offset = HAMMER_ZONE_LAYER2_NEXT_OFFSET(next_offset);
576 if (offset < resv->append_off) {
577 hammer_unlock(&hmp->blkmap_lock);
578 next_offset += resv->append_off - offset;
583 resx = kmalloc(sizeof(*resv), hmp->m_misc,
584 M_WAITOK | M_ZERO | M_USE_RESERVE);
587 resx->zone_offset = base_off;
588 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
589 resx->flags |= HAMMER_RESF_LAYER2FREE;
590 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
591 KKASSERT(resv == NULL);
593 ++hammer_count_reservations;
595 resv->append_off = offset + bytes;
598 * If we are not reserving a whole buffer but are at the start of
599 * a new block, call hammer_bnew() to avoid a disk read.
601 * If we are reserving a whole buffer (or more), the caller will
602 * probably use a direct read, so do nothing.
604 * If we do not have a whole lot of system memory we really can't
605 * afford to block while holding the blkmap_lock!
607 if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
608 if (!vm_paging_min_dnc(HAMMER_BUFSIZE / PAGE_SIZE)) {
609 hammer_bnew(hmp, next_offset, errorp, &buffer3);
615 blockmap->next_offset = next_offset + bytes;
616 hammer_unlock(&hmp->blkmap_lock);
620 hammer_rel_buffer(buffer1, 0);
622 hammer_rel_buffer(buffer2, 0);
624 hammer_rel_buffer(buffer3, 0);
625 hammer_rel_volume(root_volume, 0);
626 *zone_offp = next_offset;
632 * Dereference a reservation structure. Upon the final release the
633 * underlying big-block is checked and if it is entirely free we delete
634 * any related HAMMER buffers to avoid potential conflicts with future
635 * reuse of the big-block.
638 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
640 hammer_off_t base_offset;
643 KKASSERT(resv->refs > 0);
644 KKASSERT(hammer_is_zone_raw_buffer(resv->zone_offset));
647 * Setting append_off to the max prevents any new allocations
648 * from occuring while we are trying to dispose of the reservation,
649 * allowing us to safely delete any related HAMMER buffers.
651 * If we are unable to clean out all related HAMMER buffers we
654 if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
655 resv->append_off = HAMMER_BIGBLOCK_SIZE;
656 base_offset = hammer_xlate_to_zoneX(resv->zone, resv->zone_offset);
657 error = hammer_del_buffers(hmp, base_offset,
659 HAMMER_BIGBLOCK_SIZE,
661 if (hammer_debug_general & 0x20000) {
662 hkprintf("delbgblk %016jx error %d\n",
663 (intmax_t)base_offset, error);
666 hammer_reserve_setdelay(hmp, resv);
668 if (--resv->refs == 0) {
669 if (hammer_debug_general & 0x20000) {
670 hkprintf("delresvr %016jx zone %02x\n",
671 (intmax_t)resv->zone_offset, resv->zone);
673 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
674 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
675 kfree(resv, hmp->m_misc);
676 --hammer_count_reservations;
681 * Prevent a potentially free big-block from being reused until after
682 * the related flushes have completely cycled, otherwise crash recovery
683 * could resurrect a data block that was already reused and overwritten.
685 * The caller might reset the underlying layer2 entry's append_off to 0, so
686 * our covering append_off must be set to max to prevent any reallocation
687 * until after the flush delays complete, not to mention proper invalidation
688 * of any underlying cached blocks.
691 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
692 int zone, hammer_blockmap_layer2_t layer2)
694 hammer_reserve_t resv;
697 * Allocate the reservation if necessary.
699 * NOTE: need lock in future around resv lookup/allocation and
700 * the setdelay call, currently refs is not bumped until the call.
703 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
705 resv = kmalloc(sizeof(*resv), hmp->m_misc,
706 M_WAITOK | M_ZERO | M_USE_RESERVE);
708 resv->zone_offset = base_offset;
710 resv->append_off = HAMMER_BIGBLOCK_SIZE;
712 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
713 resv->flags |= HAMMER_RESF_LAYER2FREE;
714 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
715 kfree(resv, hmp->m_misc);
718 ++hammer_count_reservations;
720 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
721 resv->flags |= HAMMER_RESF_LAYER2FREE;
723 hammer_reserve_setdelay(hmp, resv);
727 * Enter the reservation on the on-delay list, or move it if it
728 * is already on the list.
731 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
733 if (resv->flags & HAMMER_RESF_ONDELAY) {
734 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
735 resv->flg_no = hmp->flusher.next + 1;
736 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
739 ++hmp->rsv_fromdelay;
740 resv->flags |= HAMMER_RESF_ONDELAY;
741 resv->flg_no = hmp->flusher.next + 1;
742 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
747 * Reserve has reached its flush point, remove it from the delay list
748 * and finish it off. hammer_blockmap_reserve_complete() inherits
749 * the ondelay reference.
752 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
754 KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
755 resv->flags &= ~HAMMER_RESF_ONDELAY;
756 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
757 --hmp->rsv_fromdelay;
758 hammer_blockmap_reserve_complete(hmp, resv);
762 * Backend function - free (offset, bytes) in a zone.
767 hammer_blockmap_free(hammer_transaction_t trans,
768 hammer_off_t zone_offset, int bytes)
771 hammer_volume_t root_volume;
772 hammer_blockmap_t freemap;
773 hammer_blockmap_layer1_t layer1;
774 hammer_blockmap_layer2_t layer2;
775 hammer_buffer_t buffer1 = NULL;
776 hammer_buffer_t buffer2 = NULL;
777 hammer_off_t layer1_offset;
778 hammer_off_t layer2_offset;
779 hammer_off_t base_off;
790 bytes = HAMMER_DATA_DOALIGN(bytes);
791 KKASSERT(bytes <= HAMMER_XBUFSIZE);
792 KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
793 ~HAMMER_BIGBLOCK_MASK64) == 0);
796 * Basic zone validation & locking
798 zone = HAMMER_ZONE_DECODE(zone_offset);
799 KKASSERT(hammer_is_index_record(zone));
800 root_volume = trans->rootvol;
803 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
808 layer1_offset = freemap->phys_offset +
809 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
810 layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
813 KKASSERT(layer1->phys_offset &&
814 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
815 if (!hammer_crc_test_layer1(hmp->version, layer1)) {
816 hammer_lock_ex(&hmp->blkmap_lock);
817 if (!hammer_crc_test_layer1(hmp->version, layer1))
818 hpanic("CRC FAILED: LAYER1");
819 hammer_unlock(&hmp->blkmap_lock);
823 * Dive layer 2, each entry represents a big-block.
825 layer2_offset = layer1->phys_offset +
826 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
827 layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
830 if (!hammer_crc_test_layer2(hmp->version, layer2)) {
831 hammer_lock_ex(&hmp->blkmap_lock);
832 if (!hammer_crc_test_layer2(hmp->version, layer2))
833 hpanic("CRC FAILED: LAYER2");
834 hammer_unlock(&hmp->blkmap_lock);
837 hammer_lock_ex(&hmp->blkmap_lock);
839 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
842 * Free space previously allocated via blockmap_alloc().
844 * NOTE: bytes_free can be and remain negative due to de-dup ops
845 * but can never become larger than HAMMER_BIGBLOCK_SIZE.
847 KKASSERT(layer2->zone == zone);
848 layer2->bytes_free += bytes;
849 KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
852 * If a big-block becomes entirely free we must create a covering
853 * reservation to prevent premature reuse. Note, however, that
854 * the big-block and/or reservation may still have an append_off
855 * that allows further (non-reused) allocations.
857 * Once the reservation has been made we re-check layer2 and if
858 * the big-block is still entirely free we reset the layer2 entry.
859 * The reservation will prevent premature reuse.
861 * NOTE: hammer_buffer's are only invalidated when the reservation
862 * is completed, if the layer2 entry is still completely free at
863 * that time. Any allocations from the reservation that may have
864 * occured in the mean time, or active references on the reservation
865 * from new pending allocations, will prevent the invalidation from
868 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
869 base_off = hammer_xlate_to_zone2(zone_offset &
870 ~HAMMER_BIGBLOCK_MASK64);
872 hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
873 if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
875 layer2->append_off = 0;
876 hammer_modify_buffer(trans, buffer1,
877 layer1, sizeof(*layer1));
878 ++layer1->blocks_free;
879 hammer_crc_set_layer1(hmp->version, layer1);
880 hammer_modify_buffer_done(buffer1);
881 hammer_modify_volume_field(trans,
883 vol0_stat_freebigblocks);
884 ++root_volume->ondisk->vol0_stat_freebigblocks;
885 hmp->copy_stat_freebigblocks =
886 root_volume->ondisk->vol0_stat_freebigblocks;
887 hammer_modify_volume_done(trans->rootvol);
890 hammer_crc_set_layer2(hmp->version, layer2);
891 hammer_modify_buffer_done(buffer2);
892 hammer_unlock(&hmp->blkmap_lock);
896 hammer_rel_buffer(buffer1, 0);
898 hammer_rel_buffer(buffer2, 0);
902 hammer_blockmap_dedup(hammer_transaction_t trans,
903 hammer_off_t zone_offset, int bytes)
906 hammer_blockmap_t freemap;
907 hammer_blockmap_layer1_t layer1;
908 hammer_blockmap_layer2_t layer2;
909 hammer_buffer_t buffer1 = NULL;
910 hammer_buffer_t buffer2 = NULL;
911 hammer_off_t layer1_offset;
912 hammer_off_t layer2_offset;
924 bytes = HAMMER_DATA_DOALIGN(bytes);
925 KKASSERT(bytes <= HAMMER_BIGBLOCK_SIZE);
926 KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
927 ~HAMMER_BIGBLOCK_MASK64) == 0);
930 * Basic zone validation & locking
932 zone = HAMMER_ZONE_DECODE(zone_offset);
933 KKASSERT(hammer_is_index_record(zone));
936 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
941 layer1_offset = freemap->phys_offset +
942 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
943 layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
946 KKASSERT(layer1->phys_offset &&
947 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
948 if (!hammer_crc_test_layer1(hmp->version, layer1)) {
949 hammer_lock_ex(&hmp->blkmap_lock);
950 if (!hammer_crc_test_layer1(hmp->version, layer1))
951 hpanic("CRC FAILED: LAYER1");
952 hammer_unlock(&hmp->blkmap_lock);
956 * Dive layer 2, each entry represents a big-block.
958 layer2_offset = layer1->phys_offset +
959 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
960 layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
963 if (!hammer_crc_test_layer2(hmp->version, layer2)) {
964 hammer_lock_ex(&hmp->blkmap_lock);
965 if (!hammer_crc_test_layer2(hmp->version, layer2))
966 hpanic("CRC FAILED: LAYER2");
967 hammer_unlock(&hmp->blkmap_lock);
970 hammer_lock_ex(&hmp->blkmap_lock);
972 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
975 * Free space previously allocated via blockmap_alloc().
977 * NOTE: bytes_free can be and remain negative due to de-dup ops
978 * but can never become larger than HAMMER_BIGBLOCK_SIZE.
980 KKASSERT(layer2->zone == zone);
981 temp = layer2->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
982 cpu_ccfence(); /* prevent gcc from optimizing temp out */
983 if (temp > layer2->bytes_free) {
987 layer2->bytes_free -= bytes;
989 KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
991 hammer_crc_set_layer2(hmp->version, layer2);
993 hammer_modify_buffer_done(buffer2);
994 hammer_unlock(&hmp->blkmap_lock);
998 hammer_rel_buffer(buffer1, 0);
1000 hammer_rel_buffer(buffer2, 0);
1005 * Backend function - finalize (offset, bytes) in a zone.
1007 * Allocate space that was previously reserved by the frontend.
1010 hammer_blockmap_finalize(hammer_transaction_t trans,
1011 hammer_reserve_t resv,
1012 hammer_off_t zone_offset, int bytes)
1015 hammer_volume_t root_volume;
1016 hammer_blockmap_t freemap;
1017 hammer_blockmap_layer1_t layer1;
1018 hammer_blockmap_layer2_t layer2;
1019 hammer_buffer_t buffer1 = NULL;
1020 hammer_buffer_t buffer2 = NULL;
1021 hammer_off_t layer1_offset;
1022 hammer_off_t layer2_offset;
1034 bytes = HAMMER_DATA_DOALIGN(bytes);
1035 KKASSERT(bytes <= HAMMER_XBUFSIZE);
1038 * Basic zone validation & locking
1040 zone = HAMMER_ZONE_DECODE(zone_offset);
1041 KKASSERT(hammer_is_index_record(zone));
1042 root_volume = trans->rootvol;
1045 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1050 layer1_offset = freemap->phys_offset +
1051 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1052 layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1055 KKASSERT(layer1->phys_offset &&
1056 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1057 if (!hammer_crc_test_layer1(hmp->version, layer1)) {
1058 hammer_lock_ex(&hmp->blkmap_lock);
1059 if (!hammer_crc_test_layer1(hmp->version, layer1))
1060 hpanic("CRC FAILED: LAYER1");
1061 hammer_unlock(&hmp->blkmap_lock);
1065 * Dive layer 2, each entry represents a big-block.
1067 layer2_offset = layer1->phys_offset +
1068 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1069 layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1072 if (!hammer_crc_test_layer2(hmp->version, layer2)) {
1073 hammer_lock_ex(&hmp->blkmap_lock);
1074 if (!hammer_crc_test_layer2(hmp->version, layer2))
1075 hpanic("CRC FAILED: LAYER2");
1076 hammer_unlock(&hmp->blkmap_lock);
1079 hammer_lock_ex(&hmp->blkmap_lock);
1081 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1084 * Finalize some or all of the space covered by a current
1085 * reservation. An allocation in the same layer may have
1086 * already assigned ownership.
1088 if (layer2->zone == 0) {
1089 hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
1090 --layer1->blocks_free;
1091 hammer_crc_set_layer1(hmp->version, layer1);
1092 hammer_modify_buffer_done(buffer1);
1093 layer2->zone = zone;
1094 KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
1095 KKASSERT(layer2->append_off == 0);
1096 hammer_modify_volume_field(trans,
1098 vol0_stat_freebigblocks);
1099 --root_volume->ondisk->vol0_stat_freebigblocks;
1100 hmp->copy_stat_freebigblocks =
1101 root_volume->ondisk->vol0_stat_freebigblocks;
1102 hammer_modify_volume_done(trans->rootvol);
1104 if (layer2->zone != zone)
1105 hdkprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1106 KKASSERT(layer2->zone == zone);
1107 KKASSERT(bytes != 0);
1108 layer2->bytes_free -= bytes;
1110 resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1113 * Finalizations can occur out of order, or combined with allocations.
1114 * append_off must be set to the highest allocated offset.
1116 offset = ((int)zone_offset & HAMMER_BIGBLOCK_MASK) + bytes;
1117 if (layer2->append_off < offset)
1118 layer2->append_off = offset;
1120 hammer_crc_set_layer2(hmp->version, layer2);
1121 hammer_modify_buffer_done(buffer2);
1122 hammer_unlock(&hmp->blkmap_lock);
1126 hammer_rel_buffer(buffer1, 0);
1128 hammer_rel_buffer(buffer2, 0);
1133 * Return the approximate number of free bytes in the big-block
1134 * containing the specified blockmap offset.
1136 * WARNING: A negative number can be returned if data de-dup exists,
1137 * and the result will also not represent he actual number
1138 * of free bytes in this case.
1140 * This code is used only by the reblocker.
1143 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1144 int *curp, int *errorp)
1146 hammer_volume_t root_volume;
1147 hammer_blockmap_t blockmap;
1148 hammer_blockmap_t freemap;
1149 hammer_blockmap_layer1_t layer1;
1150 hammer_blockmap_layer2_t layer2;
1151 hammer_buffer_t buffer = NULL;
1152 hammer_off_t layer1_offset;
1153 hammer_off_t layer2_offset;
1157 zone = HAMMER_ZONE_DECODE(zone_offset);
1158 KKASSERT(hammer_is_index_record(zone));
1159 root_volume = hammer_get_root_volume(hmp, errorp);
1164 blockmap = &hmp->blockmap[zone];
1165 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1170 layer1_offset = freemap->phys_offset +
1171 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1172 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1178 KKASSERT(layer1->phys_offset);
1179 if (!hammer_crc_test_layer1(hmp->version, layer1)) {
1180 hammer_lock_ex(&hmp->blkmap_lock);
1181 if (!hammer_crc_test_layer1(hmp->version, layer1))
1182 hpanic("CRC FAILED: LAYER1");
1183 hammer_unlock(&hmp->blkmap_lock);
1187 * Dive layer 2, each entry represents a big-block.
1189 * (reuse buffer, layer1 pointer becomes invalid)
1191 layer2_offset = layer1->phys_offset +
1192 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1193 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1199 if (!hammer_crc_test_layer2(hmp->version, layer2)) {
1200 hammer_lock_ex(&hmp->blkmap_lock);
1201 if (!hammer_crc_test_layer2(hmp->version, layer2))
1202 hpanic("CRC FAILED: LAYER2");
1203 hammer_unlock(&hmp->blkmap_lock);
1205 KKASSERT(layer2->zone == zone);
1207 bytes = layer2->bytes_free;
1210 * *curp becomes 1 only when no error and,
1211 * next_offset and zone_offset are in the same big-block.
1213 if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_BIGBLOCK_MASK64)
1214 *curp = 0; /* not same */
1219 hammer_rel_buffer(buffer, 0);
1220 hammer_rel_volume(root_volume, 0);
1221 if (hammer_debug_general & 0x4000) {
1222 hdkprintf("%016jx -> %d\n", (intmax_t)zone_offset, bytes);
1229 * Lookup a blockmap offset and verify blockmap layers.
1232 hammer_blockmap_lookup_verify(hammer_mount_t hmp, hammer_off_t zone_offset,
1235 hammer_volume_t root_volume;
1236 hammer_blockmap_t freemap;
1237 hammer_blockmap_layer1_t layer1;
1238 hammer_blockmap_layer2_t layer2;
1239 hammer_buffer_t buffer = NULL;
1240 hammer_off_t layer1_offset;
1241 hammer_off_t layer2_offset;
1242 hammer_off_t result_offset;
1243 hammer_off_t base_off;
1244 hammer_reserve_t resv __debugvar;
1248 * Calculate the zone-2 offset.
1250 zone = HAMMER_ZONE_DECODE(zone_offset);
1251 result_offset = hammer_xlate_to_zone2(zone_offset);
1254 * Validate the allocation zone
1256 root_volume = hammer_get_root_volume(hmp, errorp);
1259 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1260 KKASSERT(freemap->phys_offset != 0);
1265 layer1_offset = freemap->phys_offset +
1266 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1267 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1270 KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1271 if (!hammer_crc_test_layer1(hmp->version, layer1)) {
1272 hammer_lock_ex(&hmp->blkmap_lock);
1273 if (!hammer_crc_test_layer1(hmp->version, layer1))
1274 hpanic("CRC FAILED: LAYER1");
1275 hammer_unlock(&hmp->blkmap_lock);
1279 * Dive layer 2, each entry represents a big-block.
1281 layer2_offset = layer1->phys_offset +
1282 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1283 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1287 if (layer2->zone == 0) {
1288 base_off = hammer_xlate_to_zone2(zone_offset &
1289 ~HAMMER_BIGBLOCK_MASK64);
1290 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1292 KKASSERT(resv && resv->zone == zone);
1294 } else if (layer2->zone != zone) {
1295 hpanic("bad zone %d/%d", layer2->zone, zone);
1297 if (!hammer_crc_test_layer2(hmp->version, layer2)) {
1298 hammer_lock_ex(&hmp->blkmap_lock);
1299 if (!hammer_crc_test_layer2(hmp->version, layer2))
1300 hpanic("CRC FAILED: LAYER2");
1301 hammer_unlock(&hmp->blkmap_lock);
1306 hammer_rel_buffer(buffer, 0);
1307 hammer_rel_volume(root_volume, 0);
1308 if (hammer_debug_general & 0x0800) {
1309 hdkprintf("%016jx -> %016jx\n",
1310 (intmax_t)zone_offset, (intmax_t)result_offset);
1312 return(result_offset);
1317 * Check space availability
1319 * MPSAFE - does not require fs_token
1322 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1324 const int in_size = sizeof(struct hammer_inode_data) +
1325 sizeof(union hammer_btree_elm);
1326 const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1329 usedbytes = hmp->rsv_inodes * in_size +
1330 hmp->rsv_recs * rec_size +
1331 hmp->rsv_databytes +
1332 ((int64_t)hmp->rsv_fromdelay << HAMMER_BIGBLOCK_BITS) +
1333 ((int64_t)hammer_limit_dirtybufspace) +
1334 (slop << HAMMER_BIGBLOCK_BITS);
1339 if (hmp->copy_stat_freebigblocks >=
1340 (usedbytes >> HAMMER_BIGBLOCK_BITS)) {
1348 hammer_check_volume(hammer_mount_t hmp, hammer_off_t *offsetp)
1350 hammer_blockmap_t freemap;
1351 hammer_blockmap_layer1_t layer1;
1352 hammer_buffer_t buffer1 = NULL;
1353 hammer_off_t layer1_offset;
1356 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1358 layer1_offset = freemap->phys_offset +
1359 HAMMER_BLOCKMAP_LAYER1_OFFSET(*offsetp);
1360 layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1365 * No more physically available space in layer1s
1366 * of the current volume, go to the next volume.
1368 if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL)
1369 hammer_skip_volume(offsetp);
1372 hammer_rel_buffer(buffer1, 0);
1377 hammer_skip_volume(hammer_off_t *offsetp)
1379 hammer_off_t offset;
1383 zone = HAMMER_ZONE_DECODE(offset);
1384 vol_no = HAMMER_VOL_DECODE(offset) + 1;
1385 KKASSERT(vol_no <= HAMMER_MAX_VOLUMES);
1387 if (vol_no == HAMMER_MAX_VOLUMES) { /* wrap */
1392 *offsetp = HAMMER_ENCODE(zone, vol_no, 0);